aboutsummaryrefslogtreecommitdiff
path: root/Src/libvpShared/corelibs
diff options
context:
space:
mode:
authorJef <jef@targetspot.com>2024-09-24 08:54:57 -0400
committerJef <jef@targetspot.com>2024-09-24 08:54:57 -0400
commit20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/libvpShared/corelibs
parent537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
downloadwinamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz
Initial community commit
Diffstat (limited to 'Src/libvpShared/corelibs')
-rw-r--r--Src/libvpShared/corelibs/CpuID/CPUIdLib.plg1786
-rw-r--r--Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj354
-rw-r--r--Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj.filters37
-rw-r--r--Src/libvpShared/corelibs/CpuID/Win32/D9xOSSupXMM.asm99
-rw-r--r--Src/libvpShared/corelibs/CpuID/Win32/InitXMMReg.asm100
-rw-r--r--Src/libvpShared/corelibs/CpuID/Win32/TrashXMMreg.asm87
-rw-r--r--Src/libvpShared/corelibs/CpuID/Win32/VerifyXMMReg.asm99
-rw-r--r--Src/libvpShared/corelibs/CpuID/Win32/Wmt_CpuID.cpp149
-rw-r--r--Src/libvpShared/corelibs/CpuID/Win32/cid.c152
-rw-r--r--Src/libvpShared/corelibs/CpuID/Win32/cpuid.asm280
-rw-r--r--Src/libvpShared/corelibs/CpuID/readme.txt22
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/Huffman.h71
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/RawBuffer.h34
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/SystemDependant.h21
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/TokenEntropy.h100
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/boolhuff.h67
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/compdll.h607
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemode.h69
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemv.h33
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/misc_common.h27
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/pbdll.h498
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/quantize.h65
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/vp5d.h112
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/xprintf.h31
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/Makefile72
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CFrameW.h21
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CSystemDependant.c79
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Comp_Globals.c371
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PackVideo.c1840
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PickModes.c2190
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/RawBuffer.c130
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Tokenize.c454
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Transform.c361
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encode.c527
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodembs.c454
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.c764
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.h24
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.c720
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.h23
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/fullframefdct.c24
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.c1906
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.h84
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/misc_common.c482
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/resource.h97
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.c810
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.h29
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfw_comp_main.c87
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp.c1687
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp_if.c1564
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcompdll.def20
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/COptFunctions.c1967
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/CWmtFunctions.c1728
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/MmxEncodeMath.asm371
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/WmtTransform.c255
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetError.asm308
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetSAD8.asm153
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.ash12
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.asm141
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/csystemdependant.c181
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/fdct_m.asm1000
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DFrameR.c470
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DSystemDependant.c160
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/FrameIni.c478
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/Huffman.c350
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/TokenEntropy.c195
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/boolhuff.c687
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/debug.c225
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodembs.c2125
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemode.c656
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemv.c339
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/modestats.c330
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/pb_globals.c248
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/quantize.c769
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/recon.c603
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vfwpbdll_if.c605
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vp60dxv.c454
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Makefile69
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/OptFunctions.c315
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/WmtOptFunctions.c208
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/dsystemdependant.c334
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/quantindexmmx.c381
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/timer.c147
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/vp60dxv.c420
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win64/dsystemdependant.c348
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj385
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj.filters73
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.xcodeproj/project.pbxproj257
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.sln23
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.vcproj626
-rw-r--r--Src/libvpShared/corelibs/cdxv/VP60/vp60/xprintf/xprintf.cpp139
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj308
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj.filters40
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_attr.c33
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_main.c69
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_reg.c236
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/generic/dxlvinfd.c76
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/generic/dxv_init.c43
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/generic/vscreen.c175
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/generic/ximage.c353
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/utils/dxv_utils.c40
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/dkprof.c104
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/dxAccurateTime.c30
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/dxl_feat.c20
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.cpp70
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.def97
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.rc126
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/dxv_mem.c103
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/goals.mk9
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/icmdxv.c717
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/makefile339
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/resource.h15
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/win32/template.mk22
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/x86/cpuid.asm229
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/x86/pentium.asm77
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/x86/perf.asm183
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv/x86/proc.ash22
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv2/dxv.vcproj327
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv2/dxv.xcodeproj/project.pbxproj205
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj279
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj.filters16
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv2/generic/vscreen.c179
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv2/generic/ximage.c490
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv2/include/duck_dxl.h350
-rw-r--r--Src/libvpShared/corelibs/cdxv/dxv2/include/dxl_plugin.h70
-rw-r--r--Src/libvpShared/corelibs/cdxv/include/codec_common.h101
-rw-r--r--Src/libvpShared/corelibs/cdxv/include/codec_common_interface.h108
-rw-r--r--Src/libvpShared/corelibs/cdxv/include/dxl_plugin.h75
-rw-r--r--Src/libvpShared/corelibs/cdxv/include/postproc_if.h151
-rw-r--r--Src/libvpShared/corelibs/cdxv/include/preproc.h40
-rw-r--r--Src/libvpShared/corelibs/cdxv/include/preprocif.h64
-rw-r--r--Src/libvpShared/corelibs/cdxv/include/vputil_if.h149
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/Win32/preprocfunctions.c257
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/Win32/resource.h43
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/Win32/rowdiffscan.c765
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/Win32/xmmrowsad.asm88
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/generic/blockmap.c391
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/generic/clamp.c96
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/generic/cscanyuv.c2750
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/generic/preprocfunctions.c110
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/generic/preprocglobals.c501
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/generic/preprocif.c252
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/include/preproc.h343
-rw-r--r--Src/libvpShared/corelibs/cdxv/pp/include/preprocconf.h17
-rw-r--r--Src/libvpShared/corelibs/cdxv/preproc/Makefile54
-rw-r--r--Src/libvpShared/corelibs/cdxv/preproc/preproc.c693
-rw-r--r--Src/libvpShared/corelibs/cdxv/preproc/preproc.sln23
-rw-r--r--Src/libvpShared/corelibs/cdxv/preproc/preproc.vcproj302
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/OptFunctions.c315
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/WmtOptFunctions.c204
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/dsystemdependant.c369
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/quantindexmmx.c377
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/timer.c147
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DFrameR.c380
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DSystemDependant.c198
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/FrameIni.c484
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/Huffman.c285
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/MvEntropy.c710
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/TokenEntropy.c439
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/boolhuff.c815
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/debug.c410
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodembs.c1071
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemode.c799
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemv.c366
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/dxv2_vp50.c438
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/pb_globals.c389
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/quantize.c845
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/recon.c338
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vfwpbdll_if.c750
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vp50dxv.c429
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/HuffTables.h33
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/boolhuff.h78
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/compdll.h562
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/decodemode.h100
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/decodemv.h45
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/huffman.h93
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/misc_common.h53
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/pbdll.h535
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/quantize.h89
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/systemdependant.h52
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/tokenentropy.h129
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/include/xprintf.h37
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj326
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj.filters67
-rw-r--r--Src/libvpShared/corelibs/cdxv/vp50/xprintf/xprintf.cpp169
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/Makefile64
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/generic/DeInterlace.c76
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/generic/borders.c303
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/generic/clamp.c75
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/generic/deblock.c1491
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/generic/dering.c1166
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/generic/doptsystemdependant.c92
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/generic/loopfilter.c976
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/generic/postproc.c796
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/generic/scale.c1496
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/generic/simpledeblocker.c392
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/include/postp.h136
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj441
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj.filters79
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/vppp.xcodeproj/project.pbxproj233
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceMmx.c143
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceWmt.c129
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/clamp_asm.c170
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/deblockopt.c6692
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/deblockwmtopt.c2828
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/deringopt.c2529
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/deringwmtopt.c748
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/doptsystemdependant.c211
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/loopf_asm.c540
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/newlooptest_asm.c1123
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/scaleopt.c1267
-rw-r--r--Src/libvpShared/corelibs/cdxv/vppp/win32/simpledeblock_asm.c733
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/Makefile61
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/generic/fdct.c312
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/generic/idctpart.c921
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/generic/reconstruct.c243
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/generic/uoptsystemdependant.c100
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/generic/vputil.c1285
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/include/dct.h74
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/include/mac_specs.h11
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/include/reconstruct.h60
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj388
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj.filters58
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/vputil.xcodeproj/project.pbxproj213
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/fdct_m.asm1002
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/fdctmmx.c1398
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/fdctwmt.c810
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/filtmmx.c1053
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/filtwmt.c790
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/mmxidct.c2156
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/mmxrecon.c856
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/uoptsystemdependant.c351
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/vputilasm.c507
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/wmtidct.c1859
-rw-r--r--Src/libvpShared/corelibs/cdxv/vputil/win32/wmtrecon.c281
-rw-r--r--Src/libvpShared/corelibs/cdxv/vpxblit/Releasebcy00.lst495
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.plg1786
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj354
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj.filters74
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem.h108
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem_tracker.h126
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_alloc.c48
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_base.c418
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_dflt_abort.c43
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_grow.c39
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_largest.c49
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_resize.c107
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_shrink.c96
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_true.c21
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_if.h216
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_impl.h1181
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/heapmm.h142
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_cnfg.h105
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_intrnl.h149
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.c561
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.xcodeproj/project.pbxproj197
-rw-r--r--Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem_tracker.c596
256 files changed, 110635 insertions, 0 deletions
diff --git a/Src/libvpShared/corelibs/CpuID/CPUIdLib.plg b/Src/libvpShared/corelibs/CpuID/CPUIdLib.plg
new file mode 100644
index 00000000..746a4e1b
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/CPUIdLib.plg
@@ -0,0 +1,1786 @@
+<html>
+<body>
+<pre>
+<h1>Build Log</h1>
+<h3>
+--------------------Configuration: CPUIdLib - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB85.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\VerifyXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\VerifyXMMReg.obj .\Win32\VerifyXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB85.bat"
+Creating temporary file "C:\tmp\RSPB86.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\TrashXMMreg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\TrashXMMreg.obj .\Win32\TrashXMMreg.asm
+]
+Creating command line "C:\tmp\RSPB86.bat"
+Creating temporary file "C:\tmp\RSPB87.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\InitXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\InitXMMReg.obj .\Win32\InitXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB87.bat"
+Creating temporary file "C:\tmp\RSPB88.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\D9xOSSupXMM.lst /Fo .\..\..\..\ObjectCode\cpuID\release\D9xOSSupXMM.obj .\Win32\D9xOSSupXMM.asm
+]
+Creating command line "C:\tmp\RSPB88.bat"
+Creating temporary file "C:\tmp\RSPB89.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\cpuid.lst /Fo .\..\..\..\ObjectCode\cpuID\release\cpuid.obj .\Win32\cpuid.asm
+]
+Creating command line "C:\tmp\RSPB89.bat"
+Creating temporary file "C:\tmp\RSPB8A.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\include" /I "..\..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fo"..\..\..\ObjectCode\cpuID\release/" /Fd"..\..\..\ObjectCode\cpuID\release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\cid.c"
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\Wmt_CpuID.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB8A.tmp"
+Performing Custom Build Step on .\Win32\VerifyXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\VerifyXMMReg.asm
+Performing Custom Build Step on .\Win32\TrashXMMreg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\TrashXMMreg.asm
+Performing Custom Build Step on .\Win32\InitXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\InitXMMReg.asm
+Performing Custom Build Step on .\Win32\D9xOSSupXMM.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\D9xOSSupXMM.asm
+Performing Custom Build Step on .\Win32\cpuid.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\cpuid.asm
+Creating temporary file "C:\tmp\RSPB8B.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\Release\s_cpuid.lib"
+\NEWZIP\ObjectCode\cpuID\release\cid.obj
+\NEWZIP\ObjectCode\cpuID\release\Wmt_CpuID.obj
+\NEWZIP\ObjectCode\cpuID\release\cpuid.obj
+\NEWZIP\ObjectCode\cpuID\release\D9xOSSupXMM.obj
+\NEWZIP\ObjectCode\cpuID\release\InitXMMReg.obj
+\NEWZIP\ObjectCode\cpuID\release\TrashXMMreg.obj
+\NEWZIP\ObjectCode\cpuID\release\VerifyXMMReg.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB8B.tmp"
+<h3>Output Window</h3>
+Compiling...
+cid.c
+Generating Code...
+Compiling...
+Wmt_CpuID.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cpuid.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: CPUIdLib - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB8C.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\VerifyXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\VerifyXMMReg.obj .\Win32\VerifyXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB8C.bat"
+Creating temporary file "C:\tmp\RSPB8D.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\TrashXMMreg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\TrashXMMreg.obj .\Win32\TrashXMMreg.asm
+]
+Creating command line "C:\tmp\RSPB8D.bat"
+Creating temporary file "C:\tmp\RSPB8E.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\InitXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\InitXMMReg.obj .\Win32\InitXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB8E.bat"
+Creating temporary file "C:\tmp\RSPB8F.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\D9xOSSupXMM.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\D9xOSSupXMM.obj .\Win32\D9xOSSupXMM.asm
+]
+Creating command line "C:\tmp\RSPB8F.bat"
+Creating temporary file "C:\tmp\RSPB90.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\cpuid.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\cpuid.obj .\Win32\cpuid.asm
+]
+Creating command line "C:\tmp\RSPB90.bat"
+Creating temporary file "C:\tmp\RSPB91.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /GX /Z7 /Od /I "..\..\include" /I "..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\cpuID\debug/CPUIdLib.pch" /YX /Fo"..\..\..\ObjectCode\cpuID\debug/" /Fd"..\..\..\ObjectCode\cpuID\debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\cid.c"
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\Wmt_CpuID.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB91.tmp"
+Performing Custom Build Step on .\Win32\VerifyXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\VerifyXMMReg.asm
+Performing Custom Build Step on .\Win32\TrashXMMreg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\TrashXMMreg.asm
+Performing Custom Build Step on .\Win32\InitXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\InitXMMReg.asm
+Performing Custom Build Step on .\Win32\D9xOSSupXMM.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\D9xOSSupXMM.asm
+Performing Custom Build Step on .\Win32\cpuid.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\cpuid.asm
+Creating temporary file "C:\tmp\RSPB92.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\debug\s_cpuid.lib"
+\NEWZIP\ObjectCode\cpuID\debug\cid.obj
+\NEWZIP\ObjectCode\cpuID\debug\Wmt_CpuID.obj
+\NEWZIP\ObjectCode\cpuID\debug\cpuid.obj
+\NEWZIP\ObjectCode\cpuID\debug\D9xOSSupXMM.obj
+\NEWZIP\ObjectCode\cpuID\debug\InitXMMReg.obj
+\NEWZIP\ObjectCode\cpuID\debug\TrashXMMreg.obj
+\NEWZIP\ObjectCode\cpuID\debug\VerifyXMMReg.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB92.tmp"
+<h3>Output Window</h3>
+Compiling...
+cid.c
+Wmt_CpuID.cpp
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cpuid.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: colorconversions - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB93.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.obj .\Win32\rgb32toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB93.bat"
+Creating temporary file "C:\tmp\RSPB94.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.obj .\Win32\rgb32toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB94.bat"
+Creating temporary file "C:\tmp\RSPB95.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.obj .\Win32\rgb24toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB95.bat"
+Creating temporary file "C:\tmp\RSPB96.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.obj .\Win32\rgb24toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB96.bat"
+Creating temporary file "C:\tmp\RSPB97.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\..\include" /I "..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\ColorSpaces\Release/colorconversions.pch" /YX /Fo"..\..\..\ObjectCode\ColorSpaces\Release/" /Fd"..\..\..\ObjectCode\ColorSpaces\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\uyvytoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yuy2toyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yvyutoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\ColorConversions.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\lutbl.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12f.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB97.tmp"
+Performing Custom Build Step on .\Win32\rgb32toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb32toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_mmx.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_mmx.asm
+Creating temporary file "C:\tmp\RSPB98.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\release\s_cconv.lib"
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\ColorConversions.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\lutbl.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvitorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvitoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB98.tmp"
+<h3>Output Window</h3>
+Compiling...
+uyvytoyv12_mmx.c
+yuy2toyv12_mmx.c
+yvyutoyv12_mmx.c
+ColorConversions.c
+lutbl.c
+rgb24toyv12.c
+rgb24toyv12f.c
+rgb32toyv12.c
+rgb32toyv12f.c
+rgbtorgb.c
+rgbtoyuv.c
+rgbtoyuvi.c
+uyvytoyv12.c
+uyvytoyv12f.c
+yuvitorgb.c
+yuvitoyuv.c
+yuvtorgb.c
+yuvtoyuv.c
+yuvtoyuvi.c
+yuy2toyv12.c
+yuy2toyv12f.c
+yvyutoyv12.c
+yvyutoyv12f.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cconv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: colorconversions - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB99.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.obj .\Win32\rgb32toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB99.bat"
+Creating temporary file "C:\tmp\RSPB9A.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.obj .\Win32\rgb32toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB9A.bat"
+Creating temporary file "C:\tmp\RSPB9B.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.obj .\Win32\rgb24toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB9B.bat"
+Creating temporary file "C:\tmp\RSPB9C.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.obj .\Win32\rgb24toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB9C.bat"
+Creating temporary file "C:\tmp\RSPB9D.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /GX /Z7 /Od /I "..\..\include" /I "..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\ColorSpaces\Debug/colorconversions.pch" /YX /Fo"..\..\..\ObjectCode\ColorSpaces\Debug/" /Fd"..\..\..\ObjectCode\ColorSpaces\Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\uyvytoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yuy2toyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yvyutoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\ColorConversions.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\lutbl.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12f.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB9D.tmp"
+Performing Custom Build Step on .\Win32\rgb32toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb32toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_mmx.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_mmx.asm
+Creating temporary file "C:\tmp\RSPB9E.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\debug\s_cconv.lib"
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\ColorConversions.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\lutbl.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvitorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvitoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB9E.tmp"
+<h3>Output Window</h3>
+Compiling...
+uyvytoyv12_mmx.c
+yuy2toyv12_mmx.c
+yvyutoyv12_mmx.c
+ColorConversions.c
+lutbl.c
+rgb24toyv12.c
+rgb24toyv12f.c
+rgb32toyv12.c
+rgb32toyv12f.c
+rgbtorgb.c
+rgbtoyuv.c
+rgbtoyuvi.c
+uyvytoyv12.c
+uyvytoyv12f.c
+yuvitorgb.c
+yuvitoyuv.c
+yuvtorgb.c
+yuvtoyuv.c
+yuvtoyuvi.c
+yuy2toyv12.c
+yuy2toyv12f.c
+yvyutoyv12.c
+yvyutoyv12f.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cconv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: dxv - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB9F.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\include" /I "..\..\include" /I "..\..\..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\ObjectCode\dxv\Release/dxv.pch" /YX /Fo"..\..\..\..\ObjectCode\dxv\Release/" /Fd"..\..\..\..\ObjectCode\dxv\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\vscreen.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\ximage.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB9F.tmp"
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Release\s_dxv.lib" \NEWZIP\ObjectCode\dxv\Release\vscreen.obj \NEWZIP\ObjectCode\dxv\Release\ximage.obj "
+<h3>Output Window</h3>
+Compiling...
+vscreen.c
+ximage.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_dxv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: dxv - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA0.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\include" /I "..\..\include\win32" /I "..\..\include" /I "..\..\..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\ObjectCode\dxv\debug/dxv.pch" /YX /Fo"..\..\..\..\ObjectCode\dxv\debug/" /Fd"..\..\..\..\ObjectCode\dxv\debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\vscreen.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\ximage.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA0.tmp"
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Debug\s_dxv.lib" \NEWZIP\ObjectCode\dxv\debug\vscreen.obj \NEWZIP\ObjectCode\dxv\debug\ximage.obj "
+<h3>Output Window</h3>
+Compiling...
+vscreen.c
+ximage.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_dxv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: on2_mem - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA1.tmp" with contents
+[
+/nologo /MT /W3 /GX /O2 /I "..\..\include" /I "..\..\..\common\include" /I "..\..\memory_manager\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\obj\on2_mem\win32\release/on2_mem.pch" /YX /Fo"..\..\..\..\obj\on2_mem\win32\release/" /Fd"..\..\..\..\obj\on2_mem\win32\release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_alloc.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_base.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_dflt_abort.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_grow.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_largest.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_resize.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_shrink.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_true.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem_tracker.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA1.tmp"
+Creating temporary file "C:\tmp\RSPBA2.tmp" with contents
+[
+/nologo /out:"..\..\..\..\..\..\lib\win32\release\on2_mem.lib"
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_alloc.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_base.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_dflt_abort.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_grow.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_largest.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_resize.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_shrink.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_true.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\on2_mem.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\on2_mem_tracker.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBA2.tmp"
+<h3>Output Window</h3>
+Compiling...
+hmm_alloc.c
+hmm_base.c
+hmm_dflt_abort.c
+hmm_grow.c
+hmm_largest.c
+hmm_resize.c
+hmm_shrink.c
+hmm_true.c
+on2_mem.c
+on2_mem_tracker.c
+Creating library...
+
+
+
+<h3>Results</h3>
+on2_mem.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: on2_mem - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA3.tmp" with contents
+[
+/nologo /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\common\include" /I "..\..\memory_manager\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\obj\on2_mem\win32\debug/on2_mem.pch" /YX /Fo"..\..\..\..\obj\on2_mem\win32\debug/" /Fd"..\..\..\..\obj\on2_mem\win32\debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_alloc.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_base.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_dflt_abort.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_grow.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_largest.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_resize.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_shrink.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_true.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem_tracker.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA3.tmp"
+Creating temporary file "C:\tmp\RSPBA4.tmp" with contents
+[
+/nologo /out:"..\..\..\..\..\..\lib\win32\debug\on2_mem.lib"
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_alloc.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_base.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_dflt_abort.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_grow.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_largest.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_resize.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_shrink.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_true.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\on2_mem.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\on2_mem_tracker.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBA4.tmp"
+<h3>Output Window</h3>
+Compiling...
+hmm_alloc.c
+hmm_base.c
+hmm_dflt_abort.c
+hmm_grow.c
+hmm_largest.c
+hmm_resize.c
+hmm_shrink.c
+hmm_true.c
+on2_mem.c
+on2_mem_tracker.c
+Creating library...
+
+
+
+<h3>Results</h3>
+on2_mem.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: preproc - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA5.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\vp60\include" /I "..\include" /I "..\..\include" /I ".\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /Fp"Release/preproc.pch" /YX /Fo"Release/" /Fd"Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\preproc\preproc.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA5.tmp"
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Release\s_preproc.lib" .\Release\preproc.obj "
+<h3>Output Window</h3>
+Compiling...
+preproc.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_preproc.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: preproc - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA6.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\vp60\include" /I "..\..\include" /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /Fo"Debug/" /Fd"Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\preproc\preproc.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA6.tmp"
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Debug\s_preproc.lib" .\Debug\preproc.obj "
+<h3>Output Window</h3>
+Compiling...
+preproc.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_preproc.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6d - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA7.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Ox /Ot /Oa /Ow /Og /Oi /Ob2 /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6D_EXPORTS" /D "PREDICT_2D" /D "PBDLL" /D "VFW_PB" /D "USE_DRAWDIB" /D "POSTPROCESS" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6d\Release/" /Fd"..\..\..\..\ObjectCode\vp6d\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\boolhuff.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\debug.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\DFrameR.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\FrameIni.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\Huffman.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\pb_globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\quantize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\recon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\TokenEntropy.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\vfwpbdll_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\dsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\quantindexmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\vp60dxv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\xprintf\xprintf.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA7.tmp"
+Creating temporary file "C:\tmp\RSPBA8.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Release\s_vp60d.lib"
+\NEWZIP\VP6\ObjectCode\vp6d\Release\boolhuff.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\debug.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\DFrameR.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\FrameIni.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\Huffman.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\pb_globals.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\quantize.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\recon.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\TokenEntropy.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\vfwpbdll_if.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\dsystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\quantindexmmx.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\vp60dxv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\xprintf.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBA8.tmp"
+<h3>Output Window</h3>
+Compiling...
+boolhuff.c
+debug.c
+decodembs.c
+decodemode.c
+decodemv.c
+DFrameR.c
+FrameIni.c
+Huffman.c
+pb_globals.c
+quantize.c
+recon.c
+TokenEntropy.c
+vfwpbdll_if.c
+dsystemdependant.c
+quantindexmmx.c
+vp60dxv.c
+Generating Code...
+Compiling...
+xprintf.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60d.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6d - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA9.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /I "..\..\..\include" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6D_EXPORTS" /D "PREDICT_2D" /D "PBDLL" /D "VFW_PB" /D "USE_DRAWDIB" /D "POSTPROCESS" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6d\Debug/" /Fd"..\..\..\..\ObjectCode\vp6d\Debug/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\boolhuff.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\debug.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\DFrameR.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\FrameIni.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\Huffman.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\pb_globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\quantize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\recon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\TokenEntropy.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\vfwpbdll_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\dsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\quantindexmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\vp60dxv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\xprintf\xprintf.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA9.tmp"
+Creating temporary file "C:\tmp\RSPBAA.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Debug\s_vp60d.lib"
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\boolhuff.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\debug.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\DFrameR.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\FrameIni.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\Huffman.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\pb_globals.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\quantize.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\recon.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\TokenEntropy.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\vfwpbdll_if.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\dsystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\quantindexmmx.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\vp60dxv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\xprintf.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBAA.tmp"
+<h3>Output Window</h3>
+Compiling...
+boolhuff.c
+debug.c
+decodembs.c
+decodemode.c
+decodemv.c
+DFrameR.c
+FrameIni.c
+Huffman.c
+pb_globals.c
+quantize.c
+recon.c
+TokenEntropy.c
+vfwpbdll_if.c
+dsystemdependant.c
+quantindexmmx.c
+vp60dxv.c
+Generating Code...
+Compiling...
+xprintf.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60d.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6e - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBAB.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmSAD.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmSAD.obj .\cx\Win32\XmmSAD.asm
+]
+Creating command line "C:\tmp\RSPBAB.bat"
+Creating temporary file "C:\tmp\RSPBAC.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmGetSAD8.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmGetSAD8.obj .\CX\Win32\XmmGetSAD8.asm
+]
+Creating command line "C:\tmp\RSPBAC.bat"
+Creating temporary file "C:\tmp\RSPBAD.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmGetError.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmGetError.obj .\cx\Win32\XmmGetError.asm
+]
+Creating command line "C:\tmp\RSPBAD.bat"
+Creating temporary file "C:\tmp\RSPBAE.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\MmxEncodeMath.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\MmxEncodeMath.obj .\cx\Win32\MmxEncodeMath.asm
+]
+Creating command line "C:\tmp\RSPBAE.bat"
+Creating temporary file "C:\tmp\RSPBAF.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\Include\vp60" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6E_EXPORTS" /D "PREDICT_2D" /D "VFW_COMP" /D "COMPDLL" /D "POSTPROCESS" /D "CPUISLITTLEENDIAN" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6e\Release/" /Fd"..\..\..\..\ObjectCode\vp6e\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\Comp_Globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Encode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\fullframefdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\mcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\misc_common.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PackVideo.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PickModes.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\RawBuffer.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Tokenize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Transform.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\twopass.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\COptFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\csystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\CWmtFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\WmtTransform.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBAF.tmp"
+Performing Custom Build Step on .\cx\Win32\XmmSAD.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\XmmSAD.asm
+Performing Custom Build Step on .\CX\Win32\XmmGetSAD8.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\CX\Win32\XmmGetSAD8.asm
+Performing Custom Build Step on .\cx\Win32\XmmGetError.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\XmmGetError.asm
+Performing Custom Build Step on .\cx\Win32\MmxEncodeMath.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\MmxEncodeMath.asm
+Creating temporary file "C:\tmp\RSPBB0.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Release\s_vp60e.lib"
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Comp_Globals.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Encode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\fullframefdct.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\mcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\misc_common.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\PackVideo.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\PickModes.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\RawBuffer.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Tokenize.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Transform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\twopass.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\vfwcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\vfwcomp_if.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\COptFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\csystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\CWmtFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\WmtTransform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\MmxEncodeMath.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmGetError.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmGetSAD8.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmSAD.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBB0.tmp"
+<h3>Output Window</h3>
+Compiling...
+Comp_Globals.c
+Encode.c
+encodembs.c
+encodemode.c
+encodemv.c
+fullframefdct.c
+mcomp.c
+misc_common.c
+PackVideo.c
+PickModes.c
+RawBuffer.c
+Tokenize.c
+Transform.c
+twopass.c
+vfwcomp.c
+vfwcomp_if.c
+COptFunctions.c
+csystemdependant.c
+CWmtFunctions.c
+WmtTransform.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60e.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6e - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBB1.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmSAD.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmSAD.obj .\cx\Win32\XmmSAD.asm
+]
+Creating command line "C:\tmp\RSPBB1.bat"
+Creating temporary file "C:\tmp\RSPBB2.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmGetSAD8.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmGetSAD8.obj .\CX\Win32\XmmGetSAD8.asm
+]
+Creating command line "C:\tmp\RSPBB2.bat"
+Creating temporary file "C:\tmp\RSPBB3.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmGetError.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmGetError.obj .\cx\Win32\XmmGetError.asm
+]
+Creating command line "C:\tmp\RSPBB3.bat"
+Creating temporary file "C:\tmp\RSPBB4.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\MmxEncodeMath.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\MmxEncodeMath.obj .\cx\Win32\MmxEncodeMath.asm
+]
+Creating command line "C:\tmp\RSPBB4.bat"
+Creating temporary file "C:\tmp\RSPBB5.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\Include\vp60" /I "..\..\..\..\include\vp60" /D "vp6E_EXPORTS" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PREDICT_2D" /D "VFW_COMP" /D "COMPDLL" /D "POSTPROCESS" /D "CPUISLITTLEENDIAN" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6e\debug/" /Fd"..\..\..\..\ObjectCode\vp6e\debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\Comp_Globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Encode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\fullframefdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\mcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\misc_common.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PackVideo.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PickModes.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\RawBuffer.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Tokenize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Transform.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\twopass.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\COptFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\csystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\CWmtFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\WmtTransform.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBB5.tmp"
+Performing Custom Build Step on .\cx\Win32\XmmSAD.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\XmmSAD.asm
+Performing Custom Build Step on .\CX\Win32\XmmGetSAD8.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\CX\Win32\XmmGetSAD8.asm
+Performing Custom Build Step on .\cx\Win32\XmmGetError.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\XmmGetError.asm
+Performing Custom Build Step on .\cx\Win32\MmxEncodeMath.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\MmxEncodeMath.asm
+Creating temporary file "C:\tmp\RSPBB6.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Debug\s_vp60e.lib"
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Comp_Globals.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Encode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\fullframefdct.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\mcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\misc_common.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\PackVideo.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\PickModes.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\RawBuffer.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Tokenize.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Transform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\twopass.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\vfwcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\vfwcomp_if.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\COptFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\csystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\CWmtFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\WmtTransform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\MmxEncodeMath.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmGetError.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmGetSAD8.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmSAD.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBB6.tmp"
+<h3>Output Window</h3>
+Compiling...
+Comp_Globals.c
+Encode.c
+encodembs.c
+encodemode.c
+encodemv.c
+fullframefdct.c
+mcomp.c
+misc_common.c
+PackVideo.c
+PickModes.c
+RawBuffer.c
+Tokenize.c
+Transform.c
+twopass.c
+vfwcomp.c
+vfwcomp_if.c
+COptFunctions.c
+csystemdependant.c
+CWmtFunctions.c
+WmtTransform.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60e.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vppp - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBB7.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /Ob2 /I ".\include" /I "..\include" /I "..\vp60\include" /I "..\..\..\include" /I "..\..\include" /D "_MBCS" /D "_LIB" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\ObjectCode\vpppd6\Release/" /Fd"..\..\..\..\ObjectCode\vpppd6\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\borders.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\clamp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\deblock.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\DeInterlace.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\dering.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\loopfilter.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\postproc.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\scale.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\simpledeblocker.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\clamp_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceMmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceWmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\doptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\loopf_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\newlooptest_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\scaleopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\simpledeblock_asm.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBB7.tmp"
+Creating temporary file "C:\tmp\RSPBB8.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\release\s_vpppd.lib"
+\NEWZIP\ObjectCode\vpppd6\Release\borders.obj
+\NEWZIP\ObjectCode\vpppd6\Release\clamp.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblock.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlace.obj
+\NEWZIP\ObjectCode\vpppd6\Release\dering.obj
+\NEWZIP\ObjectCode\vpppd6\Release\loopfilter.obj
+\NEWZIP\ObjectCode\vpppd6\Release\postproc.obj
+\NEWZIP\ObjectCode\vpppd6\Release\scale.obj
+\NEWZIP\ObjectCode\vpppd6\Release\simpledeblocker.obj
+\NEWZIP\ObjectCode\vpppd6\Release\clamp_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblockopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblockwmtopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlaceMmx.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlaceWmt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deringopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deringwmtopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\doptsystemdependant.obj
+\NEWZIP\ObjectCode\vpppd6\Release\loopf_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\newlooptest_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\scaleopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\simpledeblock_asm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBB8.tmp"
+<h3>Output Window</h3>
+Compiling...
+borders.c
+clamp.c
+deblock.c
+DeInterlace.c
+dering.c
+loopfilter.c
+postproc.c
+scale.c
+simpledeblocker.c
+clamp_asm.c
+deblockopt.c
+deblockwmtopt.c
+DeInterlaceMmx.c
+DeInterlaceWmt.c
+deringopt.c
+deringwmtopt.c
+doptsystemdependant.c
+loopf_asm.c
+newlooptest_asm.c
+scaleopt.c
+Generating Code...
+Compiling...
+simpledeblock_asm.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpppd.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vppp - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBB9.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\vp60\include" /I "..\..\..\include" /I "..\..\include" /D "_MBCS" /D "_LIB" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\ObjectCode\vppp\Debug/vppp.pch" /YX /Fo"..\..\..\..\ObjectCode\vppp\Debug/" /Fd"..\..\..\..\ObjectCode\vppp\Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\borders.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\clamp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\deblock.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\DeInterlace.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\dering.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\loopfilter.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\postproc.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\scale.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\simpledeblocker.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\clamp_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceMmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceWmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\doptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\loopf_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\newlooptest_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\scaleopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\simpledeblock_asm.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBB9.tmp"
+Creating temporary file "C:\tmp\RSPBBA.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vpppd.lib"
+\NEWZIP\ObjectCode\vppp\Debug\borders.obj
+\NEWZIP\ObjectCode\vppp\Debug\clamp.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblock.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlace.obj
+\NEWZIP\ObjectCode\vppp\Debug\dering.obj
+\NEWZIP\ObjectCode\vppp\Debug\loopfilter.obj
+\NEWZIP\ObjectCode\vppp\Debug\postproc.obj
+\NEWZIP\ObjectCode\vppp\Debug\scale.obj
+\NEWZIP\ObjectCode\vppp\Debug\simpledeblocker.obj
+\NEWZIP\ObjectCode\vppp\Debug\clamp_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblockopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblockwmtopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlaceMmx.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlaceWmt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deringopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deringwmtopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\doptsystemdependant.obj
+\NEWZIP\ObjectCode\vppp\Debug\loopf_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\newlooptest_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\scaleopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\simpledeblock_asm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBBA.tmp"
+<h3>Output Window</h3>
+Compiling...
+borders.c
+clamp.c
+deblock.c
+DeInterlace.c
+dering.c
+loopfilter.c
+postproc.c
+scale.c
+simpledeblocker.c
+clamp_asm.c
+deblockopt.c
+deblockwmtopt.c
+DeInterlaceMmx.c
+DeInterlaceWmt.c
+deringopt.c
+deringwmtopt.c
+doptsystemdependant.c
+loopf_asm.c
+newlooptest_asm.c
+scaleopt.c
+simpledeblock_asm.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpppd.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vputil - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBBB.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\vp60\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fo"..\..\..\..\ObjectCode\vputil\Release/" /Fd"..\..\..\..\ObjectCode\vputil\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\fdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\idctpart.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\reconstruct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\vputil.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxrecon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\uoptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\vputilasm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtrecon.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBBB.tmp"
+Creating temporary file "C:\tmp\RSPBBC.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Release\s_vputil.lib"
+\NEWZIP\ObjectCode\vputil\Release\fdct.obj
+\NEWZIP\ObjectCode\vputil\Release\idctpart.obj
+\NEWZIP\ObjectCode\vputil\Release\reconstruct.obj
+\NEWZIP\ObjectCode\vputil\Release\vputil.obj
+\NEWZIP\ObjectCode\vputil\Release\fdctmmx.obj
+\NEWZIP\ObjectCode\vputil\Release\fdctwmt.obj
+\NEWZIP\ObjectCode\vputil\Release\filtmmx.obj
+\NEWZIP\ObjectCode\vputil\Release\filtwmt.obj
+\NEWZIP\ObjectCode\vputil\Release\mmxidct.obj
+\NEWZIP\ObjectCode\vputil\Release\mmxrecon.obj
+\NEWZIP\ObjectCode\vputil\Release\uoptsystemdependant.obj
+\NEWZIP\ObjectCode\vputil\Release\vputilasm.obj
+\NEWZIP\ObjectCode\vputil\Release\wmtidct.obj
+\NEWZIP\ObjectCode\vputil\Release\wmtrecon.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBBC.tmp"
+<h3>Output Window</h3>
+Compiling...
+fdct.c
+idctpart.c
+reconstruct.c
+vputil.c
+fdctmmx.c
+fdctwmt.c
+filtmmx.c
+filtwmt.c
+mmxidct.c
+mmxrecon.c
+uoptsystemdependant.c
+vputilasm.c
+wmtidct.c
+wmtrecon.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vputil.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vputil - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBBD.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\vp60\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\..\ObjectCode\vputil\Debug/vputil.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vputil\Debug/" /Fd"..\..\..\..\..\ObjectCode\vputil\Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\fdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\idctpart.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\reconstruct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\vputil.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxrecon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\uoptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\vputilasm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtrecon.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBBD.tmp"
+Creating temporary file "C:\tmp\RSPBBE.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vputil.lib"
+\ObjectCode\vputil\Debug\fdct.obj
+\ObjectCode\vputil\Debug\idctpart.obj
+\ObjectCode\vputil\Debug\reconstruct.obj
+\ObjectCode\vputil\Debug\vputil.obj
+\ObjectCode\vputil\Debug\fdctmmx.obj
+\ObjectCode\vputil\Debug\fdctwmt.obj
+\ObjectCode\vputil\Debug\filtmmx.obj
+\ObjectCode\vputil\Debug\filtwmt.obj
+\ObjectCode\vputil\Debug\mmxidct.obj
+\ObjectCode\vputil\Debug\mmxrecon.obj
+\ObjectCode\vputil\Debug\uoptsystemdependant.obj
+\ObjectCode\vputil\Debug\vputilasm.obj
+\ObjectCode\vputil\Debug\wmtidct.obj
+\ObjectCode\vputil\Debug\wmtrecon.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBBE.tmp"
+<h3>Output Window</h3>
+Compiling...
+fdct.c
+idctpart.c
+reconstruct.c
+vputil.c
+fdctmmx.c
+fdctwmt.c
+filtmmx.c
+filtwmt.c
+mmxidct.c
+mmxrecon.c
+uoptsystemdependant.c
+vputilasm.c
+wmtidct.c
+wmtrecon.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vputil.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vpxblit - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBBF.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\const.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\const.obj .\wx86\const.asm
+]
+Creating command line "C:\tmp\RSPBBF.bat"
+Creating temporary file "C:\tmp\RSPBC0.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcy00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcy00.obj .\wx86\bcy00.asm
+]
+Creating command line "C:\tmp\RSPBC0.bat"
+Creating temporary file "C:\tmp\RSPBC1.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcu00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcu00.obj .\wx86\bcu00.asm
+]
+Creating command line "C:\tmp\RSPBC1.bat"
+Creating temporary file "C:\tmp\RSPBC2.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bct10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bct10.obj .\wx86\bct10.asm
+]
+Creating command line "C:\tmp\RSPBC2.bat"
+Creating temporary file "C:\tmp\RSPBC3.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bct00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bct00.obj .\wx86\bct00.asm
+]
+Creating command line "C:\tmp\RSPBC3.bat"
+Creating temporary file "C:\tmp\RSPBC4.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs10.obj .\wx86\bcs10.asm
+]
+Creating command line "C:\tmp\RSPBC4.bat"
+Creating temporary file "C:\tmp\RSPBC5.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs00.obj .\wx86\bcs00.asm
+]
+Creating command line "C:\tmp\RSPBC5.bat"
+Creating temporary file "C:\tmp\RSPBC6.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf10.obj .\wx86\bcf10.asm
+]
+Creating command line "C:\tmp\RSPBC6.bat"
+Creating temporary file "C:\tmp\RSPBC7.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf00.obj .\wx86\bcf00.asm
+]
+Creating command line "C:\tmp\RSPBC7.bat"
+Creating temporary file "C:\tmp\RSPBC8.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcd00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcd00.obj .\wx86\bcd00.asm
+]
+Creating command line "C:\tmp\RSPBC8.bat"
+Creating temporary file "C:\tmp\RSPBC9.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc10.obj .\wx86\bcc10.asm
+]
+Creating command line "C:\tmp\RSPBC9.bat"
+Creating temporary file "C:\tmp\RSPBCA.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc00.obj .\wx86\bcc00.asm
+]
+Creating command line "C:\tmp\RSPBCA.bat"
+Creating temporary file "C:\tmp\RSPBCB.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /I ".\generic" /D "_WINDOWS" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\..\ObjectCode\vpxblit\Release/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\ctables.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\wksetblt.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBCB.tmp"
+Creating temporary file "C:\tmp\RSPBCC.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /D "_WINDOWS" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\..\ObjectCode\vpxblit\Release/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcu00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcy00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\vpx_reg.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_targa_c.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBCC.tmp"
+Performing Custom Build Step on .\wx86\const.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\const.asm
+Performing Custom Build Step on .\wx86\bcy00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcy00.asm
+Performing Custom Build Step on .\wx86\bcu00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcu00.asm
+Performing Custom Build Step on .\wx86\bct10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bct10.asm
+Performing Custom Build Step on .\wx86\bct00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bct00.asm
+Performing Custom Build Step on .\wx86\bcs10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcs10.asm
+Performing Custom Build Step on .\wx86\bcs00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcs00.asm
+Performing Custom Build Step on .\wx86\bcf10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcf10.asm
+Performing Custom Build Step on .\wx86\bcf00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcf00.asm
+Performing Custom Build Step on .\wx86\bcd00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcd00.asm
+Performing Custom Build Step on .\wx86\bcc10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcc10.asm
+Performing Custom Build Step on .\wx86\bcc00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcc00.asm
+Creating temporary file "C:\tmp\RSPBCD.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Release\s_vpxblit.lib"
+\ObjectCode\vpxblit\Release\ctables.obj
+\ObjectCode\vpxblit\Release\wksetblt.obj
+\ObjectCode\vpxblit\Release\bcf00_c.obj
+\ObjectCode\vpxblit\Release\bcf10_c.obj
+\ObjectCode\vpxblit\Release\bcs00_c.obj
+\ObjectCode\vpxblit\Release\bcs10_c.obj
+\ObjectCode\vpxblit\Release\bct00_c.obj
+\ObjectCode\vpxblit\Release\bct10_c.obj
+\ObjectCode\vpxblit\Release\bcu00_c.obj
+\ObjectCode\vpxblit\Release\bcy00_c.obj
+\ObjectCode\vpxblit\Release\vpx_reg.obj
+\ObjectCode\vpxblit\Release\bct00_targa_c.obj
+\ObjectCode\vpxblit\Release\bcc00.obj
+\ObjectCode\vpxblit\Release\bcc10.obj
+\ObjectCode\vpxblit\Release\bcd00.obj
+\ObjectCode\vpxblit\Release\bcf00.obj
+\ObjectCode\vpxblit\Release\bcf10.obj
+\ObjectCode\vpxblit\Release\bcs00.obj
+\ObjectCode\vpxblit\Release\bcs10.obj
+\ObjectCode\vpxblit\Release\bct00.obj
+\ObjectCode\vpxblit\Release\bct10.obj
+\ObjectCode\vpxblit\Release\bcu00.obj
+\ObjectCode\vpxblit\Release\bcy00.obj
+\ObjectCode\vpxblit\Release\const.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBCD.tmp"
+<h3>Output Window</h3>
+Compiling...
+ctables.c
+wksetblt.c
+Generating Code...
+Compiling...
+bcf00_c.c
+bcf10_c.c
+bcs00_c.c
+bcs10_c.c
+bct00_c.c
+bct10_c.c
+bcu00_c.c
+bcy00_c.c
+vpx_reg.c
+bct00_targa_c.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpxblit.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vpxblit - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBCE.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\const.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\const.obj .\wx86\const.asm
+]
+Creating command line "C:\tmp\RSPBCE.bat"
+Creating temporary file "C:\tmp\RSPBCF.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcy00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcy00.obj .\wx86\bcy00.asm
+]
+Creating command line "C:\tmp\RSPBCF.bat"
+Creating temporary file "C:\tmp\RSPBD0.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcu00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcu00.obj .\wx86\bcu00.asm
+]
+Creating command line "C:\tmp\RSPBD0.bat"
+Creating temporary file "C:\tmp\RSPBD1.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct10.obj .\wx86\bct10.asm
+]
+Creating command line "C:\tmp\RSPBD1.bat"
+Creating temporary file "C:\tmp\RSPBD2.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct00.obj .\wx86\bct00.asm
+]
+Creating command line "C:\tmp\RSPBD2.bat"
+Creating temporary file "C:\tmp\RSPBD3.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs10.obj .\wx86\bcs10.asm
+]
+Creating command line "C:\tmp\RSPBD3.bat"
+Creating temporary file "C:\tmp\RSPBD4.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs00.obj .\wx86\bcs00.asm
+]
+Creating command line "C:\tmp\RSPBD4.bat"
+Creating temporary file "C:\tmp\RSPBD5.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf10.obj .\wx86\bcf10.asm
+]
+Creating command line "C:\tmp\RSPBD5.bat"
+Creating temporary file "C:\tmp\RSPBD6.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf00.obj .\wx86\bcf00.asm
+]
+Creating command line "C:\tmp\RSPBD6.bat"
+Creating temporary file "C:\tmp\RSPBD7.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcd00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcd00.obj .\wx86\bcd00.asm
+]
+Creating command line "C:\tmp\RSPBD7.bat"
+Creating temporary file "C:\tmp\RSPBD8.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc10.obj .\wx86\bcc10.asm
+]
+Creating command line "C:\tmp\RSPBD8.bat"
+Creating temporary file "C:\tmp\RSPBD9.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc00.obj .\wx86\bcc00.asm
+]
+Creating command line "C:\tmp\RSPBD9.bat"
+Creating temporary file "C:\tmp\RSPBDA.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /I ".\generic" /D "_WINDOWS" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\..\ObjectCode\vpxblit\Debug/vpxblit.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\ctables.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\wksetblt.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBDA.tmp"
+Creating temporary file "C:\tmp\RSPBDB.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /D "_WINDOWS" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\..\ObjectCode\vpxblit\Debug/vpxblit.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcu00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcy00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\vpx_reg.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_targa_c.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBDB.tmp"
+Performing Custom Build Step on .\wx86\const.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\const.asm
+Performing Custom Build Step on .\wx86\bcy00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcy00.asm
+Performing Custom Build Step on .\wx86\bcu00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcu00.asm
+Performing Custom Build Step on .\wx86\bct10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bct10.asm
+Performing Custom Build Step on .\wx86\bct00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bct00.asm
+Performing Custom Build Step on .\wx86\bcs10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcs10.asm
+Performing Custom Build Step on .\wx86\bcs00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcs00.asm
+Performing Custom Build Step on .\wx86\bcf10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcf10.asm
+Performing Custom Build Step on .\wx86\bcf00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcf00.asm
+Performing Custom Build Step on .\wx86\bcd00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcd00.asm
+Performing Custom Build Step on .\wx86\bcc10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcc10.asm
+Performing Custom Build Step on .\wx86\bcc00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcc00.asm
+Creating temporary file "C:\tmp\RSPBDC.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vpxblit.lib"
+\ObjectCode\vpxblit\Debug\ctables.obj
+\ObjectCode\vpxblit\Debug\wksetblt.obj
+\ObjectCode\vpxblit\Debug\bcf00_c.obj
+\ObjectCode\vpxblit\Debug\bcf10_c.obj
+\ObjectCode\vpxblit\Debug\bcs00_c.obj
+\ObjectCode\vpxblit\Debug\bcs10_c.obj
+\ObjectCode\vpxblit\Debug\bct00_c.obj
+\ObjectCode\vpxblit\Debug\bct10_c.obj
+\ObjectCode\vpxblit\Debug\bcu00_c.obj
+\ObjectCode\vpxblit\Debug\bcy00_c.obj
+\ObjectCode\vpxblit\Debug\vpx_reg.obj
+\ObjectCode\vpxblit\Debug\bct00_targa_c.obj
+\ObjectCode\vpxblit\Debug\bcc00.obj
+\ObjectCode\vpxblit\Debug\bcc10.obj
+\ObjectCode\vpxblit\Debug\bcd00.obj
+\ObjectCode\vpxblit\Debug\bcf00.obj
+\ObjectCode\vpxblit\Debug\bcf10.obj
+\ObjectCode\vpxblit\Debug\bcs00.obj
+\ObjectCode\vpxblit\Debug\bcs10.obj
+\ObjectCode\vpxblit\Debug\bct00.obj
+\ObjectCode\vpxblit\Debug\bct10.obj
+\ObjectCode\vpxblit\Debug\bcu00.obj
+\ObjectCode\vpxblit\Debug\bcy00.obj
+\ObjectCode\vpxblit\Debug\const.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBDC.tmp"
+<h3>Output Window</h3>
+Compiling...
+ctables.c
+wksetblt.c
+Compiling...
+bcf00_c.c
+bcf10_c.c
+bcs00_c.c
+bcs10_c.c
+bct00_c.c
+bct10_c.c
+bcu00_c.c
+bcy00_c.c
+vpx_reg.c
+bct00_targa_c.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpxblit.lib - 0 error(s), 0 warning(s)
+</pre>
+</body>
+</html>
diff --git a/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj b/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj
new file mode 100644
index 00000000..c95896c5
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj
@@ -0,0 +1,354 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <VCProjectVersion>17.0</VCProjectVersion>
+ <ProjectGuid>{77A73D85-7602-42F3-BAC4-8D7F7BFF8659}</ProjectGuid>
+ <RootNamespace>CPUIdLib</RootNamespace>
+ <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\obj\CPUIdLib\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <OutDir>..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\obj\CPUIdLib\$(PlatformShortName)_$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\obj\CPUIdLib\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <OutDir>..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\obj\CPUIdLib\$(PlatformShortName)_$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg">
+ <VcpkgEnableManifest>false</VcpkgEnableManifest>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>..\..\..\libvp6\corelibs\include;..\..\..\libvp6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>OldStyle</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>..\..\..\libvp6\corelibs\include;..\..\..\libvp6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>OldStyle</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>..\..\..\libvp6\corelibs\include;..\..\..\libvp6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>..\..\..\libvp6\corelibs\include;..\..\..\libvp6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="Win32\cid.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="Win32\Wmt_CpuID.cpp">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="Win32\cpuid.asm">
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ </CustomBuild>
+ <CustomBuild Include="Win32\D9xOSSupXMM.asm">
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ </CustomBuild>
+ <CustomBuild Include="Win32\InitXMMReg.asm">
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ </CustomBuild>
+ <CustomBuild Include="Win32\TrashXMMreg.asm">
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ </CustomBuild>
+ <CustomBuild Include="Win32\VerifyXMMReg.asm">
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\cdxv\dxv2\dxv2.vcxproj">
+ <Project>{adac45fd-b93f-40a3-85b2-dbeca1283614}</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj.filters b/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj.filters
new file mode 100644
index 00000000..dc027c5f
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj.filters
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{8ff77253-bd53-4d72-a4d0-4620071c05d4}</UniqueIdentifier>
+ <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions>
+ </Filter>
+ <Filter Include="Source Files\Win32">
+ <UniqueIdentifier>{71967989-d210-421e-9b32-ca6c33a448ee}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="Win32\cid.c">
+ <Filter>Source Files\Win32</Filter>
+ </ClCompile>
+ <ClCompile Include="Win32\Wmt_CpuID.cpp">
+ <Filter>Source Files\Win32</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="Win32\cpuid.asm">
+ <Filter>Source Files\Win32</Filter>
+ </CustomBuild>
+ <CustomBuild Include="Win32\D9xOSSupXMM.asm">
+ <Filter>Source Files\Win32</Filter>
+ </CustomBuild>
+ <CustomBuild Include="Win32\InitXMMReg.asm">
+ <Filter>Source Files\Win32</Filter>
+ </CustomBuild>
+ <CustomBuild Include="Win32\TrashXMMreg.asm">
+ <Filter>Source Files\Win32</Filter>
+ </CustomBuild>
+ <CustomBuild Include="Win32\VerifyXMMReg.asm">
+ <Filter>Source Files\Win32</Filter>
+ </CustomBuild>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/D9xOSSupXMM.asm b/Src/libvpShared/corelibs/CpuID/Win32/D9xOSSupXMM.asm
new file mode 100644
index 00000000..35594caf
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/D9xOSSupXMM.asm
@@ -0,0 +1,99 @@
+;//==========================================================================
+;//
+;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;// PURPOSE.
+;//
+;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+
+;
+; **-Does9xOSSupportXMM
+;
+; This function will verify if the operating system supports the XMM
+; instructions. According to Intel documentation
+;
+; Intel Architecture
+; Software Developer
+; Manual
+; Volume 1:
+; Basic Architecture
+;
+; The following needs to be true for the OS to suppor the XMM instructions
+;
+; CR0.EM(bit 2) = 0 (emulation disabled)
+; CR4.OSFXSR(bit 9) = 1 (OS supports saving SIMD floating-point state during context
+; switches)
+;
+; * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * *
+;
+; This function will NOT run on windows NT systems. The function reads control registers
+; which are protected under Windows NT. If you attempt to run this function under Windows NT a
+; protected mode access violation will be generated.
+;
+; * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * *
+;
+; Assumptions:
+; Access to system control registers CR0 and CR4 are not protected
+;
+; Input:
+; None
+;
+; Output:
+; 1 Returned if OS supports XMM instructions
+; 0 Returned if OS does not support XMM instructions
+;
+;
+
+
+ .586
+ .MODEL flat, SYSCALL, os_dos
+ .DATA
+
+NAME x86cpuid
+
+PUBLIC Does9xOSSupportXMM_
+PUBLIC _Does9xOSSupportXMM
+
+ .CODE
+
+; int Does9xOSSupportXMM( void )
+Does9xOSSupportXMM_:
+_Does9xOSSupportXMM:
+ push esi ;safety sh*&
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+; check to see if OS supports SIMD instructions
+ mov edx,cr0
+ bt edx,2 ; ensure no emulation
+ jnae NoXMMSupport
+
+ mov edx,cr4
+ bt edx,9 ; OS support SIMD
+ jnc NoXMMSupport
+
+; we support XMM instructions
+ mov eax,1
+ jmp Exit
+
+NoXMMSupport:
+; mov eax,0 ; OS does not support XMM instructions
+
+Exit:
+ pop edx ;safety sh*&
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+
+;************************************************
+ END
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/InitXMMReg.asm b/Src/libvpShared/corelibs/CpuID/Win32/InitXMMReg.asm
new file mode 100644
index 00000000..4b827162
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/InitXMMReg.asm
@@ -0,0 +1,100 @@
+;//==========================================================================
+;//
+;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;// PURPOSE.
+;//
+;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+
+
+;
+; **-InitXMMReg
+;
+; This function is meant to be run on a Windows NT system to
+; try and determine if the OS supports the XMM registers or
+; not.
+;
+; This function is number 1 in a set of three. The other
+; functions are...
+;
+; TrashXMMReg
+; VerifyXMMReg
+;
+; Assumptions:
+; None
+;
+; Input:
+; None
+;
+; Output:
+; No return value. But XMM registers
+; 0, 1, 2 initilized to a predetermined
+; value
+;
+;
+ .686P
+ .XMM
+ .MODEL flat, SYSCALL, os_dos
+ .DATA
+
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
+
+ ALIGN 32
+
+PUBLIC XMM0Init
+PUBLIC XMM1Init
+PUBLIC XMM2Init
+
+
+ XMM0Init REAL4 1.1
+ REAL4 2.2
+ REAL4 3.3
+ REAL4 4.4
+
+ XMM1Init REAL4 5.5
+ REAL4 6.6
+ REAL4 7.7
+ REAL4 8.8
+
+ XMM2Init REAL4 9.9
+ REAL4 10.10
+ REAL4 11.11
+ REAL4 12.12
+
+
+NAME InitXMMReg
+
+PUBLIC InitXMMReg_
+PUBLIC _InitXMMReg
+
+ .CODE
+
+; void InitXMMReg( void )
+InitXMMReg_:
+_InitXMMReg:
+ push esi ;safety sh*&
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+ movaps xmm0,XMM0Init
+ movaps xmm1,XMM1Init
+ movaps xmm2,XMM2Init
+
+Exit:
+ pop edx ;safety sh*&
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+
+;************************************************
+ END
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/TrashXMMreg.asm b/Src/libvpShared/corelibs/CpuID/Win32/TrashXMMreg.asm
new file mode 100644
index 00000000..cf783158
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/TrashXMMreg.asm
@@ -0,0 +1,87 @@
+;//==========================================================================
+;//
+;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;// PURPOSE.
+;//
+;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+
+
+;
+; **-TrashXMMReg
+;
+; This function is meant to be run on a Windows NT system to
+; try and determine if the OS supports the XMM registers or
+; not.
+;
+; This function is number 2 in a set of three. The other
+; functions are...
+;
+; InitXMMReg
+; VerifyXMMReg
+;
+; Assumptions:
+; No necessary for this function to work properly but
+; IntiXMMReg should have been called to initilize the
+; XMM registers to a predetermined value
+;
+; Input:
+; None
+;
+; Output:
+; No return value. But XMM registers
+; 0, 1, 2 written to 0's
+;
+;
+
+ .686P
+ .XMM
+ .MODEL flat, SYSCALL, os_dos
+ .DATA
+
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
+
+ ALIGN 32
+
+ Zeros REAL4 0.0
+ REAL4 0.0
+ REAL4 0.0
+ REAL4 0.0
+
+
+NAME TrashXMMReg
+
+PUBLIC TrashXMMReg_
+PUBLIC _TrashXMMReg
+
+ .CODE
+
+; void TrashXMMReg( void )
+TrashXMMReg_:
+_TrashXMMReg:
+ push esi ;safety sh*&
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+ movaps xmm0,Zeros
+ movaps xmm1,Zeros
+ movaps xmm2,Zeros
+
+Exit:
+ pop edx ;safety sh*&
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+
+;************************************************
+ END
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/VerifyXMMReg.asm b/Src/libvpShared/corelibs/CpuID/Win32/VerifyXMMReg.asm
new file mode 100644
index 00000000..b8837705
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/VerifyXMMReg.asm
@@ -0,0 +1,99 @@
+;//==========================================================================
+;//
+;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;// PURPOSE.
+;//
+;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+
+
+;
+; **-VerifyXMMReg
+;
+; This function is meant to be run on a Windows NT system to
+; try and determine if the OS supports the XMM registers or
+; not.
+;
+; This function is number 3 in a set of three. The other
+; functions are...
+;
+; InitXMMReg
+; TrashXMMReg
+;
+; Assumptions:
+; Assumes that InitXMMReg was called to initilize the XMM registers.
+; Assumes that TrashXMMReg was called from a different thread to clear
+; the values in the XMM registers.
+;
+; Input:
+; None
+;
+; Output:
+; Return 1 (True) if the XMM registers are at the correct values.
+; (os supports XMM registers)
+;
+; Return 0 (False) if the XMM registers are not at the correct values.
+; (os does not support the XMM registers)
+;
+
+ .686P
+ .XMM
+ .MODEL flat, SYSCALL, os_dos
+ .DATA
+
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
+
+ ALIGN 32
+
+
+NAME VerifyXMMReg
+
+PUBLIC VerifyXMMReg_
+PUBLIC _VerifyXMMReg
+
+
+EXTERN XMM0Init:REAL4
+EXTERN XMM1Init:REAL4
+EXTERN XMM2Init:REAL4
+
+
+ .CODE
+
+; int VerifyXMMReg( void )
+VerifyXMMReg_:
+_VerifyXMMReg:
+ push esi ;safety sh*&
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+ mov eax,0 ; assume will fail
+
+ comiss xmm0,XMM0Init ; check XMM0
+ jne Exit
+
+ comiss xmm1,XMM1Init
+ jne Exit
+
+ comiss xmm2,XMM2Init
+ jne Exit
+
+ mov eax,1 ; OS supports XMM registers
+
+Exit:
+ pop edx ;safety sh*&
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+
+;************************************************
+ END
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/Wmt_CpuID.cpp b/Src/libvpShared/corelibs/CpuID/Win32/Wmt_CpuID.cpp
new file mode 100644
index 00000000..0936b7c9
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/Wmt_CpuID.cpp
@@ -0,0 +1,149 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+/****************************************************************************
+ *
+ * Module Title : Wmt_CpuID.cpp
+ *
+ * Description : willamette processor detection functions
+ *
+ *
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Header Files
+ *****************************************************************************
+ */
+
+
+#include <excpt.h>
+#include <string.h>
+
+
+extern "C" {
+
+/****************************************************************************
+ *
+ * ROUTINE : WillametteNewInstructionSupport()
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : retrun true if the processor support willamette new
+ * instructions, return false otherwise
+ *
+ *
+ * FUNCTION : detect willamette processor
+ *
+ * SPECIAL NOTES : None.
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+int WillametteNewInstructionHWSupport()
+{
+
+ int HWSupport = 0;
+ char brand[12];
+
+ __try
+ {
+ __asm
+ {
+
+ lea esi, brand
+ mov eax, 0
+ cpuid
+ mov [esi], ebx
+ mov [esi+4], edx
+ mov [esi+8], ecx
+
+ }
+
+ }
+ __except(EXCEPTION_EXECUTE_HANDLER)
+ {
+
+ if(_exception_code())
+ {
+ //cout<<endl<<"*******CPUID is not supported**********"<<endl;
+ return 0;
+ }
+ return 0;
+
+ }
+
+
+ if(strncmp(brand, "GenuineIntel", 12)!=0)
+ {
+
+ //cout<<endl<<"this is not an intel processor1"<<endl;
+ return 0;
+ }
+
+ __asm
+ {
+ mov eax, 1
+ cpuid
+ test edx, 04000000h
+ jz NotFound
+ mov [HWSupport], 1
+
+NotFound:
+ nop
+
+ }
+
+ return (HWSupport);
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : WillametteNewInstructionOSSupport()
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : retrun true if the OS support willamette new
+ * instructions, return false otherwise
+ *
+ *
+ * FUNCTION : detect willamette processor
+ *
+ * SPECIAL NOTES : None.
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+int WillametteNewInstructionOSSupport()
+{
+ __try
+ {
+ __asm xorpd xmm0, xmm0
+ }
+ __except(EXCEPTION_EXECUTE_HANDLER)
+ {
+ if(_exception_code())
+ {
+ return 0;
+ }
+ return 0;
+ }
+ return 1;
+}
+
+} \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/cid.c b/Src/libvpShared/corelibs/CpuID/Win32/cid.c
new file mode 100644
index 00000000..988d625c
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/cid.c
@@ -0,0 +1,152 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include <windows.h>
+#include <stdarg.h>
+#include "cpuidlib.h"
+#include "cidasm.h"
+#include <process.h>
+#include <stdio.h>
+
+extern int WillametteNewInstructionOSSupport();
+extern int WillametteNewInstructionHWSupport();
+
+
+/*
+ * **-DoesOSSupportXMM
+ *
+ * This function will check to see if the operating supports the XMM (Pentium III) instructions
+ * The XMM functionality adds 8 128-bit registers to the pentium II register set. With the addition
+ * of the new registers the OS needs to preserve and restore the registers on task switches.
+ *
+ * Inputs:
+ * None
+ *
+ * Outputs:
+ * True returned if the OS supports the XMM instructions.
+ * False returned if the OS does not suppor the XMM instructions.
+ */
+int DoesOSSupportXMM( void )
+{
+ OSVERSIONINFO OSInformation; // Data structure where OS version will be filled in
+ int ReturnValue = FALSE; // Preload to fail
+
+ // need to initilize size of OS info structure before calling GetVersionEx
+ OSInformation.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+
+ if( !GetVersionEx( &OSInformation ) ) // Get OS information
+ {
+ /*
+ * having trouble getting OS information
+ * to be safe will return that we do not support XMM
+ * instructions
+ */
+ // ReturnValue = FALSE;
+ }
+
+ if( OSInformation.dwPlatformId == VER_PLATFORM_WIN32_NT )
+ // if( 1 )
+ {
+ /*
+ * If we are on a windows NT system we cannot directly
+ * read the control registers to see if the OS supports
+ * the XMM instructions. We will just check to see if
+ * service pack 4 is installed.
+ */
+ int ServicePackNumber;
+
+ if( strcmp(OSInformation.szCSDVersion, "" ) != 0 ) // is there a service pack installed?
+ {
+ // Yes, get service pack revision
+ char Junk[132], Junk2[132];
+
+ sscanf( OSInformation.szCSDVersion, "%s %s %d", Junk, Junk2, &ServicePackNumber );
+ }
+ else
+ {
+ ServicePackNumber = 0;
+ }
+
+ if( OSInformation.dwMajorVersion == 4 && // must be versio 4 or greater
+ ServicePackNumber >= 4 || // must have service pack 4 or greater
+ OSInformation.dwMajorVersion >=5)
+ {
+ ReturnValue = TRUE;
+ }
+ else
+ {
+ // ReturnValue = FALSE;
+ }
+
+#if 0
+ // some handy debugging info if you are desperate
+ printf("OS Major Revision %d\n", OSInformation.dwMajorVersion );
+ printf("OS Minor REvision %d\n", OSInformation.dwMinorVersion );
+ printf("Service Pack Number %d\n", ServicePackNumber );
+#endif
+ }
+ else
+ {
+ /*
+ * we are on a Windows 9x system.
+ */
+ //if( Does9xOSSupportXMM()) // does the Windows 9x support the XMM instructions?
+ {
+ ReturnValue = TRUE; // yup
+ }
+ //else
+ //{
+ //ReturnValue = FALSE; // Nope, don't support XMM instructions
+ //}
+ }
+
+ return( ReturnValue );
+}
+
+/*
+ * **-findCPUId
+ *
+ * See cpuidlib.h for a detailed description of this function
+ */
+PROCTYPE findCPUId( void )
+{
+ PROCTYPE CpuType;
+// return 0;
+// return (PII); // drop to next lowest type of CPU which should be the Pentium II processor
+
+ CpuType = getCPUType(); // Get version of processor
+
+ // The code to check whether willammete instructions are called attempts to run
+ // an illegal instruction. Under 98 mplayer crashes the os as soon as the illegal
+ // instruction is called, so I've disabled it.
+
+ if( CpuType == XMM ) // If the CPU supports XMM (Pentium III) instructions
+ {
+// if( DoesOSSupportXMM()) // need to check to see if the OS supports the XMM instructions
+ {
+
+ if( WillametteNewInstructionHWSupport()&&
+ WillametteNewInstructionOSSupport())
+ {
+ CpuType = WMT;
+ }
+ }
+// else
+// {
+ // os does not support the XMM instructions
+// CpuType = PII; // drop to next lowest type of CPU which should be the Pentium II processor
+// }
+ }
+ return( CpuType );
+}
+
+
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/cpuid.asm b/Src/libvpShared/corelibs/CpuID/Win32/cpuid.asm
new file mode 100644
index 00000000..705d9d9b
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/cpuid.asm
@@ -0,0 +1,280 @@
+;//==========================================================================
+;//
+;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;// PURPOSE.
+;//
+;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+
+;
+; **-getCPUType
+;
+; This function will return a code indicating the type of the processor
+; that is in the system. If the processor type is unknown the generic
+; x86 (Intel 486) type is returned
+;
+; parts taken from intel's AP-485
+;
+;put checks for cmov and mmx support ????
+;
+; Assumptions:
+; None
+;
+; Input:
+; None
+;
+; Output:
+; Code for CPU type returned. See cpuidlib.h for the supported
+; types.
+;
+
+
+
+ .586
+ .MODEL flat, SYSCALL, os_dos
+ .DATA
+
+NAME x86cpuid
+
+PUBLIC getCPUType_
+PUBLIC _getCPUType
+
+CPU_ID MACRO
+ db 0fh ; Hardcoded CPUID instruction
+ db 0a2h
+ENDM
+
+;see cpuidlib.h
+X86 EQU 0 ; /* 486, Pentium plain, or any other x86 compatible */
+PMMX EQU 1 ; /* Pentium with MMX */
+PPRO EQU 2 ; /* Pentium Pro */
+PII EQU 3 ; /* Pentium II */
+C6X86 EQU 4
+C6X86MX EQU 5
+AMDK63D EQU 6
+AMDK6 EQU 7
+AMDK5 EQU 8
+XMM EQU 11
+WMT EQU 12 ;/* Willamette */
+
+
+_486 EQU 4h
+PENT EQU 50h
+PENTMMX EQU 54h
+PENTPRO EQU 61h
+PENTII EQU 63h
+SIMD EQU 25
+
+AMD_K63D EQU 58h
+AMD_K6 EQU 56h
+AMD_K5 EQU 50h ; K5 has models 0 - 6
+
+_6X86 EQU 52h
+_6X86MX EQU 60h
+
+
+_vendor_id db "------------"
+intel_id db "GenuineIntel"
+amd_id db "AuthenticAMD"
+cyrix_id db "CyrixInstead"
+
+ .CODE
+
+getCPUType_:
+_getCPUType:
+ push esi ;safety sh*&
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+;------------------------------------------------
+; Intel486 processor check
+; Checking for ability to set/clear ID flag (Bit 21) in EFLAGS
+; which indicates the presence of a processor with the CPUID
+; instruction.
+;------------------------------------------------
+check_80486:
+ pushfd ; push original EFLAGS
+ pop eax ; get original EFLAGS
+ mov ebp,X86 ; rv
+ mov ecx, eax ; save original EFLAGS
+ xor eax, 200000h ; flip ID bit in EFLAGS
+ push eax ; save new EFLAGS value on stack
+ popfd ; replace current EFLAGS value
+ pushfd ; get new EFLAGS
+ pop eax ; store new EFLAGS in EAX
+ xor eax, ecx ; can not toggle ID bit,
+ je end_cpu_type486 ; processor=80486
+
+;------------------------------------------------
+; Execute CPUID instruction to not determine vendor, family,
+; model, stepping and features. For the purpose of this
+; code, only the initial set of CPUID information is saved.
+;------------------------------------------------
+; push ebx ; save registers
+; push esi
+; push edi
+; push edx
+; push ecx
+
+; mov ebp,X86 ; rv
+
+ mov eax, 0 ; set up for CPUID instruction
+ CPU_ID ; get and save vendor ID
+
+ mov DWORD PTR _vendor_id, ebx
+ mov DWORD PTR _vendor_id[+4], edx
+ mov DWORD PTR _vendor_id[+8], ecx
+
+ cmp DWORD PTR intel_id, ebx
+ jne IsProc_AMD
+ cmp DWORD PTR intel_id[+4], edx
+ jne end_cpuid_type
+ cmp DWORD PTR intel_id[+8], ecx
+ jne end_cpuid_type ; if not equal, not an Intel processor
+
+ cmp eax, 1 ; make sure 1 is valid input for CPUID
+ jl end_cpuid_type ; if not, jump to end
+
+ mov eax, 1
+ CPU_ID ; get family/model/stepping/features
+
+ mov ebp,XMM ; assume PIII
+
+ bt edx,SIMD ; check for SIMD support
+ jnae end_cpuid_type
+
+SIMDContinue:
+ shr eax, 4 ; isolate family and model
+ mov ebp,PII ; assume PII
+
+ and eax,0ffh ;mask out type and reserved
+ nop
+
+ cmp eax,PENTII
+ jge end_cpuid_type
+
+ mov ebp,PPRO
+
+ cmp eax,PENTPRO
+ je end_cpuid_type
+
+ mov ebp,PMMX
+
+ cmp eax,PENTMMX
+ je end_cpuid_type
+
+ mov ebp,X86
+
+ cmp eax,PENT
+ jge end_cpuid_type
+
+; mov ebp,X86
+
+end_cpuid_type:
+ mov eax,ebp
+
+;remove these pops ???
+
+; pop edi ; restore registers
+; pop esi
+; pop ebx
+; pop edx
+; pop ecx
+
+end_cpu_type:
+ pop edx ;safety sh*&
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+
+end_cpu_type486:
+ mov eax,ebp
+ pop edx ;safety sh*&
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+
+;------------------------------------------------
+IsProc_AMD:
+ cmp DWORD PTR amd_id, ebx
+ jne IsProc_CYRIX
+
+ cmp DWORD PTR amd_id[+4], edx
+ jne end_cpuid_type
+
+ cmp DWORD PTR amd_id[+8], ecx
+ jne end_cpuid_type ; if not equal, not an AMD processor
+
+ cmp eax, 1 ; make sure 1 is valid input for CPUID
+ jl end_cpuid_type ; if not, jump to end
+
+ mov eax, 1
+ CPU_ID ; get family/model/stepping/features
+
+ shr eax, 4 ; isolate family and model
+ mov ebp,AMDK63D
+
+ and eax,0ffh ;mask out type and reserved
+ nop
+
+ cmp eax,AMD_K63D
+ jge end_cpuid_type
+
+ mov ebp,AMDK6
+ nop
+
+ cmp eax,AMD_K6
+ jge end_cpuid_type
+
+ mov ebp,X86
+ nop
+
+ cmp eax,AMD_K5
+ jge end_cpuid_type
+
+ mov ebp,X86
+ jmp end_cpuid_type
+
+;------------------------------------------------
+IsProc_CYRIX:
+ cmp DWORD PTR cyrix_id, ebx
+ jne end_cpuid_type
+
+ cmp DWORD PTR cyrix_id[+4], edx
+ jne end_cpuid_type
+
+ cmp DWORD PTR cyrix_id[+8], ecx
+ jne end_cpuid_type ; if not equal, not an CYRIX processor
+
+ cmp eax, 1 ; make sure 1 is valid input for CPUID
+ jl end_cpuid_type ; if not, jump to end
+
+ mov eax, 1
+ CPU_ID ; get family/model/stepping/features
+
+ shr eax, 4 ; isolate family and model
+ mov ebp,C6X86MX
+
+ and eax,0ffh ;mask out type and reserved
+ nop
+
+ cmp eax,_6X86MX
+ je end_cpuid_type
+
+ mov ebp,X86
+ jmp end_cpuid_type
+;************************************************
+ END
diff --git a/Src/libvpShared/corelibs/CpuID/readme.txt b/Src/libvpShared/corelibs/CpuID/readme.txt
new file mode 100644
index 00000000..26d6e842
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/readme.txt
@@ -0,0 +1,22 @@
+This library contains functions
+that will determine the type of CPU that is in your system. See cpuidlib.h for
+a more detailed description of the functions that are avaliable.
+
+If you want to use the library all you need to do is to fetch
+
+ - cpuidlib.h
+ - cpuidlib.lib
+
+
+October 14 1999
+Jong Chen
+
+ This is the initial revision of the library.
+
+ At the moment the code is not fully tested. The code that tests for OS support
+ of Pentium III instructions has only been tested on systems with OS that
+ support the Pentium III instructions. It has not been tested in a
+ configuration where we will detect the the OS will not support the Pentium III
+ instructions.
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/Huffman.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/Huffman.h
new file mode 100644
index 00000000..d598c5e2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/Huffman.h
@@ -0,0 +1,71 @@
+/****************************************************************************
+*
+* Module Title : Huffman.h
+*
+* Description : Huffman Coding header file.
+*
+****************************************************************************/
+#ifndef __INC_HUFFMAN_H
+#define __INC_HUFFMAN_H
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "type_aliases.h"
+#include "boolhuff.h"
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+#define HUFF_LUT_LEVELS 6
+
+/****************************************************************************
+* Types
+****************************************************************************/
+typedef struct _tokenorptr
+{
+ unsigned int selector : 1; // 1 bit selector 0->ptr, 1->token
+ unsigned int value : 7;
+} tokenorptr;
+
+typedef struct _huffnode
+{
+ union
+ {
+ char l;
+ tokenorptr left;
+ } leftunion;
+ union
+ {
+ char r;
+ tokenorptr right;
+ } rightunion;
+ unsigned char freq;
+
+} HUFF_NODE;
+
+/****************************************************************************
+* Data structures
+****************************************************************************/
+typedef struct _HUFF_TALBE_NODE
+{
+ unsigned short flag :1; // bit 0: 1-Token, 0-Index
+ unsigned short value :5; // value: the value of the Token or the Index to the huffman tree
+ unsigned short unused :6; // not used for now
+ unsigned short length :4; // Huffman code length of the token
+} HUFF_TABLE_NODE;
+
+/****************************************************************************
+* Functions
+****************************************************************************/
+extern void VP6_BuildHuffLookupTable ( HUFF_NODE * HuffTreeRoot, UINT16 * HuffTable );
+extern void VP6_BuildHuffTree ( HUFF_NODE *hn, unsigned int *counts, int values );
+extern void VP6_CreateCodeArray( HUFF_NODE *hn,
+ int node,
+ unsigned int *codearray,
+ unsigned char *lengtharray,
+ int codevalue,
+ int codelength );
+extern void VP6_EncodeValue ( BOOL_CODER *bc, HUFF_NODE *hn, int value, int length );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/RawBuffer.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/RawBuffer.h
new file mode 100644
index 00000000..cabfcf13
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/RawBuffer.h
@@ -0,0 +1,34 @@
+/****************************************************************************
+*
+* Module Title : RAW_BUFFER.h
+*
+* Description : Raw bit manipulation routines header file.
+*
+****************************************************************************/
+#ifndef __INC_RAWBUFFER_H
+#define __INC_RAWBUFFER_H
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "type_aliases.h"
+
+/****************************************************************************
+* Typedefs
+****************************************************************************/
+typedef struct RAW_BUFFER
+{
+ UINT32 pos; // Offset of "current" UINT32 in buffer
+ INT32 byte_bit_offset; // Offset of next free bit in current UINT8
+ UINT32 DataBlock;
+ UINT8 *Buffer;
+} RAW_BUFFER;
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern void InitAddRawBitsToBuffer ( RAW_BUFFER *buf, UINT8 *Buffer );
+extern void AddRawBitsToBuffer( RAW_BUFFER *buf, UINT32 data, UINT32 bits );
+extern void EndAddRawBitsToBuffer( RAW_BUFFER *buf );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/SystemDependant.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/SystemDependant.h
new file mode 100644
index 00000000..2788b40b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/SystemDependant.h
@@ -0,0 +1,21 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.h
+*
+* Description : Miscellaneous system dependant functions header
+*
+****************************************************************************/
+#ifndef __INC_SYSTEMDEPENDANT_H
+#define __INC_SYSTEMDEPENDANT_H
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern void VP6_IssueWarning ( char * WarningMessage );
+extern void PauseProcess ( unsigned int SleepMs );
+
+// System dynamic memory allocation
+char *SytemGlobalAlloc ( unsigned int Size );
+void SystemGlobalFree ( char * MemPtr );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/TokenEntropy.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/TokenEntropy.h
new file mode 100644
index 00000000..e23c410d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/TokenEntropy.h
@@ -0,0 +1,100 @@
+/****************************************************************************
+*
+* Module Title : TokenEntropy.h
+*
+* Description : Entropy coding header file.
+*
+****************************************************************************/
+#ifndef __INC_TOKEN_ENTROPY_H
+#define __INC_TOKEN_ENTROPY_H
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "type_aliases.h"
+#include "boolhuff.h"
+#include "codec_common.h"
+#include "huffman.h"
+
+/****************************************************************************
+* Constants
+****************************************************************************/
+
+// VP6 hufman table AC bands
+#define VP6_AC_BANDS 6
+
+// Tokens Value Extra Bits (range + sign)
+#define ZERO_TOKEN 0 //0 Extra Bits 0+0
+#define ONE_TOKEN 1 //1 Extra Bits 0+1
+#define TWO_TOKEN 2 //2 Extra Bits 0+1
+#define THREE_TOKEN 3 //3 Extra Bits 0+1
+#define FOUR_TOKEN 4 //4 Extra Bits 0+1
+#define DCT_VAL_CATEGORY1 5 //5-6 Extra Bits 1+1
+#define DCT_VAL_CATEGORY2 6 //7-10 Extra Bits 2+1
+#define DCT_VAL_CATEGORY3 7 //11-26 Extra Bits 4+1
+#define DCT_VAL_CATEGORY4 8 //11-26 Extra Bits 5+1
+#define DCT_VAL_CATEGORY5 9 //27-58 Extra Bits 5+1
+#define DCT_VAL_CATEGORY6 10 //59+ Extra Bits 11+1
+#define DCT_EOB_TOKEN 11 //EOB Extra Bits 0+0
+#define MAX_ENTROPY_TOKENS (DCT_EOB_TOKEN + 1)
+#define ILLEGAL_TOKEN 255
+
+#define DC_TOKEN_CONTEXTS 3 // 00, 0!0, !0!0
+#define CONTEXT_NODES (MAX_ENTROPY_TOKENS-7)
+
+#define PREC_CASES 3
+#define ZERO_RUN_PROB_CASES 14
+
+#define DC_PROBABILITY_UPDATE_THRESH 100
+
+#define ZERO_CONTEXT_NODE 0
+#define EOB_CONTEXT_NODE 1
+#define ONE_CONTEXT_NODE 2
+#define LOW_VAL_CONTEXT_NODE 3
+#define TWO_CONTEXT_NODE 4
+#define THREE_CONTEXT_NODE 5
+#define HIGH_LOW_CONTEXT_NODE 6
+#define CAT_ONE_CONTEXT_NODE 7
+#define CAT_THREEFOUR_CONTEXT_NODE 8
+#define CAT_THREE_CONTEXT_NODE 9
+#define CAT_FIVE_CONTEXT_NODE 10
+
+#define PROB_UPDATE_BASELINE_COST 7
+
+#define MAX_PROB 254
+#define DCT_MAX_VALUE 2048
+
+#define ZRL_BANDS 2
+#define ZRL_BAND2 6
+
+#define SCAN_ORDER_BANDS 16
+#define SCAN_BAND_UPDATE_BITS 4
+
+/****************************************************************************
+* Typedefs
+****************************************************************************/
+typedef struct LineEq
+{
+ INT32 M;
+ INT32 C;
+} LINE_EQ;
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern const UINT32 VP6_ProbCost[256];
+extern const UINT8 ExtraBitLengths_VP6[MAX_ENTROPY_TOKENS];
+extern const UINT32 VP6_DctRangeMinVals[MAX_ENTROPY_TOKENS];
+
+extern const UINT8 VP6_DcUpdateProbs[2][MAX_ENTROPY_TOKENS-1];
+extern const UINT8 VP6_AcUpdateProbs[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS-1];
+extern const UINT8 VP6_PrevTokenIndex[MAX_ENTROPY_TOKENS];
+
+extern const UINT8 ScanBandUpdateProbs[BLOCK_SIZE];
+
+extern const UINT8 ZrlUpdateProbs[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+extern const UINT8 ZeroRunProbDefaults[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+
+extern UINT8 PrecZeroRunLength[BLOCK_SIZE];
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/boolhuff.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/boolhuff.h
new file mode 100644
index 00000000..9818284e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/boolhuff.h
@@ -0,0 +1,67 @@
+/****************************************************************************
+*
+* Module Title : boolhuff.h
+*
+* Description : Bool Coder header file.
+*
+****************************************************************************/
+#ifndef __INC_BOOLHUFF_H
+#define __INC_BOOLHUFF_H
+
+#ifdef NOTNORMALIZED
+
+typedef struct _boolcoder
+{
+ unsigned char *buffer;
+ unsigned int pos;
+ union
+ {
+ unsigned int value;
+ unsigned char v[4];
+ };
+ unsigned int range;
+} BOOL_CODER;
+
+#else
+
+typedef struct
+{
+ unsigned int lowvalue;
+ unsigned int range;
+ unsigned int value;
+ int count;
+ unsigned int pos;
+ unsigned char *buffer;
+
+ // Variables used to track bit costs without outputing to the bitstream
+ unsigned int MeasureCost;
+ unsigned long BitCounter;
+} BOOL_CODER;
+
+#endif
+
+// Section cost measaurement stats
+//#define MEASURE_SECTION_COSTS 1
+#if defined MEASURE_SECTION_COSTS
+
+extern unsigned int Sectionbits[10];
+extern unsigned int ActiveSection;
+
+#define HEADER_SECTION 0
+#define MODE_SECTION 1
+#define MV_SECTION 2
+#define CONTEXT_OVERHEADS_SECTION 3
+#define DC_SECTION 4
+#define AC_SECTION 5
+
+#endif
+extern void VP6_StartDecode ( BOOL_CODER *bc, unsigned char *buffer );
+extern int VP6_DecodeBool ( BOOL_CODER *bc, int context );
+extern int VP6_DecodeBool128 ( BOOL_CODER *bc );
+extern void VP6_StopDecode ( BOOL_CODER *bc );
+extern void VP6_StartEncode ( BOOL_CODER *bc, unsigned char *buffer );
+extern void VP6_EncodeBool ( BOOL_CODER *bc, int x, int context );
+extern void VP6_EncodeBool2 ( BOOL_CODER *bc, int x, int context );
+extern void VP6_StopEncode ( BOOL_CODER *bc );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/compdll.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/compdll.h
new file mode 100644
index 00000000..bda11a8f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/compdll.h
@@ -0,0 +1,607 @@
+/****************************************************************************
+*
+* Module Title : COMPDLL.H
+*
+* Description : Encoder definitions.
+*
+*****************************************************************************
+*/
+#ifndef __INC_COMPDLL_H
+#define __INC_COMPDLL_H
+
+#include "codec_common.h"
+#include "preprocif.h"
+#include "preproc.h"
+#include "pbdll.h"
+#include "vp60_comp_interface.h"
+#include "RawBuffer.h"
+#include <stdio.h>
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+// Debug/stats code
+//#define PSNR_ON
+//#define FILE_PSNR
+#define MIN_BPB_FACTOR 0.1
+#define MAX_BPB_FACTOR 10.0
+#define KEY_FRAME_CONTEXT 5
+
+// GF update constants
+#define DEFAULT_GF_UPDATE_INTERVAL 8
+#define DEFAULT_2PASS_GF_UPDATE_INTERVAL 4
+#define MIN_GF_UPDATE_INTERVAL 4
+#define MAX_GF_UPDATE_INTERVAL 8
+#define GF_UPDATE_MOTION_INTERVAL 48
+#define MAX_GF_UPDATE_MOTION 16
+#define GF_DEFAULT_MOTION_CMPLX 12
+#define GF_MODE_DIST_THRESH1 50
+#define GF_MODE_DIST_THRESH2 25
+#define GF_MAX_VAR_THRESH 36
+#define FIRSTPASS_Q 32
+//#define FULLFRAMEFDCT
+/****************************************************************************
+* Types
+*****************************************************************************
+*/
+
+typedef struct CONFIG_TYPE2
+{
+ UINT32 TargetBandwidth;
+ UINT32 OutputFrameRate;
+
+ UINT32 FirstFrameQ;
+ UINT32 BaseQ;
+ UINT32 WorstQuality; // Worst Quality allowed.
+ UINT32 ActiveWorstQuality; // Reflects worst quality Currently allowed (specified as an index where 0 is worst quality)
+ UINT32 ActiveBestQuality; // Reflects best quality currently allowed (specified as an index where 0 is worst quality)
+
+} CONFIG_TYPE2;
+
+typedef enum
+{
+ DCT_COEF_TOKEN,
+ MODE_TOKEN,
+ BLOCKMAP_TOKEN,
+ MV_TOKEN
+} TOKENTYPE;
+
+typedef struct _TOKENEXTRA
+{
+ INT32 Token;
+ UINT32 Extra;
+
+ INT32 LastTokenL; // Last token in block LEFT
+ INT32 LastTokenA; // Last token in block ABOVE
+
+} TOKENEXTRA;
+
+typedef struct LineEq2
+{
+ double M;
+ double C;
+
+} LINE_EQ2;
+
+typedef struct
+{
+ BLOCK_CONTEXT * AbovePtr;
+ BLOCK_CONTEXT Above;
+ BLOCK_CONTEXT * LeftPtr;
+ BLOCK_CONTEXT Left;
+ Q_LIST_ENTRY * LastDcPtr;
+ Q_LIST_ENTRY LastDc;
+
+} MB_DC_CONTEXT;
+
+typedef struct MOTION_STATS
+{
+ UINT32 NumMvs;
+ UINT32 SumAbsX;
+ UINT32 SumAbsY;
+ INT32 SumX;
+ INT32 SumY;
+ UINT32 SumXSq;
+ UINT32 SumYSq;
+
+} MOTION_STATS;
+
+typedef struct
+{
+ double MotionSpeed;
+ double VarianceX;
+ double VarianceY;
+ double PercentGolden;
+ double PercentMotionY;
+ double PercentMotion;
+ double PercentNewMotion;
+ unsigned int QValue;
+ double MeanInterError;
+ double MeanIntraError;
+ double BitsPerMacroblock;
+ double SqBitsPerMacroblock;
+ double PSNR;
+ int isGolden;
+ int isKey;
+ int count;
+ int frame;
+} FIRSTPASS_STATS;
+
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern UINT32 (*FiltBlockBilGetSad)(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+extern UINT32 (*GetSAD16)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32);
+extern UINT32 (*GetSadHalfPixel16)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32);
+extern void (*fdct_short) ( INT16 * InputData, INT16 * OutputData );
+extern void (*idctc[65])( INT16 *InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern UINT32 (*GetSAD)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32);
+extern UINT32 (*GetSadHalfPixel)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32 );
+extern UINT32 (*GetInterError)( UINT8 *, INT32, UINT8 *, UINT8 *, INT32 );
+extern UINT32 (*GetIntraError)( UINT8 *, INT32);
+extern void (*Sub8)( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride );
+extern void (*Sub8_128)( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride );
+extern void (*Sub8Av2)( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride );
+
+#define HUGE_ERROR (1<<28) // Out of range test value
+
+// Number of search sites for heirachical search (8*steps)+1
+// so for (+- 32 pixels) = 5 step = 41 (previously 4 step = 33)
+#define MAX_SEARCH_SITES 41
+
+typedef struct CP_INSTANCE * xCP_INST;
+
+typedef struct CP_INSTANCE
+{
+ PB_INSTANCE pb; // playback
+
+ CONFIG_TYPE2 Configuration;
+
+ YUV_BUFFER_CONFIG InputConfig;
+ YUV_BUFFER_CONFIG YuvInputData;
+ INT32 SizeStep;
+ INT32 LastSizeStep;
+
+ INT32 QuickCompress;
+ BOOL GoldenFrameEnabled;
+ BOOL InterPrediction;
+ BOOL MotionCompensation;
+ BOOL AutoKeyFrameEnabled;
+ INT32 ForceKeyFrameEvery;
+ INT32 AutoKeyFrameThreshold;
+ INT32 LastKeyFrame;
+ INT32 MinimumDistanceToKeyFrame;
+ INT32 KeyFrameDataTargetOrig; // Data rate target for key frames
+ INT32 KeyFrameDataTarget; // Data rate target for key frames
+ UINT32 KeyFrameFrequency;
+ BOOL DropFramesAllowed;
+ BOOL DropFrame;
+ INT32 DropCount;
+ INT32 MaxDropCount;
+ INT32 MaxConsecDroppedFrames;
+ UINT32 QualitySetting;
+ UINT32 PreProcFilterLevel;
+ BOOL AllowSpatialResampling;
+ UINT8 RdOpt; // 0 - off, 1 - basic rd on, 2 - all rd options on
+
+ // Compressor Statistics
+ double TotErrScore;
+ UINT32 InterError;
+
+ UINT32 LastInterError;
+ UINT32 LastIntraError;
+ UINT32 MVErrorPerBit;
+ UINT32 ErrorPerBit;
+ UINT32 IntraError;
+ INT64 KeyFrameCount; // Count of key frames.
+ INT64 TotKeyFrameBytes;
+ UINT32 LastKeyFrameSize;
+ UINT32 PriorKeyFrameSize[KEY_FRAME_CONTEXT];
+ UINT32 PriorKeyFrameDistance[KEY_FRAME_CONTEXT];
+ INT32 FrameQuality[6];
+ int DecoderErrorCode; // Decoder error flag.
+ INT32 ThreshMapThreshold;
+ INT32 TotalMotionScore;
+ INT64 TotalByteCount;
+ INT32 FixedQ;
+
+ // Used for prediction filter selection
+ UINT32 MotionInterErr;
+ UINT32 MotionIntraErr;
+ UINT8 BaselineAlpha;
+ UINT8 BaselineBicThresh;
+
+ // Frame Statistics
+ INT64 CurrentFrame;
+ UINT32 LastFrameSize;
+ UINT32 ThisFrameSize;
+ BOOL ThisIsFirstFrame;
+ BOOL ThisIsKeyFrame;
+ BOOL GfRecoveryFrame;
+ UINT32 FrameError ;
+
+ // Stats for normal inter frames (excludes GFU frames and key frames)
+ UINT32 NiFrames;
+ UINT32 NiTotQi;
+ UINT32 NiAvQi;
+
+ INT32 MotionScore;
+ UINT32 FirstSixthBoundary; // Macro block index marking the first sixth of the image
+ UINT32 LastSixthBoundary; // Macro block index marking the last sixth of the image
+
+ /* Rate Targeting variables */
+ double BpbCorrectionFactor;
+ double KeyFrameBpbCorrectionFactor;
+ double GfuBpbCorrectionFactor;
+
+ // Controlling Block Selection
+ UINT32 MVChangeFactor;
+ UINT32 FourMvChangeFactor;
+ UINT32 ExhaustiveSearchThresh;
+ UINT32 BlockExhaustiveSearchThresh;
+ UINT32 MinImprovementForFourMV;
+ UINT32 FourMVThreshold;
+ UINT32 IntraThresh;
+
+ UINT32 MinErrorForMacroBlockMVSearch;
+ UINT32 MinErrorForBlockMVSearch;
+ UINT32 MinErrorForGoldenMVSearch;
+
+ UINT16 *FrameZeroCountsAlloc;
+ UINT16 *FrameZeroCounts;
+ UINT32 FrameNzCount[BLOCK_SIZE][2];
+ UINT8 NewScanOrderBands[BLOCK_SIZE];
+
+ // Frames
+ YUV_BUFFER_ENTRY *yuv0ptr; // Un-pre-processed raw input (but scaled if appropriate)
+ YUV_BUFFER_ENTRY *yuv1ptr;
+
+ // Token Buffers
+ TOKENEXTRA *CoeffTokens;
+ TOKENEXTRA *CoeffTokenPtr;
+
+ INT16 LastDC[3];
+
+ BOOL_CODER bc;
+ BOOL_CODER bc2;
+
+ UINT8 *DataOutputBuffer;
+ UINT8 MBCodingMode; // Coding mode flags
+
+ INT32 MVPixelOffsetY[MAX_SEARCH_SITES];
+ UINT32 InterTripOutThresh;
+ INT32 MVSearchSteps;
+ INT32 MVOffsetX[MAX_SEARCH_SITES];
+ INT32 MVOffsetY[MAX_SEARCH_SITES];
+ INT8 SubPixelXOffset[9]; // Half pixel MV offsets for X
+ INT8 SubPixelYOffset[9]; // Half pixel MV offsets for Y
+
+ Q_LIST_ENTRY *quantized_list;
+
+ MOTION_VECTOR MVector;
+ INT16 *DCT_codes; //Buffer that stores the result of Forward DCT
+ INT16 *DCTDataBuffer; //Input data buffer for Forward DCT
+
+ // Motion compensation related variables
+ UINT32 MvMaxExtent;
+
+ INT32 byte_bit_offset;
+
+ UINT32 NearestError[4];
+ UINT32 NearError[4];
+ UINT32 ZeroError[4];
+ UINT32 BestError[4];
+ UINT32 ErrorBins[128];
+
+ xPP_INST pp; // preprocessor
+
+#if defined PSNR_ON
+ double TotPsnr;
+ double MinPsnr;
+ double MaxPsnr;
+ double TotYPsnr;
+ double MinYPsnr;
+ double MaxYPsnr;
+ double TotUPsnr;
+ double MinUPsnr;
+ double MaxUPsnr;
+ double TotVPsnr;
+ double MinVPsnr;
+ double MaxVPsnr;
+ double TotalSqError;
+#endif
+#if defined FULLFRAMEFDCT
+ Q_LIST_ENTRY (*FDCTCoeffs)[64];
+#endif
+
+ // Structures for entropy contexts
+ UINT32 FrameDcTokenDist[2][MAX_ENTROPY_TOKENS];
+ //UINT32 FrameAcTokenDist[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+ UINT32 FrameAcTokenDist[PREC_CASES][2][8][16];
+ // Extra structures needed to decide if we choose huffman and DC / EOB runs
+ UINT32 FrameDcTokenDist2[2][MAX_ENTROPY_TOKENS];
+ //UINT32 FrameAcTokenDist2[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+ UINT32 FrameAcTokenDist2[PREC_CASES][2][8][16];
+
+ // AWG Debug Accumulate token count for entire run
+ UINT32 CumulativeFrameDcTokenDist[2][MAX_ENTROPY_TOKENS];
+ UINT32 CumulativeFrameAcTokenDist[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+
+ // Storage for the first frame entropy probabilities.
+ // These are re-used for all subsequent key frames when we are operating in
+ // error (drop frame) ressiliant mode.
+ UINT8 FirstFrameDcProbs[2*(MAX_ENTROPY_TOKENS-1)];
+ UINT8 FirstFrameAcProbs[2*PREC_CASES*VP6_AC_BANDS*(MAX_ENTROPY_TOKENS-1)];
+
+ UINT32 FrameZrlDist[ZRL_BANDS][64];
+ UINT32 FrameZeroCount[ZRL_BANDS];
+ UINT8 FrameZrlProbs[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+ UINT32 FrameZrlBranchHits[ZRL_BANDS][ZERO_RUN_PROB_CASES][2];
+
+ // Last token coded this block.
+ UINT32 MBModeCount[4][MAX_MODES+1];
+ UINT32 BModeCount[MAX_MODES+1];
+ UINT32 CountModeSameAsLast[4][MAX_MODES+1];
+ UINT32 CountModeDiffFrLast[4][MAX_MODES+1];
+
+ UINT32 ModeCodeArray[4][MAX_MODES+1][MAX_MODES+1];
+ UINT8 ModeLengthArray[4][MAX_MODES+1][MAX_MODES+1];
+
+ UINT32 MBModeCostBoth[11];
+ UINT32 MBModeCostNoNear[11];
+ UINT32 MBModeCostNoNearest[11];
+ UINT32 BModeCost[11];
+ UINT32 MvBaselineDist[2][MV_ENTROPY_TOKENS];
+ UINT32 FrameMvCount;
+ UINT32 EstModeCost[2][MAX_MODES];
+ UINT32 EstMVCost[2][MV_ENTROPY_TOKENS];
+ UINT32 * EstMvCostPtrX;
+ UINT32 * EstMvCostPtrY;
+
+ // Data structure used in re-calculating MV probability nodes
+ UINT8 NewMvSignProbs[2];
+ UINT8 NewIsMvShortProb[2];
+ UINT8 NewMvShortProbs[2][7];
+ UINT8 NewMvSizeProbs[2][LONG_MV_BITS];
+
+ UINT32 NewMvSignHits[2][2];
+ UINT32 NewIsMvShortHits[2][2];
+ UINT32 NewMvShortHits[2][7][2];
+ UINT32 NewMvSizeHits[2][LONG_MV_BITS][2];
+
+
+ UINT32 nExperimentals;
+ INT32 Experimental[C_SET_EXPERIMENTAL_MAX - C_SET_EXPERIMENTAL_MIN + 1];
+
+ // Bandwidth and buffer control variables
+ INT32 PerFrameBandwidth; // Target for average bandwidth per frame.
+ INT32 InterFrameTarget; // Average "inter" frame bit target corrected for key frame costs
+ INT32 ThisFrameTarget; // Modified rate target for this frame
+
+ BOOL BufferedMode; // FALSE = Tight buffering (Video Conferencing mode); TRUE = normal buffered/streaming mode.
+ BOOL ErrorResilliantMode; // A mode used for VC etc. to make the codec more resilliant to dropped frames.
+ INT32 StartingBufferLevel; // The initial encoder buffer level
+ INT32 BytesOffTarget; // How far off target are we in repect of target bytes for clip
+ INT32 OptimalBufferLevel; // The buffer level target we strive to reach / maintain.
+ INT32 BufferLevel; // Buffer level based upon the max sustainable rate used for rate targeting
+ INT32 MaxBufferLevel; // The maximum permited value for the buffer level.
+ INT32 DropFramesWaterMark; // Buffer fullness watermark for forced drop frames.
+ INT32 ResampleDownWaterMark; // Buffer fullness watermark for downwards spacial re-sampling
+ INT32 ResampleUpWaterMark; // Buffer fullness watermark where returning to larger image size is consdered
+ INT32 LastKeyFrameBufferLevel; // Used to monitor changes in buffer level when considering re-sampling.
+
+ INT32 Speed;
+ INT32 CPUUsed;
+
+ UINT32 ModeMvCostEstimate; // Running total of cost estimates for modes and MVs in this frame.
+
+ // Variables used in regulating cost of new motion vectors based upon an estimate of new MV frequency.
+ UINT32 FrameNewMvCounter;
+ UINT32 FrameModeCounter;
+ UINT32 MvEpbCorrection;
+ UINT32 LastFrameNewMvUsage; // 0 = Low 9 = High
+
+ UINT32 * MbBestErr;
+
+ UINT32 EstDcTokenCosts[2][MAX_ENTROPY_TOKENS];
+ UINT32 EstAcTokenCosts[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+ UINT32 EstZrlCosts[ZRL_BANDS][64];
+
+ // Data structures used to save and restor MB and DC contexts during rate distortion
+ MACROBLOCK_INFO CopyMbi;
+ BLOCK_CONTEXT AboveCopyY[2];
+ BLOCK_CONTEXT AboveCopyU;
+ BLOCK_CONTEXT AboveCopyV;
+ BLOCK_CONTEXT LeftYCopy[2];
+ BLOCK_CONTEXT LeftUCopy;
+ BLOCK_CONTEXT LeftVCopy;
+ Q_LIST_ENTRY LastDcYCopy[3];
+ Q_LIST_ENTRY LastDcUCopy[3];
+ Q_LIST_ENTRY LastDcVCopy[3];
+
+ MB_DC_CONTEXT MbDcContexts[MAX_MODES][6]; // Per mode, per block position data structure for and MB
+
+ UINT32 avgPickModeTime;
+ UINT32 avgEncodeTime;
+ UINT32 avgPackVideoTime;
+
+ UINT32 ForceHScale;
+ UINT32 ForceHRatio;
+ UINT32 ForceVScale;
+ UINT32 ForceVRatio;
+ BOOL ForceInternalSize;
+
+ PreProcInstance preproc;
+
+ // Buffers for output bitstream partitions
+ UINT8 *OutputBuffer2;
+ RAW_BUFFER RawBuffer;
+
+ // In Huffman mode runs of zeros at DC position & runs
+ // of EOB at first AC position are used
+ INT32 CurrentDcZeroRun[2];
+ TOKENEXTRA *DcZeroRunStartPtr[2];
+ INT32 CurrentAc1EobRun[2];
+ TOKENEXTRA *Ac1EobRunStartPtr[2];
+
+ // DEBUG
+ UINT32 HuffCost;
+ UINT32 CostShannon;
+
+ BOOL AllowScanOrderUpdates;
+ INT32 FrameRateInput;
+ INT32 FrameRateDropFrames;
+ INT32 FrameRateDropCount;
+
+ // Stats for monitoring frame mode and MV data
+ UINT32 ModeDist[MAX_MODES];
+
+ // Stats collected about the use of motion vectors in the curent frame
+ MOTION_STATS FrameMvStats;
+
+ // Variables used in control of GF update
+ UINT32 FramesTillGfUpdateDue;
+ INT32 GfUpdateInterval;
+ UINT32 GfuMotionSpeed;
+ UINT32 GfuMotionComplexity;
+ UINT32 GfuBoost;
+ UINT32 GfUsage; // GF usage metric
+ UINT32 LastGfOrKFrameQ;
+
+ // variables for 5 region diamond MV search
+ INT32 DSMVSearchSteps;
+ INT32 DSMVPixelOffsetY[MAX_SEARCH_SITES];
+ INT32 DSMVOffsetX[MAX_SEARCH_SITES];
+ INT32 DSMVOffsetY[MAX_SEARCH_SITES];
+
+ // 2 pass stats
+ INT32 pass;
+ FIRSTPASS_STATS fps;
+ FIRSTPASS_STATS fpmss;
+ FILE *fs;
+ FILE *ss;
+ INT32 GoldenFrameBoost;
+ INT32 MbsSinceGolden;
+ INT32 OneGoldenFrame;
+ INT32 KFBoost;
+ INT32 InterBoostFreq;
+ INT32 InterBoost;
+ INT32 GoldenMbsSinceGolden;
+ INT32 GoldenMbsThisFrame;
+ INT32 InterErrorb;
+ INT32 FramesToKey;
+ double FirstPassPSNR;
+ INT32 ActualTargetBitRate;
+ INT32 KFForced;
+ INT32 NextKFForced;
+ INT32 CalculatedWorstQ;
+ INT32 PassedInWorstQ;
+
+
+ // new parameters
+
+ BOOL DisableGolden; // disable golden frame updates
+ BOOL VBMode; // run in variable bandwidth 1 pass mode
+ BOOL EndUsage; // Local file playback mode / vs streamed
+ BOOL AutoWorstQ; // Auto adjust worst quality.... 1 pass vbr within buffering constraints
+ UINT32 BestAllowedQ; // best allowed quality ( save bits by disallowings frames that are too high quality )
+ INT32 UnderShootPct; // target a percentage of the actual frame to allow for sections that go over
+
+ INT32 MaxAllowedDatarate; // maximum the datarate is allowed to go.
+ INT32 MaximumBufferSize; // maximum buffer size.
+
+ BOOL TwoPassVBREnabled; // two pass variable bandwidth enabled
+ INT32 TwoPassVBRBias; // how variable do we want to target?
+ INT32 TwoPassVBRMaxSection; // maximum
+ INT32 TwoPassVBRMinSection; // minimum
+ INT32 Pass; // which pass of the compression are we running.
+ double TotalBitsLeftInClip;
+ double FramesYetToEncode;
+ double TotalBitsPerMB;
+
+ // Prediction mode parameters for VP6.2
+ UINT8 LastPredictionFilterMode;
+ UINT8 LastPredictionFilterMvSizeThresh;
+ UINT32 LastPredictionFilterVarThresh;
+ UINT8 LastPredictionFilterAlpha;
+
+
+ UINT32 (*FindMvViaSearch)
+ (xCP_INST cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT8 **BestBlockPtr,
+ UINT32 BlockSize);
+
+ void (*FindBestHalfPixelMv)
+ (xCP_INST cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT32 BlockSize,
+ UINT32 *MinError,
+ UINT8 BitShift);
+
+ void (*FindBestQuarterPixelMv)
+ (xCP_INST cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT32 BlockSize,
+ UINT32 *MinError,
+ UINT8 BitShift);
+
+} CP_INSTANCE;
+
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+UINT32 (*GetMBFrameVertVar)(CP_INSTANCE *cpi);
+UINT32 (*GetMBFieldVertVar)(CP_INSTANCE *cpi);
+UINT32 (*GetBlockReconErr)(CP_INSTANCE *cpi, UINT32 bp);
+
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern void UpdateFrame(CP_INSTANCE *cpi);
+extern UINT32 EncodeData(CP_INSTANCE *cpi);
+
+// Loop optimizations
+extern void InitMapArrays();
+
+// Codec
+extern void SUB8( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+ INT32 SourceStride, INT32 ReconStride );
+extern void SUB8_128( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+ INT32 SourceStride );
+extern void SUB8AV2( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+ INT32 SourceStride, INT32 ReconStride );
+
+extern CP_INSTANCE * CreateCPInstance(void);
+extern void DeleteCPInstance(CP_INSTANCE **cpi);
+extern void CMachineSpecificConfig(void);
+extern void fdct_short_C ( INT16 * InputData, INT16 * OutputData );
+
+extern BOOL EAllocateFragmentInfo(CP_INSTANCE *cpi);
+extern BOOL EAllocateFrameInfo(CP_INSTANCE *cpi);
+extern void EDeleteFragmentInfo(CP_INSTANCE *cpi);
+extern void EDeleteFrameInfo(CP_INSTANCE *cpi);
+extern UINT32 PickIntra( CP_INSTANCE *cpi );
+extern UINT32 PickModes( CP_INSTANCE *cpi, UINT32 *InterError, UINT32 *IntraError);
+
+extern void ClampAndUpdateQ ( CP_INSTANCE *cpi, UINT32 QIndex);
+extern void EncodeFrameMbs(CP_INSTANCE *cpi);
+extern void CCONV ChangeEncoderSize(CP_INSTANCE* cpi, UINT32 Width, UINT32 Height);
+extern void CopyOrResize(CP_INSTANCE* cpi, BOOL ResetPreproc );
+extern UINT32 TokenizeFrag(CP_INSTANCE* cpi, INT16* RawData, UINT32 Plane, BLOCK_CONTEXT* Above, BLOCK_CONTEXT* Left);
+extern void PredictScanOrder( CP_INSTANCE *cpi );
+extern void BuildScanOrder( PB_INSTANCE *pbi, UINT8 * );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemode.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemode.h
new file mode 100644
index 00000000..96580a24
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemode.h
@@ -0,0 +1,69 @@
+/****************************************************************************
+*
+* Module Title : decodemode.h
+*
+* Description : Functions for decoding modes and motionvectors
+*
+****************************************************************************/
+#ifndef __INC_DECODEMODE_H
+#define __INC_DECODEMODE_H
+
+#ifndef STRICT
+#define STRICT /* Strict type checking */
+#endif
+
+/****************************************************************************
+* Module statics
+****************************************************************************/
+#define MODETYPES 3
+#define MODEVECTORS 16
+#define PROBVECTORXMIT 174
+#define PROBIDEALXMIT 254
+
+/****************************************************************************
+* Typedefs
+****************************************************************************/
+typedef struct _modeContext
+{
+ UINT8 left;
+ UINT8 above;
+ UINT8 last;
+} MODE_CONTEXT;
+
+typedef struct _htorp
+{
+ unsigned char selector : 1; // 1 bit selector 0->ptr, 1->token
+ unsigned char value : 7;
+} torp;
+
+typedef struct _hnode
+{
+ torp left;
+ torp right;
+} HNODE;
+
+typedef enum _MODETYPE
+{
+ MACROBLOCK,
+ NONEAREST_MACROBLOCK,
+ NONEAR_MACROBLOCK,
+ BLOCK,
+} MODETYPE;
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern UINT8 Stats[9][4][4][4];
+extern const UINT8 VP6_ModeVq[MODETYPES][MODEVECTORS][MAX_MODES*2];
+extern const UINT8 VP6_BaselineXmittedProbs[4][2][MAX_MODES];
+
+extern void VP6_BuildModeTree ( PB_INSTANCE *pbi );
+extern void VP6_decodeModeAndMotionVector ( PB_INSTANCE *pbi, UINT32 MBrow, UINT32 MBcol );
+
+/****************************************************************************
+* Function Prototypes
+****************************************************************************/
+INLINE int mbClass(int i);
+void VP6_DecodeModeProbs(PB_INSTANCE *pbi);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemv.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemv.h
new file mode 100644
index 00000000..ca4f56bc
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemv.h
@@ -0,0 +1,33 @@
+/****************************************************************************
+*
+* Module Title : decodemv.h
+*
+* Description : Functions for decoding modes and motionvectors
+*
+****************************************************************************/
+#ifndef __INC_DECODEMV_H
+#define __INC_DECODEMV_H
+
+#ifndef STRICT
+#define STRICT /* Strict type checking */
+#endif
+
+/****************************************************************************
+* Module statics
+****************************************************************************/
+#define MV_NODES 17
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern const UINT8 DefaultMvShortProbs[2][7];
+extern const UINT8 VP6_MvUpdateProbs[2][MV_NODES];
+extern const UINT8 DefaultMvLongProbs[2][LONG_MV_BITS];
+extern const UINT8 DefaultIsShortProbs[2];
+extern const UINT8 DefaultSignProbs[2];
+
+extern void VP6_FindNearestandNextNearest(PB_INSTANCE* pbi, UINT32 MBrow, UINT32 MBcol, UINT8 Frame, int *type);
+extern void VP6_ConfigureMvEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType );
+extern void VP6_decodeMotionVector( PB_INSTANCE *pbi, MOTION_VECTOR *mv, CODING_MODE Mode );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/misc_common.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/misc_common.h
new file mode 100644
index 00000000..d470bcb5
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/misc_common.h
@@ -0,0 +1,27 @@
+/****************************************************************************
+*
+* Module Title : MiscCommon.h
+*
+* Description : Miscellaneous common routines header file
+*
+*****************************************************************************
+*/
+#ifndef __MISC_COMMON_H
+#define __MISC_COMMON_H
+
+#include "type_aliases.h"
+#include "compdll.h"
+
+/****************************************************************************
+* Function Prototypes
+****************************************************************************/
+extern double GetEstimatedBpb( CP_INSTANCE *cpi, UINT32 TargetQIndex );
+extern void UpdateBpbCorrectionFactor( CP_INSTANCE *cpi, UINT32 FrameSize );
+extern void UpRegulateMB( CP_INSTANCE *cpi, UINT32 RegulationQ, UINT32 SB, UINT32 MB, BOOL NoCheck );
+extern void ClampAndUpdateQ ( CP_INSTANCE *cpi, UINT32 QIndex );
+extern void RegulateQ( CP_INSTANCE *cpi, INT32 TargetBits );
+extern void ConfigureQuality( CP_INSTANCE *cpi, UINT32 QualityValue );
+extern void CopyBackExtraFrags(CP_INSTANCE *cpi);
+extern void VP6_PredictFilteredBlock(PB_INSTANCE* pbi, INT16* OutputPtr, UINT32 bp);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/pbdll.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/pbdll.h
new file mode 100644
index 00000000..530ba62f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/pbdll.h
@@ -0,0 +1,498 @@
+/****************************************************************************
+*
+* Module Title : pbdll.h
+*
+* Description : Decoder definition header file.
+*
+****************************************************************************/
+#ifndef __INC_PBDLL_H
+#define __INC_PBDLL_H
+
+/****************************************************************************
+* Module statics.
+****************************************************************************/
+#define VAL_RANGE 256 // Must come before header files--REMOVE THIS DEPENDENCY!!
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "huffman.h"
+#include "tokenentropy.h"
+#include "vfw_pb_interface.h"
+#include "postproc_if.h"
+#include "vputil_if.h"
+#include "quantize.h"
+#include "boolhuff.h"
+#include "rawbuffer.h"
+
+/****************************************************************************
+* MACROS
+****************************************************************************/
+
+
+// Enumeration of how block is coded
+// VP6.2 version is >= 8
+#define CURRENT_ENCODE_VERSION 8
+#define CURRENT_DECODE_VERSION 8
+
+#define SIMPLE_PROFILE 0
+#define PROFILE_1 1
+#define PROFILE_2 2
+#define ADVANCED_PROFILE 3
+
+// Loop filter options
+#define NO_LOOP_FILTER 0
+#define LOOP_FILTER_BASIC 2
+#define LOOP_FILTER_DERING 3
+
+#define UMV_BORDER 48
+#define STRIDE_EXTRA (UMV_BORDER * 2)
+#define BORDER_MBS (UMV_BORDER>>4)
+
+#define MAX_MV_EXTENT 63 // Max search distance in half pixel increments
+#define MV_ENTROPY_TOKENS 511
+#define LONG_MV_BITS 8
+
+#define PPROC_QTHRESH 64
+
+#define MAX_MODES 10
+
+#define MAX_NEAREST_ADJ_INDEX 2
+
+#define Y_MVSHIFT 0x2
+#define UV_MVSHIFT 0x3
+#define Y_MVMODMASK 0x3
+#define UV_MVMODMASK 0x7
+
+// INT32 MvShift; // motion vector shift value
+// INT32 MvModMask;
+
+// Prediction filter modes:
+// Note: when trying to use an enum here we ran into an odd compiler bug in
+// the WriteFrameHeader() code. Also an enum type is implicitly an int which
+// is a bit big for something that can only have 3 values
+#define BILINEAR_ONLY_PM 0
+#define BICUBIC_ONLY_PM 1
+#define AUTO_SELECT_PM 2
+
+#define DCProbOffset(A,B) \
+ ( (A) * (MAX_ENTROPY_TOKENS-1) \
+ + (B) )
+
+#define ACProbOffset(A,B,C,D) \
+ ( (A) * PREC_CASES * VP6_AC_BANDS * (MAX_ENTROPY_TOKENS-1) \
+ + (B) * VP6_AC_BANDS * (MAX_ENTROPY_TOKENS-1) \
+ + (C) * (MAX_ENTROPY_TOKENS-1) \
+ + (D) )
+
+#define DcNodeOffset(A,B,C) \
+ ( (A) * DC_TOKEN_CONTEXTS * CONTEXT_NODES \
+ + (B) * CONTEXT_NODES \
+ + (C) )
+
+
+#define MBOffset(row,col) ( (row) * pbi->MBCols + (col) )
+
+/****************************************************************************
+* Types
+****************************************************************************/
+typedef enum
+{
+ CODE_INTER_NO_MV = 0x0, // INTER prediction, (0,0) motion vector implied.
+ CODE_INTRA = 0x1, // INTRA i.e. no prediction.
+ CODE_INTER_PLUS_MV = 0x2, // INTER prediction, non zero motion vector.
+ CODE_INTER_NEAREST_MV = 0x3, // Use Last Motion vector
+ CODE_INTER_NEAR_MV = 0x4, // Prior last motion vector
+ CODE_USING_GOLDEN = 0x5, // 'Golden frame' prediction (no MV).
+ CODE_GOLDEN_MV = 0x6, // 'Golden frame' prediction plus MV.
+ CODE_INTER_FOURMV = 0x7, // Inter prediction 4MV per macro block.
+ CODE_GOLD_NEAREST_MV = 0x8, // Use Last Motion vector
+ CODE_GOLD_NEAR_MV = 0x9, // Prior last motion vector
+ DO_NOT_CODE = 0x10 // Fake Mode
+} CODING_MODE;
+
+typedef struct
+{
+ unsigned int FragCodingMode : 4;
+ int MVectorX : 8;
+ int MVectorY : 8;
+} FRAG_INFO;
+
+typedef struct _DCINFO
+{
+ Q_LIST_ENTRY dc;
+ short frame;
+} DCINFO;
+
+// defined so i don't have to remember which block goes where
+typedef enum
+{
+ TOP_LEFT_Y_BLOCK = 0,
+ TOP_RIGHT_Y_BLOCK = 1,
+ BOTTOM_LEFT_Y_BLOCK = 2,
+ BOTTOM_RIGHT_Y_BLOCK = 3,
+ U_BLOCK = 4,
+ V_BLOCK = 5
+} BLOCK_POSITION;
+
+// all the information gathered from a block to be used as context in the next block
+typedef struct
+{
+ UINT8 Token;
+ CODING_MODE Mode;
+ UINT16 Frame;
+ Q_LIST_ENTRY Dc;
+ UINT8 unused[3];
+} BLOCK_CONTEXT;
+
+// all the contexts maintained for a frame
+typedef struct
+{
+ BLOCK_CONTEXT LeftY[2]; // 1 for each block row in a macroblock
+ BLOCK_CONTEXT LeftU;
+ BLOCK_CONTEXT LeftV;
+
+ BLOCK_CONTEXT *AboveY;
+ BLOCK_CONTEXT *AboveU;
+ BLOCK_CONTEXT *AboveV;
+
+// BLOCK_CONTEXT *AboveYAlloc;
+// BLOCK_CONTEXT *AboveUAlloc;
+// BLOCK_CONTEXT *AboveVAlloc;
+
+ Q_LIST_ENTRY LastDcY[4]; // 1 for each frame
+ Q_LIST_ENTRY LastDcU[4];
+ Q_LIST_ENTRY LastDcV[4];
+
+} FRAME_CONTEXT;
+
+// Structure to hold last token values at each position in block
+typedef UINT8 TOKENBUFFER[256];
+
+
+
+typedef struct
+{
+ INT16 *dequantPtr;
+ INT16 *coeffsPtr;
+ INT8 *reconPtr;
+
+ INT32 MvShift; // motion vector shift value
+ INT32 MvModMask; // motion vector mod mask
+
+ INT32 FrameReconStride; // Stride of the frame
+ INT32 CurrentReconStride; // pitch of reconstruction
+
+ INT32 CurrentSourceStride; // pitch of source (compressor only)
+ INT32 FrameSourceStride; // Stride of the frame (compressor only)
+ UINT32 Plane; // plane block is from (compressor only)
+
+ BLOCK_CONTEXT *Above; // above block context
+ BLOCK_CONTEXT *Left; // left block context
+ Q_LIST_ENTRY *LastDc; // last dc value seen
+
+ UINT32 thisRecon; // index for recon
+ UINT32 Source; // index for source (compressor only)
+
+ UINT32 EobPos;
+
+ UINT8 *BaselineProbsPtr;
+ UINT8 *ContextProbsPtr;
+
+ UINT8 *AcProbsBasePtr;
+ UINT8 *DcProbsBasePtr;
+ UINT8 *DcNodeContextsBasePtr;
+ UINT8 *ZeroRunProbsBasePtr;
+
+// BOOL_CODER *br;
+// INT32 token;
+// UINT8 *MergedScanOrder;
+// UINT8 *MergedScanOrderPtr;
+
+}BLOCK_DX_INFO;
+
+
+typedef struct
+{
+ BOOL_CODER *br;
+
+ BLOCK_DX_INFO blockDxInfo[6];
+
+ CODING_MODE Mode; // mode macroblock coded as
+
+//note: these should be moved into blockDxInfo
+ CODING_MODE BlockMode[6]; // mode macroblock coded as
+ MOTION_VECTOR Mv[6]; // one motion vector per block u and v calculated from rest
+
+
+ MOTION_VECTOR NearestInterMVect;// nearest mv in last frame
+ MOTION_VECTOR NearInterMVect; // near mv in last frame
+ INT32 NearestMvIndex; // Indicates how neare nearest is.
+ MOTION_VECTOR NearestGoldMVect; // nearest mv in gold frame
+ MOTION_VECTOR NearGoldMVect; // near mv in gold frame
+ INT32 NearestGMvIndex; // Indicates how neare nearest is.
+
+ INT32 Interlaced; // is the macroblock interlaced?
+
+// Q_LIST_ENTRY *CoeffsAlloc; // coefficients 64 per frag 4 y in raster order, u then v
+} MACROBLOCK_INFO;
+
+// Frame Header type
+typedef struct FRAME_HEADER
+{
+ UINT8 *buffer;
+ UINT32 value;
+ INT32 bits_available;
+ UINT32 pos;
+} FRAME_HEADER;
+
+typedef struct _BITREADER
+{
+ int bitsinremainder; // # of bits still used in remainder
+ UINT32 remainder; // remaining bits from original long
+ const unsigned char * position; // character pointer position within data
+} BITREADER;
+
+// Playback Instance Definition
+typedef struct PB_INSTANCE
+{
+ MACROBLOCK_INFO mbi; // all the information needed for one macroblock
+ FRAME_CONTEXT fc; // all of the context information needed for a frame
+ QUANTIZER *quantizer;
+
+ // Should be able to delete these entries when VP5 complete
+ INT32 CodedBlockIndex;
+ UINT8 *DataOutputInPtr;
+ FRAG_INFO *FragInfo;
+// FRAG_INFO *FragInfoAlloc;
+
+ /* Current access points fopr input and output buffers */
+ BOOL_CODER br;
+ BOOL_CODER br2;
+ BITREADER br3;
+
+ // Decoder and Frame Type Information
+ UINT8 Vp3VersionNo;
+ UINT8 VpProfile;
+
+ UINT32 PostProcessingLevel; /* Perform post processing */
+ UINT32 ProcessorFrequency; /* CPU frequency */
+ UINT32 CPUFree;
+ UINT8 FrameType;
+
+ CONFIG_TYPE Configuration; // frame configuration
+ UINT32 CurrentFrameSize;
+
+ UINT32 YPlaneSize;
+ UINT32 UVPlaneSize;
+ UINT32 VFragments;
+ UINT32 HFragments;
+ UINT32 UnitFragments;
+ UINT32 YPlaneFragments;
+ UINT32 UVPlaneFragments;
+
+ UINT32 ReconYPlaneSize;
+ UINT32 ReconUVPlaneSize;
+
+ UINT32 YDataOffset;
+ UINT32 UDataOffset;
+ UINT32 VDataOffset;
+ UINT32 ReconYDataOffset;
+ UINT32 ReconUDataOffset;
+ UINT32 ReconVDataOffset;
+
+ UINT32 MacroBlocks; // Number of Macro-Blocks in Y component
+ UINT32 MBRows; // Number of rows of MacroBlocks in a Y frame
+ UINT32 MBCols; // Number of cols of MacroBlocks in a Y frame
+ UINT32 ScaleWidth;
+ UINT32 ScaleHeight;
+ UINT32 OutputWidth;
+ UINT32 OutputHeight;
+
+ // Frame Buffers
+ YUV_BUFFER_ENTRY *ThisFrameRecon;
+// YUV_BUFFER_ENTRY *ThisFrameReconAlloc;
+ YUV_BUFFER_ENTRY *GoldenFrame;
+// YUV_BUFFER_ENTRY *GoldenFrameAlloc;
+ YUV_BUFFER_ENTRY *LastFrameRecon;
+// YUV_BUFFER_ENTRY *LastFrameReconAlloc;
+ YUV_BUFFER_ENTRY *PostProcessBuffer;
+// YUV_BUFFER_ENTRY *PostProcessBufferAlloc;
+ YUV_BUFFER_ENTRY *ScaleBuffer; /* new buffer for testing new loop filtering scheme */
+// YUV_BUFFER_ENTRY *ScaleBufferAlloc;
+
+ Q_LIST_ENTRY *quantized_list;
+// INT16 *ReconDataBuffer;
+ INT16 *ReconDataBuffer[6];
+// INT16 *ReconDataBufferAlloc;
+// UINT8 FragCoefEOB; // Position of last non 0 coef within QFragData
+ INT16 *TmpReconBuffer;
+// INT16 *TmpReconBufferAlloc;
+ INT16 *TmpDataBuffer;
+// INT16 *TmpDataBufferAlloc;
+
+// UINT8 *LoopFilteredBlockAlloc;
+ UINT8 *LoopFilteredBlock;
+
+ void (**idct)(INT16 *InputData, INT16 *QuantMatrix, INT16 * OutputData );
+
+ POSTPROC_INST postproc;
+
+ TOKENBUFFER LastToken; // LTIndex of tokens at each position in block
+
+ CODING_MODE LastMode; // Last Mode decoded;
+
+ UINT8 DcProbs[2*(MAX_ENTROPY_TOKENS-1)];
+ UINT8 AcProbs[2*PREC_CASES*VP6_AC_BANDS*(MAX_ENTROPY_TOKENS-1)];
+
+ //3 MAX_ENTROPY_TOKENS-7
+// UINT8 DcNodeContexts[2][DC_TOKEN_CONTEXTS][CONTEXT_NODES]; // Plane, Contexts, Node
+ UINT8 DcNodeContexts[2 * DC_TOKEN_CONTEXTS * CONTEXT_NODES]; // Plane, Contexts, Node
+
+ UINT8 ZeroRunProbs[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+
+ UINT8 MergedScanOrder[BLOCK_SIZE + 65];
+ UINT8 ModifiedScanOrder[BLOCK_SIZE];
+ UINT8 EobOffsetTable[BLOCK_SIZE];
+ UINT8 ScanBands[BLOCK_SIZE];
+
+ UINT8 MBModeProb[11];
+ UINT8 BModeProb[11];
+
+ UINT8 PredictionFilterMode;
+ UINT8 PredictionFilterMvSizeThresh;
+ UINT32 PredictionFilterVarThresh;
+ UINT8 PredictionFilterAlpha;
+
+ BOOL RefreshGoldenFrame;
+
+ UINT8 Inter00Prob;
+ UINT32 AvgFrameQIndex;
+
+ BOOL testMode;
+
+ UINT32 mvNearOffset[16];
+
+ int probInterlaced;
+ char *MBInterlaced;
+ char *predictionMode;
+ MOTION_VECTOR *MBMotionVector;
+// char *MBInterlacedAlloc;
+// char *predictionModeAlloc;
+// MOTION_VECTOR *MBMotionVectorAlloc;
+
+ UINT8 MvSignProbs[2];
+ UINT8 IsMvShortProb[2];
+ UINT8 MvShortProbs[2][7];
+ UINT8 MvQPelProbs[2];
+ UINT8 MvHalfPixelProbs[2];
+ UINT8 MvLowBitProbs[2];
+ UINT8 MvSizeProbs[2][LONG_MV_BITS];
+
+ UINT8 probXmitted[4][2][MAX_MODES];
+ UINT8 probModeSame[4][MAX_MODES];
+ UINT8 probMode[4][MAX_MODES][MAX_MODES-1]; // nearest+near,nearest only, nonearest+nonear, 10 preceding modes, 9 nodes
+
+ UINT32 maxTimePerFrame;
+ UINT32 thisDecodeTime;
+ UINT32 avgDecodeTime;
+ UINT32 avgPPTime[10];
+ UINT32 avgBlitTime;
+
+ // Does this frame use multiple data streams
+ // Multistream is implicit for SIMPLE_PROFILE
+ BOOL MultiStream;
+
+ // Huffman code tables for DC, AC & Zero Run Length
+ UINT32 DcHuffCode[2][MAX_ENTROPY_TOKENS];
+ UINT8 DcHuffLength[2][MAX_ENTROPY_TOKENS];
+ UINT32 DcHuffProbs[2][MAX_ENTROPY_TOKENS];
+ HUFF_NODE DcHuffTree[2][MAX_ENTROPY_TOKENS];
+
+ UINT32 AcHuffCode[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+ UINT8 AcHuffLength[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+ UINT32 AcHuffProbs[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+ HUFF_NODE AcHuffTree[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+
+ UINT32 ZeroHuffCode[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+ UINT8 ZeroHuffLength[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+ UINT32 ZeroHuffProbs[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+ HUFF_NODE ZeroHuffTree[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+
+ /* FAST look-up-table for huffman Trees */
+ UINT16 DcHuffLUT[2][1<<HUFF_LUT_LEVELS];
+ UINT16 AcHuffLUT[PREC_CASES][2][VP6_AC_BANDS][1<<HUFF_LUT_LEVELS];
+ UINT16 ZeroHuffLUT[ZRL_BANDS][1<<HUFF_LUT_LEVELS];
+
+ RAW_BUFFER HuffBuffer;
+
+ // Second partition buffer details
+ FRAME_HEADER Header;
+ UINT32 Buff2Offset;
+
+ // Note: Use of huffman codes for DCT data is only allowed
+ // when using multiple data streams / partitions
+ BOOL UseHuffman;
+
+ // Counters for runs of zeros at DC & EOB at first AC position in Huffman mode
+ INT32 CurrentDcRunLen[2];
+ INT32 CurrentAc1RunLen[2];
+
+ // Should we do loop filtering.
+ // In simple profile this is ignored and there is no loop filtering
+ UINT8 UseLoopFilter;
+
+ // Control of dering loop/prediction filter
+ UINT32 DrCutOff;
+ UINT32 DrThresh[256];
+
+ UINT32 BlackClamp;
+ UINT32 WhiteClamp;
+
+ UINT32 DeInterlaceMode;
+
+ UINT32 AddNoiseMode;
+
+} PB_INSTANCE;
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern UINT8 LimitVal_VP31[VAL_RANGE * 3];
+extern BOOL VP6_ModeUsesMC[MAX_MODES]; // table to indicate if the given mode uses motion estimation
+extern const int VP6_Mode2Frame[DO_NOT_CODE];
+extern const INT32 VP6_CoeffToBand[65];
+extern const UINT8 DefaultNonInterlacedScanBands[BLOCK_SIZE];
+extern const UINT8 DefaultInterlacedScanBands[BLOCK_SIZE];
+
+extern PB_INSTANCE *VP6_CreatePBInstance ( void );
+extern void VP6_DeletePBInstance ( PB_INSTANCE** );
+extern BOOL VP6_LoadFrame ( PB_INSTANCE *pbi );
+extern void VP6_SetFrameType ( PB_INSTANCE *pbi, UINT8 FrType );
+extern UINT8 VP6_GetFrameType ( PB_INSTANCE *pbi );
+extern BOOL VP6_InitFrameDetails ( PB_INSTANCE *pbi );
+extern void VP6_ErrorTrap ( PB_INSTANCE *pbi, int ErrorCode );
+extern BOOL VP6_AllocateFragmentInfo ( PB_INSTANCE *pbi );
+extern BOOL VP6_AllocateFrameInfo ( PB_INSTANCE *pbi, unsigned int FrameSize );
+extern void VP6_DeleteFragmentInfo ( PB_INSTANCE *pbi );
+extern void VP6_DeleteFrameInfo ( PB_INSTANCE *pbi );
+extern void VP6_DMachineSpecificConfig ( void );
+extern UINT32 VP6_bitread1 ( BOOL_CODER *br ) ;
+extern UINT32 VP6_bitread ( BOOL_CODER *br, int bits );
+extern void vp6_appendframe ( PB_INSTANCE *pbi );
+extern void VP6_readTSC ( unsigned long *tsc );
+extern void VP6_ConfigureContexts ( PB_INSTANCE *pbi );
+extern void VP6_ResetAboveContext ( PB_INSTANCE *pbi );
+extern void VP6_ResetLeftContext ( PB_INSTANCE *pbi );
+extern void VP6_UpdateContext ( PB_INSTANCE *pbi, BLOCK_CONTEXT *c, BLOCK_POSITION bp );
+extern void VP6_UpdateContextA ( PB_INSTANCE *pbi, BLOCK_CONTEXT *c, BLOCK_POSITION bp );
+
+extern void VP6_PredictDC ( PB_INSTANCE *pbi, BLOCK_POSITION bp );
+extern void VP6_PredictDC_MB ( PB_INSTANCE *pbi );
+
+extern void VP6_ReconstructBlock ( PB_INSTANCE *pbi, BLOCK_POSITION bp );
+//extern void VP6_ReconstructMacroBlock ( PB_INSTANCE *pbi);
+extern void VP6_PredictFilteredBlock(PB_INSTANCE* pbi, INT16* OutputPtr, UINT32 bp);
+#endif
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/quantize.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/quantize.h
new file mode 100644
index 00000000..747cc160
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/quantize.h
@@ -0,0 +1,65 @@
+/****************************************************************************
+*
+* Module Title : quantize.h
+*
+* Description : Quantizer header file.
+*
+****************************************************************************/
+#ifndef __INC_QUANTIZE_H
+#define __INC_QUANTIZE_H
+
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "codec_common_interface.h"
+
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+
+
+
+/****************************************************************************
+* Structures
+****************************************************************************/
+typedef struct
+{
+ UINT32 FrameQIndex; // Quality specified as a table index
+ UINT32 LastFrameQIndex;
+ short round[8];
+ short mult[8];
+ short zbin[8];
+ UINT32 QThreshTable[Q_TABLE_SIZE]; // ac quantizer scale values
+
+ UINT32 *transIndex; // array to reorder zig zag to idct's ordering
+ UINT8 quant_index[64]; // array to reorder from raster to zig zag
+
+ // used by the dequantizer
+ Q_LIST_ENTRY * dequant_coeffs[2]; // pointer to current dequantization tables
+ Q_LIST_ENTRY * dequant_coeffsAlloc[2]; // alloc so we can keep alligned
+
+ INT32 QuantCoeffs[2][64]; // Quantizer values table
+ INT32 QuantRound[2][64]; // Quantizer rounding table
+ INT32 ZeroBinSize[2][64]; // Quantizer zero bin table
+ INT32 ZlrZbinCorrections[2][64]; // Zbin corrections based upon zero run length.
+
+} QUANTIZER;
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern const UINT8 VP6_QTableSelect[6];
+extern const Q_LIST_ENTRY VP6_DcQuant[Q_TABLE_SIZE];
+
+extern void (*VP6_quantize) ( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+extern void (*VP6_BuildQuantIndex)( QUANTIZER * pbi);
+extern void VP6_InitQTables ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+extern void VP6_UpdateQ ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+extern void VP6_UpdateQC ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+extern QUANTIZER * VP6_CreateQuantizer ( void );
+extern void VP6_DeleteQuantizer ( QUANTIZER **pbi );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/vp5d.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/vp5d.h
new file mode 100644
index 00000000..116fd54a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/vp5d.h
@@ -0,0 +1,112 @@
+#ifndef vp5d_h
+#define vp5d_h 1
+
+// Interface between vp3d.dll and Albany's DXV adaptor/blitter.
+// Timothy S. Murphy 13 September 1999.
+
+
+// The main object "defined" here.
+
+struct VP3decompressor;
+
+
+// Some conveniences.
+
+typedef unsigned char uchar;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+
+// FourCC codes. Should agree with microsoft's definition
+// sans their stupid types and include files.
+
+typedef ulong FourCC;
+
+#define MakeFourCC( a, b, c, d) ( \
+ (ulong) (uchar) a \
+ | (ulong) (uchar) b << 8 \
+ | (ulong) (uchar) c << 16 \
+ | (ulong) (uchar) d << 24 \
+)
+
+// A temporary fourCC for Eric & I to use til the bit stream stabilizes.
+// (Eric - "hurl4cc" should NOT appear anywhere in your code, I just put it
+// here so you can check the fourCC representations in memory and files.)
+
+#define hurl4cc MakeFourCC( 'H', 'U', 'R', 'L')
+
+// The actual fourCC for now; similar remarks apply.
+
+#define VP30 1
+
+#if VP30
+# define wilk4cc MakeFourCC( 'V', 'P', '3', '0')
+#else
+# define wilk4cc MakeFourCC( 'W', 'I', 'L', 'K')
+#endif
+
+
+// Array of fourCC codes, has length _and_ is null-terminated.
+// As Donald Knuth once said,
+// "Some people occasionally like a little extra redundancy sometimes."
+
+typedef struct { const FourCC * codes; uint numCodes;} FourCClist;
+
+
+// YUV buffer configuration.
+
+typedef struct {
+
+ ulong Ywidth, Yheight, UVwidth, UVheight;
+
+ long Ystride, UVstride;
+
+ const uchar *Ybuf, *Ubuf, *Vbuf;
+
+} YUVbufferLayout;
+
+
+#if __cplusplus
+# define Decompressor VP3decompressor
+ extern "C" {
+#else
+# define Decompressor struct VP3decompressor
+#endif
+
+#if defined(MACPPC)
+#define _stdcall
+#endif
+
+
+// Return array of fourCC codes supported.
+
+const FourCClist * _stdcall VP3DfourCClist();
+
+
+// Create a decompressor for a particular supported stream type.
+// Returns 0 on failure.
+
+Decompressor * _stdcall VP3DcreateDecompressor( FourCC streamType);
+
+void _stdcall VP3DdestroyDecompressor( Decompressor *);
+
+
+// Advance to next frame, returning reference to updated YUV buffer.
+
+const YUVbufferLayout * _stdcall VP3DnextFrame
+(
+ Decompressor *, const uchar * CXdata, ulong CXdataLengthInBytes
+);
+
+void _stdcall VP3DblitBGR(
+ const Decompressor *, uchar * outRGB, long outStride, long outHeight
+);
+
+
+#if __cplusplus
+ }
+#endif
+
+#undef Decompressor
+
+#endif // vp3d_h
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/xprintf.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/xprintf.h
new file mode 100644
index 00000000..fb9d4c14
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/xprintf.h
@@ -0,0 +1,31 @@
+/****************************************************************************
+*
+* Module Title : xprintf.h
+*
+* Description : Debug print interface header file.
+*
+****************************************************************************/
+#ifndef __INC_XPRINTF_H
+#define __INC_XPRINTF_H
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "pbdll.h"
+
+/****************************************************************************
+* Functions
+****************************************************************************/
+#if __cplusplus
+extern "C"
+{
+#endif
+
+// Display a printf style message on the current video frame
+extern int vp6_xprintf(const PB_INSTANCE* ppbi, long pixel, const char* format, ...);
+
+#if __cplusplus
+}
+#endif
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Makefile b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Makefile
new file mode 100644
index 00000000..7bf40ed2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Makefile
@@ -0,0 +1,72 @@
+## Target to built
+
+TARGET =libvp6e
+
+## TOOLS
+CC = ecc
+LD = ecc
+AR = ar
+OBJDUMP = objdump
+RM = rm -f
+
+## Directories
+TOPDIR =C:\DuckSoft
+PRIVATEINCLUDE =${TOPDIR}\private\include
+PRIVATEINCLUDE2 =${TOPDIR}\private\include\vp60
+CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
+CDXVINCLUDE =${TOPDIR}\private\corelibs\cdxv\include
+VP6INCLUDE =${TOPDIR}\private\corelibs\cdxv\vp60\vp60\include
+CXGENERIC =${TOPDIR}\private\corelibs\cdxv\vp60\vp60\cx\generic
+OBJDIR =${TOPDIR}\ObjectCode\bspvp6e
+CURRENTDIR =${TOPDIR}\private\corelibs\cdxv\vp60\vp60
+LIBDIR =${TOPDIR}\private\corelibs\lib\mapca
+
+## Compile Flags
+ALLINCLUDES =-I${CXGENERIC} -I${VP6INCLUDE} -I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${PRIVATEINCLUDE2}
+VP6DEFINES =-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
+ETIDEFINES =-DMAPCA
+ALLDEFINES =${VP6DEFINES} ${ETIDEFINES}
+DEBUG =-O2
+CFLAGS =-msvc -align 8 -ms -etswp -mP3OPT_nonlocal_calls_through_register=true \
+ -mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
+ -magen_interroutine_padding
+ALLFLAGS = $(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
+
+
+## Files
+OBJS = cx\bsp\PackVideo.o \
+ cx\bsp\PickModes.o \
+ cx\generic\RawBuffer.o \
+ cx\bsp\bspTokenize.o \
+ cx\generic\Transform.o \
+ cx\bsp\encode.o \
+ cx\bsp\encodembs.o \
+ cx\bsp\encodemode.o \
+ cx\generic\encodemv.o \
+ cx\bsp\mcomp.o \
+ cx\generic\misc_common.o \
+ cx\generic\twopass.o \
+ cx\bsp\vfwcomp.o \
+ cx\generic\vfwcomp_if.o \
+ cx\bsp\bspComp_Globals.o \
+ cx\bsp\mcompopt.o \
+ cx\bsp\bsptransform.o \
+ cx\bsp\CSystemDependant.o
+
+SRCS = $(OBJS:.o=.c)
+
+ARTARGET = ${TARGET}.a
+
+# archive
+
+ARTARGET:${OBJS}
+ ${AR} -cr ${ARTARGET} ${OBJS}
+ mv ${ARTARGET} ${LIBDIR}
+
+${OBJS} : ${SRCS}
+ $(CC) $(ALLFLAGS) -c $*.c -o $*.o
+
+clean:
+ ${RM} ${OBJS} ${ARTARGET}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CFrameW.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CFrameW.h
new file mode 100644
index 00000000..d2ceb50d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CFrameW.h
@@ -0,0 +1,21 @@
+/****************************************************************************
+*
+* Module Title : CFrameW.h
+*
+* Description : Frame writing functions.
+*
+****************************************************************************/
+#ifndef __INC_CFRAMEW_H
+#define __INC_CFRAMEW_H
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "type_aliases.h"
+
+/****************************************************************************
+* Functions
+****************************************************************************/
+extern void WriteFrameHeader ( CP_INSTANCE *cpi );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CSystemDependant.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CSystemDependant.c
new file mode 100644
index 00000000..4ba43888
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CSystemDependant.c
@@ -0,0 +1,79 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions
+*
+****************************************************************************/
+#define STRICT /* Strict type checking. */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <string.h>
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "pbdll.h"
+#include "compdll.h"
+#include "mcomp.h"
+#include "quantize.h"
+#include "resource.h" /* Resource IDs. */
+
+/****************************************************************************
+* Explicit imports
+****************************************************************************/
+#if defined(POSTPROCESS)
+extern void FDct1d4 (INT16 *InputData, INT16 * OutputData);
+extern void IDct4( INT16 *InputData, INT16 *OutputData);
+#endif
+
+extern UINT32 ComputeBlockReconError ( CP_INSTANCE *cpi, UINT32 bp);
+extern UINT32 GetSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8 * RefPtr,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern UINT32 GetHalfPixelSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8 * RefPtr,UINT8 * RefPtr2,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern UINT32 GetIntraErrorC( UINT8* DataPtr, INT32 SourceStride);
+extern UINT32 GetInterErr( UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride );
+extern UINT32 GetSumAbsDiffs( UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+extern UINT32 GetHalfPixelSumAbsDiffs( UINT8 * SrcData, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+extern void VP6_quantize_c( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+extern UINT32 GetMBFieldVerticalVariance( CP_INSTANCE *cpi);
+extern UINT32 FiltBlockBilGetSad_C(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+
+
+/****************************************************************************
+ *
+ * ROUTINE : CMachineSpecificConfig
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets function pointers to vanilla "C" implementations.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CMachineSpecificConfig ( void )
+{
+
+ GetSAD16 = GetSumAbsDiffs16;
+ GetSadHalfPixel16 = GetHalfPixelSumAbsDiffs16;
+
+ GetSAD = GetSumAbsDiffs;
+ GetSadHalfPixel = GetHalfPixelSumAbsDiffs;
+ GetInterError = GetInterErr;
+ GetIntraError = GetIntraErrorC;
+ fdct_short = fdct_short_C;
+ VP6_quantize = VP6_quantize_c;
+ Sub8 = SUB8;
+ Sub8_128 = SUB8_128;
+ Sub8Av2 = SUB8AV2;
+ GetMBFrameVertVar = GetMBFrameVerticalVariance;
+ GetMBFieldVertVar = GetMBFieldVerticalVariance;
+ FiltBlockBilGetSad = FiltBlockBilGetSad_C;
+ GetBlockReconErr = ComputeBlockReconError;
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Comp_Globals.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Comp_Globals.c
new file mode 100644
index 00000000..a58c8aff
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Comp_Globals.c
@@ -0,0 +1,371 @@
+/****************************************************************************
+*
+* Module Title : Comp_Globals.c
+*
+* Description : Global compressor functions & declarations.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h> // For Abs()
+#include "compdll.h"
+#include "mcomp.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+INT32 *XX_LUT;
+static INT32 XSquaredTable[511];
+
+// Motion compensation related variables
+INT32 *AbsX_LUT = NULL;
+static INT32 AbsXTable[511];
+
+UINT32 (*FiltBlockBilGetSad)(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+UINT32 (*GetSAD16)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32);
+UINT32 (*GetSadHalfPixel16)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32);
+UINT32 (*GetSAD)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32);
+UINT32 (*GetSadHalfPixel)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32);
+UINT32 (*GetInterError)( UINT8 *, INT32, UINT8 *, UINT8 *, INT32 );
+UINT32 (*GetIntraError)( UINT8 *, INT32 );
+void (*fdct_short) ( INT16 * InputData, INT16 * OutputData );
+void (*Sub8)( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride);
+void (*Sub8_128)( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride );
+void (*Sub8Av2)( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride );
+
+/****************************************************************************
+* Explicit Imports
+****************************************************************************/
+extern unsigned int CPUFrequency;
+extern void VP6_DeleteTmpBuffers(PB_INSTANCE * pbi);
+extern BOOL VP6_AllocateTmpBuffers(PB_INSTANCE * pbi);
+extern void VP6_VPInitLibrary(void);
+extern void VP6_VPDeInitLibrary(void);
+extern void FillValueTokens ( void );
+
+/****************************************************************************
+ *
+ * ROUTINE : EDeleteFragmentInfo
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Deletes memory allocated for member data structures.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void EDeleteFragmentInfo ( CP_INSTANCE *cpi )
+{
+ if( cpi->DCT_codes )
+ duck_free( cpi->DCT_codes );
+ cpi->DCT_codes = 0;
+
+ if( cpi->DCTDataBuffer )
+ duck_free( cpi->DCTDataBuffer);
+ cpi->DCTDataBuffer = 0;
+
+ if( cpi->quantized_list)
+ duck_free( cpi->quantized_list);
+ cpi->quantized_list = 0;
+
+ if( cpi->MbBestErr )
+ duck_free(cpi->MbBestErr);
+ cpi->MbBestErr = 0;
+
+#if defined FULLFRAMEFDCT
+ if( cpi->FDCTCoeffs)
+ duck_free(cpi->FDCTCoeffs);
+ cpi->FDCTCoeffs = 0;
+#endif
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : EAllocateFragmentInfo
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : BOOL: TRUE on success, FALSE if allocation failure.
+ *
+ * FUNCTION : Allocates memory for encoder data structures.
+ *
+ * SPECIAL NOTES : Uses ROUNDUP32 to align pointers to 32-byte boundaries.
+ *
+ ****************************************************************************/
+BOOL EAllocateFragmentInfo ( CP_INSTANCE *cpi )
+{
+ // De-allocate existing memory
+ EDeleteFragmentInfo(cpi);
+
+ // Allocate new memory
+ cpi->DCT_codes = duck_memalign(32, 64*sizeof(INT16), DMEM_GENERAL);
+ if(!cpi->DCT_codes) { EDeleteFragmentInfo(cpi); return FALSE; }
+
+ cpi->quantized_list = duck_memalign(32, 64*sizeof(Q_LIST_ENTRY), DMEM_GENERAL);
+ if(!cpi->quantized_list) { EDeleteFragmentInfo(cpi); return FALSE; }
+
+ cpi->DCTDataBuffer = duck_memalign(32, 64*sizeof(INT16), DMEM_GENERAL);
+ if(!cpi->DCTDataBuffer) { EDeleteFragmentInfo(cpi); return FALSE; }
+
+ cpi->MbBestErr = (UINT32 *) duck_memalign(32, cpi->pb.MacroBlocks * sizeof(UINT32), DMEM_GENERAL);
+ if(!cpi->MbBestErr) { EDeleteFrameInfo(cpi); return FALSE; }
+
+#if defined FULLFRAMEFDCT
+ cpi->FDCTCoeffs= (Q_LIST_ENTRY(*)[64]) duck_memalign(32, sizeof(Q_LIST_ENTRY)*64* cpi->pb.UnitFragments , DMEM_GENERAL);
+ if(!cpi->FDCTCoeffs) {EDeleteFragmentInfo(cpi); return FALSE;}
+#endif
+
+
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : EDeleteFrameInfo
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Deletes memory allocated for frame buffers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void EDeleteFrameInfo ( CP_INSTANCE *cpi )
+{
+ if(cpi->yuv0ptr)
+ duck_free(cpi->yuv0ptr);
+ cpi->yuv0ptr = 0;
+
+ if(cpi->yuv1ptr)
+ duck_free(cpi->yuv1ptr);
+ cpi->yuv1ptr = 0;
+
+ if( cpi->CoeffTokens )
+ duck_free(cpi->CoeffTokens);
+ cpi->CoeffTokens = 0;
+
+ if( cpi->OutputBuffer2 )
+ duck_free(cpi->OutputBuffer2);
+ cpi->OutputBuffer2 = 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : EAllocateFrameInfo
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : BOOL: TRUE on success, FALSE if allocation failure.
+ *
+ * FUNCTION : Allocates memory for frame buffers.
+ *
+ * SPECIAL NOTES : Uses ROUNDUP32 to align pointers to 32-byte boundaries.
+ *
+ ****************************************************************************/
+BOOL EAllocateFrameInfo ( CP_INSTANCE *cpi )
+{
+ int FrameSize = cpi->pb.ReconYPlaneSize + 2 * cpi->pb.ReconUVPlaneSize;
+
+ // De-allocate existing memory
+ EDeleteFrameInfo ( cpi );
+
+ // Allocate frame buffers aligned to 32-byte boundaries
+ cpi->yuv0ptr = duck_memalign(32, FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+ if(!cpi->yuv0ptr) { EDeleteFrameInfo(cpi); return FALSE; }
+
+ cpi->yuv1ptr = duck_memalign(32, FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+ if(!cpi->yuv1ptr) { EDeleteFrameInfo(cpi); return FALSE; }
+
+ cpi->CoeffTokens = duck_memalign(32, FrameSize*sizeof(TOKENEXTRA), DMEM_GENERAL);
+ if(!cpi->CoeffTokens) { EDeleteFrameInfo(cpi); return FALSE; }
+
+ // Allocate the temporary output buffer for packed dct data
+ cpi->OutputBuffer2 = duck_memalign(32, FrameSize, DMEM_GENERAL);
+ if(!cpi->OutputBuffer2) { EDeleteFrameInfo(cpi); return FALSE; }
+
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeleteCPInstance
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : CP_INSTANCE **cpi : Pointer to pointer to encoder instance.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Deletes memory allocated for encoder instance and sets
+ * encoder instance pointer to NULL.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeleteCPInstance ( CP_INSTANCE **cpi )
+{
+ if ( *cpi != NULL )
+ {
+ DeletePreProc ( &(*cpi)->preproc );
+ VP6_DeleteTmpBuffers ( &(*cpi)->pb );
+ duck_free ( *cpi );
+ *cpi = NULL;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CreateCPInstance
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : CP_INSTANCE *: Pointer to new encoder instance or NULL.
+ *
+ * FUNCTION : Creates and initializes an encoder instance.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+CP_INSTANCE *CreateCPInstance ( void )
+{
+ UINT32 i;
+ CP_INSTANCE *cpi;
+
+ // Allocate encoder data structure
+ int cpi_size = sizeof( CP_INSTANCE );
+ cpi = duck_malloc ( cpi_size, DMEM_GENERAL );
+ if ( !cpi )
+ return NULL;
+
+ // Initialize
+ memset ( (unsigned char *)cpi, 0, cpi_size );
+
+ // Allocate decoder buffers
+ if ( !VP6_AllocateTmpBuffers(&cpi->pb) )
+ {
+ DeleteCPInstance(&cpi);
+ return NULL;
+ }
+
+ // Initialise Configuration structure to legal values
+ cpi->Configuration.BaseQ = 32;
+ cpi->Configuration.FirstFrameQ = 32;
+ cpi->Configuration.WorstQuality = 32;
+ cpi->Configuration.ActiveWorstQuality = 8;
+ cpi->Configuration.ActiveBestQuality = Q_TABLE_SIZE - 4;
+ cpi->Configuration.OutputFrameRate = 30;
+ cpi->Configuration.TargetBandwidth = 100*1024;
+
+ cpi->MVChangeFactor = 14;
+ cpi->FourMvChangeFactor = 8;
+ cpi->ExhaustiveSearchThresh = 2500;
+ cpi->MinImprovementForFourMV = 100;
+ cpi->FourMVThreshold = 10000;
+ cpi->IntraThresh = 25;
+ cpi->InterTripOutThresh = 5000;
+ cpi->BpbCorrectionFactor = 1.0;
+ cpi->KeyFrameBpbCorrectionFactor = 1.0;
+ cpi->GoldenFrameEnabled = TRUE;
+ cpi->InterPrediction = TRUE;
+ cpi->MotionCompensation = TRUE;
+ cpi->ThreshMapThreshold = 5;
+ cpi->QuickCompress = TRUE;
+ cpi->RdOpt = 0;
+ cpi->PreProcFilterLevel = 4;
+ cpi->FixedQ = -1;
+ cpi->pb.idct = idctc;
+ cpi->pb.ProcessorFrequency = CPUFrequency;
+
+ memset ( cpi->pb.DcProbs, 0, sizeof(cpi->pb.DcProbs) );
+ memset ( cpi->pb.AcProbs, 0, sizeof(cpi->pb.AcProbs) );
+
+ cpi->nExperimentals = 0;
+ for ( i=0; i<C_SET_EXPERIMENTAL_MAX-C_SET_EXPERIMENTAL_MIN+1; i++ )
+ cpi->Experimental[i] = 0;
+
+ // Access pointers to MV cost array
+ cpi->EstMvCostPtrX = &cpi->EstMVCost[0][MV_ENTROPY_TOKENS / 2];
+ cpi->EstMvCostPtrY = &cpi->EstMVCost[1][MV_ENTROPY_TOKENS / 2];
+
+ return cpi;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VPEInitLibrary
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Fully initializes the playback library.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VPEInitLibrary ( void )
+{
+ int i;
+
+ // Initialise the decompressor
+ VP6_VPInitLibrary();
+ CMachineSpecificConfig();
+
+ // Prepare Abs difference lookup table
+ AbsX_LUT = &AbsXTable[255];
+ for ( i=(-255); i<=255; i++ )
+ AbsX_LUT[i] = abs(i);
+
+ // Prepare table of squared error values
+ XX_LUT = &XSquaredTable[255];
+ for ( i=(-255); i<=255; i++ )
+ XX_LUT[i] = i*i;
+
+ // Prepare table of tokens for fast look-up
+ FillValueTokens();
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VPEDeInitLibrary
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-initializes the playback library.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VPEDeInitLibrary ( void )
+{
+ VP6_VPDeInitLibrary();
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PackVideo.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PackVideo.c
new file mode 100644
index 00000000..da2a1c5c
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PackVideo.c
@@ -0,0 +1,1840 @@
+/****************************************************************************
+*
+* Module Title : PackVideo.c
+*
+* Description : Bitstream Packing Routines for VP6.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "compdll.h"
+#include "encodemode.h"
+#include "encodemv.h"
+#include "TokenEntropy.h"
+#include "systemdependant.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define PROB_UPDATE_CORECTION (-1)
+
+#define MAX_DC_ZRL 74 // Maximum run of zeros at DC position (11 + 63)
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+UINT32 scanupdates[64][2];
+
+const UINT8 DcZrlHuffCode[5] = { 0, 1, 2, 6, 7 };
+const UINT8 DcZrlHuffLength[5] = { 2, 2, 2, 3, 3 };
+const UINT8 DcZrlExtraOffset[5] = { 1, 2, 3, 7, 11 };
+const UINT8 DcZrlExtraLength[5] = { 0, 0, 2, 2, 6 };
+
+const UINT8 DcZrlHuffBand[MAX_DC_ZRL+1] =
+{
+ 0, 0, 1, 2, 2, 2, 2, 3,
+ 3, 3, 3, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4
+};
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern void ConvertBoolTrees ( PB_INSTANCE *pbi );
+
+/****************************************************************************
+ *
+ * ROUTINE : GetOptimalFrameZrlProbs
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Calculate optimal ZRL node probabilities and
+ * hit counts from ZRL distribution data.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void GetOptimalFrameZrlProbs ( CP_INSTANCE *cpi )
+{
+ UINT32 i,j;
+ UINT32 Sum, Sum2;
+ UINT32 BitSums[ZRL_BANDS][6][2];
+ UINT32 RunLength;
+ UINT32 Count;
+ UINT32 Index;
+
+ // Clear down BitSums workspace
+ memset ( BitSums, 0, sizeof(BitSums) );
+
+ // Work out the optimised nodes probabilities relating to explicit values
+ for ( i=0; i<ZRL_BANDS; i++ )
+ {
+ // branch hits and probility for the top node ( is Run > 4 )
+ Sum = cpi->FrameZeroCount[i];
+ Sum2 = cpi->FrameZrlDist[i][1] + cpi->FrameZrlDist[i][2] + cpi->FrameZrlDist[i][3] + cpi->FrameZrlDist[i][4];
+ cpi->FrameZrlBranchHits[i][0][0] = Sum2;
+ cpi->FrameZrlBranchHits[i][0][1] = Sum - Sum2;
+ if ( Sum )
+ cpi->FrameZrlProbs[i][0] = (Sum2 * 255) / Sum;
+
+ // Second Node 1,2 vs 3,4
+ Sum = cpi->FrameZrlDist[i][1] + cpi->FrameZrlDist[i][2] + cpi->FrameZrlDist[i][3] + cpi->FrameZrlDist[i][4];
+ Sum2 = cpi->FrameZrlDist[i][1] + cpi->FrameZrlDist[i][2];
+ cpi->FrameZrlBranchHits[i][1][0] = Sum2;
+ cpi->FrameZrlBranchHits[i][1][1] = Sum - Sum2;
+ if ( Sum )
+ cpi->FrameZrlProbs[i][1] = (Sum2 * 255) / Sum;
+
+ // Third Node 1 vs 2
+ Sum = cpi->FrameZrlDist[i][1] + cpi->FrameZrlDist[i][2];
+ Sum2 = cpi->FrameZrlDist[i][1];
+ cpi->FrameZrlBranchHits[i][2][0] = Sum2;
+ cpi->FrameZrlBranchHits[i][2][1] = Sum - Sum2;
+ if ( Sum )
+ cpi->FrameZrlProbs[i][2] = (Sum2 * 255) / Sum;
+
+ // Fourth Node 3 vs 4
+ Sum = cpi->FrameZrlDist[i][3] + cpi->FrameZrlDist[i][4];
+ Sum2 = cpi->FrameZrlDist[i][3];
+ cpi->FrameZrlBranchHits[i][3][0] = Sum2;
+ cpi->FrameZrlBranchHits[i][3][1] = Sum - Sum2;
+ if ( Sum )
+ cpi->FrameZrlProbs[i][3] = (Sum2 * 255) / Sum;
+
+ // Fifth Node 5-8 vs >8
+ Sum = cpi->FrameZeroCount[i] -
+ (cpi->FrameZrlDist[i][1] + cpi->FrameZrlDist[i][2] + cpi->FrameZrlDist[i][3] + cpi->FrameZrlDist[i][4]);
+ Sum2 = cpi->FrameZrlDist[i][5] + cpi->FrameZrlDist[i][6] + cpi->FrameZrlDist[i][7] + cpi->FrameZrlDist[i][8];
+ cpi->FrameZrlBranchHits[i][4][0] = Sum2;
+ cpi->FrameZrlBranchHits[i][4][1] = Sum - Sum2;
+ if ( Sum )
+ cpi->FrameZrlProbs[i][4] = (Sum2 * 255) / Sum;
+
+ // Sixth Node 5,6 vs 7,8
+ Sum = cpi->FrameZrlDist[i][5] + cpi->FrameZrlDist[i][6] + cpi->FrameZrlDist[i][7] + cpi->FrameZrlDist[i][8];
+ Sum2 = cpi->FrameZrlDist[i][5] + cpi->FrameZrlDist[i][6];
+ cpi->FrameZrlBranchHits[i][5][0] = Sum2;
+ cpi->FrameZrlBranchHits[i][5][1] = Sum - Sum2;
+ if ( Sum )
+ cpi->FrameZrlProbs[i][5] = (Sum2 * 255) / Sum;
+
+ // Seventh Node 5 vs 6
+ Sum = cpi->FrameZrlDist[i][5] + cpi->FrameZrlDist[i][6];
+ Sum2 = cpi->FrameZrlDist[i][5];
+ cpi->FrameZrlBranchHits[i][6][0] = Sum2;
+ cpi->FrameZrlBranchHits[i][6][1] = Sum - Sum2;
+ if ( Sum )
+ cpi->FrameZrlProbs[i][6] = (Sum2 * 255) / Sum;
+
+ // Eighth Node 7 vs 8
+ Sum = cpi->FrameZrlDist[i][7] + cpi->FrameZrlDist[i][8];
+ Sum2 = cpi->FrameZrlDist[i][7];
+ cpi->FrameZrlBranchHits[i][7][0] = Sum2;
+ cpi->FrameZrlBranchHits[i][7][1] = Sum - Sum2;
+ if ( Sum )
+ cpi->FrameZrlProbs[i][7] = (Sum2 * 255) / Sum;
+ }
+
+ // Work out the bit probabilities for the remaining nodes
+ for ( i=0; i<ZRL_BANDS; i++ )
+ {
+ for ( j=9; j<64; j++ )
+ {
+ RunLength = j - 9;
+ Count = cpi->FrameZrlDist[i][j];
+
+ BitSums[i][5][((RunLength >> 5) & 1)] += Count;
+ BitSums[i][4][((RunLength >> 4) & 1)] += Count;
+ BitSums[i][3][((RunLength >> 3) & 1)] += Count;
+ BitSums[i][2][((RunLength >> 2) & 1)] += Count;
+ BitSums[i][1][((RunLength >> 1) & 1)] += Count;
+ BitSums[i][0][(RunLength & 1)] += Count;
+ }
+
+ for ( j=0; j<6; j++ )
+ {
+ Index = j + 8; // Index into FrameZrlProbs[] etc.
+ Sum = BitSums[i][j][0] + BitSums[i][j][1];
+ Sum2 = BitSums[i][j][0];
+ cpi->FrameZrlBranchHits[i][Index][0] = Sum2;
+ cpi->FrameZrlBranchHits[i][Index][1] = Sum - Sum2;
+ if ( Sum )
+ cpi->FrameZrlProbs[i][Index] = (Sum2 * 255) / Sum;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ConvertDistribution
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance (NOT USED).
+ * UINT32 *Distribution : Token histogram array.
+ *
+ * OUTPUTS : UINT8 *Probabilities : Pointer to array of node probs.
+ * UINT32 BranchChoices[][2] : Histogram of 1/0 branch decisions.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Converts a token distribution array into a set of tree
+ * node probabilities.
+ *
+ * SPECIAL NOTES : The format of the binary decision tree is fixed.
+ *
+ ****************************************************************************/
+void ConvertDistribution
+(
+ CP_INSTANCE *cpi,
+ UINT32 *Distribution,
+ UINT8 *Probabilities,
+ UINT32 BranchChoices[][2]
+)
+{
+ UINT32 i;
+ UINT32 Tmp1;
+ UINT32 Tmp2;
+ UINT32 SumTokens = 0;
+
+ // Count the token
+ for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+ SumTokens += Distribution[i];
+
+ // Set the default output probabilities
+ for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ Probabilities[i] = 128;
+ BranchChoices[i][0] = 0;
+ BranchChoices[i][1] = 0;
+ }
+
+ // Trap cases where there are no tokens
+ if ( SumTokens > 0 )
+ {
+ // The first probability we are interested in is the 0 context
+ Probabilities[ZERO_CONTEXT_NODE] = (UINT8)(((Distribution[DCT_EOB_TOKEN]+Distribution[ZERO_TOKEN]) * 255)/SumTokens);
+ BranchChoices[ZERO_CONTEXT_NODE][0] = (Distribution[DCT_EOB_TOKEN] + Distribution[ZERO_TOKEN]);
+ BranchChoices[ZERO_CONTEXT_NODE][1] = SumTokens - (Distribution[DCT_EOB_TOKEN] + Distribution[ZERO_TOKEN]);
+ if ( Probabilities[ZERO_CONTEXT_NODE] == 0 )
+ Probabilities[ZERO_CONTEXT_NODE] = 1;
+ else if ( Probabilities[ZERO_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[ZERO_CONTEXT_NODE] = MAX_PROB;
+
+ // Next the Zero/EOB split
+ Tmp1 = Distribution[DCT_EOB_TOKEN];
+ Tmp2 = Distribution[DCT_EOB_TOKEN]+Distribution[ZERO_TOKEN];
+ BranchChoices[EOB_CONTEXT_NODE][0] = Tmp1;
+ BranchChoices[EOB_CONTEXT_NODE][1] = Tmp2-Tmp1;
+ if ( Tmp2 )
+ {
+ Probabilities[EOB_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+ if ( Probabilities[EOB_CONTEXT_NODE] == 0 )
+ Probabilities[EOB_CONTEXT_NODE] = 1;
+ else if ( Probabilities[EOB_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[EOB_CONTEXT_NODE] = MAX_PROB;
+ }
+
+ // Now the One Context
+ Tmp1 = Distribution[ONE_TOKEN];
+ Tmp2 = SumTokens - (Distribution[DCT_EOB_TOKEN]+Distribution[ZERO_TOKEN]);
+ BranchChoices[ONE_CONTEXT_NODE][0] = Tmp1;
+ BranchChoices[ONE_CONTEXT_NODE][1] = Tmp2-Tmp1;
+ if ( Tmp2 )
+ {
+ Probabilities[ONE_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+ if ( Probabilities[ONE_CONTEXT_NODE] == 0 )
+ Probabilities[ONE_CONTEXT_NODE] = 1;
+ else if ( Probabilities[ONE_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[ONE_CONTEXT_NODE] = MAX_PROB;
+ }
+
+ // Now the LowVal Context
+ Tmp1 = Distribution[TWO_TOKEN] + Distribution[THREE_TOKEN] + Distribution[FOUR_TOKEN];
+ Tmp2 = Tmp2 - Distribution[ONE_TOKEN];
+ BranchChoices[LOW_VAL_CONTEXT_NODE][0] = Tmp1;
+ BranchChoices[LOW_VAL_CONTEXT_NODE][1] = Tmp2-Tmp1;
+ if ( Tmp2 )
+ {
+ Probabilities[LOW_VAL_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+ if ( Probabilities[LOW_VAL_CONTEXT_NODE] == 0 )
+ Probabilities[LOW_VAL_CONTEXT_NODE] = 1;
+ else if ( Probabilities[LOW_VAL_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[LOW_VAL_CONTEXT_NODE] = MAX_PROB;
+ }
+
+ // Now the TWO Context
+ Tmp1 = Distribution[TWO_TOKEN];
+ Tmp2 = Distribution[TWO_TOKEN] + Distribution[THREE_TOKEN] + Distribution[FOUR_TOKEN];
+ BranchChoices[TWO_CONTEXT_NODE][0] = Tmp1;
+ BranchChoices[TWO_CONTEXT_NODE][1] = Tmp2-Tmp1;
+ if ( Tmp2 )
+ {
+ Probabilities[TWO_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+ if ( Probabilities[TWO_CONTEXT_NODE] == 0 )
+ Probabilities[TWO_CONTEXT_NODE] = 1;
+ else if ( Probabilities[TWO_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[TWO_CONTEXT_NODE] = MAX_PROB;
+ }
+
+ // Now the Three Context
+ Tmp1 = Distribution[THREE_TOKEN];
+ Tmp2 = Distribution[THREE_TOKEN] + Distribution[FOUR_TOKEN];
+ BranchChoices[THREE_CONTEXT_NODE][0] = Tmp1;
+ BranchChoices[THREE_CONTEXT_NODE][1] = Tmp2-Tmp1;
+ if ( Tmp2 )
+ {
+ Probabilities[THREE_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+ if ( Probabilities[THREE_CONTEXT_NODE] == 0 )
+ Probabilities[THREE_CONTEXT_NODE] = 1;
+ else if ( Probabilities[THREE_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[THREE_CONTEXT_NODE] = MAX_PROB;
+ }
+
+ // Now the HighLowVal Context
+ Tmp1 = Distribution[DCT_VAL_CATEGORY1] + Distribution[DCT_VAL_CATEGORY2];
+ Tmp2 = Distribution[DCT_VAL_CATEGORY1] + Distribution[DCT_VAL_CATEGORY2] + Distribution[DCT_VAL_CATEGORY3] + Distribution[DCT_VAL_CATEGORY4] + Distribution[DCT_VAL_CATEGORY5] + Distribution[DCT_VAL_CATEGORY6];
+ BranchChoices[HIGH_LOW_CONTEXT_NODE][0] = Tmp1;
+ BranchChoices[HIGH_LOW_CONTEXT_NODE][1] = Tmp2-Tmp1;
+ if ( Tmp2 )
+ {
+ Probabilities[HIGH_LOW_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+ if ( Probabilities[HIGH_LOW_CONTEXT_NODE] == 0 )
+ Probabilities[HIGH_LOW_CONTEXT_NODE] = 1;
+ else if ( Probabilities[HIGH_LOW_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[HIGH_LOW_CONTEXT_NODE] = MAX_PROB;
+ }
+
+ // Now the Cat1 Context
+ Tmp1 = Distribution[DCT_VAL_CATEGORY1];
+ Tmp2 = Distribution[DCT_VAL_CATEGORY1] + Distribution[DCT_VAL_CATEGORY2];
+ BranchChoices[CAT_ONE_CONTEXT_NODE][0] = Tmp1;
+ BranchChoices[CAT_ONE_CONTEXT_NODE][1] = Tmp2-Tmp1;
+ if ( Tmp2 )
+ {
+ Probabilities[CAT_ONE_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+ if ( Probabilities[CAT_ONE_CONTEXT_NODE] == 0 )
+ Probabilities[CAT_ONE_CONTEXT_NODE] = 1;
+ else if ( Probabilities[CAT_ONE_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[CAT_ONE_CONTEXT_NODE] = MAX_PROB;
+ }
+
+ // Now the Cat3/4 Context
+ Tmp1 = Distribution[DCT_VAL_CATEGORY3] + Distribution[DCT_VAL_CATEGORY4];
+ Tmp2 = Distribution[DCT_VAL_CATEGORY3] + Distribution[DCT_VAL_CATEGORY4] + Distribution[DCT_VAL_CATEGORY5] + Distribution[DCT_VAL_CATEGORY6];
+ BranchChoices[CAT_THREEFOUR_CONTEXT_NODE][0] = Tmp1;
+ BranchChoices[CAT_THREEFOUR_CONTEXT_NODE][1] = Tmp2-Tmp1;
+ if ( Tmp2 )
+ {
+ Probabilities[CAT_THREEFOUR_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+ if ( Probabilities[CAT_THREEFOUR_CONTEXT_NODE] == 0 )
+ Probabilities[CAT_THREEFOUR_CONTEXT_NODE] = 1;
+ else if ( Probabilities[CAT_THREEFOUR_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[CAT_THREEFOUR_CONTEXT_NODE] = MAX_PROB;
+ }
+
+ // Now the Cat3 Context
+ Tmp1 = Distribution[DCT_VAL_CATEGORY3];
+ Tmp2 = Distribution[DCT_VAL_CATEGORY3] + Distribution[DCT_VAL_CATEGORY4];
+ BranchChoices[CAT_THREE_CONTEXT_NODE][0] = Tmp1;
+ BranchChoices[CAT_THREE_CONTEXT_NODE][1] = Tmp2-Tmp1;
+ if ( Tmp2 )
+ {
+ Probabilities[CAT_THREE_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+ if ( Probabilities[CAT_THREE_CONTEXT_NODE] == 0 )
+ Probabilities[CAT_THREE_CONTEXT_NODE] = 1;
+ else if ( Probabilities[CAT_THREE_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[CAT_THREE_CONTEXT_NODE] = MAX_PROB;
+ }
+
+ // Now the Cat5 Context
+ Tmp1 = Distribution[DCT_VAL_CATEGORY5];
+ Tmp2 = Distribution[DCT_VAL_CATEGORY5] + Distribution[DCT_VAL_CATEGORY6];
+ BranchChoices[CAT_FIVE_CONTEXT_NODE][0] = Tmp1;
+ BranchChoices[CAT_FIVE_CONTEXT_NODE][1] = Tmp2-Tmp1;
+ if ( Tmp2 )
+ {
+ Probabilities[CAT_FIVE_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+ if ( Probabilities[CAT_FIVE_CONTEXT_NODE] == 0 )
+ Probabilities[CAT_FIVE_CONTEXT_NODE] = 1;
+ else if ( Probabilities[CAT_FIVE_CONTEXT_NODE] > MAX_PROB )
+ Probabilities[CAT_FIVE_CONTEXT_NODE] = MAX_PROB;
+ }
+ }
+
+ // Adjust the probabilities to a 7 bit resolution
+ for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ Probabilities[i] &= ~1;
+ if ( Probabilities[i] == 0 ) // 0 not legal.
+ Probabilities[i] = 1;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : AddBitsToBuffer
+ *
+ * INPUTS : BOOL_CODER *bc : Pointer to a bool coder instance.
+ * UINT32 data : Data value to be encoder by bc.
+ * UINT32 bits : Number of bits of data to be encoded.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Uses the specified Bool Coder to encode the specified
+ * data value which has the specified number of bits.
+ *
+ * SPECIAL NOTES : Fixed probability of 128 (0x80) is used to encode
+ * each bit in turn. The least-significant bit is
+ * encoded first.
+ *
+ ****************************************************************************/
+void AddBitsToBuffer ( BOOL_CODER *bc, UINT32 data, UINT32 bits )
+{
+ int bit;
+
+ for( bit=bits-1; bit>=0; bit-- )
+ VP6_EncodeBool ( bc, (1&(data>>bit)), 0x80 );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : WriteFrameHeader
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Writes a frame header to the bitstream.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void WriteFrameHeader ( CP_INSTANCE *cpi )
+{
+ RAW_BUFFER *Buffer = &cpi->RawBuffer;
+ BOOL_CODER *bc = &cpi->bc;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = HEADER_SECTION;
+#endif
+
+ // Output the frame type (base/key frame or inter frame)
+ AddRawBitsToBuffer( Buffer, (UINT32)pbi->FrameType, 1 );
+
+ // Quantizer
+ AddRawBitsToBuffer( Buffer, pbi->quantizer->FrameQIndex, 6 );
+
+ // Flag to indicate if we are using two bool coder streams.
+ // Note that this flag is ignored by the decoder in SIMPLE_PROFILE
+ // where the use of two streams is implicit
+ AddRawBitsToBuffer ( Buffer, (UINT32)((pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE)) ? 1 : 0), 1);
+
+ // If the frame was a base frame then write out the frame dimensions.
+ if ( pbi->FrameType == BASE_FRAME )
+ {
+ // Add the version and profile details
+ AddRawBitsToBuffer ( Buffer, (UINT32)pbi->Vp3VersionNo, 5 );
+ AddRawBitsToBuffer ( Buffer, (UINT32)pbi->VpProfile, 2 );
+
+ // is this keyframe section of the file interlaced
+ AddRawBitsToBuffer ( Buffer, (UINT32)(pbi->Configuration.Interlaced), 1);
+
+ // encoded size vertical and horizontal
+ AddBitsToBuffer( bc, (UINT32)(pbi->VFragments>>1), 8 );
+ AddBitsToBuffer( bc, (UINT32)(pbi->HFragments>>1), 8 );
+
+ if( ( pbi->Configuration.HScale > 1 || pbi->Configuration.VScale > 1 ) &&
+ ( cpi->AllowSpatialResampling == 0 && !cpi->ForceInternalSize ))
+ {
+ // scaled and cropped output size in macroblocks
+ AddBitsToBuffer( bc, (UINT32)(cpi->YuvInputData.YHeight * pbi->Configuration.VScale / pbi->Configuration.VRatio >> 4), 8 );
+ AddBitsToBuffer( bc, (UINT32)(cpi->YuvInputData.YWidth * pbi->Configuration.HScale / pbi->Configuration.HRatio >> 4), 8 );
+ }
+ else
+ {
+ // scaled and cropped output size in macroblocks
+ AddBitsToBuffer( bc, (UINT32)(cpi->YuvInputData.YHeight >> 4), 8 );
+ AddBitsToBuffer( bc, (UINT32)(cpi->YuvInputData.YWidth >> 4), 8 );
+ }
+
+ // scaling mode
+ AddBitsToBuffer( bc, (UINT32)(pbi->Configuration.ScalingMode), 2);
+
+ // Unless in SIMPLE_PROFILE transmit data to describe the filter
+ // strategy for fractional pels (Applies until next key frame)
+ if ( pbi->VpProfile != SIMPLE_PROFILE )
+ {
+ // Indicate what type of filtering we should use in motion prediction.
+ // Applies until next key frame.
+ if ( pbi->PredictionFilterMode == AUTO_SELECT_PM )
+ {
+ AddBitsToBuffer( bc, (UINT32)1, 1 );
+ AddBitsToBuffer( bc, (UINT32)(pbi->PredictionFilterVarThresh >> ((cpi->pb.Vp3VersionNo > 7) ? 0 : 5)), 5 );
+ AddBitsToBuffer( bc, (UINT32)(pbi->PredictionFilterMvSizeThresh), 3 );
+ }
+ else
+ {
+ AddBitsToBuffer( bc, (UINT32)0, 1 );
+ AddBitsToBuffer( bc, (UINT32)(pbi->PredictionFilterMode == BICUBIC_ONLY_PM) ? 1 : 0, 1 );
+ }
+
+ // If the ENCODER VERSION is > 7 then we add the VP6.2 specific stuff
+ if ( cpi->pb.Vp3VersionNo > 7 )
+ AddBitsToBuffer( bc, (UINT32)pbi->PredictionFilterAlpha, 4 );
+
+ cpi->LastPredictionFilterMode = pbi->PredictionFilterMode;
+ cpi->LastPredictionFilterVarThresh = pbi->PredictionFilterVarThresh;
+ cpi->LastPredictionFilterMvSizeThresh = pbi->PredictionFilterMvSizeThresh;
+ cpi->LastPredictionFilterAlpha = pbi->PredictionFilterAlpha;
+ }
+ }
+ // Non key frame specific stuff
+ else
+ {
+ // Flag whether or not the golden frame should be updated this frame
+ AddBitsToBuffer( bc, (pbi->RefreshGoldenFrame) ? 1 : 0, 1 );
+
+ // Indicate whether loop filter is to be used.
+ // This flag is ignored if we are in SIMPLE_PROFILE
+ if ( pbi->VpProfile != SIMPLE_PROFILE )
+ {
+ if ( pbi->UseLoopFilter == NO_LOOP_FILTER )
+ {
+ AddBitsToBuffer( bc, 0, 1 );
+ }
+ else if ( pbi->UseLoopFilter == LOOP_FILTER_BASIC )
+ {
+ AddBitsToBuffer( bc, 1, 1 );
+ AddBitsToBuffer( bc, 0, 1 );
+ }
+ else // LOOP_FILTER_DERING
+ {
+ AddBitsToBuffer( bc, 1, 1 );
+ AddBitsToBuffer( bc, 1, 1 );
+ }
+
+ // Should we update prediction modes etc. VP6.2 and later
+ if ( cpi->pb.Vp3VersionNo > 7 )
+ {
+ if ( (pbi->PredictionFilterMode != cpi->LastPredictionFilterMode) ||
+ (pbi->PredictionFilterVarThresh != cpi->LastPredictionFilterVarThresh) ||
+ (pbi->PredictionFilterMvSizeThresh != cpi->LastPredictionFilterMvSizeThresh) ||
+ (pbi->PredictionFilterAlpha != cpi->LastPredictionFilterAlpha) )
+ {
+ // Idicate a change
+ AddBitsToBuffer( bc, 1, 1 );
+
+ // Indicate what type of filtering we should use in motion prediction.
+ // Applies until next key frame.
+ if ( pbi->PredictionFilterMode == AUTO_SELECT_PM )
+ {
+ AddBitsToBuffer( bc, (UINT32)1, 1 );
+ AddBitsToBuffer( bc, (UINT32)pbi->PredictionFilterVarThresh, 5 );
+ AddBitsToBuffer( bc, (UINT32)(pbi->PredictionFilterMvSizeThresh), 3 );
+ }
+ else
+ {
+ AddBitsToBuffer( bc, (UINT32)0, 1 );
+ AddBitsToBuffer( bc, (UINT32)(pbi->PredictionFilterMode == BICUBIC_ONLY_PM) ? 1 : 0, 1 );
+ }
+
+ AddBitsToBuffer( bc, (UINT32)pbi->PredictionFilterAlpha, 4 );
+
+ cpi->LastPredictionFilterMode = pbi->PredictionFilterMode;
+ cpi->LastPredictionFilterVarThresh = pbi->PredictionFilterVarThresh;
+ cpi->LastPredictionFilterMvSizeThresh = pbi->PredictionFilterMvSizeThresh;
+ cpi->LastPredictionFilterAlpha = pbi->PredictionFilterAlpha;
+ }
+ else
+ {
+ // No change this frame
+ AddBitsToBuffer( bc, 0, 1 );
+ }
+ }
+ }
+ }
+
+ // All frames (key frame and inter)
+ if ( pbi->UseHuffman )
+ AddBitsToBuffer( bc, 1, 1 );
+ else
+ AddBitsToBuffer( bc, 0, 1 );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6AddHuffmanToken
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * TOKENEXTRA *TokenExtra : Token & extrabits to be encoded.
+ * UINT32 *HuffCode : Array of Huffman codes for tokens.
+ * UINT8 *HuffLength : Array of lengths of each HuffCode entry.
+ * UINT32 *ZeroCode : Array of Huffman codes for zero runs.
+ * UINT8 *ZeroLength : Array of lengths of each ZeroLength entry.
+ * UINT8 *CoefIndex : DCT coeff position token occurs at.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Adds a single token any any associated extra-bits
+ * to the bitstream using Huffman tokens.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INLINE
+void VP6AddHuffmanToken
+(
+ CP_INSTANCE *cpi,
+ TOKENEXTRA *TokenExtra,
+ UINT32 *HuffCode,
+ UINT8 *HuffLength,
+ UINT32 *ZeroCode,
+ UINT8 *ZeroLength,
+ UINT8 *CoefIndex
+)
+{
+ INT32 Token = TokenExtra->Token;
+ UINT32 Extra = TokenExtra->Extra;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Output Huffman code for zero run length
+ if ( Token == ZERO_TOKEN )
+ {
+ if ( *CoefIndex > 0 )
+ {
+ // Output Huffman code for Token
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, HuffCode[Token], HuffLength[Token] );
+
+ if ( Extra >= 8 )
+ {
+ // Zero run greater than 8 coded with extra bits greater than 8
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, ZeroCode[8], ZeroLength[8] );
+
+ // Zero run of 8 or more coded with fixed 6-bits
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, Extra-8, 6 );
+ }
+ else
+ {
+ // Zero run less than 8 coded with Huffman code
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, ZeroCode[Extra], ZeroLength[Extra] );
+ }
+
+ // Step the coefindex on by run length - 1 for AC zero runs
+ // Note that TokenExtra->Extra = run length - 1
+ *CoefIndex += Extra;
+ }
+ else if ( Extra > 0 )
+ {
+ // Zero at DC
+ UINT32 DcZrlBand = DcZrlHuffBand[Extra];
+
+ // Zero token
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, HuffCode[Token], HuffLength[Token] );
+
+ // Run length token & extra bits
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, DcZrlHuffCode[DcZrlBand], DcZrlHuffLength[DcZrlBand] );
+ if ( DcZrlExtraLength[DcZrlBand] )
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, Extra-DcZrlExtraOffset[DcZrlBand], DcZrlExtraLength[DcZrlBand] );
+ }
+ }
+ else if ( Token == DCT_EOB_TOKEN )
+ {
+ if ( *CoefIndex > 1 )
+ {
+ // EOB token beyond first AC in scan
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, HuffCode[Token], HuffLength[Token] );
+ }
+ else if ( Extra > 0 )
+ {
+ // Temp use same codes for EOB runs as for DC zero runs
+ UINT32 DcZrlBand = DcZrlHuffBand[Extra];
+
+ // EOB token at first AC in scan
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, HuffCode[Token], HuffLength[Token] );
+
+ // Run length token & extra bits
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, DcZrlHuffCode[DcZrlBand], DcZrlHuffLength[DcZrlBand] );
+ if ( DcZrlExtraLength[DcZrlBand] )
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, Extra-DcZrlExtraOffset[DcZrlBand], DcZrlExtraLength[DcZrlBand] );
+ }
+ }
+ else
+ {
+ // Output Huffman code for Token
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, HuffCode[Token], HuffLength[Token] );
+
+ // Output Extra bits
+ if ( ExtraBitLengths_VP6[Token] )
+ AddRawBitsToBuffer ( &pbi->HuffBuffer, Extra, ExtraBitLengths_VP6[Token] );
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6AddToken
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance (NOT USED).
+ * BOOL_CODER *bc : Pointer to Bool Coder to be used.
+ * TOKENEXTRA *TokenExtra : Token & extrabits to be encoded.
+ * UINT8 *BaselineProbsPtr :
+ * UINT8 *ContextProbsPtr : Array of tree node probs
+ * UINT8 *ZeroRunProbsPtr : Array of probs for aero run lengths.
+ * UINT8 *CoefIndex : DCT coeff position token occurs at.
+ * BOOL NonZeroImplicit : Flag indicating whether a zero token
+ * is prohibited due to context.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Adds a single token any any associated extra-bits
+ * to the bitstream using a Bool Coder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6AddToken
+(
+ CP_INSTANCE *cpi,
+ BOOL_CODER *bc,
+ TOKENEXTRA *TokenExtra,
+ UINT8 *BaselineProbsPtr,
+ UINT8 *ContextProbsPtr,
+ UINT8 *ZeroRunProbsPtr,
+ UINT8 *CoefIndex,
+ BOOL NonZeroImplicit
+)
+{
+ // Case statement to output code patterns for the token.
+ switch ( TokenExtra->Token )
+ {
+ case DCT_EOB_TOKEN: // 00
+ VP6_EncodeBool ( bc, 0, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 0, ContextProbsPtr[EOB_CONTEXT_NODE ] ); // EOB vs 0 branch
+ break;
+
+ case ZERO_TOKEN: // 01
+ VP6_EncodeBool ( bc, 0, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+
+ // For DC there is no run length and EOB is not allowed
+ if ( *CoefIndex > 0 )
+ {
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[EOB_CONTEXT_NODE] );// EOB vs 0 branch
+
+ // Step the coefindex on by run length - 1
+ // Note that TokenExtra->Extra = run length - 1
+ *CoefIndex += TokenExtra->Extra;
+
+ // Now code the zero run length
+ if ( TokenExtra->Extra < 8 ) // run lengths 2, 3, 4
+ {
+ switch ( TokenExtra->Extra )
+ {
+ case 0:
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[0] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[1] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[2] );
+ break;
+ case 1:
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[0] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[1] );
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[2] );
+ break;
+ case 2:
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[0] );
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[1] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[3] );
+ break;
+ case 3:
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[0] );
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[1] );
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[3] );
+ break;
+ case 4:
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[0] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[4] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[5] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[6] );
+ break;
+ case 5:
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[0] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[4] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[5] );
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[6] );
+ break;
+ case 6:
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[0] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[4] );
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[5] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[7] );
+ break;
+ case 7:
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[0] );
+ VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[4] );
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[5] );
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[7] );
+ break;
+ }
+ }
+ else
+ {
+ TokenExtra->Extra -= 8;
+
+ // Run length > 8
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[0] );
+ VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[4] );
+
+ // Code run length -8
+ VP6_EncodeBool ( bc, (1&TokenExtra->Extra), ZeroRunProbsPtr[8] ); // Bit 0
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), ZeroRunProbsPtr[9] ); // Bit 1
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), ZeroRunProbsPtr[10] ); // Bit 2
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>3)), ZeroRunProbsPtr[11] ); // Bit 3
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>4)), ZeroRunProbsPtr[12] ); // Bit 4
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>5)), ZeroRunProbsPtr[13] ); // Bit 5
+ }
+ }
+ break;
+
+ case ONE_TOKEN: // 10 X
+ if ( !NonZeroImplicit )
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 0, ContextProbsPtr[ONE_CONTEXT_NODE] ); // One Branch
+
+ VP6_EncodeBool ( bc, TokenExtra->Extra, 128 ); // Sign
+ break;
+
+ case TWO_TOKEN: // 1100 X
+ if ( !NonZeroImplicit )
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] ); // One Branch
+
+ VP6_EncodeBool ( bc, 0, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] ); // Low Value Branch
+ VP6_EncodeBool ( bc, 0, ContextProbsPtr[TWO_CONTEXT_NODE] ); // 2 Branch
+
+ VP6_EncodeBool ( bc, TokenExtra->Extra, 128); // Sign
+ break;
+
+ case THREE_TOKEN: // 11010 X
+ if ( !NonZeroImplicit )
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] ); // One Branch
+ VP6_EncodeBool ( bc, 0, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] ); // Low Value Branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[TWO_CONTEXT_NODE] ); // 2 Branch
+ VP6_EncodeBool ( bc, 0, BaselineProbsPtr[THREE_CONTEXT_NODE] ); // Three Branch
+
+ VP6_EncodeBool ( bc, TokenExtra->Extra, 128 ); // Sign
+ break;
+
+ case FOUR_TOKEN: // 11011 X
+ if ( !NonZeroImplicit )
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] ); // One Branch
+ VP6_EncodeBool ( bc, 0, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] ); // Low Value Branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[TWO_CONTEXT_NODE] ); // 2 Branch
+ VP6_EncodeBool ( bc, 1, BaselineProbsPtr[THREE_CONTEXT_NODE] ); // Three Branch
+
+ VP6_EncodeBool ( bc, TokenExtra->Extra, 128 ); // Sign
+ break;
+
+ case DCT_VAL_CATEGORY1: // 11100 XX
+ if ( !NonZeroImplicit )
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] ); // One Branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] ); // Low Value Branch
+
+ VP6_EncodeBool ( bc, 0, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] );// HighLow Value Branch
+ VP6_EncodeBool ( bc, 0, BaselineProbsPtr[CAT_ONE_CONTEXT_NODE] ); // Cat1 Value Branch
+
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 159 ); // Data Bit
+ VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128 ); // Sign Bit
+ break;
+
+ case DCT_VAL_CATEGORY2: // 11101 XXX
+ if ( !NonZeroImplicit )
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] ); // One Branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] ); // Low Value Branch
+
+ VP6_EncodeBool ( bc, 0, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] );// HighLow Value Branch
+ VP6_EncodeBool ( bc, 1, BaselineProbsPtr[CAT_ONE_CONTEXT_NODE] ); // Cat1 Value Branch
+
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), 165 ); // Data Bits
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 145 );
+ VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128 ); // Sign Bit
+ break;
+
+ case DCT_VAL_CATEGORY3: // 111 100 XXXXX
+ if ( !NonZeroImplicit )
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] ); // One Branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] ); // Low Value Branch
+
+ VP6_EncodeBool ( bc, 1, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] ); // HighLow Value Branch
+ VP6_EncodeBool ( bc, 0, BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE] ); // Cat3/4 Value Branch
+ VP6_EncodeBool ( bc, 0, BaselineProbsPtr[CAT_THREE_CONTEXT_NODE] ); // Cat3 Value Branch
+
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>3)), 173 ); // Data Bits
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), 148 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 140 );
+ VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128 ); // Sign Bit
+ break;
+
+ case DCT_VAL_CATEGORY4: // 111101 XXXXX
+ if ( !NonZeroImplicit )
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] ); // One Branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] ); // Low Value Branch
+
+ VP6_EncodeBool ( bc, 1, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] ); // HighLow Value Branch
+ VP6_EncodeBool ( bc, 0, BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE] ); // Cat3/4 Value Branch
+ VP6_EncodeBool ( bc, 1, BaselineProbsPtr[CAT_THREE_CONTEXT_NODE] ); // Cat3 Value Branch
+
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>4)), 176 ); // More significant bits more likely to be 0
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>3)), 155 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), 140 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 135 );
+ VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128 ); // Sign Bit
+ break;
+
+ case DCT_VAL_CATEGORY5: // 111110 XXXXXX
+ if ( !NonZeroImplicit )
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] ); // One Branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] ); // Low Value Branch
+
+ VP6_EncodeBool ( bc, 1, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] ); // HighLow Value Branch
+ VP6_EncodeBool ( bc, 1, BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE] ); // Cat3/4 Value Branch
+ VP6_EncodeBool ( bc, 0, BaselineProbsPtr[CAT_FIVE_CONTEXT_NODE] ); // Cat5/6 Value Branch
+
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>5)), 180 ); // Data Bits
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>4)), 157 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>3)), 141 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), 134 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 130 );
+ VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128); // Sign Bit
+ break;
+
+ case DCT_VAL_CATEGORY6: // 111111 XXXXXXXXXXXX
+ if ( !NonZeroImplicit )
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] ); // One Branch
+ VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] ); // Low Value Branch
+
+ VP6_EncodeBool ( bc, 1, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] ); // HighLow Value Branch
+ VP6_EncodeBool ( bc, 1, BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE] ); // Cat3/4 Value Branch
+ VP6_EncodeBool ( bc, 1, BaselineProbsPtr[CAT_FIVE_CONTEXT_NODE] ); // Cat5/6 Value Branch
+
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>11)), 254 ); // Data Bits
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>10)), 254 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>9)), 243 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>8)), 230 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>7)), 196 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>6)), 177 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>5)), 153 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>4)), 140 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>3)), 133 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), 130 );
+ VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 129 );
+ VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128 ); // Sign Bit
+ break;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateContextProbs
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Determines which context probabilities to update and
+ * encodes the changes to the bitstream.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UpdateContextProbs ( CP_INSTANCE *cpi )
+{
+ UINT32 i,j;
+ UINT32 Plane;
+ UINT32 Band;
+ INT32 Prec;
+ INT32 OldBits;
+ INT32 NewBits;
+ UINT8 PrecNonZero;
+ INT32 ProbUpdateCost;
+ UINT8 Probs[MAX_ENTROPY_TOKENS-1];
+ UINT8 LastProb[MAX_ENTROPY_TOKENS-1];
+ UINT32 BranchChoices[MAX_ENTROPY_TOKENS-1][2];
+
+ BOOL_CODER *bc = &cpi->bc;
+ PB_INSTANCE *pbi = &cpi->pb;
+ UINT8 FrameType = VP6_GetFrameType ( pbi );
+
+ // Clear down last prob structure
+ memset ( LastProb, 128, MAX_ENTROPY_TOKENS-1 );
+
+ // Baseline DC probabilities for Y and then UV Planes.
+ for ( Plane=0; Plane<2; Plane++ )
+ {
+ if ( FrameType == BASE_FRAME )
+ memcpy ( pbi->DcProbs+DCProbOffset(Plane,0), LastProb, MAX_ENTROPY_TOKENS-1 );
+
+ ConvertDistribution ( cpi, cpi->FrameDcTokenDist[Plane], Probs, BranchChoices );
+
+ // Are there any updates for this set.
+ for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ OldBits = ((BranchChoices[i][0] * VP6_ProbCost[pbi->DcProbs[DCProbOffset(Plane,i)]])/256) +
+ ((BranchChoices[i][1] * VP6_ProbCost[255 - pbi->DcProbs[DCProbOffset(Plane,i)]])/256);
+ NewBits = ((BranchChoices[i][0] * VP6_ProbCost[Probs[i]])/256) +
+ ((BranchChoices[i][1] * VP6_ProbCost[255 - Probs[i]])/256);
+
+ ProbUpdateCost = PROB_UPDATE_BASELINE_COST + PROB_UPDATE_CORECTION + (VP6_ProbCost[255 - VP6_DcUpdateProbs[Plane][i]]/256);
+
+ if ( (OldBits - NewBits) > ProbUpdateCost )
+ {
+ // Probabilities sent
+ VP6_EncodeBool ( bc, 1, VP6_DcUpdateProbs[Plane][i] );
+
+ AddBitsToBuffer ( bc, Probs[i] >> 1, PROB_UPDATE_BASELINE_COST );
+
+ // Update the last probability records
+ pbi->DcProbs[DCProbOffset(Plane,i)] = Probs[i];
+ LastProb[i] = Probs[i];
+ }
+ else
+ {
+ // Probabilities not sent
+ VP6_EncodeBool ( bc, 0, VP6_DcUpdateProbs[Plane][i] );
+ }
+ }
+ }
+
+ // If we are in Error resilliant mode and this was the first frame then take a copy of the
+ // entropy probabilities used for re-use on subsequent key frames.
+ if ( (cpi->ErrorResilliantMode) && (cpi->CurrentFrame == 1) )
+ {
+ memcpy( cpi->FirstFrameDcProbs, pbi->DcProbs, sizeof(cpi->FirstFrameDcProbs) );
+ }
+
+ // Are we supporting dynamic scan order updates
+ if ( ( (pbi->Configuration.Interlaced) || (cpi->AllowScanOrderUpdates) ) &&
+ ( !cpi->ErrorResilliantMode ) )
+ {
+ VP6_EncodeBool ( bc, 1, 128 );
+
+ // Transmit changes to the AC scan order banding
+ for ( i=1; i<BLOCK_SIZE; i++ )
+ {
+ // Should we update the ceoffs band
+ if ( cpi->NewScanOrderBands[i] != pbi->ScanBands[i] )
+ {
+ VP6_EncodeBool ( bc, 1, ScanBandUpdateProbs[i] );
+ AddBitsToBuffer ( bc, cpi->NewScanOrderBands[i], SCAN_BAND_UPDATE_BITS );
+ pbi->ScanBands[i] = cpi->NewScanOrderBands[i];
+ scanupdates[i][1]++;
+ }
+ else
+ {
+ VP6_EncodeBool ( bc, 0, ScanBandUpdateProbs[i] );
+ scanupdates[i][0]++;
+ }
+ }
+ }
+ else
+ {
+ VP6_EncodeBool ( bc, 0, 128 );
+ }
+
+ // Reset Zero run probabilities to defaults values for key frames
+ if ( FrameType == BASE_FRAME )
+ {
+ memcpy ( pbi->ZeroRunProbs, ZeroRunProbDefaults, sizeof(pbi->ZeroRunProbs) );
+ }
+
+ // Update the Zero Run probabilities
+ memcpy ( cpi->FrameZrlProbs, pbi->ZeroRunProbs, sizeof(cpi->FrameZrlProbs) );
+ if ( !cpi->ErrorResilliantMode )
+ GetOptimalFrameZrlProbs( cpi );
+
+ // Transmit any changes needed
+ for ( i=0; i<ZRL_BANDS; i++ )
+ {
+ for ( j=0; j<ZERO_RUN_PROB_CASES; j++ )
+ {
+ // Work out if saving enough to justify update TBD,
+ OldBits = ((cpi->FrameZrlBranchHits[i][j][0] * VP6_ProbCost[pbi->ZeroRunProbs[i][j]])/256) +
+ ((cpi->FrameZrlBranchHits[i][j][1] * VP6_ProbCost[255 - pbi->ZeroRunProbs[i][j]])/256);
+ NewBits = ((cpi->FrameZrlBranchHits[i][j][0] * VP6_ProbCost[cpi->FrameZrlProbs[i][j]])/256) +
+ ((cpi->FrameZrlBranchHits[i][j][1] * VP6_ProbCost[255 - cpi->FrameZrlProbs[i][j]])/256);
+
+ ProbUpdateCost = PROB_UPDATE_BASELINE_COST + (VP6_ProbCost[255 - ZrlUpdateProbs[i][j]]/256);
+
+ if ( (OldBits - NewBits) > ProbUpdateCost )
+ {
+ // Probabilities sent
+ VP6_EncodeBool ( bc, 1, ZrlUpdateProbs[i][j] );
+ AddBitsToBuffer( bc, cpi->FrameZrlProbs[i][j] >> 1, PROB_UPDATE_BASELINE_COST );
+ pbi->ZeroRunProbs[i][j] = (cpi->FrameZrlProbs[i][j] & ~1);
+ pbi->ZeroRunProbs[i][j] += (pbi->ZeroRunProbs[i][j] == 0) ? 1 : 0;
+ }
+ else
+ {
+ // Probability not sent
+ VP6_EncodeBool ( bc, 0, ZrlUpdateProbs[i][j] );
+ }
+ }
+ }
+
+ // Baseline probabilities for each AC band.
+ // Prec=0 means last token in current block was 0: Prec=1 means it was !0
+ for ( Prec=0; Prec<PREC_CASES; Prec++ )
+ {
+ PrecNonZero = (Prec > 0) ? 1 : 0;
+
+ // Baseline probabilities for each AC band.
+ for ( Plane=0; Plane<2; Plane++ )
+ {
+ for ( Band=0; Band<VP6_AC_BANDS; Band++ )
+ {
+ // Decide whether to transmit probability data based upon number of tokens represented
+ ConvertDistribution ( cpi, cpi->FrameAcTokenDist[Prec][Plane][Band], Probs, BranchChoices );
+
+ if ( FrameType == BASE_FRAME )
+ memcpy( pbi->AcProbs+ACProbOffset(Plane,Prec,Band,0), LastProb, MAX_ENTROPY_TOKENS-1 );
+
+ for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ OldBits = ((BranchChoices[i][0] * VP6_ProbCost[pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)]])/256) +
+ ((BranchChoices[i][1] * VP6_ProbCost[255 - pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)]])/256);
+ NewBits = ((BranchChoices[i][0] * VP6_ProbCost[Probs[i]])/256) +
+ ((BranchChoices[i][1] * VP6_ProbCost[255 - Probs[i]])/256);
+
+ ProbUpdateCost = PROB_UPDATE_BASELINE_COST + PROB_UPDATE_CORECTION + (VP6_ProbCost[255 - VP6_AcUpdateProbs[Prec][Plane][Band][i]]/256);
+
+ if ( (OldBits - NewBits) > ProbUpdateCost )
+ {
+ // Probabilities sent
+ VP6_EncodeBool ( bc, 1, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+ AddBitsToBuffer ( bc, Probs[i] >> 1, PROB_UPDATE_BASELINE_COST );
+
+ // Update the last probability records
+ pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = Probs[i];
+ LastProb[i] = Probs[i];
+ }
+ else
+ {
+ // Probabilities not sent
+ VP6_EncodeBool ( bc, 0, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+ }
+ }
+ }
+ }
+ }
+
+ // If we are in Error resilliant mode and this was the first frame then take a copy of the
+ // entropy probabilities used for re-use on subsequent key frames.
+ if ( cpi->ErrorResilliantMode && (cpi->CurrentFrame == 1) )
+ memcpy ( cpi->FirstFrameAcProbs, pbi->AcProbs, sizeof(cpi->FirstFrameAcProbs) );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateContextProbs2
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sends a selected set of updated context info to the bitstream.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UpdateContextProbs2 ( CP_INSTANCE *cpi )
+{
+ UINT32 i,j;
+ UINT32 Plane;
+ UINT32 Band;
+ INT32 Prec;
+ UINT8 PrecNonZero;
+ UINT8 Probs[MAX_ENTROPY_TOKENS-1];
+ UINT32 BranchChoices[MAX_ENTROPY_TOKENS-1][2];
+
+ BOOL_CODER *bc = &cpi->bc;
+ PB_INSTANCE *pbi = &cpi->pb;
+ UINT8 ActiveDcNodes[2] = { 3, 2 }; // Y, UV
+ UINT8 ActiveAcNodes[3][2] = { { 1, 1 },{ 2, 0 },{ 1, 0 } }; // {Y, UV} for each prec case
+ UINT8 ActiveAcBands[3][2] = { { 1, 1 },{ 1, 0 },{ 1, 0 } }; // {Y, UV} for each prec case
+
+ // Baseline DC probabilities for Y and then UV Planes.
+ for ( Plane=0; Plane<2; Plane++ )
+ {
+ ConvertDistribution ( cpi, cpi->FrameDcTokenDist[Plane], Probs, BranchChoices );
+
+ // Some nodes are always updated
+ // The rest are never updated but are left at the key frame values
+ for ( i=0; i<ActiveDcNodes[Plane]; i++ )
+ {
+ // Probabilities sent
+ VP6_EncodeBool ( bc, 1, VP6_DcUpdateProbs[Plane][i] );
+ AddBitsToBuffer ( bc, Probs[i] >> 1, PROB_UPDATE_BASELINE_COST );
+
+ // Update the last probability records
+ pbi->DcProbs[DCProbOffset(Plane,i)] = Probs[i];
+ }
+ for ( i=ActiveDcNodes[Plane]; i<MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ // Probabilities not sent
+ VP6_EncodeBool ( bc, 0, VP6_DcUpdateProbs[Plane][i] );
+ }
+ }
+
+ // Do not change the scan order banding in error resilient mode
+ VP6_EncodeBool ( bc, 0, 128 );
+
+ // For now do not update ZRL probabilities in error resilient mode
+ for ( i=0; i<2; i++ )
+ {
+ for ( j=0; j<ZERO_RUN_PROB_CASES; j++ )
+ {
+ // Probability not sent
+ VP6_EncodeBool ( bc, 0, ZrlUpdateProbs[i][j] );
+ }
+ }
+
+ // Baseline probabilities for each AC band.
+ // Prec=0 means last token in current block was 0: Prec=1 means it was !0
+ for ( Prec=0; Prec<PREC_CASES; Prec++ )
+ {
+ PrecNonZero = (Prec > 0) ? 1 : 0;
+
+ // Baseline probabilities for each AC band.
+ for ( Plane=0; Plane<2; Plane++ )
+ {
+ // For the first couple of AC bands we always update the first few probabilities.
+ // For the higher AC bands we never update probabilities
+ for ( Band=0; Band<ActiveAcBands[Prec][Plane]; Band++ )
+ {
+ // Decide whether to transmit probability data based upon number of tokens represented
+ ConvertDistribution ( cpi, cpi->FrameAcTokenDist[Prec][Plane][Band], Probs, BranchChoices );
+
+ for ( i=0; i<ActiveAcNodes[Prec][Plane]; i++ )
+ {
+ // Probabilities sent
+ VP6_EncodeBool ( bc, 1, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+ AddBitsToBuffer ( bc, Probs[i] >> 1, PROB_UPDATE_BASELINE_COST );
+
+ // Update the last probability records
+ pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = Probs[i];
+ }
+
+ for ( i=ActiveAcNodes[Prec][Plane]; i<MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ // Probabilities not sent
+ VP6_EncodeBool ( bc, 0, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+ }
+ }
+
+ for ( Band=ActiveAcBands[Prec][Plane]; Band<VP6_AC_BANDS; Band++ )
+ {
+ for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ // Probabilities not sent
+ VP6_EncodeBool ( bc, 0, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+ }
+ }
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateContextProbs3
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates all baseline probabilities (except first frame
+ * when in error resilliant mode).
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UpdateContextProbs3(CP_INSTANCE *cpi)
+{
+ UINT32 i,j;
+ UINT32 Plane;
+ UINT32 Band;
+ INT32 Prec;
+ UINT8 Prob;
+ UINT8 LastProb[MAX_ENTROPY_TOKENS-1];
+
+ BOOL_CODER *bc = &cpi->bc;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Clear down last prob structure
+ memset ( LastProb, 128, MAX_ENTROPY_TOKENS-1 );
+
+ // Copy over the DC probabilities used for the first frame
+ memcpy ( pbi->DcProbs, cpi->FirstFrameDcProbs, sizeof(pbi->DcProbs) );
+
+ // Baseline DC probabilities for Y and then UV Planes.
+ for ( Plane=0; Plane<2; Plane++ )
+ {
+ // Are there any updates for this set.
+ for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ Prob = pbi->DcProbs[DCProbOffset(Plane,i)];
+ if ( Prob != LastProb [i] )
+ {
+ // Send probabilities
+ VP6_EncodeBool ( bc, 1, VP6_DcUpdateProbs[Plane][i] );
+ AddBitsToBuffer ( bc, Prob >> 1, PROB_UPDATE_BASELINE_COST );
+
+ LastProb[i] = Prob;
+ }
+ else
+ {
+ // Probabilities not sent
+ VP6_EncodeBool ( bc, 0, VP6_DcUpdateProbs[Plane][i] );
+ }
+ }
+ }
+
+ // Reset Zero run probabilities to defaults values for key frames
+ memcpy ( pbi->ZeroRunProbs, ZeroRunProbDefaults, sizeof(pbi->ZeroRunProbs) );
+
+ // Do not change the scan order banding in error resilient mode
+ VP6_EncodeBool ( bc, 0, 128 );
+
+ // For now do not update ZRL probabilities in error resilient mode
+ for ( i=0; i<2; i++ )
+ {
+ for ( j=0; j<ZERO_RUN_PROB_CASES; j++ )
+ {
+ // Probability not sent
+ VP6_EncodeBool ( bc, 0, ZrlUpdateProbs[i][j] );
+ }
+ }
+
+ // Copy over the AC probabilities used for the first frame
+ memcpy ( pbi->AcProbs, cpi->FirstFrameAcProbs, sizeof(pbi->AcProbs) );
+
+ // Baseline probabilities for each AC band.
+ // Prec=0 means last token in current block was 0: Prec=1 means it was !0
+ for ( Prec=0; Prec<PREC_CASES; Prec++ )
+ {
+ // Baseline probabilities for each AC band.
+ for ( Plane=0; Plane<2; Plane++ )
+ {
+ for ( Band=0; Band<VP6_AC_BANDS; Band++ )
+ {
+ for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ Prob = pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)];
+ if ( Prob != LastProb [i] )
+ {
+ // Probabilities sent
+ VP6_EncodeBool ( bc, 1, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+ AddBitsToBuffer ( bc, Prob >> 1, PROB_UPDATE_BASELINE_COST );
+
+ LastProb [i] = Prob;
+ }
+ else
+ {
+ // Probabilities not sent
+ VP6_EncodeBool ( bc, 0, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+ }
+ }
+ }
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PackHuffmanCoeffs
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Outputs the list of tokens generated for the frame
+ * using Huffman coding.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PackHuffmanCoeffs ( CP_INSTANCE *cpi )
+{
+ UINT32 Plane;
+ UINT8 PrecTokenIndex;
+ TOKENEXTRA *j;
+ TOKENEXTRA *First;
+ TOKENEXTRA *Last;
+
+ BOOL_CODER *bc = &cpi->bc;
+ PB_INSTANCE *pbi = &cpi->pb;
+ UINT8 FrameType = VP6_GetFrameType ( pbi );
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = HEADER_SECTION;
+#endif
+
+ // Work out which context probabilities need to be updated
+ // and output the changes to the bitstream.
+ //
+ // Error resilliant mode uses a fixed probability update pattern to make the entropy
+ // code more resilliant to dropped frames
+ if ( cpi->ErrorResilliantMode )
+ {
+ // In "error resilliant / VC" mode use an update mechanism that is more tolerant of dropped frames.
+ if ( FrameType == BASE_FRAME )
+ {
+ if ( cpi->CurrentFrame == 1 )
+ UpdateContextProbs( cpi );
+ else
+ UpdateContextProbs3( cpi );
+ }
+ else
+ UpdateContextProbs2( cpi );
+ }
+ else
+ {
+ UpdateContextProbs( cpi );
+ }
+
+ // Create all the context specific propabilities
+ VP6_ConfigureContexts ( pbi );
+
+ // probability that the macroblock is interlaced
+ if(pbi->Configuration.Interlaced)
+ AddBitsToBuffer ( bc, (UINT32)(pbi->probInterlaced), 8 );
+
+ // Create Huffman codes for tokens based on tree probabilities
+ ConvertBoolTrees ( pbi );
+
+ // encode coefficients
+ First=cpi->CoeffTokens;
+ Last=cpi->CoeffTokenPtr;
+ {
+ UINT8 coef;
+ UINT32 now;
+ unsigned int MBrow, MBcol, block;
+
+ j = First;
+ now = bc->pos * 8 - 4;
+ for ( MBrow=BORDER_MBS; MBrow<pbi->MBRows - BORDER_MBS; MBrow++ )
+ {
+ for ( MBcol=BORDER_MBS; MBcol<pbi->MBCols-BORDER_MBS; MBcol++ )
+ {
+ // dumb way to encode the interlaced decision but it works!!!
+ {
+ UINT8 prob = pbi->probInterlaced;
+
+ // super simple context adjustment
+ if(MBcol>BORDER_MBS)
+ {
+ if(pbi->MBInterlaced[MBOffset(MBrow,MBcol-1)])
+ prob = prob - (prob>>1);
+ else
+ prob = prob + ((256-prob)>>1);
+ }
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = HEADER_SECTION;
+#endif
+
+ if ( pbi->Configuration.Interlaced )
+ VP6_EncodeBool ( bc, pbi->MBInterlaced[MBOffset(MBrow,MBcol)], prob );
+ }
+
+ if ( pbi->FrameType != BASE_FRAME )
+ encodeModeAndMotionVector ( cpi, MBrow, MBcol );
+
+ for ( block=0 ; block<6 ; block++ )
+ {
+ Plane = block>3;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = DC_SECTION;
+#endif
+
+ // DC Token
+ coef = 0;
+ VP6AddHuffmanToken ( cpi, j,
+ pbi->DcHuffCode[Plane],
+ pbi->DcHuffLength[Plane],
+ pbi->ZeroHuffCode[0],
+ pbi->ZeroHuffLength[0],
+ &coef );
+
+ PrecTokenIndex = VP6_PrevTokenIndex[j->Token];
+ j++;
+
+ for ( coef=1; coef<64; coef++ )
+ {
+ UINT32 ZrlBand = (coef >= ZRL_BAND2) ? 1 : 0;
+
+ // Restrict to 4 AC bands when using Huffman
+ UINT32 AcBand = VP6_CoeffToBand[coef];
+ AcBand = (AcBand < 4) ? AcBand : 3;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = AC_SECTION;
+#endif
+
+ VP6AddHuffmanToken ( cpi, j,
+ pbi->AcHuffCode[PrecTokenIndex][Plane][AcBand],
+ pbi->AcHuffLength[PrecTokenIndex][Plane][AcBand],
+ pbi->ZeroHuffCode[ZrlBand],
+ pbi->ZeroHuffLength[ZrlBand], &coef );
+
+ PrecTokenIndex = VP6_PrevTokenIndex[j->Token];
+
+ if( j->Token == DCT_EOB_TOKEN )
+ coef=64;
+ j++;
+ }
+ }
+ }
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PackArithmeticCoeffs
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Outputs the list of tokens generated for the frame
+ * using a Bool Coder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PackArithmeticCoeffs ( CP_INSTANCE *cpi )
+{
+ UINT32 Plane;
+ UINT8 PrecTokenIndex;
+ TOKENEXTRA *j;
+ TOKENEXTRA *First;
+ TOKENEXTRA *Last;
+ BOOL_CODER *nbc;
+
+ BOOL_CODER *bc = &cpi->bc;
+ PB_INSTANCE *pbi = &cpi->pb;
+ UINT8 FrameType = VP6_GetFrameType ( pbi );
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = CONTEXT_OVERHEADS_SECTION;
+#endif
+
+ // Select which bool coder partition to use
+ if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+ nbc = &cpi->bc2;
+ else
+ nbc = &cpi->bc;
+
+ // Work out which context probabilities need to be updated
+ // and output the changes to the bitstream.
+ //
+ // Error resilliant mode uses a fixed probability update pattern to make the entropy
+ // code more resilliant to dropped frames
+ if ( cpi->ErrorResilliantMode )
+ {
+ // In "error resilliant / VC" mode use an update mechanism that is more tolerant of dropped frames.
+ if ( FrameType == BASE_FRAME )
+ {
+ if ( cpi->CurrentFrame == 1 )
+ UpdateContextProbs( cpi );
+ else
+ UpdateContextProbs3( cpi );
+ }
+ else
+ UpdateContextProbs2( cpi );
+ }
+ else
+ {
+ UpdateContextProbs( cpi );
+ }
+
+ // Create all the context specific propabilities
+ VP6_ConfigureContexts ( pbi );
+
+ // probability that the macroblock is interlaced
+ if ( pbi->Configuration.Interlaced )
+ AddBitsToBuffer ( bc, (UINT32)(pbi->probInterlaced), 8 );
+
+ // encode coefficients
+ First=cpi->CoeffTokens;
+ Last=cpi->CoeffTokenPtr;
+ {
+ UINT8 coef;
+ UINT32 now;
+ unsigned int MBrow,MBcol,block;
+
+ j = First;
+
+ now = bc->pos * 8 - 4;
+ for ( MBrow=BORDER_MBS; MBrow<pbi->MBRows - BORDER_MBS; MBrow++ )
+ {
+ for ( MBcol=BORDER_MBS; MBcol<pbi->MBCols-BORDER_MBS; MBcol++ )
+ {
+ // dumb way to encode the interlaced decision but it works!!!
+ {
+ UINT8 prob = pbi->probInterlaced;
+
+ // super simple context adjustment
+ if ( MBcol>BORDER_MBS )
+ {
+ if ( pbi->MBInterlaced[MBOffset(MBrow,MBcol-1)] )
+ prob = prob - (prob>>1);
+ else
+ prob = prob + ((256-prob)>>1);
+ }
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = CONTEXT_OVERHEADS_SECTION;
+#endif
+
+ if ( pbi->Configuration.Interlaced )
+ VP6_EncodeBool( bc, pbi->MBInterlaced[MBOffset(MBrow,MBcol)], prob );
+ }
+
+ if ( pbi->FrameType != BASE_FRAME )
+ encodeModeAndMotionVector ( cpi, MBrow, MBcol );
+
+ for ( block=0 ; block<6 ; block++ )
+ {
+ Plane = block>3;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = DC_SECTION;
+#endif
+
+ // DC Token
+ coef = 0;
+ VP6AddToken ( cpi, nbc,
+ j,
+ pbi->DcProbs+DCProbOffset(Plane,0),
+// pbi->DcNodeContexts[Plane][j->LastTokenL + j->LastTokenA],
+ (pbi->DcNodeContexts + DcNodeOffset(Plane, (j->LastTokenL + j->LastTokenA), 0)),
+ pbi->ZeroRunProbs[0], &coef, FALSE );
+
+ PrecTokenIndex = VP6_PrevTokenIndex[j->Token];
+ j++;
+
+ for ( coef=1; coef<64; coef++ )
+ {
+
+ UINT32 band = VP6_CoeffToBand[coef];
+ UINT8 *AcProbsPtr = pbi->AcProbs + ACProbOffset(Plane,PrecTokenIndex,band,0 );
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = AC_SECTION;
+#endif
+
+ VP6AddToken ( cpi, nbc,
+ j, AcProbsPtr, AcProbsPtr, pbi->ZeroRunProbs[(coef >= ZRL_BAND2) ? 1 : 0],
+ &coef, ((coef>1) && (PrecTokenIndex == 0)) );
+
+ PrecTokenIndex = VP6_PrevTokenIndex[j->Token];
+
+ if ( j->Token == DCT_EOB_TOKEN )
+ coef=64;
+ j++;
+ }
+ }
+ }
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PackCodedVideo
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Outputs the compressed frame to the bitstream: writes
+ * a frame header and entropy codes associated lists
+ * of tokens.
+ *
+ * SPECIAL NOTES : Uses either Huffman or Bool coding depending on
+ * pbi->UseHuffman flag.
+ *
+ ****************************************************************************/
+extern double ModeBits;
+extern double ModeBits2;
+
+void PackCodedVideo ( CP_INSTANCE *cpi )
+{
+ UINT32 Buffer2Offset;
+ unsigned int duration;
+ unsigned int starttsc,endtsc;
+
+ BOOL_CODER *bc = &cpi->bc;
+ PB_INSTANCE *pbi = &cpi->pb;
+ BOOL KeyFrame = (pbi->FrameType == BASE_FRAME);
+
+ VP6_readTSC ( &starttsc );
+
+ // Initialise the raw buffer i/o and the two bool coders.
+ InitAddRawBitsToBuffer ( &cpi->RawBuffer, pbi->DataOutputInPtr );
+
+ // Start the bool coder or coders
+ if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+ {
+ // Start the first bool coder: Allow for the raw header bytes.
+ VP6_StartEncode ( bc, (pbi->DataOutputInPtr + ((KeyFrame) ? 4 : 3)) );
+
+ // Create either second Bool or Huffman coded partition
+ if ( pbi->UseHuffman )
+ InitAddRawBitsToBuffer ( &pbi->HuffBuffer, cpi->OutputBuffer2 );
+ else
+ VP6_StartEncode ( &cpi->bc2, cpi->OutputBuffer2 );
+ }
+ else
+ {
+ // Start the first bool coder: Allow for the raw header bytes.
+ VP6_StartEncode( bc, (pbi->DataOutputInPtr + ((KeyFrame) ? 2 : 1)) );
+ }
+
+ // Set flag to insure ouput to the bitstream rather than simulated cost analysis
+ bc->MeasureCost = FALSE;
+
+ if ( pbi->UseHuffman )
+ {
+ // AWG Using runs so copy correct distribution
+ memcpy ( cpi->FrameDcTokenDist, cpi->FrameDcTokenDist2, sizeof(cpi->FrameDcTokenDist2) );
+ memcpy ( cpi->FrameAcTokenDist, cpi->FrameAcTokenDist2, sizeof(cpi->FrameAcTokenDist2) );
+ }
+
+ // Write out the frame header information including size.
+ WriteFrameHeader ( cpi );
+
+ // The tree is not needed (implicit) for key frames
+ if ( !KeyFrame )
+ {
+ // Error resilliant mode uses a fixed probability update pattern to make the entropy
+ // code more resilliant to dropped frames
+ if ( cpi->ErrorResilliantMode )
+ {
+ UpdateModeProbs(cpi);
+ BuildandPackMvTree2( cpi );
+ }
+ else
+ {
+ UpdateModeProbs(cpi);
+ BuildandPackMvTree( cpi );
+ }
+ }
+
+ if ( pbi->UseHuffman )
+ PackHuffmanCoeffs ( cpi );
+ else
+ PackArithmeticCoeffs ( cpi );
+
+ // Stop the bool coders and work out this frame size.
+ VP6_StopEncode ( bc );
+
+ // ThisFrameSize is measured in bits
+ if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+ {
+ // Offset to second bitstream partition from start of buffer
+ Buffer2Offset = 4 + bc->pos;
+
+ // Write offset to third bitstream partition
+ AddRawBitsToBuffer ( &cpi->RawBuffer, Buffer2Offset, 16 );
+
+ if ( pbi->UseHuffman )
+ {
+ // Flush buffer for second Huffman coded output partition
+ EndAddRawBitsToBuffer ( &pbi->HuffBuffer );
+
+ // ThisFrameSize is measured in bits
+ cpi->ThisFrameSize = (Buffer2Offset + pbi->HuffBuffer.pos)*8;
+
+ memcpy ( &cpi->RawBuffer.Buffer[Buffer2Offset], pbi->HuffBuffer.Buffer, pbi->HuffBuffer.pos );
+ }
+ else
+ {
+ // Stop the second bool coder
+ VP6_StopEncode ( &cpi->bc2);
+
+ // Work out the frame size
+ cpi->ThisFrameSize = (Buffer2Offset + cpi->bc2.pos)*8;
+
+ // Assemble output bitstream from two bitstream partitions
+ memcpy ( &pbi->DataOutputInPtr[Buffer2Offset], cpi->bc2.buffer, cpi->bc2.pos );
+ }
+ }
+ else
+ {
+ // Raw header bits + coded bits
+ cpi->ThisFrameSize = ((KeyFrame ? 2 : 1) + bc->pos)*8;
+ }
+
+ // Stop and flush the raw bits encoder used for the frist part of the header
+ EndAddRawBitsToBuffer ( &cpi->RawBuffer );
+
+ // Get time & compute duration
+ VP6_readTSC ( &endtsc );
+ duration = ( endtsc - starttsc )/ pbi->ProcessorFrequency ;
+
+ if( cpi->avgPackVideoTime == 0)
+ cpi->avgPackVideoTime = duration;
+ else
+ cpi->avgPackVideoTime = ( 7 * cpi->avgPackVideoTime + duration ) >> 3;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PickModes.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PickModes.c
new file mode 100644
index 00000000..9573d1c2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PickModes.c
@@ -0,0 +1,2190 @@
+/****************************************************************************
+*
+* Module Title : PickModes.c
+*
+* Description : Coding mode selection functions.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h> // For abs()
+#include "mcomp.h"
+#include "tokenentropy.h"
+#include "compdll.h"
+#include "decodemode.h"
+#include "decodemv.h"
+#include "encodemode.h"
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern void PredictBlock ( CP_INSTANCE *cpi, BLOCK_POSITION bp );
+extern UINT8 TokenizeFrag_RD ( CP_INSTANCE *cpi, INT16 * RawData, UINT32 Plane, UINT32 *MbCost );
+extern INT32 *XX_LUT;
+extern void GetQuantizedCoeffsMSE_RD( INT16 * DctCodes,INT16 * Coeffs,INT16 * DequantMatrix,UINT32 *MSE);
+
+extern void PredictDCE
+(
+ CP_INSTANCE *cpi,
+ BLOCK_POSITION bp
+);
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define KF_INDICATOR_THRESH (5 << 12) //was 12800 (3 << 12)
+
+#define EPB (cpi->ErrorPerBit)
+
+#define MIN_ERR 100
+#define MAX_ERR 20000
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+static const UINT32 IntraThreshTable[Q_TABLE_SIZE] =
+{
+ 47, 46, 45, 40, 39, 38, 37, 36,
+ 35, 34, 33, 32, 31, 30, 29, 28,
+ 27, 26, 25, 25, 24, 24, 23, 23,
+ 22, 21, 21, 20, 19, 19, 18, 18,
+ 17, 17, 17, 16, 16, 15, 15, 14,
+ 14, 13, 13, 12, 12, 11, 11, 10,
+ 9, 9, 9, 7, 6, 6, 5, 4,
+ 4, 3, 3, 2, 1, 0, 0, 0
+};
+
+static const UINT32 IntraFactors[Q_TABLE_SIZE] =
+{
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 126, 122,
+ 120, 118, 116, 114, 112, 110, 108, 106,
+ 104, 102, 100, 98, 94, 90, 88, 84,
+ 80, 76, 72, 64, 56, 48, 32, 32
+};
+
+static const UINT32 ErrorPerBit[Q_TABLE_SIZE] =
+{
+ 300, 250, 200, 180, 170, 160, 150, 145,
+ 140, 130, 120, 114, 110, 102, 98, 95,
+ 90, 85, 80, 78, 76, 74, 72, 70,
+ 68, 64, 62, 58, 56, 54, 52, 50,
+ 49, 48, 47, 46, 45, 44, 43, 42,
+ 41, 40, 39, 38, 37, 36, 35, 34,
+ 33, 33, 32, 31, 30, 27, 24, 19,
+ 17, 15, 12, 9, 7, 4, 2, 1
+};
+
+static const UINT32 FourModeImprovement[Q_TABLE_SIZE] =
+{
+ 250, 225, 210, 200, 195, 180, 165, 150,
+ 140, 130, 120, 114, 110, 102, 98, 95,
+ 90, 85, 80, 78, 76, 74, 72, 70,
+ 68, 64, 62, 58, 56, 54, 52, 50,
+ 49, 48, 47, 46, 45, 44, 43, 42,
+ 41, 40, 39, 38, 37, 36, 35, 34,
+ 33, 33, 32, 31, 30, 27, 24, 19,
+ 17, 15, 12, 9, 7, 4, 2, 1
+};
+
+static const UINT32 MvEpbCorrectionTable[10] =
+{
+ 650, 500, 400, 300, 250, 200, 150, 100, 75, 50
+};
+
+/***************** RATE DISTORTION STATIC TABLES *****************/
+static const UINT32 RateMult[Q_TABLE_SIZE] =
+{
+ 700, 650, 600, 550, 450, 450, 400, 375,
+ 350, 325, 300, 275, 250, 225, 200, 190,
+ 180, 170, 160, 151, 142, 134, 126, 119,
+ 112, 106, 100, 95, 90, 85, 80, 75,
+ 70, 66, 62, 58, 54, 50, 47, 44,
+ 41, 38, 35, 33, 31, 29, 27, 25,
+ 23, 21, 19, 17, 15, 13, 11, 9,
+ 7, 5, 3, 2, 3, 1, 1, 1
+};
+
+
+static const UINT32 RateDiv[Q_TABLE_SIZE] =
+{
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 2, 1, 2, 4
+};
+
+// Using the proportion of new mvs in the last frame as a measure of complexity
+// this table is used to apply a correction to the rate multiplier used in RD.
+// 128 is neutral, higher prefers rate, lower prefers dist.
+static const UINT32 RateMultCorrection[10] =
+{
+ 120, 125, 130, 140, 150, 165, 180, 195, 200, 220
+};
+
+static const INT32 RdMvCostCorrection[10] =
+{
+
+ 36, 15, 12, 4, 3, 2, 1, 0, 0, 0
+};
+
+/****************************************************************************
+*
+* RD SPECIFIC CODE
+*
+*****************************************************************************/
+
+/****************************************************************************
+ *
+ * ROUTINE : ComputeBlockReconError
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Distortion metric for the block
+ *
+ * FUNCTION : Computes a reconstruction distortion metric for a block.
+ *
+ * SPECIAL NOTES : None
+
+ *
+ ****************************************************************************/
+UINT32 ComputeBlockReconError ( CP_INSTANCE *cpi, UINT32 bp)
+{
+ UINT32 i, j;
+
+ UINT8 *NewDataPtr = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[bp].Source];
+ UINT8 *RefDataPtr1 = &cpi->pb.ThisFrameRecon[cpi->pb.mbi.blockDxInfo[bp].thisRecon];
+ INT32 SourceStride = cpi->pb.mbi.blockDxInfo[bp].CurrentSourceStride;
+ INT32 ReconStride = cpi->pb.mbi.blockDxInfo[bp].CurrentReconStride;
+
+ INT32 XXDiff;
+ INT32 XXSum = 0;
+ INT32 MaxXXDiff = 0;
+
+ static UINT32 MaxDiff = 0;
+
+ // Mode of interpolation chosen based upon on the offset of the second reference pointer
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ for ( j=0; j<BLOCK_HEIGHT_WIDTH; j++ )
+ {
+ XXDiff = XX_LUT[(int)NewDataPtr[j] - (int)RefDataPtr1[j]];
+ XXSum += XXDiff;
+
+ if ( XXDiff > MaxXXDiff )
+ MaxXXDiff = XXDiff;
+ }
+
+ // Step to next row of block.
+ NewDataPtr += SourceStride;
+ RefDataPtr1 += ReconStride;
+ }
+
+ // Compute distortion value
+ return (UINT32)(XXSum + (2 * MaxXXDiff)) << 6;
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : RdSaveMbContext
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 MBcol : Macroblock column number.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Saves the context information for a macro-block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void RdSaveMbContext ( CP_INSTANCE *cpi, UINT32 MBcol )
+{
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Save mbi to restore later
+ memcpy ( &cpi->CopyMbi, &pbi->mbi, sizeof(MACROBLOCK_INFO) );
+
+ // Save the frame dc context
+ memcpy ( cpi->AboveCopyY, &pbi->fc.AboveY[MBcol*2], sizeof(BLOCK_CONTEXT)*2 );
+ memcpy ( &cpi->AboveCopyU, &pbi->fc.AboveU[MBcol], sizeof(BLOCK_CONTEXT) );
+ memcpy ( &cpi->AboveCopyV, &pbi->fc.AboveV[MBcol], sizeof(BLOCK_CONTEXT) );
+
+ memcpy ( cpi->LeftYCopy, pbi->fc.LeftY, sizeof(BLOCK_CONTEXT)*2 );
+ memcpy ( &cpi->LeftUCopy, &pbi->fc.LeftU, sizeof(BLOCK_CONTEXT) );
+ memcpy ( &cpi->LeftVCopy, &pbi->fc.LeftV, sizeof(BLOCK_CONTEXT) );
+
+ memcpy ( cpi->LastDcYCopy, pbi->fc.LastDcY, sizeof(Q_LIST_ENTRY)*3 );
+ memcpy ( cpi->LastDcUCopy, pbi->fc.LastDcU, sizeof(Q_LIST_ENTRY)*3 );
+ memcpy ( cpi->LastDcVCopy, pbi->fc.LastDcV, sizeof(Q_LIST_ENTRY)*3 );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : RdRestoresMbContext
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 MBcol : Macroblock column number.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Restores the contexts for a macro-block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void RdRestoresMbContext( CP_INSTANCE *cpi, UINT32 MBcol )
+{
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Restore the dc context data structures to how they were before the call to this function.
+ memcpy( &pbi->fc.AboveY[MBcol*2], cpi->AboveCopyY, sizeof(BLOCK_CONTEXT)*2 );
+ memcpy( &pbi->fc.AboveU[MBcol], &cpi->AboveCopyU, sizeof(BLOCK_CONTEXT) );
+ memcpy( &pbi->fc.AboveV[MBcol], &cpi->AboveCopyV, sizeof(BLOCK_CONTEXT) );
+
+ memcpy( pbi->fc.LeftY, cpi->LeftYCopy, sizeof(BLOCK_CONTEXT)*2 );
+ memcpy( &pbi->fc.LeftU, &cpi->LeftUCopy, sizeof(BLOCK_CONTEXT) );
+ memcpy( &pbi->fc.LeftV, &cpi->LeftVCopy, sizeof(BLOCK_CONTEXT) );
+
+ memcpy( pbi->fc.LastDcY, cpi->LastDcYCopy, sizeof(Q_LIST_ENTRY)*3 );
+ memcpy( pbi->fc.LastDcU, cpi->LastDcUCopy, sizeof(Q_LIST_ENTRY)*3 );
+ memcpy( pbi->fc.LastDcV, cpi->LastDcVCopy, sizeof(Q_LIST_ENTRY)*3 );
+
+ // Restore mbi values to their Y defaults for use in the rest of pickmodes
+ memcpy( &pbi->mbi, &cpi->CopyMbi, sizeof(MACROBLOCK_INFO) );
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : EncodeBlock_RD
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 MBrow : Macro-block row number.
+ * UINT32 MBcol : Macro-block column number.
+ * BLOCK_POSITION bp : Position of block in MB (0-5).
+ * BOOL SaveBlockDcContext : Flag whether to save block context.
+ *
+ * OUTPUTS : UINT32 *Rate : Approximation of number of bits required to code block.
+ * UINT32 *Dist : Distortion of the encoded block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes a block in rate-distortion mode.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void EncodeBlock_RD
+(
+ CP_INSTANCE *cpi,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ BLOCK_POSITION bp,
+ UINT32 *Rate,
+ UINT32 *Dist,
+ BOOL SaveBlockDcContext
+)
+{
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ UINT32 T_Error1;
+
+ // build a block predictor & subtract predictor from source we are trying to compress
+ PredictBlock ( cpi, bp );
+
+ // forward DCT
+ fdct_short(cpi->DCTDataBuffer, cpi->DCT_codes);
+
+ // predict our dc values from the surrounding guys
+ PredictDCE (cpi, bp);
+
+ // quantize the coefficients
+ VP6_quantize ( pbi->quantizer, cpi->DCT_codes, pbi->mbi.blockDxInfo[bp].coeffsPtr, (UINT8)bp );
+
+ // convert coefficients to tokens
+ //pbi->FragCoefEOB = (UINT8)
+ TokenizeFrag_RD ( cpi, pbi->mbi.blockDxInfo[bp].coeffsPtr, pbi->mbi.blockDxInfo[bp].Plane, Rate );
+
+
+ GetQuantizedCoeffsMSE_RD(cpi->DCT_codes,
+ pbi->mbi.blockDxInfo[bp].coeffsPtr,
+ pbi->mbi.blockDxInfo[bp].dequantPtr,
+ &T_Error1);
+ *Dist += T_Error1;
+
+ // predict our dc values from the surrounding guys
+ VP6_PredictDC ( pbi, bp );
+
+ // update the context info for the next block
+ VP6_UpdateContextA ( pbi, pbi->mbi.blockDxInfo[bp].Above, bp );
+ VP6_UpdateContext ( pbi, pbi->mbi.blockDxInfo[bp].Left, bp );
+
+
+ // If requested then save the DC context for this block in a data structure indexed by mode and block position.
+ // The saved values are used to update the DC context once the best coding method has been decided.
+ if ( SaveBlockDcContext )
+ {
+ memcpy ( &cpi->MbDcContexts[pbi->mbi.Mode][bp].Above, pbi->mbi.blockDxInfo[bp].Above, sizeof(BLOCK_CONTEXT) );
+ cpi->MbDcContexts[pbi->mbi.Mode][bp].AbovePtr = pbi->mbi.blockDxInfo[bp].Above;
+
+ memcpy ( &cpi->MbDcContexts[pbi->mbi.Mode][bp].Left, pbi->mbi.blockDxInfo[bp].Left, sizeof(BLOCK_CONTEXT) );
+ cpi->MbDcContexts[pbi->mbi.Mode][bp].LeftPtr = pbi->mbi.blockDxInfo[bp].Left;
+
+ memcpy ( &cpi->MbDcContexts[pbi->mbi.Mode][bp].LastDc, pbi->mbi.blockDxInfo[bp].LastDc, sizeof(Q_LIST_ENTRY) );
+ cpi->MbDcContexts[pbi->mbi.Mode][bp].LastDcPtr = pbi->mbi.blockDxInfo[bp].LastDc;
+ }
+
+}
+/****************************************************************************
+ *
+ * ROUTINE : EncodeMacroBlock_RD
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 *FragsToCheck : Pointer to list of blocks in the MB.
+ * UINT32 MBrow : Macro-block row number.
+ * UINT32 MBcol : Macro-block column number.
+ *
+ * OUTPUTS : UINT32 *Rate : Pointer to Rate value (in bits).
+ * UINT32 *Dist : Pointer to Distortion value.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Encodes the macro-block to the point where an estimate
+ * of the cost of coding and reconstruction error may be
+ * made.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void EncodeMacroBlock_RD
+(
+ CP_INSTANCE *cpi,
+ UINT32 *FragsToCheck,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ UINT32 *Rate,
+ UINT32 *Dist
+)
+{
+ UINT32 Block;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Save the Macro Block and DC context
+ RdSaveMbContext ( cpi, MBcol );
+
+ // Clear down MB rate and distortion accumulators
+ *Rate = 0;
+ *Dist = 0;
+
+ // Set up the Mb mode and Mv values
+ for ( Block=0; Block<6; Block++ )
+ {
+ pbi->mbi.Mv[Block].x = pbi->FragInfo[FragsToCheck[Block]].MVectorX;
+ pbi->mbi.Mv[Block].y = pbi->FragInfo[FragsToCheck[Block]].MVectorY;
+ }
+
+ pbi->mbi.blockDxInfo[0].Above = &pbi->fc.AboveY[MBcol*2];
+ pbi->mbi.blockDxInfo[1].Above = &pbi->fc.AboveY[MBcol*2+1];
+ pbi->mbi.blockDxInfo[2].Above = &pbi->fc.AboveY[MBcol*2];
+ pbi->mbi.blockDxInfo[3].Above = &pbi->fc.AboveY[MBcol*2+1];
+ pbi->mbi.blockDxInfo[4].Above = &pbi->fc.AboveU[MBcol];
+ pbi->mbi.blockDxInfo[5].Above = &pbi->fc.AboveV[MBcol];
+
+ EncodeBlock_RD ( cpi, MBrow, MBcol, 0, Rate, Dist, TRUE );
+
+ EncodeBlock_RD ( cpi, MBrow, MBcol, 1, Rate, Dist, TRUE );
+
+ EncodeBlock_RD ( cpi, MBrow, MBcol, 2, Rate, Dist, TRUE );
+
+ EncodeBlock_RD ( cpi, MBrow, MBcol, 3, Rate, Dist, TRUE );
+
+ EncodeBlock_RD ( cpi, MBrow, MBcol, 4, Rate, Dist, TRUE );
+
+ EncodeBlock_RD ( cpi, MBrow, MBcol, 5, Rate, Dist, TRUE );
+
+ // Restore the MB and dc context
+ RdRestoresMbContext ( cpi, MBcol );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : RdFunction
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 Rate : Rate value (in bits).
+ * UINT32 Dist : Distortion value.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: The computed rate-distortion value.
+ *
+ * FUNCTION : Evaluates a Rate-Distortion function for specified rate
+ * and distortion.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 RdFunction ( CP_INSTANCE *cpi, UINT32 Rate, UINT32 Dist )
+{
+ UINT32 RdValue;
+ UINT32 A = RateMult[cpi->pb.quantizer->FrameQIndex];
+ UINT32 B = RateDiv[cpi->pb.quantizer->FrameQIndex];
+
+ // Apply a correction to the rate multiplier according to an estimate
+ // of complexity derived from last frame MV useage.
+ A = (A*RateMultCorrection[cpi->LastFrameNewMvUsage]) >> 7;
+ if ( A < 1 )
+ A = 1;
+
+ RdValue = Dist + ((A * Rate) / B);
+
+ return RdValue;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : RdModeCost
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 MBrow : Macro-block row number.
+ * UINT32 MBcol : Macro-block column number.
+ * UINT8 Mode : Coding mode for MB.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Approximate cost of coding mode (in bits).
+ *
+ * FUNCTION : Estimates the cost (in bits) of coding a mode.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 RdModeCost ( CP_INSTANCE *cpi, UINT32 MBrow, UINT32 MBcol, UINT8 Mode )
+{
+ return modeCost ( cpi, MBrow, MBcol, Mode );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SetFragMotionVectorAndMode
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * INT32 FragIndex : Block to set Mode & MV for.
+ * MOTION_VECTOR *ThisMotionVector : MV for the block.
+ * CODING_MODE mode : Coding mode for the block.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets specified coding mode & motion vector for a block.
+ *
+ ****************************************************************************/
+void SetFragMotionVectorAndMode
+(
+ PB_INSTANCE *pbi,
+ INT32 FragIndex,
+ MOTION_VECTOR *ThisMotionVector,
+ CODING_MODE mode
+)
+{
+ // Note the coding mode and vector for the block
+ pbi->FragInfo[FragIndex].FragCodingMode = mode;
+ pbi->FragInfo[FragIndex].MVectorX = ThisMotionVector->x;
+ pbi->FragInfo[FragIndex].MVectorY = ThisMotionVector->y;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PickIntra
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Total intra-error for the frame.
+ *
+ * FUNCTION : Selects INTRA coding mode for all macro-blocks in the
+ * frame. This is a suitable way to code key-frames as
+ * there is then no dependency on previously decoded data.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 PickIntra ( CP_INSTANCE *cpi )
+{
+ UINT32 Temp;
+ UINT32 i;
+ UINT32 B;
+ UINT32 MBrow;
+ UINT32 MBcol;
+ UINT32 UVRow;
+ UINT32 UVColumn;
+ INT32 FragIndex;
+ UINT32 IntraError;
+ UINT32 UVFragOffset;
+ INT32 TopLeftIndex = 0;
+ UINT32 TotIntraError = 0;
+ UINT32 CountInterlaced = 0;
+ PB_INSTANCE *pbi = &cpi->pb;
+ //UINT32 BlockOffset[4] = { 0, 1, pbi->HFragments, pbi->HFragments+1 };
+ UINT32 BlockOffset[4];
+
+ BlockOffset[0] = 0;
+ BlockOffset[1] = 1;
+ BlockOffset[2] = pbi->HFragments;
+ BlockOffset[3] = pbi->HFragments+1;
+
+ for ( i=0; i<128; i++ )
+ cpi->ErrorBins[i] = pbi->UnitFragments / 4;
+
+ // Reset the mode+mv frame cost estimate (no modes or mvs for a key frame).
+ cpi->ModeMvCostEstimate = 0;
+
+ for ( MBrow=BORDER_MBS; MBrow<pbi->MBRows-BORDER_MBS; MBrow++ )
+ {
+ for ( MBcol=BORDER_MBS; MBcol<pbi->MBCols-BORDER_MBS; MBcol++ )
+ {
+ cpi->MBCodingMode = CODE_INTRA;
+
+ pbi->mbi.blockDxInfo[0].Source = pbi->YDataOffset + 16*(MBrow-BORDER_MBS) *pbi->Configuration.VideoFrameWidth + 16*(MBcol-BORDER_MBS);
+ pbi->mbi.blockDxInfo[0].thisRecon = pbi->ReconYDataOffset + 16*MBrow * pbi->Configuration.YStride + 16*MBcol;
+
+ if ( pbi->Configuration.Interlaced /*&& GetMBFrameVertVar(cpi) > GetMBFieldVertVar(cpi)*/ )
+ {
+ // Code MB as two separate fields
+ pbi->mbi.Interlaced = 1;
+ pbi->MBInterlaced[MBOffset(MBrow,MBcol)] = 1;
+
+ pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[3].CurrentSourceStride = 2 * pbi->Configuration.VideoFrameWidth;
+
+ pbi->mbi.blockDxInfo[1].Source = pbi->mbi.blockDxInfo[0].Source + 8;
+ pbi->mbi.blockDxInfo[2].Source = pbi->mbi.blockDxInfo[0].Source + pbi->Configuration.VideoFrameWidth;
+ pbi->mbi.blockDxInfo[3].Source = pbi->mbi.blockDxInfo[2].Source + 8;
+
+ CountInterlaced++;
+ }
+ else
+ {
+ // Code MB as a single progressive-scan MB
+ pbi->MBInterlaced[MBOffset(MBrow,MBcol)] = 0;
+ pbi->mbi.Interlaced = 0;
+
+ pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth;
+
+ pbi->mbi.blockDxInfo[1].Source = pbi->mbi.blockDxInfo[0].Source + 8;
+ pbi->mbi.blockDxInfo[2].Source = pbi->mbi.blockDxInfo[0].Source + (pbi->Configuration.VideoFrameWidth << 3);
+ pbi->mbi.blockDxInfo[3].Source = pbi->mbi.blockDxInfo[2].Source + 8;
+
+ }
+
+ for ( B=0; B<4; B++ )
+ {
+ FragIndex = TopLeftIndex + BlockOffset[B];
+ pbi->FragInfo[FragIndex].FragCodingMode = cpi->MBCodingMode;
+ }
+
+ // Matching fragments in the U and V planes
+ UVRow = (FragIndex / (pbi->HFragments * 2));
+ UVColumn = (FragIndex % pbi->HFragments) / 2;
+ UVFragOffset = (UVRow * (pbi->HFragments / 2)) + UVColumn;
+
+ pbi->FragInfo[pbi->YPlaneFragments + UVFragOffset].FragCodingMode = cpi->MBCodingMode;
+ pbi->FragInfo[pbi->YPlaneFragments + pbi->UVPlaneFragments + UVFragOffset].FragCodingMode = cpi->MBCodingMode;
+
+ // Keep a note of the total error score for the Y macro blocks for rate targeting purposes
+ IntraError = GetMBIntraError( cpi );
+
+ Temp = (IntraError>>8);
+ if ( Temp < MIN_ERR )
+ Temp = MIN_ERR;
+ else if ( Temp > MAX_ERR )
+ Temp = MAX_ERR;
+ TotIntraError += Temp;
+
+ TopLeftIndex += 2;
+ }
+
+ TopLeftIndex += pbi->HFragments;
+ }
+
+ pbi->probInterlaced = 256-(1+254*CountInterlaced/pbi->MacroBlocks);
+
+ return TotIntraError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SetMBMotionVectorsAndMode
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * INT32 *FragIndexes : Pointer to list of blocks in the MB.
+ * MOTION_VECTOR *MotionVector : MV for the MB.
+ * CODING_MODE mode : Coding mode for the MB.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets the coding mode for the macro-block and coding mode
+ * and motion vector for each its 6 constituent blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetMBMotionVectorsAndMode
+(
+ CP_INSTANCE *cpi,
+ UINT32 *FragIndexes,
+ UINT32 Mode,
+ MOTION_VECTOR *MotionVector
+)
+{
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ pbi->mbi.Mode = Mode;
+ SetFragMotionVectorAndMode ( pbi, FragIndexes[0], MotionVector, Mode );
+ SetFragMotionVectorAndMode ( pbi, FragIndexes[1], MotionVector, Mode );
+ SetFragMotionVectorAndMode ( pbi, FragIndexes[2], MotionVector, Mode );
+ SetFragMotionVectorAndMode ( pbi, FragIndexes[3], MotionVector, Mode );
+ SetFragMotionVectorAndMode ( pbi, FragIndexes[4], MotionVector, Mode );
+ SetFragMotionVectorAndMode ( pbi, FragIndexes[5], MotionVector, Mode );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PickBetterMBMode
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 *FragsToCheck : Pointer to list of 6 blocks in this MB.
+ * CODING_MODE mode : Coding mode to evaluate.
+ * MOTION_VECTOR *ThisMVector : Pointer to MV associated with this mode.
+ * UINT32 MBrow : MB row.
+ * UINT32 MBcol : MB column.
+ * UINT8 *Frame : Pointer to MB in previous frame reconstruction.
+ * CODING_MODE *BestMode : Pointer to best mode found so far.
+ * UINT32 *Error : Best error found so far.
+ * MOTION_VECTOR *mv : Pointer to MV for best mode found so far.
+ * UINT32 *FourError : Pointer to errors for 4 Y-blocks in MB.
+ * UINT32 *BestRate : Pointer to best rate found so far.
+ * UINT32 *BestDist : Pointer to best distortion found so far.
+ * UINT32 *BestRd : Pointer to best RD-value found so far.
+ *
+ * OUTPUTS : CODING_MODE *BestMode : Pointer to best mode found so far.
+ * UINT32 *Error : Best error found so far.
+ * MOTION_VECTOR *mv : Pointer to MV for best mode found so far.
+ * UINT32 *FourError : Pointer to errors for 4 Y-blocks in MB.
+ * UINT32 *BestRate : Pointer to best rate found so far.
+ * UINT32 *BestDist : Pointer to best distortion found so far.
+ * UINT32 *BestRd : Pointer to best RD-value found so far.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Evaluates the specified coding mode and if better than
+ * the best mode found so far, updates the relevant variables.
+ *
+ * SPECIAL NOTES : If rate-distortion mode is enabled then an estimate of
+ * the rate & distortion is found by a dummy coding of the
+ * MB that does not output to the bitstream.
+ *
+ ****************************************************************************/
+void PickBetterMBMode
+(
+ CP_INSTANCE *cpi,
+ UINT32 * FragsToCheck,
+ CODING_MODE mode,
+ MOTION_VECTOR *ThisMVector,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ UINT8 *Frame,
+ CODING_MODE *BestMode,
+ UINT32 *Error,
+ MOTION_VECTOR *mv,
+ UINT32 *FourError,
+ UINT32 *BestRate,
+ UINT32 *BestDist,
+ UINT32 *BestRd
+)
+{
+ UINT32 ThisError;
+ UINT32 EstModeCost;
+
+ // Get an estimate of the mode cost
+ if ( cpi->RdOpt )
+ EstModeCost = RdModeCost ( cpi, MBrow, MBcol, mode );
+ else
+ EstModeCost = modeCost ( cpi, MBrow, MBcol, mode );
+ ThisError = EstModeCost * EPB;
+
+ // Trap for cases where mode cost alone rules this mode out
+ if( !cpi->RdOpt && (ThisError > *Error))
+ return;
+
+ ThisError += GetMBInterError ( cpi, cpi->yuv1ptr, Frame, ThisMVector, FourError );
+
+ // Are we using RD
+ if ( cpi->RdOpt )
+ {
+ UINT32 Rate;
+ UINT32 Dist;
+ UINT32 RdValue;
+
+ // RD Code TBD
+ SetMBMotionVectorsAndMode ( cpi, FragsToCheck, mode, ThisMVector );
+ EncodeMacroBlock_RD ( cpi, FragsToCheck, MBrow, MBcol, &Rate, &Dist );
+ Rate += EstModeCost;
+
+ // Calculate Best RD value
+ RdValue = RdFunction ( cpi, Rate, Dist );
+
+ if ( (RdValue < *BestRd) || ( (ThisError<(*Error >> 1)) && (Dist<(*BestDist >> 1)) ) )
+ {
+ *BestRd = RdValue;
+ *BestRate = Rate;
+ *BestDist = Dist;
+ *BestMode = mode;
+ *Error = ThisError;
+ mv->x = ThisMVector->x;
+ mv->y = ThisMVector->y;
+ }
+ }
+ else
+ {
+ if ( ThisError < *Error )
+ {
+ *BestMode = mode;
+ *Error = ThisError;
+ mv->x = ThisMVector->x;
+ mv->y = ThisMVector->y;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PickBetterMBModeandMV
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 *FragsToCheck : Pointer to list of 6 blocks in this MB.
+ * CODING_MODE mode : Coding mode to evaluate.
+ * UINT8 *Frame : Pointer to MB in previous frame reconstruction.
+ * UINT32 MBrow : MB row.
+ * UINT32 MBcol : MB column.
+ * CODING_MODE *BestMode : Pointer to best mode found so far.
+ * UINT32 *Error : Best error found so far.
+ * MOTION_VECTOR *BestMV : Pointer to MV for best mode found so far.
+ * UINT32 *FourErrors : Pointer to errors for 4 Y-blocks in MB.
+ * UINT32 *BestRate : Pointer to best rate found so far.
+ * UINT32 *BestDist : Pointer to best distortion found so far.
+ * UINT32 *BestRd : Pointer to best RD-value found so far.
+ *
+ * OUTPUTS : CODING_MODE *BestMode : Pointer to best mode found so far.
+ * UINT32 *Error : Best error found so far.
+ * MOTION_VECTOR *BestMV : Pointer to MV for best mode found so far.
+ * UINT32 *FourErrors : Pointer to errors for 4 Y-blocks in MB.
+ * UINT32 *BestRate : Pointer to best rate found so far.
+ * UINT32 *BestDist : Pointer to best distortion found so far.
+ * UINT32 *BestRd : Pointer to best RD-value found so far.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Evaluates the specified coding mode and if better than
+ * the best mode found so far, updates the relevant variables.
+ * As part of the evaluation of the mode a motion vector
+ * search is carried out.
+ *
+ * SPECIAL NOTES : If rate-distortion mode is enabled then an estimate of
+ * the rate & distortion is found by a dummy coding of the
+ * MB that does not output to the bitstream.
+ *
+ ****************************************************************************/
+void PickBetterMBModeAndMV
+(
+ CP_INSTANCE *cpi,
+ UINT32 *FragsToCheck,
+ CODING_MODE mode,
+ UINT8 *Frame,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ CODING_MODE *BestMode,
+ UINT32 *Error,
+ MOTION_VECTOR *BestMV,
+ BOOL FullSearchEnabled,
+ UINT32 *FourErrors,
+ UINT32 *BestRate,
+ UINT32 *BestDist,
+ UINT32 *BestRd
+)
+{
+ UINT32 ThisError;
+ UINT32 EstMvCost;
+ UINT32 EstModeCost;
+ MOTION_VECTOR ThisMV;
+ MOTION_VECTOR InterMVectEx;
+ MOTION_VECTOR DifferentialVector;
+
+ // Get an estimate of the mode cost
+ if ( cpi->RdOpt )
+ EstModeCost = RdModeCost ( cpi, MBrow, MBcol, mode );
+ else
+ EstModeCost = modeCost ( cpi, MBrow, MBcol, mode );
+
+ if ( !cpi->RdOpt && ((EstModeCost * EPB) > *Error) )
+ return;
+
+ // If the best error is above the required threshold search for a new inter MV
+ // Use a mix of heirachical and exhaustive searches for quick mode.
+ ThisError = GetMBMVInterError ( cpi, mode, Frame, &ThisMV, FourErrors );
+
+ // If we still do not have a good match try an exhaustive MBMV search
+ if ( FullSearchEnabled &&
+ (ThisError > cpi->ExhaustiveSearchThresh) &&
+ (*Error > cpi->ExhaustiveSearchThresh) )
+ {
+ UINT32 NewError;
+ NewError = GetMBMVExhaustiveSearch ( cpi, mode, Frame, &InterMVectEx, FourErrors );
+
+ // Is the Variance measure for the EX search better... If so then use it.
+ if ( NewError < ThisError )
+ {
+ ThisError = NewError;
+ ThisMV.x = InterMVectEx.x;
+ ThisMV.y = InterMVectEx.y;
+ }
+ }
+
+ cpi->bc.BitCounter = 0;
+
+ // Convert the motion vector to a differential vector relative to "nearest"
+ DifferentialVector.x = ThisMV.x;
+ DifferentialVector.y = ThisMV.y;
+ if ( mode == CODE_INTER_PLUS_MV )
+ {
+ if ( cpi->pb.mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ DifferentialVector.x -= cpi->pb.mbi.NearestInterMVect.x;
+ DifferentialVector.y -= cpi->pb.mbi.NearestInterMVect.y;
+ }
+ }
+ else // Golden frame
+ {
+ if ( cpi->pb.mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ DifferentialVector.x -= cpi->pb.mbi.NearestGoldMVect.x;
+ DifferentialVector.y -= cpi->pb.mbi.NearestGoldMVect.y;
+ }
+ else
+ {
+ DifferentialVector.x = ThisMV.x;
+ DifferentialVector.y = ThisMV.y;
+ }
+ }
+
+
+ // The error MV error adjustment coprises a MVEPB which is a constant set according
+ // to the number of new motion vectors in the last frame and an estimate of the cost
+ // in bits(*64) of the vector.
+ EstMvCost = cpi->EstMvCostPtrX[DifferentialVector.x] + cpi->EstMvCostPtrY[DifferentialVector.y];
+ ThisError += (cpi->MVErrorPerBit + (ThisError >> 13)) * EstMvCost;
+ ThisError += EstModeCost * EPB;
+
+ // Are we using RD
+ if ( cpi->RdOpt )
+ {
+ UINT32 Rate;
+ UINT32 Dist;
+ UINT32 RdValue;
+
+ // RD Code TBD
+ SetMBMotionVectorsAndMode ( cpi, FragsToCheck, mode, &ThisMV );
+ EncodeMacroBlock_RD ( cpi, FragsToCheck, MBrow, MBcol, &Rate, &Dist );
+ Rate += EstModeCost;
+ Rate += EstMvCost;
+ Rate -= RdMvCostCorrection[cpi->LastFrameNewMvUsage]; // Apply mv re-use estimate correction
+
+ // Calculate Best RD value
+ RdValue = RdFunction ( cpi, Rate, Dist );
+
+ if ( (RdValue < *BestRd) || ( (ThisError<(*Error >> 1)) && (Dist<(*BestDist >> 1)) ) )
+ {
+ *BestRd = RdValue;
+ *BestRate = Rate;
+ *BestDist = Dist;
+ *BestMode = mode;
+ *Error = ThisError;
+ BestMV->x = ThisMV.x;
+ BestMV->y = ThisMV.y;
+ }
+ }
+ else
+ {
+ // Is the improvement, if any, good enough to justify a new MV
+ if ( ThisError < *Error )
+ {
+ *BestMode = mode;
+ *Error = ThisError;
+ BestMV->x = ThisMV.x;
+ BestMV->y = ThisMV.y;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PickBetterBMode
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT8 *Frame : Pointer to block in previous frame reconstruction (NOT USED).
+ * UINT32 MBrow : MB row of parent MB.
+ * UINT32 MBcol : MB column of parent MB.
+ * UINT32 Block : Block number in its parant MB (0-3).
+ * CODING_MODE ThisMode : Coding mode to evaluate.
+ * MOTION_VECTOR *ThisMv : Pointer to MV for best mode found so far.
+ * UINT32 *ThisError : Best error found so far.
+ * CODING_MODE *BestMode : Pointer to best mode found so far.
+ * UINT32 *BestError : Pointer to best error found so far.
+ * MOTION_VECTOR *BestMv : Pointer to MV for best mode found so far.
+ * UINT32 *BestRdValue : Pointer to best RD-value found so far.
+ *
+ * OUTPUTS : CODING_MODE *BestMode : Pointer to best mode found so far.
+ * UINT32 *BestError : Pointer to best error found so far.
+ * MOTION_VECTOR *BestMv : Pointer to MV for best mode found so far.
+ * UINT32 *BestRdValue : Pointer to best RD-value found so far.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Evaluates the specified coding mode for coding the block
+ * and if better than the best mode found so far, updates
+ * the relevant variables.
+ *
+ * SPECIAL NOTES : If rate-distortion mode is enabled then an estimate of
+ * the rate & distortion is found by a dummy coding of the
+ * block that does not output to the bitstream.
+ *
+ ****************************************************************************/
+void PickBetterBMode
+(
+ CP_INSTANCE *cpi,
+ UINT8 *Frame,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ UINT32 Block,
+ CODING_MODE ThisMode,
+ MOTION_VECTOR *ThisMv,
+ UINT32 ThisError,
+ CODING_MODE *BestMode,
+ UINT32 *BestError,
+ MOTION_VECTOR *BestMv,
+ UINT32 *BestRdValue
+)
+{
+ UINT32 EstModeCost;
+
+ EstModeCost = blockModeCost ( cpi, MBrow, MBcol, ThisMode );
+ ThisError += EstModeCost * EPB;
+
+ // Are we using RD or modified prediction error
+ if ( cpi->RdOpt > 1 )
+ {
+ UINT32 Rate = 0;
+ UINT32 Dist = 0;
+ UINT32 RdValue = 0;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Save the Macro Block and DC context
+ RdSaveMbContext ( cpi, MBcol );
+
+ // Set up relevant parts of the mbi structure
+ pbi->mbi.Mode = ThisMode;
+ pbi->mbi.Mv[Block].x = ThisMv->x;
+ pbi->mbi.Mv[Block].y = ThisMv->y;
+
+ switch ( Block )
+ {
+ case 0:
+ pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2];
+ pbi->mbi.blockDxInfo[Block].Left = &pbi->fc.LeftY[0];
+ break;
+ case 1:
+ pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2+1];
+ pbi->mbi.blockDxInfo[Block].Left = &pbi->fc.LeftY[0];
+ break;
+ case 2:
+ pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2];
+ pbi->mbi.blockDxInfo[Block].Left = &pbi->fc.LeftY[1];
+ break;
+ case 3:
+ pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2+1];
+ pbi->mbi.blockDxInfo[Block].Left = &pbi->fc.LeftY[1];
+ break;
+ default:
+//sjlhack -- what the heck is this?????? If it is an error then return one... don't fake it out!!!!!!!!!!
+ // Error - Block should always be in range 0-3
+ pbi->mbi.blockDxInfo[0].Above = &pbi->fc.AboveY[1+MBcol*2];
+ pbi->mbi.blockDxInfo[0].Left = &pbi->fc.LeftY[0];
+ break;
+ }
+
+
+ // Encode the block to get a rate and a distortion value
+ EncodeBlock_RD ( cpi, MBrow, MBcol, Block, &Rate, &Dist, FALSE );
+
+ // Restore the MB and dc context
+ RdRestoresMbContext ( cpi, MBcol );
+
+ // Add in the mode cost to the rate.
+ Rate += EstModeCost;
+
+ // Calculate Best RD value
+ RdValue = RdFunction ( cpi, Rate, Dist );
+
+ // Does this mode give an improvement in RD
+ if ( (RdValue < *BestRdValue) || (ThisError < (*BestError >> 1)) )
+ {
+ *BestMode = ThisMode;
+ *BestError = ThisError;
+ *BestError = Dist;
+ *BestRdValue = RdValue;
+ BestMv->x = ThisMv->x;
+ BestMv->y = ThisMv->y;
+ }
+ }
+ else
+ {
+ // Non RD case.
+ if ( ThisError < *BestError )
+ {
+ *BestMode = ThisMode;
+ *BestError = ThisError;
+ BestMv->x = ThisMv->x;
+ BestMv->y = ThisMv->y;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PickBetterBModeandMV
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT8 *Frame : Pointer to block in previous frame reconstruction.
+ * UINT32 MBrow : MB row of parent MB.
+ * UINT32 MBcol : MB column of parent MB.
+ * UINT32 Block : Block number in its parant MB (0-3).
+ * CODING_MODE ThisMode : Coding mode to evaluate.
+ * CODING_MODE *BestMode : Coding mode to evaluate.
+ * UINT32 *BestError : Pointer to best error found so far.
+ * MOTION_VECTOR *BestMv : Pointer to MV for best mode found so far.
+ * UINT32 *BestRdValue : Pointer to best RD-value found so far.
+ * BOOL FullSearchEnabled : Flag as to whether exhaustive MV search is enabled (NOT USED).
+ *
+ * OUTPUTS : CODING_MODE *BestMode : Coding mode to evaluate.
+ * UINT32 *BestError : Pointer to best error found so far.
+ * MOTION_VECTOR *BestMv : Pointer to MV for best mode found so far.
+ * UINT32 *BestRdValue : Pointer to best RD-value found so far.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Evaluates the specified coding mode for coding the block
+ * and if better than the best mode found so far, updates
+ * the relevant variables. As part of the evaluation of the
+ * mode a motion vector search is carried out.
+ *
+ * SPECIAL NOTES : If rate-distortion mode is enabled then an estimate of
+ * the rate & distortion is found by a dummy coding of the
+ * block that does not output to the bitstream.
+ *
+ ****************************************************************************/
+void PickBetterBModeAndMV
+(
+ CP_INSTANCE *cpi,
+ UINT8 *Frame,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ UINT32 Block,
+ CODING_MODE ThisMode,
+ CODING_MODE *BestMode,
+ UINT32 *BestError,
+ MOTION_VECTOR *BestMV,
+ UINT32 *BestRdValue,
+ BOOL FullSearchEnabled
+)
+{
+ UINT32 ThisError;
+ UINT32 EstMvCost;
+ UINT32 EstModeCost;
+ MOTION_VECTOR ThisMV;
+ MOTION_VECTOR DifferentialVector;
+
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Weight the mode according to last mode
+ EstModeCost = blockModeCost ( cpi, MBrow, MBcol, ThisMode );
+ ThisError = EstModeCost * EPB;
+
+ if ( !cpi->RdOpt && (ThisError > *BestError) )
+ return;
+
+ // If the best error is above the required threshold search for a new inter MV
+ if ( *BestError > cpi->BlockExhaustiveSearchThresh )
+ {
+ ThisError += GetBMVExhaustiveSearch( cpi, Frame, &ThisMV, Block);
+ }
+ else
+ {
+ ThisError += GetBMVSearch( cpi, Frame, &ThisMV, Block );
+ }
+ //else
+
+ DifferentialVector.x = ThisMV.x;
+ DifferentialVector.y = ThisMV.y;
+ if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ DifferentialVector.x -= pbi->mbi.NearestInterMVect.x;
+ DifferentialVector.y -= pbi->mbi.NearestInterMVect.y;
+ }
+
+ EstMvCost = cpi->EstMvCostPtrX[DifferentialVector.x] + cpi->EstMvCostPtrY[DifferentialVector.y];
+ ThisError += (cpi->MVErrorPerBit + (ThisError >> 13)) * EstMvCost;
+
+ // Are we using RD or modified prediction error
+ if ( cpi->RdOpt > 1 )
+ {
+ UINT32 Rate = 0;
+ UINT32 Dist = 0;
+ UINT32 RdValue = 0;
+
+ // Save the Macro Block and DC context
+ RdSaveMbContext ( cpi, MBcol );
+
+ // Set up relevant parts of the mbi structure
+ pbi->mbi.Mode = ThisMode;
+ pbi->mbi.Mv[Block].x = ThisMV.x;
+ pbi->mbi.Mv[Block].y = ThisMV.y;
+
+ switch ( Block )
+ {
+ case 0:
+ pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2];
+ pbi->mbi.blockDxInfo[Block].Left = &pbi->fc.LeftY[0];
+ break;
+ case 1:
+ pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2+1];
+ pbi->mbi.blockDxInfo[Block].Left = &pbi->fc.LeftY[0];
+ break;
+ case 2:
+ pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2];
+ pbi->mbi.blockDxInfo[Block].Left = &pbi->fc.LeftY[1];
+ break;
+ case 3:
+ pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2+1];
+ pbi->mbi.blockDxInfo[Block].Left = &pbi->fc.LeftY[1];
+ break;
+ }
+
+ // Encode the block to get a rate and a distortion value
+ EncodeBlock_RD ( cpi, MBrow, MBcol, Block, &Rate, &Dist, FALSE );
+
+ // Restore the MB and dc context
+ RdRestoresMbContext ( cpi, MBcol );
+
+ // Add in the mode and mv costs to the rate.
+ Rate += EstModeCost;
+ Rate += EstMvCost;
+
+ // Calculate Best RD value
+ RdValue = RdFunction ( cpi, Rate, Dist );
+
+ // Does this mode give an improvement in RD
+ if ( (RdValue < *BestRdValue) || (ThisError < (*BestError >> 1)) )
+ {
+ *BestError = ThisError;
+ *BestMode = ThisMode;
+ *BestError = Dist;
+ *BestRdValue = RdValue;
+ BestMV->x = ThisMV.x;
+ BestMV->y = ThisMV.y;
+ }
+ }
+ else
+ {
+ // Non RD case.
+ if ( ThisError < *BestError )
+ {
+ *BestMode = ThisMode;
+ *BestError = ThisError;
+ BestMV->x = ThisMV.x;
+ BestMV->y = ThisMV.y;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PickBlockMode
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 MBrow : MB row of parent MB.
+ * UINT32 MBcol : MB column of parent MB.
+ * UINT32 Block : Block number in its parant MB (0-3).
+ * CODING_MODE *BestMode : Coding mode to evaluate.
+ * MOTION_VECTOR *BestMVect : Pointer to MV for best mode found so far.
+ * UINT32 *BestError : Pointer to best error found so far.
+ *
+ * OUTPUTS : CODING_MODE *BestMode : Coding mode to evaluate.
+ * MOTION_VECTOR *BestMVect : Pointer to MV for best mode found so far.
+ * UINT32 *BestError : Pointer to best error found so far.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Picks the best coding mode for a block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PickBlockMode
+(
+ CP_INSTANCE *cpi,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ UINT32 Block,
+ CODING_MODE *BestMode,
+ MOTION_VECTOR *BestMVect,
+ UINT32 *BestError
+)
+{
+ UINT32 BestSoFarError = HUGE_ERROR;
+ CODING_MODE BestSoFarMode = CODE_INTER_NO_MV;
+ UINT32 BestSoFarRdValue = HUGE_ERROR;
+ MOTION_VECTOR BestSoFarMVect = { 0, 0 };
+ MOTION_VECTOR ZeroMVect = { 0, 0 };
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // To start with I have chosen to pick the best mode and mv for the block based upon prediction error even when using RD
+ // and only do the rate and distortion stuff for the chosen best mode and MV.
+ PickBetterBMode ( cpi, pbi->LastFrameRecon, MBrow, MBcol, Block, CODE_INTER_NO_MV, &ZeroMVect, cpi->ZeroError[Block], &BestSoFarMode, &BestSoFarError, &BestSoFarMVect, &BestSoFarRdValue );
+
+ if ( pbi->mbi.NearestInterMVect.x || pbi->mbi.NearestInterMVect.y )
+ PickBetterBMode ( cpi, pbi->LastFrameRecon, MBrow, MBcol, Block, CODE_INTER_NEAREST_MV, &pbi->mbi.NearestInterMVect, cpi->NearestError[Block], &BestSoFarMode, &BestSoFarError, &BestSoFarMVect, &BestSoFarRdValue );
+
+ if ( pbi->mbi.NearInterMVect.x || pbi->mbi.NearInterMVect.y )
+ PickBetterBMode ( cpi, pbi->LastFrameRecon, MBrow, MBcol, Block, CODE_INTER_NEAR_MV, &pbi->mbi.NearInterMVect, cpi->NearError[Block], &BestSoFarMode, &BestSoFarError, &BestSoFarMVect, &BestSoFarRdValue );
+
+ if ( (cpi->RdOpt > 1) || (BestSoFarError > cpi->MinErrorForBlockMVSearch) )
+ PickBetterBModeAndMV ( cpi, pbi->LastFrameRecon, MBrow, MBcol, Block, CODE_INTER_PLUS_MV,&BestSoFarMode,&BestSoFarError,&BestSoFarMVect, &BestSoFarRdValue, TRUE );
+
+ *BestMode = BestSoFarMode;
+ *BestError = BestSoFarError;
+ BestMVect->x = BestSoFarMVect.x;
+ BestMVect->y = BestSoFarMVect.y;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PickMacroBlockMode
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 MBrow : MB row number.
+ * UINT32 MBcol : MB column number.
+ *
+ * OUTPUTS : UINT32 *InterError : Pointer to best inter-mode error.
+ * UINT32 *IntraError : Pointer to intra-mode error.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Picks the best coding mode for a macro-block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PickMacroBlockMode
+(
+ CP_INSTANCE *cpi,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ UINT32 *InterError,
+ UINT32 *IntraError
+)
+{
+ UINT32 i;
+ UINT32 Temp;
+ UINT32 TempError[4];
+ UINT32 BestRate; // The "rate" of the current best mode choice (when RD enabled else unused)
+ UINT32 BestDist; // The "distortion" of the current best mode choice (when RD enabled else unused)
+ UINT32 BestRd; // The best RD compromise so far
+ int type, type2;
+ UINT32 EstModeCost;
+ UINT32 ThisError;
+ UINT32 ThisIntraError;
+ UINT32 FragsToCheck[6];
+ MOTION_VECTOR FourMVect[6];
+ CODING_MODE FourMode[6];
+ MOTION_VECTOR DifferentialVector;
+
+ MOTION_VECTOR MVect = { 0, 0 };
+ MOTION_VECTOR ZeroMVect = { 0, 0 };
+ UINT32 BestError = HUGE_ERROR;
+ CODING_MODE BestMode = CODE_INTRA;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ UINT32 YFragIndex = (MBrow-BORDER_MBS) * pbi->HFragments * 2 + (MBcol-BORDER_MBS)*2;
+ UINT32 UVFragOffset = (MBrow-BORDER_MBS) * pbi->HFragments / 2 + (MBcol-BORDER_MBS);
+ UINT32 UFragIndex = pbi->YPlaneFragments + UVFragOffset;
+ UINT32 VFragIndex = pbi->YPlaneFragments + pbi->UVPlaneFragments + UVFragOffset;
+
+ // Intra and inter errors for this mb ignoring mode cost corrections etc.
+ UINT32 ThisMbIntraErr;
+ UINT32 ThisMbInterErr;
+
+//note: should be able to move FragsToCheck into the blockDxInfo struct
+//then in the MB loop, we should be able to inc the values instead of doing these multiplies
+//it may not affect the pc performance, but it may help other processors
+ FragsToCheck[0] = YFragIndex;
+ FragsToCheck[1] = YFragIndex+1;
+ FragsToCheck[2] = YFragIndex+pbi->HFragments;
+ FragsToCheck[3] = YFragIndex+pbi->HFragments+1;
+ FragsToCheck[4] = UFragIndex;
+ FragsToCheck[5] = VFragIndex;
+
+ // Root offsets for this MB
+ pbi->mbi.blockDxInfo[0].Source = pbi->YDataOffset + 16*(MBrow-BORDER_MBS) *pbi->Configuration.VideoFrameWidth + 16*(MBcol-BORDER_MBS);
+ pbi->mbi.blockDxInfo[0].thisRecon = pbi->ReconYDataOffset + 16*MBrow * pbi->Configuration.YStride + 16*MBcol;
+
+ // AWG Add function here to compute variance for each block in MB
+ // in progressive & interlaced mode. Use the resulting values to
+ // determine which coding pattern to use from (initially):
+ // (P,P,P,P), (P,I,P,I), (I,P,I,P), (I,I,I,I)
+ // Selected pattern encoded instead of interlaced flag.
+
+ // Values that depend on whether or not we are coding an interlaced block.
+ if ( pbi->Configuration.Interlaced /*&& GetMBFrameVertVar(cpi) > GetMBFieldVertVar(cpi)*/ )
+ {
+ pbi->mbi.Interlaced = 1;
+ pbi->MBInterlaced[MBOffset(MBrow,MBcol)] = 1;
+
+ pbi->mbi.blockDxInfo[0].CurrentReconStride =
+ pbi->mbi.blockDxInfo[1].CurrentReconStride =
+ pbi->mbi.blockDxInfo[2].CurrentReconStride =
+ pbi->mbi.blockDxInfo[3].CurrentReconStride = 2 * pbi->Configuration.YStride;
+
+ pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[3].CurrentSourceStride = 2 * pbi->Configuration.VideoFrameWidth;
+
+ pbi->mbi.blockDxInfo[1].thisRecon = pbi->mbi.blockDxInfo[0].thisRecon + 8;
+ pbi->mbi.blockDxInfo[2].thisRecon = pbi->mbi.blockDxInfo[0].thisRecon + pbi->Configuration.YStride;
+ pbi->mbi.blockDxInfo[3].thisRecon = pbi->mbi.blockDxInfo[2].thisRecon + 8;
+
+ pbi->mbi.blockDxInfo[1].Source = pbi->mbi.blockDxInfo[0].Source + 8;
+ pbi->mbi.blockDxInfo[2].Source = pbi->mbi.blockDxInfo[0].Source + pbi->Configuration.VideoFrameWidth;
+ pbi->mbi.blockDxInfo[3].Source = pbi->mbi.blockDxInfo[2].Source + 8;
+ }
+ else
+ {
+ pbi->mbi.Interlaced = 0;
+ pbi->MBInterlaced[MBOffset(MBrow,MBcol)] = 0;
+
+ pbi->mbi.blockDxInfo[0].CurrentReconStride =
+ pbi->mbi.blockDxInfo[1].CurrentReconStride =
+ pbi->mbi.blockDxInfo[2].CurrentReconStride =
+ pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride;
+
+ pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth;
+
+ pbi->mbi.blockDxInfo[1].thisRecon = pbi->mbi.blockDxInfo[0].thisRecon + 8;
+ pbi->mbi.blockDxInfo[2].thisRecon = pbi->mbi.blockDxInfo[0].thisRecon + (pbi->Configuration.YStride << 3);
+ pbi->mbi.blockDxInfo[3].thisRecon = pbi->mbi.blockDxInfo[2].thisRecon + 8;
+
+ pbi->mbi.blockDxInfo[1].Source = pbi->mbi.blockDxInfo[0].Source + 8;
+ pbi->mbi.blockDxInfo[2].Source = pbi->mbi.blockDxInfo[0].Source + (pbi->Configuration.VideoFrameWidth << 3);
+ pbi->mbi.blockDxInfo[3].Source = pbi->mbi.blockDxInfo[2].Source + 8;
+ }
+
+ // Calculate the U and V pointers (not affected by interlaced mode) for use in Rd code.
+ if ( cpi->RdOpt )
+ {
+ pbi->mbi.blockDxInfo[4].Source = pbi->UDataOffset + ((MBrow-BORDER_MBS) * 8) * (pbi->Configuration.VideoFrameWidth/2) + ((MBcol-BORDER_MBS) * 8);
+ pbi->mbi.blockDxInfo[5].Source = pbi->VDataOffset + ((MBrow-BORDER_MBS) * 8) * (pbi->Configuration.VideoFrameWidth/2) + ((MBcol-BORDER_MBS) * 8);
+
+ pbi->mbi.blockDxInfo[4].thisRecon = pbi->ReconUDataOffset + (MBrow * 8) * pbi->Configuration.UVStride + (MBcol * 8);
+ pbi->mbi.blockDxInfo[5].thisRecon = pbi->ReconVDataOffset + (MBrow * 8) * pbi->Configuration.UVStride + (MBcol * 8);
+ }
+
+ // What are the two nearest motion vectors.
+ VP6_FindNearestandNextNearest ( pbi, MBrow, MBcol, 1, &type );
+ VP6_FindNearestandNextNearest ( pbi, MBrow, MBcol, 2, &type2 );
+
+ // Look at the intra coding error.
+ ThisIntraError = GetMBIntraError ( cpi );
+
+ // Keep a cumulative Intra error score for the frame (clip individual values to an allowed range)
+ Temp = ThisIntraError >> 8;
+ if ( Temp < MIN_ERR )
+ Temp = MIN_ERR;
+ else if ( Temp > MAX_ERR )
+ Temp = MAX_ERR;
+ *IntraError += Temp;
+ ThisMbIntraErr = Temp;
+
+ EstModeCost = RdModeCost ( cpi, MBrow, MBcol, CODE_INTRA );
+ ThisError = EstModeCost*EPB;
+ ThisIntraError += ThisError;
+
+ // To start with set best mode etc to Intra values
+ BestMode = CODE_INTRA;
+ BestError = ThisIntraError;
+
+ // Apply Intra weighting factors to best error
+ BestError = (ThisIntraError >> 7) * IntraFactors[pbi->quantizer->FrameQIndex];
+ if ( cpi->MBCodingMode != CODE_INTRA )
+ BestError += (cpi->IntraThresh);
+ else
+ BestError += (cpi->IntraThresh >> 1);
+
+ // Set Best Rate and Dist if appropriate.
+ if ( cpi->RdOpt )
+ {
+ SetMBMotionVectorsAndMode ( cpi, FragsToCheck, CODE_INTRA, &ZeroMVect );
+ EncodeMacroBlock_RD ( cpi, FragsToCheck, MBrow, MBcol, &BestRate, &BestDist );
+ BestRate += EstModeCost;
+
+ // Calculate a BestRd value for Intra
+ BestRd = RdFunction ( cpi, BestRate, BestDist );
+ }
+
+ // pick the best of the set of inter modes with known motion vectors
+ if ( !cpi->GfRecoveryFrame )
+ {
+ PickBetterMBMode ( cpi,
+ FragsToCheck,
+ CODE_INTER_NO_MV,
+ &ZeroMVect,
+ MBrow,
+ MBcol,
+ pbi->LastFrameRecon,
+ &BestMode,
+ &BestError,
+ &MVect,
+ cpi->ZeroError,
+ &BestRate,
+ &BestDist,
+ &BestRd );
+ }
+
+ if( (!cpi->GfRecoveryFrame) && (pbi->mbi.NearestInterMVect.x || pbi->mbi.NearestInterMVect.y) )
+ {
+ PickBetterMBMode ( cpi,
+ FragsToCheck,
+ CODE_INTER_NEAREST_MV,
+ &pbi->mbi.NearestInterMVect,
+ MBrow,
+ MBcol,
+ pbi->LastFrameRecon,
+ &BestMode,
+ &BestError,
+ &MVect,
+ cpi->NearestError,
+ &BestRate,
+ &BestDist,
+ &BestRd );
+ }
+
+ if( (!cpi->GfRecoveryFrame) && ( pbi->mbi.NearInterMVect.x || pbi->mbi.NearInterMVect.y) )
+ {
+ PickBetterMBMode ( cpi,
+ FragsToCheck,
+ CODE_INTER_NEAR_MV,
+ &pbi->mbi.NearInterMVect,
+ MBrow,
+ MBcol,
+ pbi->LastFrameRecon,
+ &BestMode,
+ &BestError,
+ &MVect,
+ cpi->NearError,
+ &BestRate,
+ &BestDist,
+ &BestRd );
+ }
+
+ PickBetterMBMode ( cpi,
+ FragsToCheck,
+ CODE_USING_GOLDEN,
+ &ZeroMVect,
+ MBrow,
+ MBcol,
+ pbi->GoldenFrame,
+ &BestMode,
+ &BestError,
+ &MVect,
+ TempError,
+ &BestRate,
+ &BestDist,
+ &BestRd );
+
+ if(pbi->mbi.NearestGoldMVect.x || pbi->mbi.NearestGoldMVect.y)
+ {
+ PickBetterMBMode ( cpi,
+ FragsToCheck,
+ CODE_GOLD_NEAREST_MV,
+ &pbi->mbi.NearestGoldMVect,
+ MBrow,
+ MBcol,
+ pbi->GoldenFrame,
+ &BestMode,
+ &BestError,
+ &MVect,
+ TempError,
+ &BestRate,
+ &BestDist,
+ &BestRd );
+ }
+
+ if ( pbi->mbi.NearGoldMVect.x || pbi->mbi.NearGoldMVect.y )
+ {
+ PickBetterMBMode ( cpi,
+ FragsToCheck,
+ CODE_GOLD_NEAR_MV,
+ &pbi->mbi.NearGoldMVect,
+ MBrow,
+ MBcol,
+ pbi->GoldenFrame,
+ &BestMode,
+ &BestError,
+ &MVect,
+ TempError,
+ &BestRate,
+ &BestDist,
+ &BestRd );
+ }
+
+ // DEBUG Code...
+ {
+ int a = (BestError >> 17);
+ if ( a>127 )
+ cpi->ErrorBins[127]++;
+ else
+ cpi->ErrorBins[a]++;
+ }
+
+ // (Note: ignoring this threshold for RD doesn't seem to help much)
+ if ( (!cpi->GfRecoveryFrame) && (BestError > cpi->MinErrorForMacroBlockMVSearch) )
+ {
+ PickBetterMBModeAndMV ( cpi,
+ FragsToCheck,
+ CODE_INTER_PLUS_MV,
+ pbi->LastFrameRecon,
+ MBrow,
+ MBcol,
+ &BestMode,
+ &BestError,
+ &MVect,
+ TRUE,
+ cpi->BestError,
+ &BestRate,
+ &BestDist,
+ &BestRd );
+ }
+
+ // (Note: ignoring this threshold for RD doesn't seem to help much)
+ if ( BestError > cpi->MinErrorForGoldenMVSearch )
+ {
+ PickBetterMBModeAndMV ( cpi,
+ FragsToCheck,
+ CODE_GOLDEN_MV,
+ pbi->GoldenFrame,
+ MBrow,
+ MBcol,
+ &BestMode,
+ &BestError,
+ &MVect,
+ FALSE,
+ TempError,
+ &BestRate,
+ &BestDist,
+ &BestRd );
+ }
+
+ // Finaly... If the best error is still to high then consider the 4MV mode
+ EstModeCost = RdModeCost(cpi,MBrow,MBcol,CODE_INTER_FOURMV);
+ ThisError = EstModeCost * EPB;
+
+ // Only consider 4-Mode mode if the best prediction error so far is above a threshold
+ // (Note that ignoring this threshold for RD doesn't seem to help much)
+ if ( (!cpi->GfRecoveryFrame) && ((ThisError + cpi->MinImprovementForFourMV) < BestError) )
+ {
+ UINT32 Error;
+ UINT32 RdValue;
+ UINT32 Rate = 0;
+ UINT32 Dist = 0;
+
+ for ( i=0; i<4; i++ )
+ {
+ PickBlockMode ( cpi, MBrow, MBcol, i, &FourMode[i], &FourMVect[i], &Error );
+ ThisError += Error;
+ }
+
+ // Calculate the UV vectors as the average of the Y plane ones.
+ // First .x component
+ FourMVect[4].x = FourMVect[0].x + FourMVect[1].x + FourMVect[2].x + FourMVect[3].x;
+ if ( FourMVect[4].x >= 0 )
+ FourMVect[4].x = (FourMVect[4].x + 2) / 4;
+ else
+ FourMVect[4].x = (FourMVect[4].x - 2) / 4;
+ FourMVect[5].x = FourMVect[4].x;
+
+ // Then .y component
+ FourMVect[4].y = FourMVect[0].y + FourMVect[1].y + FourMVect[2].y + FourMVect[3].y;
+ if ( FourMVect[4].y >= 0 )
+ FourMVect[4].y = (FourMVect[4].y + 2) / 4;
+ else
+ FourMVect[4].y = (FourMVect[4].y - 2) / 4;
+ FourMVect[5].y = FourMVect[4].y;
+
+ // Do Rd for selected modes
+ if ( cpi->RdOpt )
+ {
+ // Set up the individual block modes and motion vector structures
+ pbi->mbi.Mode = CODE_INTER_FOURMV;
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[0], &FourMVect[0], FourMode[0] );
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[1], &FourMVect[1], FourMode[1] );
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[2], &FourMVect[2], FourMode[2] );
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[3], &FourMVect[3], FourMode[3] );
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[4], &FourMVect[4], CODE_INTER_FOURMV );
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[5], &FourMVect[5], CODE_INTER_FOURMV );
+
+ // Now calculate Rate and distortion
+ EncodeMacroBlock_RD ( cpi, FragsToCheck, MBrow, MBcol, &Rate, &Dist );
+ Rate += EstModeCost; // Add in the cost of specifying 4-Mode mode in the first place
+
+ // Add in the cost of the 4 individual modes and Mvs
+ for ( i=0; i<4; i++ )
+ {
+ Rate += blockModeCost ( cpi, MBrow, MBcol, FourMode[i] );
+ if ( FourMode[i] == CODE_INTER_PLUS_MV )
+ {
+ DifferentialVector.x = FourMVect[i].x;
+ DifferentialVector.y = FourMVect[i].y;
+ if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ DifferentialVector.x -= pbi->mbi.NearestInterMVect.x;
+ DifferentialVector.y -= pbi->mbi.NearestInterMVect.y;
+ }
+
+ Rate += cpi->EstMvCostPtrX[DifferentialVector.x] + cpi->EstMvCostPtrY[DifferentialVector.y];
+ }
+ }
+
+ // Finaly plug the combined Rate and distortion values into the RD function.
+ RdValue = RdFunction ( cpi, Rate, Dist );
+
+ if ( RdValue < BestRd )
+ {
+ BestRd = RdValue;
+ BestRate = Rate;
+ BestDist = Dist;
+ BestError = ThisError;
+ BestMode = CODE_INTER_FOURMV;
+ }
+ }
+ else if ( (ThisError + cpi->MinImprovementForFourMV) < BestError )
+ {
+ BestError = ThisError;
+ BestMode = CODE_INTER_FOURMV;
+ }
+ }
+
+ // Keep a cumulative best error score for the frame (clip individual values to an allowed range)
+ // For The Intra mode case use ThisIntraError not BestError because BestError has been modified
+ // by intra weighting factors and could be less than mode cost.
+ if ( BestMode != CODE_INTRA )
+ Temp = ( (BestError - (modeCost( cpi, MBrow, MBcol, BestMode )*EPB)) >>8 );
+ else
+ Temp = ( (ThisIntraError - (modeCost( cpi, MBrow, MBcol, BestMode )*EPB)) >>8 );
+
+ if ( Temp < MIN_ERR )
+ Temp = MIN_ERR;
+ else if ( Temp > MAX_ERR )
+ Temp = MAX_ERR;
+ *InterError += Temp;
+ ThisMbInterErr = Temp;
+
+ // Record of intra and inter error for motion modes
+ if ( (BestMode != CODE_INTRA) && (BestMode != CODE_INTER_NO_MV) && (BestMode != CODE_USING_GOLDEN) )
+ {
+ // Keep a record of motion related inta and intra prediction errors
+ cpi->MotionIntraErr += ThisMbIntraErr;
+ cpi->MotionInterErr += ThisMbInterErr;
+ }
+
+ // Keep running total of the approximate cost of the chosen mode / MVs etc
+ cpi->ModeMvCostEstimate += modeCost ( cpi, MBrow, MBcol, BestMode );
+
+ // keep track of how many times this mode is the same as the last one we encountered
+ if ( (pbi->mbi.NearestInterMVect.x == 0) && (pbi->mbi.NearestInterMVect.y == 0) )
+ type = NONEAREST_MACROBLOCK;
+ else if ( (pbi->mbi.NearInterMVect.x == 0) && (pbi->mbi.NearInterMVect.y == 0) )
+ type = NONEAR_MACROBLOCK;
+ else
+ type = MACROBLOCK;
+
+ //type = 0;
+ cpi->CountModeSameAsLast[type][BestMode] += (cpi->MBCodingMode == BestMode);
+ cpi->CountModeDiffFrLast[type][BestMode] += (cpi->MBCodingMode != BestMode);
+ cpi->MBModeCount[type][BestMode]++;
+ cpi->MBCodingMode = BestMode;
+
+ switch ( BestMode )
+ {
+ case CODE_INTER_FOURMV:
+
+ for ( i=0; i<4; i++ )
+ {
+ cpi->BModeCount[FourMode[i]]++;
+
+ // Running total modeMv costs
+ cpi->ModeMvCostEstimate += blockModeCost ( cpi, MBrow, MBcol, FourMode[i] );
+ }
+
+ // Set up mb mode and mv structures for four mv
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[0], &FourMVect[0], FourMode[0] );
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[1], &FourMVect[1], FourMode[1] );
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[2], &FourMVect[2], FourMode[2] );
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[3], &FourMVect[3], FourMode[3] );
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[4], &FourMVect[4], CODE_INTER_FOURMV );
+ SetFragMotionVectorAndMode ( pbi, FragsToCheck[5], &FourMVect[5], CODE_INTER_FOURMV );
+
+ for ( i=0; i<4; i++ )
+ {
+ if ( FourMode[i] == CODE_INTER_PLUS_MV )
+ {
+ DifferentialVector.x = FourMVect[i].x;
+ DifferentialVector.y = FourMVect[i].y;
+ if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ DifferentialVector.x -= pbi->mbi.NearestInterMVect.x;
+ DifferentialVector.y -= pbi->mbi.NearestInterMVect.y;
+ }
+
+ cpi->MvBaselineDist[0][(MV_ENTROPY_TOKENS >> 1) + DifferentialVector.x]++;
+ cpi->MvBaselineDist[1][(MV_ENTROPY_TOKENS >> 1) + DifferentialVector.y]++;
+
+ // Running total of estimated mode+mv costs
+ cpi->ModeMvCostEstimate += (cpi->EstMvCostPtrX[DifferentialVector.x] + cpi->EstMvCostPtrY[DifferentialVector.y]);
+
+ // Store mv stats
+ cpi->FrameMvStats.NumMvs++;
+ cpi->FrameMvStats.SumAbsX += abs(FourMVect[i].x);
+ cpi->FrameMvStats.SumAbsY += abs(FourMVect[i].y);
+ cpi->FrameMvStats.SumX += FourMVect[i].x;
+ cpi->FrameMvStats.SumY += FourMVect[i].y;
+ cpi->FrameMvStats.SumXSq += FourMVect[i].x * FourMVect[i].x;
+ cpi->FrameMvStats.SumYSq += FourMVect[i].y * FourMVect[i].y;
+ }
+ }
+
+ // Update the new MV and Mode counters
+ cpi->FrameNewMvCounter += 4;
+ cpi->FrameModeCounter += 4;
+
+ // Update KeyFrameIndicator
+ if ( (MBrow >= cpi->FirstSixthBoundary) && (MBrow < cpi->LastSixthBoundary) && // Exclude top and bottome for "letterbox in 4:3" video
+ (BestError > KF_INDICATOR_THRESH) && ((ThisIntraError * 2) < (BestError * 5)) )
+ cpi->MotionScore ++;
+ break;
+
+ case CODE_INTRA:
+ SetMBMotionVectorsAndMode ( cpi, FragsToCheck, BestMode, &ZeroMVect );
+
+ // Update KeyFrameIndicator
+ if ( (MBrow >= cpi->FirstSixthBoundary) && (MBrow < cpi->LastSixthBoundary) ) // Exclude top and bottome for "letterbox in 4:3" video
+ cpi->MotionScore++;
+
+ // Update the Mode counter
+ cpi->FrameModeCounter++;
+
+ break;
+
+ case CODE_INTER_PLUS_MV:
+ case CODE_GOLDEN_MV:
+
+ DifferentialVector.x = MVect.x;
+ DifferentialVector.y = MVect.y;
+ if ( BestMode == CODE_INTER_PLUS_MV )
+ {
+ if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ DifferentialVector.x -= pbi->mbi.NearestInterMVect.x;
+ DifferentialVector.y -= pbi->mbi.NearestInterMVect.y;
+ }
+ else
+ {
+ DifferentialVector.x = MVect.x;
+ DifferentialVector.y = MVect.y;
+ }
+ }
+ else
+ {
+ if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ DifferentialVector.x -= pbi->mbi.NearestGoldMVect.x;
+ DifferentialVector.y -= pbi->mbi.NearestGoldMVect.y;
+ }
+ else
+ {
+ DifferentialVector.x = MVect.x;
+ DifferentialVector.y = MVect.y;
+ }
+ }
+
+ SetMBMotionVectorsAndMode ( cpi, FragsToCheck, BestMode, &MVect );
+
+ // Update KeyFrameIndicator
+ if ( (MBrow >= cpi->FirstSixthBoundary) && (MBrow < cpi->LastSixthBoundary) && // Exclude top and bottome for "letterbox in 4:3" video
+ (BestError > KF_INDICATOR_THRESH) && ((ThisIntraError * 2) < (BestError * 5)) )
+ cpi->MotionScore++;
+
+ cpi->MvBaselineDist[0][(MV_ENTROPY_TOKENS >> 1) + DifferentialVector.x]++;
+ cpi->MvBaselineDist[1][(MV_ENTROPY_TOKENS >> 1) + DifferentialVector.y]++;
+
+ // Update the new MV and Mode counters
+ cpi->FrameNewMvCounter++;
+ cpi->FrameModeCounter++;
+
+ // Running total of estimated mode+mv costs
+ cpi->ModeMvCostEstimate += (cpi->EstMvCostPtrX[DifferentialVector.x] + cpi->EstMvCostPtrY[DifferentialVector.y]);
+
+ // Store mv stats (exclude GF)
+ if ( BestMode == CODE_INTER_PLUS_MV)
+ {
+ cpi->FrameMvStats.NumMvs++;
+ cpi->FrameMvStats.SumAbsX += abs(MVect.x);
+ cpi->FrameMvStats.SumAbsY += abs(MVect.y);
+ cpi->FrameMvStats.SumX += MVect.x;
+ cpi->FrameMvStats.SumY += MVect.y;
+ cpi->FrameMvStats.SumXSq += MVect.x * MVect.x;
+ cpi->FrameMvStats.SumYSq += MVect.y * MVect.y;
+ }
+ break;
+
+ default:
+ SetMBMotionVectorsAndMode ( cpi, FragsToCheck, BestMode, &MVect );
+
+ // Update KeyFrameIndicator
+ if ( (MBrow >= cpi->FirstSixthBoundary) && (MBrow < cpi->LastSixthBoundary) && // Exclude top and bottome for "letterbox in 4:3" video
+ (BestError > KF_INDICATOR_THRESH) && ((ThisIntraError * 2) < (BestError * 5)) )
+ cpi->MotionScore++;
+
+ // Update the Mode counters
+ cpi->FrameModeCounter++;
+
+ // Store mv stats (exclude GF mv modes)
+ if ( (BestMode == CODE_INTER_NEAREST_MV) || (BestMode == CODE_INTER_NEAR_MV) )
+ {
+ cpi->FrameMvStats.NumMvs++;
+ cpi->FrameMvStats.SumAbsX += abs(MVect.x);
+ cpi->FrameMvStats.SumAbsY += abs(MVect.y);
+ cpi->FrameMvStats.SumX += MVect.x;
+ cpi->FrameMvStats.SumY += MVect.y;
+ cpi->FrameMvStats.SumXSq += MVect.x * MVect.x;
+ cpi->FrameMvStats.SumYSq += MVect.y * MVect.y;
+ }
+
+ break;
+ }
+
+ // Keep a record of the distribution of mode choices in this frame
+ cpi->ModeDist[BestMode]++;
+ pbi->predictionMode[MBOffset(MBrow,MBcol)] = BestMode;
+ pbi->MBMotionVector[MBOffset(MBrow,MBcol)].x = pbi->FragInfo[FragsToCheck[3]].MVectorX;
+ pbi->MBMotionVector[MBOffset(MBrow,MBcol)].y = pbi->FragInfo[FragsToCheck[3]].MVectorY;
+
+ // If Rd Opt is enabled then restore the macro block Dc Prediction context for chosen mode.
+ if ( cpi->RdOpt )
+ {
+ for ( i=0; i<6; i++ )
+ {
+ memcpy ( cpi->MbDcContexts[BestMode][i].AbovePtr, &cpi->MbDcContexts[BestMode][i].Above, sizeof(BLOCK_CONTEXT) );
+ memcpy ( cpi->MbDcContexts[BestMode][i].LeftPtr, &cpi->MbDcContexts[BestMode][i].Left, sizeof(BLOCK_CONTEXT) );
+ memcpy ( cpi->MbDcContexts[BestMode][i].LastDcPtr, &cpi->MbDcContexts[BestMode][i].LastDc, sizeof(Q_LIST_ENTRY) );
+
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PickModes
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : UINT32 *InterError : Pointer to inter-mode error.
+ * UINT32 *IntraError : Pointer to intra-mode error.
+ *
+ * RETURNS : UINT32: 0 Always.
+ *
+ * FUNCTION : Picks the best coding mode for each macro-block in
+ * the frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 PickModes ( CP_INSTANCE *cpi, UINT32 *InterError, UINT32 *IntraError )
+{
+ UINT8 QIndex;
+ UINT32 MBrow, MBcol;
+ unsigned int duration;
+ unsigned int starttsc,endtsc;
+ UINT32 CountInterlaced = 0;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Record start time
+ VP6_readTSC ( &starttsc );
+
+ // Work new motion vector cost weighting based upon the frequency of new motion vectors in the last frame.
+ if ( cpi->FrameModeCounter )
+ {
+ cpi->LastFrameNewMvUsage = (cpi->FrameNewMvCounter * 10)/cpi->FrameModeCounter;
+ cpi->MvEpbCorrection = MvEpbCorrectionTable[cpi->LastFrameNewMvUsage];
+ }
+ else
+ {
+ cpi->LastFrameNewMvUsage = 0;
+ cpi->MvEpbCorrection = MvEpbCorrectionTable[0];
+ }
+
+ cpi->FrameModeCounter = 0;
+ cpi->FrameNewMvCounter = 0;
+
+ // Set flag to allow bit cost anlaylsis without actual output.
+ cpi->bc.MeasureCost = TRUE;
+ QIndex = pbi->quantizer->FrameQIndex;
+
+ memset ( (void *)cpi->MvBaselineDist, 0, sizeof(cpi->MvBaselineDist) );
+ memset ( (void *)cpi->MBModeCount, 0, sizeof(cpi->MBModeCount) );
+ memset ( (void *)cpi->CountModeSameAsLast, 0, sizeof(cpi->CountModeSameAsLast) );
+ memset ( (void *)cpi->CountModeDiffFrLast, 0, sizeof(cpi->CountModeDiffFrLast) );
+ memset ( (void *)cpi->BModeCount, 0, sizeof(cpi->BModeCount) );
+
+ // Clear down record of frame coding mode distribution
+ memset ( cpi->ModeDist, 0, sizeof(cpi->ModeDist) );
+
+ // Clear down frame average abs MV data structure
+ memset ( &cpi->FrameMvStats, 0, sizeof(cpi->FrameMvStats) );
+
+ // Clear the mode+mv frame cost estimate
+ cpi->ModeMvCostEstimate = 0;
+
+ cpi->ErrorPerBit = ErrorPerBit[QIndex];
+
+ if ( cpi->ErrorPerBit < 1 )
+ cpi->ErrorPerBit = 1;
+
+ // Calculate a provisional mv-epb using epb and a correction that depends on
+ // frequency of mv's in last frame.
+ cpi->MVErrorPerBit = (ErrorPerBit[QIndex] << 8) / cpi->MvEpbCorrection;
+
+ // initialize error scores
+ *InterError = 0;
+ *IntraError = 0;
+
+ // Initialise key frame indicator.
+ cpi->MotionScore = 0;
+
+ // Initialise mode variable for use in mode weighting tests
+ cpi->MBCodingMode = CODE_INTER_NO_MV;
+
+ // Error threshold where we consider forcing INTRA mode.
+ cpi->InterTripOutThresh = (5000<<12);
+
+ // Test Values
+ cpi->IntraThresh = (IntraThreshTable[QIndex] << 12);
+
+ switch ( cpi->QuickCompress )
+ {
+ case 2:
+ {
+ // this auto speed selection code needs some work !!!
+ UINT32 millisecondsForCompress = 1000000 / cpi->Configuration.OutputFrameRate;
+ millisecondsForCompress = millisecondsForCompress * (16-cpi->CPUUsed) / 16;
+
+ if ( cpi->avgEncodeTime+cpi->avgPackVideoTime < millisecondsForCompress )
+ {
+ millisecondsForCompress -= cpi->avgEncodeTime + cpi->avgPackVideoTime;
+
+ if ( cpi->avgPickModeTime == 0 )
+ {
+ cpi->Speed = 4;
+ }
+ else
+ {
+ // why just go up by 1 and not try to calculate the value
+ // that would compress fast enough (etc)??
+ if ( millisecondsForCompress < cpi->avgPickModeTime )
+ {
+ cpi->Speed += 3;
+ cpi->avgPickModeTime = 0;
+ }
+ else if ( millisecondsForCompress*100 > cpi->avgPickModeTime*130 )
+ {
+ cpi->Speed -= 1;
+ cpi->avgPickModeTime = 0;
+ }
+
+ if ( cpi->Speed < 4 )
+ cpi->Speed = 4;
+ else if ( cpi->Speed > 16 )
+ cpi->Speed = 16;
+ }
+ }
+ else
+ {
+ cpi->Speed = 16;
+ }
+
+ cpi->MinErrorForMacroBlockMVSearch = 25 << 12;
+ cpi->MinErrorForGoldenMVSearch = 40 << 12;
+ cpi->ExhaustiveSearchThresh = 1000 << 12;
+ cpi->MinErrorForBlockMVSearch = 50 << 12;
+ cpi->FindMvViaSearch = FindMvVia3StepSearch;
+ cpi->FindBestHalfPixelMv = FindBestFractionalPixelStep;
+ cpi->FindBestQuarterPixelMv = FindBestFractionalPixelStep;
+ cpi->BlockExhaustiveSearchThresh = HUGE_ERROR;
+
+ if ( cpi->Speed >= 1 )
+ cpi->FindMvViaSearch = FindMvViaDiamondSearch;
+ if ( cpi->Speed >= 2 )
+ cpi->FindBestQuarterPixelMv = SkipFractionalPixelStep;
+ if ( cpi->Speed >= 3 )
+ cpi->MinErrorForGoldenMVSearch = HUGE_ERROR;
+ if ( cpi->Speed >= 4 )
+ cpi->MinErrorForBlockMVSearch = HUGE_ERROR;
+ if ( cpi->Speed >= 14 )
+ cpi->FindBestHalfPixelMv = SkipFractionalPixelStep;
+ if ( cpi->Speed >= 5 )
+ {
+ unsigned int i, sum=0;
+
+ for ( i=0; i<128; i++ )
+ {
+ sum += cpi->ErrorBins[i];
+ if ( 10*sum>(cpi->Speed-6)*(pbi->MBRows-4)*(pbi->MBCols-4) )
+ break;
+ }
+ ++i;
+ cpi->MinErrorForMacroBlockMVSearch = i << 17;
+ cpi->ExhaustiveSearchThresh = i << 23;
+ }
+ if ( cpi->Speed >= 12 )
+ cpi->ExhaustiveSearchThresh = HUGE_ERROR;
+
+ memset ( cpi->ErrorBins, 0, sizeof(cpi->ErrorBins) );
+ }
+ break;
+
+ case 1:
+ cpi->MinErrorForMacroBlockMVSearch = 25 << 12;
+ cpi->MinErrorForGoldenMVSearch = 25 << 12;
+ cpi->ExhaustiveSearchThresh = 1000 << 12;
+ cpi->MinErrorForBlockMVSearch = 50 << 12;
+ cpi->BlockExhaustiveSearchThresh = HUGE_ERROR;
+ break;
+
+ case 3:
+ cpi->MinErrorForMacroBlockMVSearch = 25 << 12;
+ cpi->MinErrorForGoldenMVSearch = 25 << 12;
+ cpi->ExhaustiveSearchThresh = 1000 << 12;
+ cpi->MinErrorForBlockMVSearch = 50 << 12;
+ cpi->BlockExhaustiveSearchThresh = HUGE_ERROR;
+ cpi->RdOpt = 2;
+ break;
+
+ case 0:
+ cpi->MinErrorForMacroBlockMVSearch = 25 << 12;
+ cpi->MinErrorForGoldenMVSearch = 25 << 12;
+ cpi->ExhaustiveSearchThresh = 300 << 12;
+ cpi->BlockExhaustiveSearchThresh = 40 << 12;
+ cpi->MinErrorForBlockMVSearch = 20 << 12;
+ cpi->RdOpt = 2;
+ break;
+ }
+
+ // Extra cost penalty to prevent spurious use of 4mv mode.
+ // The reason this is needed probably has something to do with
+ // poorer dc prediction with a 4mv macro block than within a
+ // macro block where all are coded with the same mode.
+ cpi->MinImprovementForFourMV = FourModeImprovement[QIndex]<<12;
+
+ // Define boundaries to be used in key frame selection process
+ cpi->FirstSixthBoundary = (pbi->MBRows-(2*BORDER_MBS))/6+2; // Macro block index marking the first sixth of the image
+ cpi->LastSixthBoundary = ((pbi->MBRows-(2*BORDER_MBS))*5)/6+2; // Macro block index marking the last sixth of the image
+
+ // If we are using RdOpt then reset the Above dc context data structure
+ if ( cpi->RdOpt )
+ VP6_ResetAboveContext ( pbi );
+
+ // decide what block type and motion vectors to use on all of the frames
+ for ( MBrow=BORDER_MBS; MBrow<pbi->MBRows-BORDER_MBS; MBrow++ )
+ {
+ // If we are using RdOpt then reset the Left dc context data structure for each row of MBs
+ if ( cpi->RdOpt )
+ VP6_ResetLeftContext ( pbi );
+
+ for ( MBcol=BORDER_MBS; MBcol < pbi->MBCols-BORDER_MBS; MBcol++ )
+ {
+ // Try to pick the best mode for the macro block
+ PickMacroBlockMode ( cpi, MBrow, MBcol,InterError, IntraError );
+
+ if ( pbi->MBInterlaced[MBOffset(MBrow,MBcol)] )
+ CountInterlaced++;
+ }
+ }
+
+ pbi->probInterlaced = 256-(1+254*CountInterlaced/((pbi->MBRows-(2*BORDER_MBS))*(pbi->MBCols-(2*BORDER_MBS))));
+
+ // system state should be cleared here....
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ // Recored end time & compute duration
+ VP6_readTSC(&endtsc);
+ duration = (endtsc - starttsc) / pbi->ProcessorFrequency;
+
+ if ( cpi->avgPickModeTime == 0)
+ cpi->avgPickModeTime = duration;
+ else
+ cpi->avgPickModeTime = (7*cpi->avgPickModeTime+duration)>>3;
+
+ return 0;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/RawBuffer.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/RawBuffer.c
new file mode 100644
index 00000000..3d222359
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/RawBuffer.c
@@ -0,0 +1,130 @@
+/****************************************************************************
+*
+* Module Title : RawBuffer.c
+*
+* Description : Functions to handle bit-wise writing to raw buffer.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "RawBuffer.h"
+#include "codec_common.h"
+/****************************************************************************
+ *
+ * ROUTINE : WriteLongToBuffer
+ *
+ * INPUTS : RAW_BUFFER *buf : Pointer to the buffer instance to be written to.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Writes the 32-bits of buf->DataBlock into the byte
+ * buffer buf->Buffer in big-endian format.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ***************************************************************************/
+INLINE
+void WriteLongToBuffer ( RAW_BUFFER *buf )
+{
+ buf->Buffer[buf->pos++] = (buf->DataBlock>>24);
+ buf->Buffer[buf->pos++] = (buf->DataBlock>>16) & 0x000000FF;
+ buf->Buffer[buf->pos++] = (buf->DataBlock>> 8) & 0x000000FF;
+ buf->Buffer[buf->pos++] = buf->DataBlock & 0x000000FF;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : InitAddRawBitsToBuffer
+ *
+ * INPUTS : RAW_BUFFER *buf : Pointer to the buffer instance to be written to.
+ * UINT8 *Buffer : Array to be used by RAW_BUFFER to write to.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initializes a Raw Buffer instance given a pointer to an
+ * array of UINT8s to be used as the storage buffer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void InitAddRawBitsToBuffer ( RAW_BUFFER *buf, UINT8 *Buffer )
+{
+ buf->Buffer = Buffer;
+ buf->byte_bit_offset = 32;
+ buf->DataBlock = 0;
+ buf->pos = 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : AddRawBitsToBuffer
+ *
+ * INPUTS : RAW_BUFFER *buf : Pointer to the buffer instance to be written to.
+ * UINT32 data : Bit pattern to be written to the buffer.
+ * UINT32 bits : Number of significant bits of data to write.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Writes data to the buffer to the specified number of bits
+ * (UINT32 bits).
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void AddRawBitsToBuffer( RAW_BUFFER *buf, UINT32 data, UINT32 bits )
+{
+ // how many bits should we shift by?
+ buf->byte_bit_offset -= bits;
+
+ if ( buf->byte_bit_offset < 0 )
+ {
+ // only write the left most bits in this datablock
+ buf->DataBlock |= (data >> (-buf->byte_bit_offset));
+
+ // output block
+ WriteLongToBuffer ( buf );
+ buf->DataBlock = 0;
+ buf->byte_bit_offset += 32;
+ }
+ // note we may have bits getting shifted off the left side (like in above case)
+ buf->DataBlock |= (data << buf->byte_bit_offset);
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : EndAddRawBitsToBuffer
+ *
+ * INPUTS : RAW_BUFFER *buf : Pointer to the buffer instance to be written to.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Finalizes all writes to the buffer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void EndAddRawBitsToBuffer ( RAW_BUFFER *buf )
+{
+ UINT8 shift = 24;
+
+ while ( buf->byte_bit_offset < 32 )
+ {
+ buf->Buffer[buf->pos++] = (buf->DataBlock>>shift) & 0xff;
+ shift -= 8;
+ buf->byte_bit_offset += 8;
+ }
+
+ buf->byte_bit_offset = 32;
+ buf->DataBlock = 0;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Tokenize.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Tokenize.c
new file mode 100644
index 00000000..432164ed
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Tokenize.c
@@ -0,0 +1,454 @@
+/****************************************************************************
+*
+* Module Title : Tokenize.C
+*
+* Description : Tokenizing fragments for output by pack video
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h> // For abs()
+#include "compdll.h"
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+static TOKENEXTRA DctValueTokens[DCT_MAX_VALUE*2];
+
+/****************************************************************************
+ *
+ * ROUTINE : FillValueTokens
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Fills in the DctValueTokens array used during
+ * compression for fast look-up of token and eatra-bits
+ * information.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FillValueTokens ( void )
+{
+ INT32 i;
+
+ for ( i=-2048; i<2047; i++ )
+ {
+ UINT32 AbsDataVal = abs ( i );
+ TOKENEXTRA *TokenExtra = DctValueTokens + 2048 + i;
+
+ // Values are tokenised as category value and a number of
+ // additional bits that define the position within the category.
+ if ( i == 0 )
+ {
+ TokenExtra->Token = 0;
+ }
+ else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY1] )
+ {
+ TokenExtra->Token = AbsDataVal;
+ TokenExtra->Extra = (i < 0);
+ }
+ // Extra Bit 1 determines sign, Bit 0 the value
+ else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY2] )
+ {
+ TokenExtra->Token = DCT_VAL_CATEGORY1;
+ TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY1]);
+ TokenExtra->Extra <<=1;
+ TokenExtra->Extra |= (i < 0);
+ }
+ // Extra Bit 2 determines sign, Bit 0-1 the value
+ else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY3] )
+ {
+ TokenExtra->Token = DCT_VAL_CATEGORY2;
+ TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY2]);
+ TokenExtra->Extra <<=1;
+ TokenExtra->Extra |= (i < 0);
+ }
+ // Extra Bit 3 determines sign, Bit 0-2 the value
+ else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY4] )
+ {
+ TokenExtra->Token = DCT_VAL_CATEGORY3;
+ TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY3]);
+ TokenExtra->Extra <<=1;
+ TokenExtra->Extra |= (i < 0);
+ }
+ // Extra Bit 4 determines sign, Bit 0-3 the value
+ else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY5] )
+ {
+ TokenExtra->Token = DCT_VAL_CATEGORY4;
+ TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY4]);
+ TokenExtra->Extra <<=1;
+ TokenExtra->Extra |= (i < 0);
+ }
+ // Extra Bit 5 determines sign, Bit 0-4 the value
+ else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY6] )
+ {
+ TokenExtra->Token = DCT_VAL_CATEGORY5;
+ TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY5]);
+ TokenExtra->Extra <<=1;
+ TokenExtra->Extra |= (i < 0);
+ }
+ // Extra Bit 11 determines sign, Bit 0-10 the value
+ else
+ {
+ TokenExtra->Token = DCT_VAL_CATEGORY6;
+ TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY6]);
+ TokenExtra->Extra <<=1;
+ TokenExtra->Extra |= (i < 0);
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : TokenizeFrag
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * INT16 *RawData : Array of quantized DCT coefficients.
+ * UINT32 Plane : Plane block belongs to (Y=0, UV=1)
+ * BLOCK_CONTEXT *Above : Pointer to an above context.
+ * BLOCK_CONTEXT *Left : Pointer to a left context.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT16: Index of the EOB token for the block.
+ *
+ * FUNCTION : Takes a set of quantized DCT coefficients for a block
+ * and produces a set of representative tokens. Each token
+ * consists of a token identifier and, for most tokens, a
+ * set of 'extra-bits'.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 TokenizeFrag
+(
+ CP_INSTANCE *cpi,
+ INT16 *RawData,
+ UINT32 Plane,
+ BLOCK_CONTEXT *Above,
+ BLOCK_CONTEXT *Left
+)
+{
+ INT32 i;
+ UINT32 Token;
+ INT32 Offset;
+ INT32 ZeroCount;
+ INT32 LastNonZeroCoeff=0;
+
+ UINT32 token_pos = 0;
+ UINT32 PlaneX = Plane;
+ UINT32 PrevTokenIndex;
+ UINT32 LastTokenNonZero;
+
+
+ for ( i=1; i<64; i++ )
+ {
+ // j is coeff number in zig-zag order
+ int j = cpi->pb.ModifiedScanOrder[i];
+
+ if ( RawData[j] )
+ {
+ LastNonZeroCoeff = i;
+ cpi->FrameNzCount[j][1]++;
+ }
+ else
+ cpi->FrameNzCount[j][0]++;
+ }
+
+ // Tokenize the DC value
+ if ( RawData[0] )
+ {
+ if ( cpi->CurrentDcZeroRun[PlaneX] > 0 )
+ {
+ // Termination of run of zeros in DC positions
+ cpi->DcZeroRunStartPtr[PlaneX]->Extra = cpi->CurrentDcZeroRun[PlaneX];
+ cpi->CurrentDcZeroRun[PlaneX] = 0;
+ }
+
+ Offset = DCT_MAX_VALUE + RawData[0];
+
+ Token = DctValueTokens[Offset].Token;
+ cpi->CoeffTokenPtr->Token = Token;
+ cpi->CoeffTokenPtr->Extra = DctValueTokens[Offset].Extra;
+
+ cpi->FrameDcTokenDist2[Plane][Token]++;
+ }
+ else
+ {
+ Token = ZERO_TOKEN;
+ cpi->CoeffTokenPtr->Token = Token;
+ cpi->CoeffTokenPtr->Extra = 0;
+
+ // Check for run of zeros at DC position (Huffman mode)
+ if ( cpi->CurrentDcZeroRun[PlaneX] == 0 )
+ {
+ // New run starts
+ cpi->DcZeroRunStartPtr[PlaneX] = cpi->CoeffTokenPtr;
+ cpi->FrameDcTokenDist2[Plane][Token]++;
+ }
+
+ cpi->CurrentDcZeroRun[PlaneX]++;
+
+ if ( cpi->CurrentDcZeroRun[PlaneX] >= 74/*11+63*/ )
+ {
+ // Maximum run-length is 11+63
+ cpi->DcZeroRunStartPtr[PlaneX]->Extra = cpi->CurrentDcZeroRun[PlaneX];
+ cpi->CurrentDcZeroRun[PlaneX] = 0;
+ }
+ }
+
+ cpi->CoeffTokenPtr->LastTokenL = Left->Token;
+ cpi->CoeffTokenPtr->LastTokenA = Above->Token;
+ cpi->FrameDcTokenDist[Plane][Token]++;
+ PrevTokenIndex = VP6_PrevTokenIndex[Token];
+ cpi->CoeffTokenPtr++;
+ token_pos++;
+
+ // Update the context
+ LastTokenNonZero = (Token != ZERO_TOKEN);
+ Left->Token = LastTokenNonZero;
+ Above->Token = LastTokenNonZero;
+
+ // Tokenize the rest of the block
+ for ( i=1; i<=LastNonZeroCoeff; i++ )
+ {
+
+ UINT32 Band;
+ ZeroCount = 0;
+ while ( !RawData[cpi->pb.ModifiedScanOrder[i]] )
+ {
+ i++;
+ ZeroCount++;
+
+ }
+ // Trap the end of a run of EOBs at AC1
+ if ( cpi->CurrentAc1EobRun[PlaneX] > 0 )
+ {
+ // End of run of EOBs at first AC position
+ cpi->Ac1EobRunStartPtr[PlaneX]->Extra = cpi->CurrentAc1EobRun[PlaneX];
+ cpi->CurrentAc1EobRun[PlaneX] = 0;
+ }
+
+ // Code the zero token and zero run length
+ if ( ZeroCount > 0 )
+ {
+ int ZeroBand;
+
+ Band = VP6_CoeffToBand[token_pos];
+ cpi->CoeffTokenPtr->Token = ZERO_TOKEN;
+ cpi->CoeffTokenPtr->Extra = ZeroCount - 1;
+
+ cpi->FrameAcTokenDist [PrevTokenIndex][Plane][Band][ZERO_TOKEN]++;
+ cpi->FrameAcTokenDist2[PrevTokenIndex][Plane][Band][ZERO_TOKEN]++;
+
+ PrevTokenIndex = VP6_PrevTokenIndex[ZERO_TOKEN];
+
+ // ZeroBand = 0:1
+ ZeroBand = (token_pos >= ZRL_BAND2);
+
+ cpi->FrameZrlDist[ZeroBand][ZeroCount]++;
+ cpi->FrameZeroCount[ZeroBand]++;
+
+ // Update token_pos
+ token_pos += ZeroCount;
+
+ // Step on to next token
+ cpi->CoeffTokenPtr++;
+ }
+
+ // Code the non zero value
+ Offset = DCT_MAX_VALUE + RawData[cpi->pb.ModifiedScanOrder[i]];
+ cpi->CoeffTokenPtr->Token = DctValueTokens[Offset].Token;
+ cpi->CoeffTokenPtr->Extra = DctValueTokens[Offset].Extra;
+ Band = VP6_CoeffToBand[token_pos];
+
+ cpi->FrameAcTokenDist [PrevTokenIndex][Plane][Band][cpi->CoeffTokenPtr->Token]++;
+ cpi->FrameAcTokenDist2[PrevTokenIndex][Plane][Band][cpi->CoeffTokenPtr->Token]++;
+ PrevTokenIndex = VP6_PrevTokenIndex [cpi->CoeffTokenPtr->Token];
+
+ cpi->CoeffTokenPtr++;
+ token_pos++;
+ }
+
+ // If we have reached the end of the block then code EOB
+ if ( i < BLOCK_SIZE )
+ {
+ UINT32 Band;
+ cpi->CoeffTokenPtr->Token = DCT_EOB_TOKEN;
+ cpi->CoeffTokenPtr->Extra = 0;
+ Band = VP6_CoeffToBand[token_pos];
+
+ // if EOB at first AC pos
+ if ( token_pos == 1 )
+ {
+ // The start of an EOB run
+ if ( cpi->CurrentAc1EobRun[PlaneX] == 0 )
+ {
+ cpi->Ac1EobRunStartPtr[PlaneX] = cpi->CoeffTokenPtr;
+ cpi->FrameAcTokenDist2[PrevTokenIndex][Plane][Band][DCT_EOB_TOKEN]++;
+ }
+
+ cpi->CurrentAc1EobRun[PlaneX]++;
+
+ if ( cpi->CurrentAc1EobRun[PlaneX] >= 74 /*11+63*/ )
+ {
+ cpi->Ac1EobRunStartPtr[PlaneX]->Extra = cpi->CurrentAc1EobRun[PlaneX];
+ cpi->CurrentAc1EobRun[PlaneX] = 0;
+ }
+ }
+ else
+ {
+ cpi->FrameAcTokenDist2[PrevTokenIndex][Plane][Band][DCT_EOB_TOKEN]++;
+ }
+
+ cpi->FrameAcTokenDist[PrevTokenIndex][Plane][Band][DCT_EOB_TOKEN]++;
+ PrevTokenIndex = VP6_PrevTokenIndex [DCT_EOB_TOKEN];
+
+ cpi->CoeffTokenPtr++;
+ token_pos++;
+
+ }
+
+
+ token_pos--;
+
+ // Return the position of the last token.
+ return cpi->pb.EobOffsetTable[token_pos];
+}
+
+/****************************************************************************
+*
+* Rate Distortion Specific Code...
+*
+****************************************************************************/
+
+/****************************************************************************
+ *
+ * ROUTINE : TokenCost_RD
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT8 Token : Token to be costed.
+ * int Band : Band that the token belongs in.
+ * UINT8 Plane : Plane that the token belogs in.
+ * UINT8 PrecCase : Previous token context type.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Estimated cost in bits of coding this token.
+ *
+ * FUNCTION : Produces an estimate of the cost, i.e. number of bits
+ * required to code, the token using statistics derived
+ * from the distribution of tokens in the previous frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ***************************************************************************/
+UINT32 TokenCost_RD ( CP_INSTANCE *cpi, UINT8 Token, int Band, UINT8 Plane, UINT8 PrecCase )
+{
+ if ( Band == -1 )
+ return cpi->EstDcTokenCosts[Plane][Token] + (ExtraBitLengths_VP6[Token] << 6);
+ else
+ return cpi->EstAcTokenCosts[PrecCase][Plane][Band][Token] + (ExtraBitLengths_VP6[Token] << 6);
+}
+/****************************************************************************
+ *
+ * ROUTINE : TokenizeFrag_RD
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * INT16 *RawData : Array of quantized DCT coeffs to be tokenized.
+ * UINT32 Plane : Plane that the block belongs to.
+ *
+ * OUTPUTS : UINT32 *MbCost : Pointer to variable that will hold the
+ * cost of tokenizing the block.
+ *
+ * RETURNS : UINT8: Estimated cost in bits of coding this token.
+ *
+ * FUNCTION : Cut down RD version of tokenize function of tokenize block
+ * that does not update all the context stuff.
+ *
+ * SPECIAL NOTES :
+ *
+ ****************************************************************************/
+UINT8 TokenizeFrag_RD
+(
+ CP_INSTANCE *cpi,
+ INT16 *RawData,
+ UINT32 Plane,
+ UINT32 *MbCost
+)
+{
+ UINT32 i;
+ UINT8 Token;
+ INT32 ZeroCount;
+
+ UINT8 TokenPos = 1;
+ INT32 Band;
+ INT32 PrevTokenCase ;
+
+
+ // Tokenize the DC value
+ Token = DctValueTokens[DCT_MAX_VALUE + RawData[0]].Token;
+ *MbCost += cpi->EstDcTokenCosts[Plane][Token] + (ExtraBitLengths_VP6[Token] << 6);
+ PrevTokenCase =VP6_PrevTokenIndex[Token];
+
+
+ // Tokenize the rest of the block
+ for ( i=1; i<BLOCK_SIZE; i++ )
+ {
+ // Test for EOB condition
+ ZeroCount = 0;
+ while ( !RawData[cpi->pb.ModifiedScanOrder[i]] && (i < BLOCK_SIZE) )
+ {
+ i++;
+ ZeroCount++;
+ }
+
+ // If we have reached the end of the block then code EOB
+ if ( i == BLOCK_SIZE )
+ {
+ Token = DCT_EOB_TOKEN;
+ Band = VP6_CoeffToBand[TokenPos];
+ *MbCost += cpi->EstAcTokenCosts[PrevTokenCase][Plane][Band][Token] + (ExtraBitLengths_VP6[Token] << 6);
+ PrevTokenCase =VP6_PrevTokenIndex[Token];
+ TokenPos++;
+ }
+ else
+ {
+ INT32 Offset = DCT_MAX_VALUE + RawData[cpi->pb.ModifiedScanOrder[i]];
+
+ if ( ZeroCount > 0 )
+ {
+ //0:1
+ UINT8 ZBand = (TokenPos >= ZRL_BAND2);
+
+ Token = ZERO_TOKEN;
+ Band = VP6_CoeffToBand[TokenPos];
+ *MbCost += cpi->EstAcTokenCosts[PrevTokenCase][Plane][Band][Token] + (ExtraBitLengths_VP6[Token] << 6);
+ PrevTokenCase =VP6_PrevTokenIndex[Token];
+ TokenPos += ZeroCount;
+
+ // Get estimated cost of zero run bits (based upon previous frame stats
+ *MbCost += cpi->EstZrlCosts[ZBand][ZeroCount];
+ }
+
+ Token = DctValueTokens[Offset].Token;
+ Band = VP6_CoeffToBand[TokenPos];
+ *MbCost += cpi->EstAcTokenCosts[PrevTokenCase][Plane][Band][Token] + (ExtraBitLengths_VP6[Token] << 6);
+ PrevTokenCase =VP6_PrevTokenIndex[Token];
+ TokenPos++;
+ }
+ }
+
+ TokenPos--;
+
+ // Return the position of the last token.
+ return TokenPos;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Transform.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Transform.c
new file mode 100644
index 00000000..6ee4f44a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Transform.c
@@ -0,0 +1,361 @@
+/****************************************************************************
+*
+* Module Title : Transform.c
+*
+* Description : DCT transform & inverse transform functions.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h> // For Abs()
+#include "type_aliases.h"
+#include "codec_common.h"
+
+/****************************************************************************
+ *
+ * ROUTINE : SUB8
+ *
+ * INPUTS : UINT8 *FiltPtr : Pointer to 8x8 source block.
+ * UINT8 *ReconPtr : Pointer to 8x8 block to be subtracted from FiltPtr.
+ * UINT8 *old_ptr1 : NOT USED.
+ * UINT8 *new_ptr1 : NOT USED.
+ * INT32 SourceStride : Stride of FiltPtr.
+ * INT32 ReconStride : Stride of ReconPtr.
+ *
+ * OUTPUTS : INT16 *DctInputPtr : Pointer to 8x8 array to hold difference.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Does a pixel-by-pixel subtraction of the two 8x8 blocks
+ * and stores the results in DctInputPtr.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SUB8
+(
+ UINT8 *FiltPtr,
+ UINT8 *ReconPtr,
+ INT16 *DctInputPtr,
+ UINT8 *old_ptr1, /* NOT USED */
+ UINT8 *new_ptr1, /* NOT USED */
+ INT32 SourceStride,
+ INT32 ReconStride
+)
+{
+ int i;
+
+ // Loop unrolled to improve speed...
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ DctInputPtr[0] = (INT16)((int)(FiltPtr[0]) - ((int)ReconPtr[0]) );
+ DctInputPtr[1] = (INT16)((int)(FiltPtr[1]) - ((int)ReconPtr[1]) );
+ DctInputPtr[2] = (INT16)((int)(FiltPtr[2]) - ((int)ReconPtr[2]) );
+ DctInputPtr[3] = (INT16)((int)(FiltPtr[3]) - ((int)ReconPtr[3]) );
+ DctInputPtr[4] = (INT16)((int)(FiltPtr[4]) - ((int)ReconPtr[4]) );
+ DctInputPtr[5] = (INT16)((int)(FiltPtr[5]) - ((int)ReconPtr[5]) );
+ DctInputPtr[6] = (INT16)((int)(FiltPtr[6]) - ((int)ReconPtr[6]) );
+ DctInputPtr[7] = (INT16)((int)(FiltPtr[7]) - ((int)ReconPtr[7]) );
+
+ // Next row...
+ FiltPtr += SourceStride;
+ ReconPtr += ReconStride;
+ DctInputPtr += BLOCK_HEIGHT_WIDTH;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : Sub8_128
+ *
+ * INPUTS : UINT8 *FiltPtr : Pointer to 8x8 source block.
+ * UINT8 *old_ptr1 : NOT USED.
+ * UINT8 *new_ptr1 : NOT USED.
+ * INT32 SourceStride : Stride of FiltPtr.
+ *
+ * OUTPUTS : INT16 *DctInputPtr : Pointer to 8x8 array to hold modified block.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Subtracts the value 128 from each pixel value in the
+ * input block FiltPtr.
+ *
+ * SPECIAL NOTES : Used when coding a block in INTRA mode to convert the
+ * pixel range (0,255) to (-128,127). This reduces the
+ * internal precision required by the DCT transform.
+ *
+ ****************************************************************************/
+void SUB8_128
+(
+ UINT8 *FiltPtr,
+ INT16 *DctInputPtr,
+ UINT8 *old_ptr1, /* NOT USED */
+ UINT8 *new_ptr1, /* NOT USED */
+ INT32 SourceStride
+)
+{
+ int i;
+
+ // Loop unrolled to improve speed...
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ DctInputPtr[0] = (INT16)((int)(FiltPtr[0]) - 128);
+ DctInputPtr[1] = (INT16)((int)(FiltPtr[1]) - 128);
+ DctInputPtr[2] = (INT16)((int)(FiltPtr[2]) - 128);
+ DctInputPtr[3] = (INT16)((int)(FiltPtr[3]) - 128);
+ DctInputPtr[4] = (INT16)((int)(FiltPtr[4]) - 128);
+ DctInputPtr[5] = (INT16)((int)(FiltPtr[5]) - 128);
+ DctInputPtr[6] = (INT16)((int)(FiltPtr[6]) - 128);
+ DctInputPtr[7] = (INT16)((int)(FiltPtr[7]) - 128);
+
+ // Next row...
+ FiltPtr += SourceStride;
+ DctInputPtr += BLOCK_HEIGHT_WIDTH;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SUB8AV2
+ *
+ * INPUTS : UINT8 *FiltPtr : Pointer to 8x8 source block.
+ * UINT8 *ReconPtr1 : Pointer to first 8x8 reference block.
+ * UINT8 *ReconPtr2 : Pointer to second 8x8 reference block.
+ * UINT8 *old_ptr1 : NOT USED.
+ * UINT8 *new_ptr1 : NOT USED.
+ * INT32 SourceStride : Stride of FiltPtr.
+ * INT32 ReconStride : Stride of ReconPtr1 & ReconPtr2.
+ *
+ * OUTPUTS : INT16 *DctInputPtr : Pointer to 8x8 array to hold difference.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Subtracts the average of the two reconstruction blocks
+ * from the FiltPtr block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SUB8AV2
+(
+ UINT8 *FiltPtr,
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ INT16 *DctInputPtr,
+ UINT8 *old_ptr1, /* NOT USED */
+ UINT8 *new_ptr1, /* NOT USED */
+ INT32 SourceStride,
+ INT32 ReconStride
+)
+{
+ int i;
+
+ // Loop unrolled to improve speed...
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ DctInputPtr[0] = (INT16)((int)(FiltPtr[0]) - (((int)ReconPtr1[0] + (int)ReconPtr2[0]) / 2) );
+ DctInputPtr[1] = (INT16)((int)(FiltPtr[1]) - (((int)ReconPtr1[1] + (int)ReconPtr2[1]) / 2) );
+ DctInputPtr[2] = (INT16)((int)(FiltPtr[2]) - (((int)ReconPtr1[2] + (int)ReconPtr2[2]) / 2) );
+ DctInputPtr[3] = (INT16)((int)(FiltPtr[3]) - (((int)ReconPtr1[3] + (int)ReconPtr2[3]) / 2) );
+ DctInputPtr[4] = (INT16)((int)(FiltPtr[4]) - (((int)ReconPtr1[4] + (int)ReconPtr2[4]) / 2) );
+ DctInputPtr[5] = (INT16)((int)(FiltPtr[5]) - (((int)ReconPtr1[5] + (int)ReconPtr2[5]) / 2) );
+ DctInputPtr[6] = (INT16)((int)(FiltPtr[6]) - (((int)ReconPtr1[6] + (int)ReconPtr2[6]) / 2) );
+ DctInputPtr[7] = (INT16)((int)(FiltPtr[7]) - (((int)ReconPtr1[7] + (int)ReconPtr2[7]) / 2) );
+
+ // Next row...
+ FiltPtr += SourceStride;
+ ReconPtr1 += ReconStride;
+ ReconPtr2 += ReconStride;
+ DctInputPtr += BLOCK_HEIGHT_WIDTH;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : AllZeroDctData
+ *
+ * INPUTS : Q_LIST_ENTRY *QuantList : Array of quantized DCT coefficients.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : BOOL: TRUE if all quantized DCT coeffs are zero, FALSE otherwise.
+ *
+ * FUNCTION : Checks for case where all DCT data will be zero.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+BOOL AllZeroDctData ( Q_LIST_ENTRY * QuantList )
+{
+ UINT32 i;
+
+ for ( i=0; i<64; i++ )
+ if ( QuantList[i] != 0 )
+ return FALSE;
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : Sub8Filtered
+ *
+ * INPUTS : UINT8 *FiltPtr : Pointer to 8x8 source block.
+ * UINT8 *ReconPtr : Pointer to 8x8 block to be subtracted from FiltPtr.
+ * INT32 SourceStride : Stride of FiltPtr.
+ * INT32 ReconStride : Stride of ReconPtr.
+ * INT32 *Kernel : Pointer to filter taps to filter source.
+ *
+ * OUTPUTS : INT16 *DctInputPtr : Pointer to 8x8 array to hold difference.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Does a pixel-by-pixel subtraction of the two 8x8 blocks
+ * and stores the results in DctInputPtr. However, at any
+ * pixel if the difference exceeds 4 then a 3x3 filter is
+ * applied to the source block before doing the subtraction.
+ *
+ * SPECIAL NOTES : The Kernel actually has 10 entries, the first 9 are the
+ * taps of the 3x3 filter, the last is the filter normalization
+ * factor.
+ *
+ ****************************************************************************/
+void Sub8Filtered
+(
+ UINT8 *FiltPtr,
+ UINT8 *ReconPtr,
+ INT16 *DctInputPtr,
+ INT32 SourceStride,
+ INT32 ReconStride,
+ INT32 *Kernel
+)
+{
+ int i,j;
+ INT32 Tmp;
+ INT32 Diff;
+ UINT8 *SrcPtr;
+
+ // Loop unrolled to improve speed...
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ for ( j=0; j<BLOCK_HEIGHT_WIDTH; j++ )
+ {
+ Diff = (INT32)((INT32)FiltPtr[j] - (INT32)ReconPtr[j]);
+
+ if ( abs( Diff ) > 4 )
+ {
+ // Filter source
+
+ // Top row of filter...
+ SrcPtr = &FiltPtr[j-SourceStride];
+ Tmp = (UINT32)SrcPtr[-1] * Kernel[0];
+ Tmp += (UINT32)SrcPtr[0] * Kernel[1];
+ Tmp += (UINT32)SrcPtr[1] * Kernel[2];
+
+ // Middle row of filter...
+ SrcPtr = &FiltPtr[j];
+ Tmp += (UINT32)SrcPtr[-1] * Kernel[3];
+ Tmp += (UINT32)SrcPtr[0] * Kernel[4];
+ Tmp += (UINT32)SrcPtr[1] * Kernel[5];
+
+ // Bottom row of filter...
+ SrcPtr = &FiltPtr[j+SourceStride];
+ Tmp += (UINT32)SrcPtr[-1] * Kernel[6];
+ Tmp += (UINT32)SrcPtr[0] * Kernel[7];
+ Tmp += (UINT32)SrcPtr[1] * Kernel[8];
+
+ // Normalize filter output...
+ Tmp = Tmp / Kernel[9];
+
+ // Subtract...
+ Tmp = (Tmp - (INT32)ReconPtr[j]);
+
+ // Dcide whether to use filtered or unfiltered result...
+ if ( abs(Tmp)+4 < abs(Diff) )
+ DctInputPtr[j] = (INT16)Tmp;
+ else
+ DctInputPtr[j] = (INT16)Diff;
+ }
+ else
+ DctInputPtr[j] = (INT16)Diff;
+ }
+
+ // Next row...
+ FiltPtr += SourceStride;
+ ReconPtr += ReconStride;
+ DctInputPtr += BLOCK_HEIGHT_WIDTH;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : Sub8_128Filtered
+ *
+ * INPUTS : UINT8 *FiltPtr : Pointer to 8x8 source block.
+ * INT32 SourceStride : Stride of FiltPtr.
+ * INT32 *Kernel : Pointer to filter taps to filter source.
+ *
+ * OUTPUTS : INT16 *DctInputPtr : Pointer to 8x8 array to hold difference.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 3x3 filter to the source data and then subtracts
+ * 128 from each pixel value. The resulting block is stored in
+ * DctInputPtr.
+ *
+ * SPECIAL NOTES : The Kernel actually has 10 entries, the first 9 are the
+ * taps of the 3x3 filter, the last is the filter normalization
+ * factor.
+ *
+ ****************************************************************************/
+void Sub8_128Filtered
+(
+ UINT8 *FiltPtr,
+ INT16 *DctInputPtr,
+ INT32 SourceStride,
+ INT32 *Kernel
+)
+{
+ int i, j;
+ INT32 Tmp;
+ UINT8 *SrcPtr;
+
+ // Loop unrolled to improve speed...
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ for ( j=0; j<BLOCK_HEIGHT_WIDTH; j++ )
+ {
+ // Filter source
+
+ // Top row of filter...
+ SrcPtr = &FiltPtr[j-SourceStride];
+ Tmp = (UINT32)SrcPtr[-1] * Kernel[0];
+ Tmp += (UINT32)SrcPtr[0] * Kernel[1];
+ Tmp += (UINT32)SrcPtr[1] * Kernel[2];
+
+ // Middle row of filter...
+ SrcPtr = &FiltPtr[j];
+ Tmp += (UINT32)SrcPtr[-1] * Kernel[3];
+ Tmp += (UINT32)SrcPtr[0] * Kernel[4];
+ Tmp += (UINT32)SrcPtr[1] * Kernel[5];
+
+ // Bottom row of filter...
+ SrcPtr = &FiltPtr[j+SourceStride];
+ Tmp += (UINT32)SrcPtr[-1] * Kernel[6];
+ Tmp += (UINT32)SrcPtr[0] * Kernel[7];
+ Tmp += (UINT32)SrcPtr[1] * Kernel[8];
+
+ // Normalize filter output...
+ Tmp = Tmp / Kernel[9];
+
+ // Subtract...
+ DctInputPtr[j] = (INT16)(Tmp - (INT32)128);
+ }
+
+ // Next row...
+ FiltPtr += SourceStride;
+ DctInputPtr += BLOCK_HEIGHT_WIDTH;
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encode.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encode.c
new file mode 100644
index 00000000..c663cfef
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encode.c
@@ -0,0 +1,527 @@
+/****************************************************************************
+*
+* Module Title : Encode.c
+*
+* Description : Main encode function.
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <stdio.h>
+#include "compdll.h"
+#include "misc_common.h"
+#include "encodemv.h"
+#include "encodemode.h"
+
+/****************************************************************************
+* Explicit imports
+****************************************************************************/
+extern void PackCodedVideo ( CP_INSTANCE *cpi );
+extern void InitLoopDeringThresholds ( PB_INSTANCE *pbi );
+
+#if defined FULLFRAMEFDCT
+extern void BuildFrameMbs ( CP_INSTANCE *cpi );
+extern void FDCTFrameMbs ( CP_INSTANCE *cpi );
+#endif
+
+extern const UINT32 VP6_QThreshTable[Q_TABLE_SIZE];
+extern const UINT32 VP6_ZBinTable[Q_TABLE_SIZE];
+extern const UINT32 VP6_RTable[Q_TABLE_SIZE];
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_ShannonCost
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Computed Shannon cost.
+ *
+ * FUNCTION : Computes the Shannon cost of coding the frame based
+ * on the observed distribution of tokens for the frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 VP6_ShannonCost ( CP_INSTANCE *cpi )
+{
+ UINT32 Cost = 0;
+ UINT32 i, j;
+ UINT32 Sum;
+ UINT32 Band;
+ UINT32 Plane;
+ UINT32 Prob;
+
+ // First cost the DC tokens...
+ for ( Plane=0; Plane<2; Plane++ )
+ {
+ Sum = 0;
+ for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+ {
+ Sum += cpi->FrameDcTokenDist[Plane][i];
+ }
+
+ if ( Sum>0 )
+ {
+ for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+ {
+ Prob = (cpi->FrameDcTokenDist[Plane][i] * 255) / Sum;
+ if ( Prob > 254 )
+ Prob = 254;
+ else if ( Prob == 0 )
+ Prob = 1;
+
+ Cost += (VP6_ProbCost[Prob] * cpi->FrameDcTokenDist[Plane][i])/256;
+ Cost += cpi->FrameDcTokenDist[Plane][i] * ExtraBitLengths_VP6[i];
+
+ // Save individual token costs for use in next frames RD code
+ // Cost in bits x 265.... convert to bits x 64
+ cpi->EstDcTokenCosts[Plane][i] = VP6_ProbCost[Prob] >> 2;
+ if ( cpi->EstDcTokenCosts[Plane][i] == 0 )
+ cpi->EstDcTokenCosts[Plane][i] = 1;
+ }
+ }
+ // Set defaults for predictive cost tables used in RD code
+ else
+ {
+ for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+ {
+ cpi->EstDcTokenCosts[Plane][i] = 4 << 6;
+ }
+ }
+ }
+
+ // Then cost the AC tokens...
+ for ( Plane=0; Plane<2; Plane++ )
+ {
+ for ( Band=0; Band<VP6_AC_BANDS; Band++ )
+ {
+ for ( j=0; j<PREC_CASES; j++ )
+ {
+ Sum = 0;
+ for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+ {
+ Sum += cpi->FrameAcTokenDist[j][Plane][Band][i];
+ }
+
+ if ( Sum>0 )
+ {
+ for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+ {
+ Prob = (cpi->FrameAcTokenDist[j][Plane][Band][i] * 255) / Sum;
+ if ( Prob > 254 )
+ Prob = 254;
+ else if ( Prob == 0 )
+ Prob = 1;
+
+ Cost += (VP6_ProbCost[Prob] * cpi->FrameAcTokenDist[j][Plane][Band][i])/256;
+ Cost += cpi->FrameAcTokenDist[j][Plane][Band][i] * ExtraBitLengths_VP6[i];
+
+ // Save individual token costs for use in next frames RD code
+ // Cost in bits x 265.... convert to bits x 64
+ cpi->EstAcTokenCosts[j][Plane][Band][i] = VP6_ProbCost[Prob] >> 2;
+ if ( cpi->EstAcTokenCosts[j][Plane][Band][i] == 0 )
+ cpi->EstAcTokenCosts[j][Plane][Band][i] = 1;
+ }
+ }
+ // Set defaults for predictive cost tables used in RD code
+ else
+ {
+ for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+ {
+ cpi->EstAcTokenCosts[j][Plane][Band][i] = 4 << 6;
+ }
+ }
+ }
+ }
+ }
+
+ // Finally cost the zero run lengths...
+ for ( i=0; i<ZRL_BANDS; i++ )
+ {
+ Sum = 0;
+ for ( j=0; j<64; j++ )
+ {
+ Sum += cpi->FrameZrlDist[i][j];
+ }
+
+ // Now work out Shannon cost approximations for each run length
+ if ( Sum>0 )
+ {
+ for ( j=0; j<64; j++ )
+ {
+ Prob = (cpi->FrameZrlDist[i][j] * 255) / Sum;
+ if ( Prob > 255 )
+ Prob = 255;
+ else if ( Prob == 0 )
+ Prob = 1;
+
+ // Add in to our total cost estimate
+ Cost += (VP6_ProbCost[Prob] * cpi->FrameZrlDist[i][j])/256;
+
+ // Cost in bits x 265.... convert to bits x 64
+ cpi->EstZrlCosts[i][j] = VP6_ProbCost[Prob] >> 2;
+ }
+ }
+ // Set a default for predictive cost tables used in RD code
+ else
+ {
+ cpi->EstZrlCosts[i][j] = 3 << 6;
+ }
+ }
+ return Cost;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : EncodeData
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Always TRUE (This needs fixing!)
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * SPECIAL NOTES : Applies rate targetting heuristics.
+ *
+ ****************************************************************************/
+UINT32 EncodeData ( CP_INSTANCE *cpi )
+{
+ unsigned char *tmp;
+ BOOL RedoY = FALSE;
+ UINT32 FrameOverShootLimit;
+ UINT32 FrameUnderShootLimit;
+ UINT32 ShannonBits;
+ UINT32 TopIndex;
+ UINT32 BottomIndex;
+ INT32 ModeMvCost = cpi->ModeMvCostEstimate/64; // Estimated overhed in bits for modes and MVs
+ INT32 QuantizePasses = 0;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Set tolerance values for frame overshoot and undershoot.
+ if ( VP6_GetFrameType(pbi) == BASE_FRAME )
+ {
+ if ( cpi->BufferedMode )
+ {
+ if ( cpi->BufferLevel < cpi->OptimalBufferLevel )
+ {
+ FrameOverShootLimit = cpi->ThisFrameTarget * 10/8;
+ FrameUnderShootLimit = 0;
+ }
+ else
+ {
+ FrameOverShootLimit = cpi->ThisFrameTarget * 14/8;
+ FrameUnderShootLimit = 0;
+ }
+ }
+ // Unbuffered video mode (eg video conferencing)
+ else
+ {
+ FrameOverShootLimit = cpi->ThisFrameTarget * 10/8;
+ FrameUnderShootLimit = 0;
+ }
+
+ // Limit Q range for the adaptive loop.
+ BottomIndex = cpi->Configuration.ActiveWorstQuality;
+ if ( BottomIndex < 20)
+ BottomIndex = 20;
+ TopIndex = cpi->Configuration.ActiveBestQuality;
+ if ( TopIndex > 60 )
+ TopIndex = 60;
+ }
+ else
+ {
+ // Normal streamed video mode
+ if ( cpi->BufferedMode && cpi->pass != 2 )
+ {
+ if ( cpi->BufferLevel < cpi->OptimalBufferLevel )
+ {
+ // Looser frame size constraints for local file playback
+ if ( cpi->EndUsage == USAGE_LOCAL_FILE_PLAYBACK )
+ {
+ if ( cpi->ThisFrameTarget > cpi->PerFrameBandwidth )
+ FrameOverShootLimit = cpi->ThisFrameTarget * 2;
+ else
+ FrameOverShootLimit = cpi->PerFrameBandwidth * 2;
+
+ FrameUnderShootLimit = cpi->ThisFrameTarget * 3/8;
+ }
+ else
+ {
+ if ( cpi->MaxAllowedDatarate > 125 )
+ FrameOverShootLimit = (cpi->ThisFrameTarget * cpi->MaxAllowedDatarate)/100;
+ else
+ FrameOverShootLimit = cpi->ThisFrameTarget * 125/100;
+
+ FrameUnderShootLimit = cpi->ThisFrameTarget * 3/8;
+ }
+ }
+ else
+ {
+ // Looser frame size constraints for local file playback
+ if ( cpi->EndUsage == USAGE_LOCAL_FILE_PLAYBACK)
+ {
+ if ( cpi->ThisFrameTarget > cpi->PerFrameBandwidth )
+ FrameOverShootLimit = cpi->ThisFrameTarget * 2;
+ else
+ FrameOverShootLimit = cpi->PerFrameBandwidth * 2;
+
+ FrameUnderShootLimit = cpi->ThisFrameTarget * 4/8;
+ }
+ else
+ {
+ if ( cpi->MaxAllowedDatarate > 150 )
+ FrameOverShootLimit = (cpi->ThisFrameTarget * cpi->MaxAllowedDatarate)/100;
+ else
+ FrameOverShootLimit = cpi->ThisFrameTarget * 150/100;
+
+ FrameUnderShootLimit = cpi->ThisFrameTarget * 5/8;
+ }
+ }
+ }
+
+ // Unbuffered video mode (eg video conferencing)
+ // jbb upped this from 10/8 to 14/8 and shut off lower
+ // limit. This basically eliminated the multiple
+ // go round issue?
+ else
+ {
+ FrameOverShootLimit = cpi->ThisFrameTarget * 14/8;
+ FrameUnderShootLimit = cpi->ThisFrameTarget * 0/8;
+ }
+
+ // Limit Q range for the adaptive loop.
+ BottomIndex = cpi->Configuration.ActiveWorstQuality;
+ TopIndex = cpi->Configuration.ActiveBestQuality;
+ if ( TopIndex > 60)
+ TopIndex = 60;
+ }
+
+
+ // Q adjustment loop (Only loops around if our rate targeting huristic is badly off).
+ do
+ {
+ #if defined FULLFRAMEFDCT
+ FDCTFrameMbs ( cpi );
+ #endif
+
+ // Zero down the structures used to count token distributions
+ memset ( cpi->FrameDcTokenDist, 0, sizeof(cpi->FrameDcTokenDist) );
+ memset ( cpi->FrameDcTokenDist2, 0, sizeof(cpi->FrameDcTokenDist2) );
+ memset ( cpi->FrameAcTokenDist, 0, sizeof(cpi->FrameAcTokenDist) );
+ memset ( cpi->FrameAcTokenDist2, 0, sizeof(cpi->FrameAcTokenDist2) );
+ memset ( cpi->FrameNzCount, 0, sizeof(cpi->FrameNzCount) );
+
+ // Zero down run distribution counts
+ memset( cpi->FrameZrlDist, 0, sizeof(cpi->FrameZrlDist) );
+ cpi->FrameZeroCount[0] = 0;
+ cpi->FrameZeroCount[1] = 0;
+
+ // Pack DC tokens and adjust the ones we couldn't predict 2d
+ pbi->CodedBlockIndex = 0;
+
+ // reset our token list
+ cpi->CoeffTokenPtr = cpi->CoeffTokens;
+
+ // Set loop/predictionfilter thresholds based upon Q
+ if ( pbi->UseLoopFilter == LOOP_FILTER_DERING )
+ InitLoopDeringThresholds( pbi );
+
+#if defined FULLFRAMEFDCT
+ BuildFrameMbs ( cpi );
+#else
+ // Encode frame MB-by-MB
+ EncodeFrameMbs(cpi);
+#endif
+ // Increment the counter on the number of passes through the dct quantize loop
+ QuantizePasses++;
+
+ // Clear MMX state so floating point can work again
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ // If we are in buffered (streaming) mode and have selected fastest speed
+ // then disallow the re-code loop
+ if ( (cpi->QuickCompress == 2) && (cpi->BufferedMode) )
+ break;
+
+ // Test for severe over-run or under-run conditions. If necessary adjust Q and try again.
+ ShannonBits = VP6_ShannonCost(cpi) + ModeMvCost;
+
+
+ // Are we are overshooting and up against the limit of active max Q.
+ if ( (pbi->quantizer->FrameQIndex == cpi->Configuration.ActiveWorstQuality) &&
+ (cpi->Configuration.ActiveWorstQuality > cpi->Configuration.WorstQuality) &&
+ (ShannonBits > FrameOverShootLimit) )
+ {
+ INT32 OverSizePercent = ((ShannonBits - FrameOverShootLimit) * 100) / FrameOverShootLimit;
+
+ // If so is there any scope for relaxing it
+ while ( (cpi->Configuration.ActiveWorstQuality > cpi->Configuration.WorstQuality) &&
+ (OverSizePercent > 0) )
+ {
+
+ cpi->Configuration.ActiveWorstQuality --;
+ BottomIndex = cpi->Configuration.ActiveWorstQuality;
+
+ OverSizePercent -= 6; // Assume 1 qstep = about 65 on frame size.
+ }
+ }
+
+ // Should we try and recode
+ if ( ((ShannonBits > FrameOverShootLimit) && (pbi->quantizer->FrameQIndex > BottomIndex)) ||
+ ((ShannonBits < FrameUnderShootLimit) && (pbi->quantizer->FrameQIndex < TopIndex)) )
+ {
+ UINT32 LastQIndex = pbi->quantizer->FrameQIndex;
+
+ if ( ShannonBits > FrameOverShootLimit )
+ {
+ // Truncate TmpBottomIndex
+ UINT32 TmpBottomIndex = (pbi->quantizer->FrameQIndex + BottomIndex) >> 1;
+
+ if ( pbi->quantizer->FrameQIndex > 0 )
+ TopIndex = pbi->quantizer->FrameQIndex - 1;
+ else
+ TopIndex = 0;
+
+ // Tweak the appropriate BpbCorrectionFactor.
+ UpdateBpbCorrectionFactor( cpi, ShannonBits );
+
+ if ( VP6_GetFrameType(pbi) == BASE_FRAME )
+ RegulateQ(cpi, FrameOverShootLimit );
+ else
+ RegulateQ(cpi, cpi->ThisFrameTarget );
+
+ // Do not allow jumps to be to large and to go out of range.
+ if ( pbi->quantizer->FrameQIndex < TmpBottomIndex )
+ ClampAndUpdateQ ( cpi, (UINT32)TmpBottomIndex );
+ else if ( pbi->quantizer->FrameQIndex > TopIndex )
+ ClampAndUpdateQ ( cpi, (UINT32)TopIndex );
+ }
+ else
+ {
+ // Round TmpTopIndex Up
+ UINT32 TmpTopIndex = (TopIndex + pbi->quantizer->FrameQIndex + 1) >> 1;
+
+ if ( pbi->quantizer->FrameQIndex < (Q_TABLE_SIZE-1) )
+ BottomIndex = pbi->quantizer->FrameQIndex + 1;
+ else
+ BottomIndex = (Q_TABLE_SIZE-1);
+
+ // Tweak the appropriate BpbCorrectionFactor.
+ UpdateBpbCorrectionFactor( cpi, ShannonBits );
+ RegulateQ(cpi, cpi->ThisFrameTarget );
+
+ // Clamp Q to upper and lower limits
+ if ( pbi->quantizer->FrameQIndex < BottomIndex )
+ ClampAndUpdateQ ( cpi, (UINT32)BottomIndex );
+ else if ( pbi->quantizer->FrameQIndex > TmpTopIndex )
+ ClampAndUpdateQ ( cpi, (UINT32)TmpTopIndex );
+ }
+
+ // If we were able to adjust Q index
+ // given current constraints, then cycle round again.
+ if ( pbi->quantizer->FrameQIndex != LastQIndex )
+ {
+ // Loop round and try again at the modified Q
+ RedoY = TRUE;
+ }
+ else
+ {
+ RedoY = FALSE;
+ }
+ }
+ else
+ RedoY = FALSE;
+ }
+ while ( RedoY );
+
+
+ // Optimize the scan order and then repeat dct and tokenize phases
+ if ( ( (cpi->pb.Configuration.Interlaced) || (cpi->AllowScanOrderUpdates) ) &&
+ (!cpi->ErrorResilliantMode) &&
+ (cpi->QuickCompress !=2) )
+ {
+ // Work out the optimal scan bands based upon the frame zero counts for this frame
+ PredictScanOrder( cpi );
+
+ // Build the scan order
+ BuildScanOrder( &(cpi->pb), cpi->NewScanOrderBands );
+
+ // Zero down the structures used to count token distributions
+ memset ( cpi->FrameDcTokenDist, 0, sizeof(cpi->FrameDcTokenDist) );
+ memset ( cpi->FrameDcTokenDist2, 0, sizeof(cpi->FrameDcTokenDist2) );
+ memset ( cpi->FrameAcTokenDist, 0, sizeof(cpi->FrameAcTokenDist) );
+ memset ( cpi->FrameAcTokenDist2, 0, sizeof(cpi->FrameAcTokenDist2) );
+ memset ( cpi->FrameNzCount, 0, sizeof(cpi->FrameNzCount) );
+
+ // Zero run distribution counts
+ memset( cpi->FrameZrlDist, 0, sizeof(cpi->FrameZrlDist) );
+ cpi->FrameZeroCount[0] = 0;
+ cpi->FrameZeroCount[1] = 0;
+
+ // Pack DC tokens and adjust the ones we couldn't predict 2d
+ pbi->CodedBlockIndex = 0;
+
+ // reset our token list
+ cpi->CoeffTokenPtr = cpi->CoeffTokens;
+
+ // Set loop/prediction filter thresholds based upon Q
+ if ( pbi->UseLoopFilter == LOOP_FILTER_DERING )
+ InitLoopDeringThresholds( pbi );
+
+ // Encode frame MB-by-MB
+#if defined FULLFRAMEFDCT
+ BuildFrameMbs ( cpi );
+#else
+ EncodeFrameMbs(cpi);
+#endif
+
+ // Clear MMX state so floating point can work again
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+ }
+ // Decide whether to drop back to using Huffman entropy coding or not
+ if ( cpi->pb.VpProfile == SIMPLE_PROFILE )
+ {
+ if( ShannonBits > 9000*8 )
+ pbi->UseHuffman = TRUE;
+ else
+ pbi->UseHuffman = FALSE;
+ }
+
+ // Entropy code the tokens generated & output bits to the bitstream
+ PackCodedVideo(cpi);
+
+ // switch pointers so that this frame recon becomes last frame recon
+ tmp = pbi->LastFrameRecon;
+ pbi->LastFrameRecon = pbi->ThisFrameRecon;
+ pbi->ThisFrameRecon = tmp;
+
+ // update UMV border
+ UpdateUMVBorder ( pbi->postproc, pbi->LastFrameRecon );
+
+ // Update the golden frame buffer.
+ if( (pbi->FrameType == BASE_FRAME) || pbi->RefreshGoldenFrame )
+ memcpy ( pbi->GoldenFrame, pbi->LastFrameRecon, pbi->ReconYPlaneSize + 2* pbi->ReconUVPlaneSize );
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ BuildMVCostEstimates(cpi);
+ BuildModeCostEstimates(cpi);
+
+ // AWG This function returns a UINT32 __NOT__ a BOOL !!
+ return TRUE;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodembs.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodembs.c
new file mode 100644
index 00000000..91b0a642
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodembs.c
@@ -0,0 +1,454 @@
+/****************************************************************************
+*
+* Module Title : Encodembs.c
+*
+* Description : Compressor functions for block order transmittal
+*
+* AUTHOR : Paul Wilkins
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "compdll.h"
+#include "misc_common.h"
+#include "decodemode.h"
+#include "decodemv.h"
+#include "quantize.h"
+
+
+/****************************************************************************
+ *
+ * ROUTINE : PredictBlock
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * BLOCK_POSITION bp : Position of block in MB (0-5)
+ * UINT32 MBrow : MB row (NOT USED).
+ * UINT32 MBcol : MB column (NOT USED).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Creates a prediction for an 8x8 block given a coding
+ * mode and other data stored at the MB level.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PredictBlock ( CP_INSTANCE *cpi, BLOCK_POSITION bp)
+{
+ MACROBLOCK_INFO *mbi=&cpi->pb.mbi;
+ INT32 CurrentReconStride = cpi->pb.mbi.blockDxInfo[bp].CurrentReconStride;
+ INT32 CurrentSourceStride = cpi->pb.mbi.blockDxInfo[bp].CurrentSourceStride;
+ UINT32 thisRecon = cpi->pb.mbi.blockDxInfo[bp].thisRecon;
+ UINT32 Source = cpi->pb.mbi.blockDxInfo[bp].Source;
+
+ if ( VP6_ModeUsesMC[mbi->Mode] )
+ {
+ VP6_PredictFilteredBlock ( &cpi->pb, cpi->DCTDataBuffer, bp );
+
+ SubtractBlock ( &cpi->yuv1ptr[Source], cpi->DCTDataBuffer, CurrentSourceStride );
+ }
+ else if ( mbi->Mode==CODE_INTER_NO_MV )
+ {
+ Sub8 ( &cpi->yuv1ptr[Source], &cpi->pb.LastFrameRecon[thisRecon], cpi->DCTDataBuffer, 0, 0, CurrentSourceStride, CurrentReconStride );
+ }
+ else if ( mbi->Mode==CODE_USING_GOLDEN )
+ {
+ Sub8 ( &cpi->yuv1ptr[Source], &cpi->pb.GoldenFrame[thisRecon], cpi->DCTDataBuffer, 0, 0, CurrentSourceStride, CurrentReconStride );
+ }
+ else if ( mbi->Mode==CODE_INTRA )
+ {
+ Sub8_128 ( &cpi->yuv1ptr[Source], cpi->DCTDataBuffer, 0, 0, CurrentSourceStride );
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PredictDCE
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * BLOCK_POSITION bp : Position of block in MB (0-5)
+ * Q_LIST_ENTRY *LastDC : Pointer to array of DC values last used (one per prediction frame type)
+ * BLOCK_CONTEXT *Above : Pointer to above context for block.
+ * BLOCK_CONTEXT *Left : Pointer to left context for block.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Computes a DC predictor for the block based on two
+ * supplied contexts, one above and one to the left.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PredictDCE
+(
+ CP_INSTANCE *cpi,
+ BLOCK_POSITION bp
+)
+{
+ PB_INSTANCE *pbi = &cpi->pb;
+ UINT8 Frame = VP6_Mode2Frame[pbi->mbi.Mode];
+
+ Q_LIST_ENTRY *LastDC = pbi->mbi.blockDxInfo[bp].LastDc;
+ BLOCK_CONTEXT *Above = pbi->mbi.blockDxInfo[bp].Above;
+ BLOCK_CONTEXT *Left = pbi->mbi.blockDxInfo[bp].Left;
+
+ INT32 Avg;
+
+ Avg = LastDC[Frame];
+
+ if(Frame == Left->Frame)
+ {
+ Avg = Left->Dc;
+ }
+ if(Frame == Above->Frame)
+ {
+ Avg = Above->Dc;
+ if(Frame == Left->Frame)
+ {
+ #define HIGHBITDUPPED(X) (((signed short) X) >> 15)
+ Avg += Left->Dc;
+ Avg += (HIGHBITDUPPED(Avg)&1);
+ Avg >>= 1;
+
+ }
+ }
+
+//Jim says that y,u,v all use the same quantizer so we probably do not need to have a separate dequant ptr
+ // make sure the last dc is updated for next time
+ cpi->DCT_codes[0] -= ((Avg * pbi->mbi.blockDxInfo[bp].dequantPtr[0]));
+}
+/****************************************************************************
+ *
+ * ROUTINE : EncodeMacroBlock
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 MBrow : MB row.
+ * UINT32 MBcol : MB column.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes a single macro-block by coding each of
+ * it's six constituent blocks in turn.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void EncodeMacroBlock ( CP_INSTANCE *cpi, UINT32 MBrow, UINT32 MBcol )
+{
+ UINT32 bp;
+ UINT32 fragCoefEOB;
+// UINT32 MBPointer;
+// UINT32 MBSourcePointer;
+ //INT32 NextBlock;
+ //INT32 NextLineInBlock;
+ UINT32 FragsToCheck[6];
+ PB_INSTANCE *pbi = &cpi->pb;
+ UINT32 FragIndex = 2*(MBrow-BORDER_MBS) * pbi->HFragments + 2*(MBcol-BORDER_MBS);
+
+ pbi->mbi.Interlaced = pbi->MBInterlaced[MBOffset(MBrow,MBcol)];
+
+ //NextBlock = 8;
+ //NextLineInBlock = 1;
+
+ if ( pbi->mbi.Interlaced == 1 )
+ {
+ //NextBlock = 1;
+ //NextLineInBlock = 2;
+ pbi->mbi.blockDxInfo[0].CurrentReconStride =
+ pbi->mbi.blockDxInfo[1].CurrentReconStride =
+ pbi->mbi.blockDxInfo[2].CurrentReconStride =
+ pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride * 2;
+
+ pbi->mbi.blockDxInfo[2].thisRecon -= (pbi->Configuration.YStride * 7);
+ pbi->mbi.blockDxInfo[3].thisRecon -= (pbi->Configuration.YStride * 7);
+
+ pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth * 2;
+
+ pbi->mbi.blockDxInfo[2].Source -= (pbi->Configuration.VideoFrameWidth * 7);
+ pbi->mbi.blockDxInfo[3].Source -= (pbi->Configuration.VideoFrameWidth * 7);
+ }
+
+
+//note: should be able to move FragsToCheck into the blockDxInfo struct
+//then in the MB loop, we should be able to inc the values instead of doing these multiplies
+//it may not affect the pc performance, but it may help other processors
+ FragsToCheck[0] = FragIndex;
+ FragsToCheck[1] = FragIndex+1;
+ FragsToCheck[2] = FragIndex+cpi->pb.HFragments;
+ FragsToCheck[3] = FragIndex+cpi->pb.HFragments+1;
+ FragsToCheck[4] = cpi->pb.YPlaneFragments + (MBrow-BORDER_MBS) * (cpi->pb.HFragments / 2) + MBcol-BORDER_MBS;
+ FragsToCheck[5] = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments + (MBrow-BORDER_MBS) * ( cpi->pb.HFragments / 2 ) + MBcol-BORDER_MBS;
+
+ cpi->pb.mbi.Mode = -1;
+
+ for( bp=0 ; bp<6 ; bp++ )
+ {
+ cpi->pb.mbi.Mode = cpi->pb.FragInfo[FragsToCheck[bp]].FragCodingMode;
+ cpi->pb.mbi.Mv[bp].x = cpi->pb.FragInfo[FragsToCheck[bp]].MVectorX;
+ cpi->pb.mbi.Mv[bp].y = cpi->pb.FragInfo[FragsToCheck[bp]].MVectorY;
+ }
+
+ for( bp=0 ; bp<6 ; bp++ )
+ {
+ // Build a block predictor, subtract from source to get prediction error for block
+ PredictBlock ( cpi, bp );
+
+ // Transform the error signal using the forward DCT to get set of transform coefficients
+ fdct_short ( cpi->DCTDataBuffer, cpi->DCT_codes );
+
+ // Predict the DCT DC value from those in surrounding blocks
+ PredictDCE ( cpi, bp );
+
+ // Quantize the resulting DCT coefficients at prevailing Q
+ VP6_quantize ( cpi->pb.quantizer, cpi->DCT_codes, cpi->pb.mbi.blockDxInfo[bp].coeffsPtr, (UINT8)bp );
+
+ // Tokenize the resulting quantized coefficients
+ fragCoefEOB = (UINT8)TokenizeFrag ( cpi,
+ cpi->pb.mbi.blockDxInfo[bp].coeffsPtr,
+ cpi->pb.mbi.blockDxInfo[bp].Plane,
+ pbi->mbi.blockDxInfo[bp].Above,
+ pbi->mbi.blockDxInfo[bp].Left );
+
+ // Produce reconstructed block so encoder has __exactly__ the same
+ // data for last frame reconstruction as the decoder
+
+ // Re-form the DC value from the prediction
+ VP6_PredictDC ( &cpi->pb, bp );
+
+ // Invert the transform to re-create the prediction error
+ cpi->pb.idct[fragCoefEOB]( cpi->pb.mbi.blockDxInfo[bp].coeffsPtr,
+ cpi->pb.mbi.blockDxInfo[bp].dequantPtr,
+ cpi->pb.ReconDataBuffer[bp] );
+
+ // Add prediction error to predictor to re-create block as it appears at decoder
+ VP6_ReconstructBlock(&cpi->pb, bp);
+
+ // DEBUG Code: Store prediction block in Post-processing buffer
+ //PredictBlockToPostProcessBuffer ( &cpi->pb, bp );
+
+ // Update the context info for the next block
+ cpi->pb.CodedBlockIndex++;
+ VP6_UpdateContextA ( &cpi->pb, pbi->mbi.blockDxInfo[bp].Above, bp );
+ VP6_UpdateContext ( &cpi->pb, pbi->mbi.blockDxInfo[bp].Left, bp );
+ }
+
+ if ( pbi->mbi.Interlaced == 1 )
+ {
+ /* reset to non interlaced */
+ pbi->mbi.blockDxInfo[0].CurrentReconStride =
+ pbi->mbi.blockDxInfo[1].CurrentReconStride =
+ pbi->mbi.blockDxInfo[2].CurrentReconStride =
+ pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride;
+
+ pbi->mbi.blockDxInfo[2].thisRecon += (pbi->Configuration.YStride * 7);
+ pbi->mbi.blockDxInfo[3].thisRecon += (pbi->Configuration.YStride * 7);
+
+ pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth;
+
+ pbi->mbi.blockDxInfo[2].Source += (pbi->Configuration.VideoFrameWidth * 7);
+ pbi->mbi.blockDxInfo[3].Source += (pbi->Configuration.VideoFrameWidth * 7);
+ }
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : EncodeFrameMbs
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes a frame by encoding each of it's constituent
+ * macro-blocks in turn.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void EncodeFrameMbs ( CP_INSTANCE *cpi )
+{
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ unsigned int duration;
+ unsigned int starttsc;
+ unsigned int endtsc;
+
+ // Record start time
+ VP6_readTSC ( &starttsc );
+
+ // Reset Dc zero & Ac EOB run counters
+ cpi->CurrentDcZeroRun[0] = 0;
+ cpi->CurrentDcZeroRun[1] = 0;
+ cpi->DcZeroRunStartPtr[0] = NULL;
+ cpi->DcZeroRunStartPtr[1] = NULL;
+ cpi->CurrentAc1EobRun[0] = 0;
+ cpi->CurrentAc1EobRun[1] = 0;
+ cpi->Ac1EobRunStartPtr[0] = NULL;
+ cpi->Ac1EobRunStartPtr[1] = NULL;
+
+ if ( cpi->pb.FrameType == BASE_FRAME )
+ {
+ // Initialise probability distributions with baseline default values
+ memcpy ( cpi->pb.IsMvShortProb, DefaultIsShortProbs, sizeof(cpi->pb.IsMvShortProb) );
+ memcpy ( cpi->pb.MvShortProbs, DefaultMvShortProbs, sizeof(cpi->pb.MvShortProbs) );
+ memcpy ( cpi->pb.MvSignProbs, DefaultSignProbs, sizeof(cpi->pb.MvSignProbs) );
+ memcpy ( cpi->pb.MvSizeProbs, DefaultMvLongProbs, sizeof(cpi->pb.MvSizeProbs) );
+ memcpy ( cpi->pb.probXmitted, VP6_BaselineXmittedProbs, sizeof(cpi->pb.probXmitted) );
+ memset ( cpi->pb.MBModeProb, 128, sizeof(cpi->pb.MBModeProb) );
+ memset ( cpi->pb.BModeProb, 128, sizeof(cpi->pb.BModeProb) );
+ memset ( cpi->pb.probModeSame, 128, sizeof(cpi->pb.probModeSame) );
+ memset ( cpi->pb.probMode, 128, sizeof(cpi->pb.probMode) );
+ memset ( cpi->pb.predictionMode, 1, sizeof(char)*cpi->pb.MacroBlocks );
+
+ memset ( cpi->MBModeCostNoNearest, 0, sizeof(cpi->MBModeCostNoNearest) );
+ memset ( cpi->MBModeCostNoNear, 0, sizeof(cpi->MBModeCostNoNear) );
+ memset ( cpi->MBModeCostBoth, 0, sizeof(cpi->MBModeCostBoth) );
+ memset ( cpi->BModeCost, 0, sizeof(cpi->BModeCost) );
+ }
+ else
+ {
+ cpi->pb.LastMode = CODE_INTER_NO_MV;
+ }
+
+ // since we are on a new frame reset the above contexts
+ VP6_ResetAboveContext( &cpi->pb );
+
+ {
+ UINT32 MBrow;
+ UINT32 MBRows = cpi->pb.MBRows;
+ UINT32 MBCols = cpi->pb.MBCols;
+
+ MBCols -= BORDER_MBS;
+ MBRows -= BORDER_MBS;
+
+ // AWG Code Added: Initialize strides for source & recon
+ pbi->mbi.blockDxInfo[0].CurrentReconStride =
+ pbi->mbi.blockDxInfo[1].CurrentReconStride =
+ pbi->mbi.blockDxInfo[2].CurrentReconStride =
+ pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride;
+
+ pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth;
+ // AWG End Added Code
+
+ // for each row of macroblocks
+ MBrow=BORDER_MBS;
+ do
+ {
+ MACROBLOCK_INFO *mbi = &cpi->pb.mbi;
+ UINT32 MBcol;
+
+ VP6_ResetLeftContext(&cpi->pb);
+
+ // for each macroblock within a row of macroblocks
+
+ mbi->blockDxInfo[0].Above = &pbi->fc.AboveY[BORDER_MBS*2];
+ mbi->blockDxInfo[1].Above = &pbi->fc.AboveY[BORDER_MBS*2+1];
+ mbi->blockDxInfo[2].Above = &pbi->fc.AboveY[BORDER_MBS*2];
+ mbi->blockDxInfo[3].Above = &pbi->fc.AboveY[BORDER_MBS*2+1];
+ mbi->blockDxInfo[4].Above = &pbi->fc.AboveU[BORDER_MBS];
+ mbi->blockDxInfo[5].Above = &pbi->fc.AboveV[BORDER_MBS];
+
+
+ mbi->blockDxInfo[0].thisRecon = pbi->ReconYDataOffset + ((MBrow * pbi->Configuration.YStride) << 4) + (BORDER_MBS * 16);
+ mbi->blockDxInfo[1].thisRecon = mbi->blockDxInfo[0].thisRecon + 8;
+ mbi->blockDxInfo[2].thisRecon = mbi->blockDxInfo[0].thisRecon + (pbi->Configuration.YStride << 3);
+ mbi->blockDxInfo[3].thisRecon = mbi->blockDxInfo[1].thisRecon + (pbi->Configuration.YStride << 3);
+ mbi->blockDxInfo[4].thisRecon = pbi->ReconUDataOffset + ((MBrow * pbi->Configuration.UVStride) << 3) + (BORDER_MBS * 8);
+ mbi->blockDxInfo[5].thisRecon = pbi->ReconVDataOffset + ((MBrow * pbi->Configuration.UVStride) << 3) + (BORDER_MBS * 8);
+
+
+ mbi->blockDxInfo[0].Source = pbi->YDataOffset + ((MBrow * 16) - UMV_BORDER) * pbi->Configuration.VideoFrameWidth;
+ mbi->blockDxInfo[1].Source = mbi->blockDxInfo[0].Source + 8;
+ mbi->blockDxInfo[2].Source = mbi->blockDxInfo[0].Source + (pbi->Configuration.VideoFrameWidth << 3);
+ mbi->blockDxInfo[3].Source = mbi->blockDxInfo[1].Source + (pbi->Configuration.VideoFrameWidth << 3);
+ mbi->blockDxInfo[4].Source = pbi->UDataOffset + ((MBrow * 8) - (UMV_BORDER>>1)) * (pbi->Configuration.VideoFrameWidth/2);
+ mbi->blockDxInfo[5].Source = pbi->VDataOffset + ((MBrow * 8) - (UMV_BORDER>>1)) * (pbi->Configuration.VideoFrameWidth/2);
+
+ MBcol=BORDER_MBS;
+ do
+ {
+
+ // Decode the macroblock
+ EncodeMacroBlock(cpi, MBrow, MBcol);
+
+
+ mbi->blockDxInfo[0].Above += 2;
+ mbi->blockDxInfo[1].Above += 2;
+ mbi->blockDxInfo[2].Above += 2;
+ mbi->blockDxInfo[3].Above += 2;
+ mbi->blockDxInfo[4].Above += 1;
+ mbi->blockDxInfo[5].Above += 1;
+
+ mbi->blockDxInfo[0].thisRecon += 16;
+ mbi->blockDxInfo[1].thisRecon += 16;
+ mbi->blockDxInfo[2].thisRecon += 16;
+ mbi->blockDxInfo[3].thisRecon += 16;
+ mbi->blockDxInfo[4].thisRecon += 8;
+ mbi->blockDxInfo[5].thisRecon += 8;
+
+ mbi->blockDxInfo[0].Source += 16;
+ mbi->blockDxInfo[1].Source += 16;
+ mbi->blockDxInfo[2].Source += 16;
+ mbi->blockDxInfo[3].Source += 16;
+ mbi->blockDxInfo[4].Source += 8;
+ mbi->blockDxInfo[5].Source += 8;
+
+ } while(++MBcol < MBCols);
+
+
+ } while(++MBrow < MBRows);
+ }
+
+
+ // Terminate current DC run of zeros or AC run of EOB
+ if ( cpi->CurrentDcZeroRun[0] > 0 )
+ {
+ cpi->DcZeroRunStartPtr[0]->Extra = cpi->CurrentDcZeroRun[0];
+ cpi->CurrentDcZeroRun[0] = 0;
+ }
+ if ( cpi->CurrentDcZeroRun[1] > 0 )
+ {
+ cpi->DcZeroRunStartPtr[1]->Extra = cpi->CurrentDcZeroRun[1];
+ cpi->CurrentDcZeroRun[1] = 0;
+ }
+ if ( cpi->CurrentAc1EobRun[0] > 0 )
+ {
+ cpi->Ac1EobRunStartPtr[0]->Extra = cpi->CurrentAc1EobRun[0];
+ cpi->CurrentAc1EobRun[0] = 0;
+ }
+ if ( cpi->CurrentAc1EobRun[1] > 0 )
+ {
+ cpi->Ac1EobRunStartPtr[1]->Extra = cpi->CurrentAc1EobRun[1];
+ cpi->CurrentAc1EobRun[1] = 0;
+ }
+
+ // Record end time and compute duration
+ VP6_readTSC ( &endtsc );
+ duration = (endtsc - starttsc)/cpi->pb.ProcessorFrequency;
+
+ if( cpi->avgEncodeTime==0 )
+ cpi->avgEncodeTime = duration;
+ else
+ cpi->avgEncodeTime = ( 7 * cpi->avgEncodeTime + duration ) >> 3;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.c
new file mode 100644
index 00000000..0dc81031
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.c
@@ -0,0 +1,764 @@
+/****************************************************************************
+*
+* Module Title : encodemode.c
+*
+* Description : Functions for encoding modes and motion vectors.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h> // For abs()
+#include "compdll.h"
+#include "boolhuff.h"
+#include "decodemode.h"
+#include "encodemv.h"
+#include "decodemv.h"
+
+/****************************************************************************
+* Explicit Imports
+****************************************************************************/
+extern void AddBitsToBuffer( BOOL_CODER *bc, UINT32 data, UINT32 bits );
+
+/****************************************************************************
+* Module statics.
+****************************************************************************/
+static const HNODE CodingMode[9] =
+{
+ { // 0
+ { 0, 1 },
+ { 0, 2 },
+ },
+ { // 1
+ { 0, 3 },
+ { 0, 4 },
+ },
+ { // 2
+ { 0, 5 },
+ { 0, 6 },
+ },
+ { // 3
+ { 1, CODE_INTER_NO_MV },
+ { 1, CODE_INTER_PLUS_MV },
+ },
+ { // 4
+ { 1, CODE_INTER_NEAREST_MV },
+ { 1, CODE_INTER_NEAR_MV },
+ },
+ { // 5
+ { 1, CODE_INTRA },
+ { 1, CODE_INTER_FOURMV },
+ },
+ { // 6
+ { 0, 7 },
+ { 0, 8 },
+ },
+ { // 7
+ { 1, CODE_USING_GOLDEN },
+ { 1, CODE_GOLDEN_MV },
+ },
+ { // 8
+ { 1, CODE_GOLD_NEAREST_MV},
+ { 1, CODE_GOLD_NEAR_MV },
+ },
+};
+
+// NOTE:
+// ModeCodeArray contains information required to traverse a binary
+// decision tree for coding the coding mode. The form of the tree is
+// documented in decodemode.c. Each entry corresponds to a decision
+// as to whether to take the 0 or one branch at a particular node.
+// An entry whose value is 9 indicates that we have reached a leaf node.
+// Each row corresponds to the value of the previously coded mode
+// and each column to the succesive node decisions.
+static const UINT32 ModeCodeArray[MAX_MODES][7] =
+{
+ 0, 0, 0, 9, 9, 9, 9, // CODE_INTER_NO_MV
+ 1, 0, 0, 9, 9, 9, 9, // CODE_INTRA
+ 0, 0, 1, 9, 9, 9, 9, // CODE_INTER_PLUS_MV
+ 0, 1, 0, 9, 9, 9, 9, // CODE_INTER_NEAREST_MV
+ 0, 1, 1, 9, 9, 9, 9, // CODE_INTER_NEAR_MV
+ 1, 1, 0, 0, 9, 9, 9, // CODE_USING_GOLDEN
+ 1, 1, 0, 1, 9, 9, 9, // CODE_GOLDEN_MV
+ 1, 0, 1, 9, 9, 9, 9, // CODE_INTER_FOURMV
+ 1, 1, 1, 0, 9, 9, 9, // CODE_GOLD_NEAREST_MV
+ 1, 1, 1, 1, 9, 9, 9 // CODE_GOLD_NEAR_MV
+};
+
+/****************************************************************************
+ *
+ * ROUTINE : encodeBlockMode
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * CODING_MODE mode : Mode we are trying to encode.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes a block mode into the bitstream using 2 bits.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void encodeBlockMode ( CP_INSTANCE *cpi, CODING_MODE mode )
+{
+ int choice = 0;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MODE_SECTION;
+#endif
+
+ switch ( mode )
+ {
+ case CODE_INTER_NO_MV: choice = 0; break;
+ case CODE_INTER_PLUS_MV: choice = 1; break;
+ case CODE_INTER_NEAREST_MV: choice = 2; break;
+ case CODE_INTER_NEAR_MV: choice = 3; break;
+ }
+ AddBitsToBuffer ( &cpi->bc, choice, 2 );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : encodeMode
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * CODING_MODE lastmode : Mode of the last coded macroblock.
+ * CODING_MODE mode : Mode we are trying to encode.
+ * UINT32 type : MODE_TYPE (all modes available, nonearest
+ * no near macroblock)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes coding mode for MB into the bitstream using a tree
+ * traversal algorithm:
+ * -- First decision is whether mode==lastmode: code a 0 or 1
+ * using probability from probModeSame.
+ * -- If mode!=lastmode step down the tree using ModeCodeArray
+ * to decide whether to code a 0 or 1 decision at each node,
+ * and probMode to determine the probability of coding a 0
+ * decision (1 decision probability is then computed as
+ * (1 minus zero-decision-prob)).
+ *
+ * SPECIAL NOTES : Uses VP6_EncodeBool to encode the bits to the bitstream.
+ *
+ ****************************************************************************/
+void encodeMode ( CP_INSTANCE *cpi, CODING_MODE lastmode, CODING_MODE mode, UINT32 type )
+{
+ UINT8 Stat;
+ UINT8 i = 0;
+ UINT8 node = 0;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MODE_SECTION;
+#endif
+
+ if ( mode == lastmode )
+ {
+ VP6_EncodeBool ( &cpi->bc, 1, cpi->pb.probModeSame[type][lastmode] );
+ }
+ else
+ {
+ VP6_EncodeBool( &cpi->bc, 0, cpi->pb.probModeSame[type][lastmode] );
+
+ while ( ModeCodeArray[mode][i] != 9 )
+ {
+ Stat = cpi->pb.probMode[type][lastmode][node];
+
+ VP6_EncodeBool ( &cpi->bc, ModeCodeArray[mode][i], (int)Stat );
+
+ if ( ModeCodeArray[mode][i] == 0 )
+ node = CodingMode[node].left.value;
+ else
+ node = CodingMode[node].right.value;
+ i++;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : encodeModeTest
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * CODING_MODE lastmode : Mode of the last coded macroblock.
+ * CODING_MODE mode : Mode we are trying to encode.
+ * UINT32 type : MODE_TYPE (all modes available, nonearest
+ * no near macroblock)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Exactly the same functionality as encodeMode above,
+ * _but_ rather than outputting bits to the bitstream,
+ * BitCounter in cpi->pb in incremented by an estimate
+ * of the number of bits required.
+ *
+ * SPECIAL NOTES : Uses VP6_EncodeBool2 to get an estimate of the number
+ * of bits that will be generated.
+ *
+ ****************************************************************************/
+void encodeModeTest ( CP_INSTANCE *cpi, CODING_MODE lastmode, CODING_MODE mode, UINT32 type )
+{
+ UINT8 Stat;
+ UINT8 i = 0;
+ UINT8 node = 0;
+
+ if ( mode==lastmode )
+ {
+ VP6_EncodeBool2 ( &cpi->bc, 1, cpi->pb.probModeSame[type][lastmode] );
+ }
+ else
+ {
+ VP6_EncodeBool2 ( &cpi->bc, 0, cpi->pb.probModeSame[type][lastmode] );
+
+ while ( ModeCodeArray[mode][i] != 9 )
+ {
+ Stat = cpi->pb.probMode[type][lastmode][node];
+
+ VP6_EncodeBool2 ( &cpi->bc, ModeCodeArray[mode][i], (int)Stat );
+
+ if ( ModeCodeArray[mode][i] == 0 )
+ node = CodingMode[node].left.value;
+ else
+ node = CodingMode[node].right.value;
+ i++;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : encodeModeDiff
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * int diff : Probability difference value to encode.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Takes a differential probability value in the range
+ * -256 to +256 in steps of 4 and encodes it using a fixed
+ * tree with hard-coded probabilities.
+ *
+ * SPECIAL NOTES : The hard coded probabilities for the difference tree
+ * were calcualated by taking the average number of times a
+ * branch was taken on some sample material i.e.
+ * (bond, bike, beautifulmind).
+ *
+ ****************************************************************************/
+void encodeModeDiff ( CP_INSTANCE *cpi, int diff )
+{
+ if ( diff==0 )
+ {
+ // 0 difference
+ VP6_EncodeBool ( &cpi->bc, 0, 205 );
+ }
+ else
+ {
+ // Non-0
+ VP6_EncodeBool ( &cpi->bc, 1, 205 );
+
+ // transmit sign of difference
+ VP6_EncodeBool ( &cpi->bc, diff<0, 128 );
+
+ // go to abs value
+ diff = abs(diff);
+
+ if ( diff<12 )
+ {
+ VP6_EncodeBool ( &cpi->bc, 0, 171 );
+ VP6_EncodeBool ( &cpi->bc, diff==4, 83 );
+ }
+ else
+ {
+ VP6_EncodeBool ( &cpi->bc, 1, 171 );
+
+ if ( diff<28 )
+ {
+ VP6_EncodeBool ( &cpi->bc, 0, 199 );
+ VP6_EncodeBool ( &cpi->bc, diff==12, 140 );
+ if ( diff>12 )
+ {
+ VP6_EncodeBool ( &cpi->bc, diff==16, 125 );
+ if ( diff>16 )
+ VP6_EncodeBool ( &cpi->bc, diff==20, 104 );
+ }
+ }
+ else
+ {
+ VP6_EncodeBool ( &cpi->bc, 1, 199 );
+ AddBitsToBuffer ( &cpi->bc, diff>>2, 7 );
+ }
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : estimateModeDiffCost
+ *
+ * INPUTS : int diff : Probability difference value to encode.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Number of bits required to code diff.
+ *
+ * FUNCTION : Same as encodeModeDiff above but rather than outputting
+ * bits to the bitstream it estimates the number of bits
+ * that will be generated.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int estimateModeDiffCost ( int diff )
+{
+ int cost = 0;
+
+ if ( diff==0 )
+ {
+ cost += (VP6_ProbCost[205]+128) >> 8;
+ }
+ else
+ {
+ cost += (VP6_ProbCost[255-205]+128) >> 8;
+ cost += 64;
+
+ // go to abs value
+ diff = abs(diff);
+ if ( diff<12 )
+ {
+ // < 12
+ cost += (VP6_ProbCost[171]+128) >> 8;
+
+ if ( diff==4 )
+ cost += (VP6_ProbCost[255-83]+128) >> 8;
+ else
+ cost += (VP6_ProbCost[83]+128) >> 8;
+ }
+ else
+ {
+ // >= 12
+ cost += (VP6_ProbCost[255-171]+128) >> 8;
+
+ if ( diff<28 )
+ {
+ // < 28
+ cost += (VP6_ProbCost[199]+128) >> 8;
+
+ if ( diff==12 )
+ cost += (VP6_ProbCost[255-140]+128) >> 8;
+ else
+ {
+ cost += (VP6_ProbCost[140]+128) >> 8;
+
+ if ( diff==16 )
+ cost += (VP6_ProbCost[255-125]+128) >> 8;
+ else
+ {
+ cost += (VP6_ProbCost[125]+128) >> 8;
+ if ( diff==20 )
+ cost += (VP6_ProbCost[255-104]+128) >> 8;
+ else
+ cost += (VP6_ProbCost[104]+128) >> 8;
+ }
+ }
+ }
+ else
+ {
+ // >= 28 just send the bits
+ cost += (VP6_ProbCost[255-199]+128) >> 8;
+ cost += 7*64;
+ }
+ }
+ }
+ return cost;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateModeProbs
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Determines which probabilities to transmit and
+ * use for encoding macroblock modes, and then
+ * transmits the information necessary to decode the
+ * probabilities.
+ *
+ * a) Pick the lowest cost vector we have available
+ * b) Compare it to what we used in the last frame
+ * c) Determine if it makes sense to update the vector
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UpdateModeProbs ( CP_INSTANCE *cpi )
+{
+ int i, j, k;
+ int diff;
+ int costToIdeal;
+ unsigned int thisCost, bestCost;
+ unsigned int lowestCost = 0x7fffffff;
+ unsigned int lastCost = 0x7fffffff;
+ unsigned int whichVector = 0;
+ UINT32 total, round;
+ UINT8 proposedProb[2][10];
+ PB_INSTANCE *pbi = &cpi->pb;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MODE_SECTION;
+#endif
+
+ // For each mode type (all modes available, no nearest, no near mode)
+ for ( j=0; j<MODETYPES; j++ )
+ {
+ // sum the totals for each of the modes
+ cpi->MBModeCount[j][MAX_MODES] = 0;
+ cpi->CountModeSameAsLast[j][MAX_MODES] = 0;
+ for ( i=0; i<MAX_MODES; i++ )
+ {
+ cpi->MBModeCount[j][MAX_MODES] += cpi->MBModeCount[j][i];
+ cpi->CountModeSameAsLast[j][MAX_MODES] += cpi->CountModeSameAsLast[j][i];
+ cpi->CountModeDiffFrLast[j][MAX_MODES] += cpi->CountModeDiffFrLast[j][i];
+ }
+
+ // estimate the cost of using the cheapest vector from our vq codebook
+ whichVector = 0;
+ lowestCost = 0x7fffffff;
+ for ( k=0; k<MODEVECTORS; k++ )
+ {
+ thisCost = 0;
+ for ( i=0; i<MAX_MODES; i++ )
+ {
+ thisCost += cpi->CountModeSameAsLast[j][i]*((VP6_ProbCost[VP6_ModeVq[j][k][i*2]] +128)>>8);
+ thisCost += cpi->CountModeDiffFrLast[j][i]*((VP6_ProbCost[VP6_ModeVq[j][k][i*2+1]]+128)>>8);
+ }
+ if ( thisCost<lowestCost )
+ {
+ whichVector = k;
+ lowestCost = thisCost;
+ }
+ }
+
+ // In the error resilliant mode / VC mode we discount the "last frame values" as
+ // a candidate vector in order to improve the resilliance to dropped/corrupt frames.
+ if ( !cpi->ErrorResilliantMode )
+ {
+ // estimate the cost of using the vector we have from the last frame
+ lastCost = 0;
+ for ( i=0; i<MAX_MODES; i++ )
+ {
+ lastCost += cpi->CountModeSameAsLast[j][i]*((VP6_ProbCost[pbi->probXmitted[j][1][i]] + 128)>>8);
+ lastCost += cpi->CountModeDiffFrLast[j][i]*((VP6_ProbCost[pbi->probXmitted[j][0][i]] + 128)>>8);
+ }
+ }
+
+ // if the best from our vq book + the cost of transmitting the vector is cheaper
+ // than our current vector use it. OR... if we are running in error resilliant mode.
+ if( cpi->ErrorResilliantMode ||
+ ( (lastCost / 64) > (((VP6_ProbCost[255-PROBVECTORXMIT]+128)>>8) + lowestCost) / 64 + 4 ) /* for the vector itself */ )
+ {
+ // transmit that we are transmitting a new vector
+ VP6_EncodeBool ( &cpi->bc,1,PROBVECTORXMIT );
+
+ // transmit which vector to use here
+ AddBitsToBuffer ( &cpi->bc, whichVector, 4 );
+
+ // adjust the vector
+ for ( i=0; i<MAX_MODES; i++ )
+ {
+ pbi->probXmitted[j][1][i] = VP6_ModeVq[j][whichVector][i*2];
+ pbi->probXmitted[j][0][i] = VP6_ModeVq[j][whichVector][i*2+1];
+ }
+ }
+ else
+ {
+ lowestCost = lastCost;
+
+ // transmit that we are reusing the last vector
+ VP6_EncodeBool ( &cpi->bc, 0, PROBVECTORXMIT );
+ }
+
+ // calculate the ideal vector and how much it would cost to go to it.
+ bestCost = 0;
+ costToIdeal = 0;
+ total = 1 + cpi->CountModeSameAsLast[j][MAX_MODES]+cpi->CountModeDiffFrLast[j][MAX_MODES];
+ round = total/2;
+ for ( i=0; i<10; i++ )
+ {
+ // what's the ideal probability
+ proposedProb[1][i] = (round+256*cpi->CountModeSameAsLast[j][i]) / total;
+
+ // calculate the truncated difference between the ideal and where we are now
+ diff = 4*((proposedProb[1][i] - pbi->probXmitted[j][1][i]) / 4);
+ costToIdeal += estimateModeDiffCost(diff);
+ diff += pbi->probXmitted[j][1][i];
+ proposedProb[1][i] = ( diff<0 ? 0 : (diff>255 ? 255 : diff) );
+
+ // update the cost of our ideal choice and of moving to our ideal values
+ bestCost += cpi->CountModeSameAsLast[j][i]*((VP6_ProbCost[proposedProb[1][i]]+128)>>8);
+
+ // what's the ideal probability
+ proposedProb[0][i] = (round+256*cpi->CountModeDiffFrLast[j][i]) / total;
+
+ // calculate the truncated difference between the ideal and where we are now
+ diff = 4*((proposedProb[0][i] - pbi->probXmitted[j][0][i]) / 4);
+ costToIdeal += estimateModeDiffCost(diff);
+ diff += pbi->probXmitted[j][0][i];
+ proposedProb[0][i] = ( diff<0 ? 0 : (diff>255 ? 255 : diff) );
+
+ // update the cost of our ideal choice and of moving to our ideal values
+ bestCost += cpi->CountModeDiffFrLast[j][i]*((VP6_ProbCost[proposedProb[0][i]]+128)>>8);
+ }
+
+ // if updating our vector to be closer to the ideal is cheaper than going with what we have now
+ if ( (costToIdeal + bestCost + ((VP6_ProbCost[255-PROBIDEALXMIT]+128)>>8)) / 64 < lowestCost / 64 )
+ {
+ // transmit that we are updating the mode probabilities
+ VP6_EncodeBool ( &cpi->bc, 1, PROBIDEALXMIT );
+
+ // encode the differences and adjust the ideal values
+ for ( i=0; i<10; i++ )
+ {
+ diff = proposedProb[1][i]-pbi->probXmitted[j][1][i];
+ encodeModeDiff(cpi,diff);
+ diff += pbi->probXmitted[j][1][i];
+ pbi->probXmitted[j][1][i] = ( diff<0 ? 0 : (diff>255 ? 255 : diff) );
+
+ diff = proposedProb[0][i]- pbi->probXmitted[j][0][i];
+ encodeModeDiff(cpi,diff);
+ diff += pbi->probXmitted[j][0][i];
+ pbi->probXmitted[j][0][i] = ( diff<0 ? 0 : (diff>255 ? 255 : diff) );
+ }
+ }
+ else
+ {
+ // transmit that we are not updating the mode probabilities
+ VP6_EncodeBool ( &cpi->bc, 0, PROBIDEALXMIT );
+ }
+ }
+
+ VP6_BuildModeTree ( &cpi->pb );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : encodeModeandMotionVector
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 MBrow : MB row.
+ * UINT32 MBcol : MB column.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes a macroblock's mode and motion vectors to
+ * the bitstream.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void encodeModeAndMotionVector ( CP_INSTANCE *cpi, UINT32 MBrow, UINT32 MBcol )
+{
+ UINT32 k;
+ int type, type2;
+ CODING_MODE mode;
+ UINT32 FragsToCheck[4];
+ PB_INSTANCE *pbi = &cpi->pb;
+ int width = pbi->HFragments;
+ UINT32 FragIndex = (MBrow-BORDER_MBS)*width*2 + (MBcol-BORDER_MBS)*2;
+
+ VP6_FindNearestandNextNearest ( &cpi->pb, MBrow, MBcol, 1, &type );
+ VP6_FindNearestandNextNearest ( &cpi->pb, MBrow, MBcol, 2, &type2 );
+
+ FragsToCheck[0] = FragIndex;
+ FragsToCheck[1] = FragIndex+1;
+ FragsToCheck[2] = FragIndex+pbi->HFragments;
+ FragsToCheck[3] = FragIndex+pbi->HFragments+1;
+
+ mode = pbi->predictionMode[MBOffset(MBrow,MBcol)];
+
+ encodeMode ( cpi, pbi->LastMode, mode, type );
+ pbi->LastMode = mode;
+
+ // check to see if we need to encode mvs or more sub modes
+ switch ( mode )
+ {
+ case CODE_INTER_PLUS_MV:
+ encodeMotionVector ( cpi, pbi->FragInfo[FragIndex].MVectorX, pbi->FragInfo[FragIndex].MVectorY, mode );
+ break;
+
+ case CODE_GOLDEN_MV:
+ encodeMotionVector ( cpi, pbi->FragInfo[FragIndex].MVectorX, pbi->FragInfo[FragIndex].MVectorY, mode);
+ break;
+
+ case CODE_INTER_FOURMV:
+ // encode sub mode decisions
+ encodeBlockMode ( cpi, pbi->FragInfo[FragsToCheck[0]].FragCodingMode );
+ encodeBlockMode ( cpi, pbi->FragInfo[FragsToCheck[1]].FragCodingMode );
+ encodeBlockMode ( cpi, pbi->FragInfo[FragsToCheck[2]].FragCodingMode );
+ encodeBlockMode ( cpi, pbi->FragInfo[FragsToCheck[3]].FragCodingMode );
+
+ // encode the 4 motion vectors
+ for ( k=0; k<4; k++ )
+ if ( pbi->FragInfo[FragsToCheck[k]].FragCodingMode==CODE_INTER_PLUS_MV )
+ encodeMotionVector ( cpi, pbi->FragInfo[FragsToCheck[k]].MVectorX, pbi->FragInfo[FragsToCheck[k]].MVectorY, CODE_INTER_PLUS_MV );
+ break;
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_EstimateCost
+ *
+ * INPUTS : BOOL_CODER *bc : Pointer to a BoolCoder (UNUSED).
+ * HUFF_NODE *hn : Pointer to a Huffman tree.
+ * int value : Value to be encoded.
+ * int length : Length in bits of value.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Cost of coding value (in bits).
+ *
+ * FUNCTION : Computes the cost of coding value bit-by-bit using
+ * the Huffman tree specified.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 VP6_EstimateCost ( BOOL_CODER *bc, HUFF_NODE *hn, int value, int length )
+{
+ int i;
+ int node = 0;
+ UINT32 total = 0;
+
+ for ( i=length-1; i>=0; i-- )
+ {
+ int v = (value>>i) & 1;
+
+ if ( v )
+ {
+ total += (VP6_ProbCost[255-hn[node].freq]+128)>>8;
+ node = hn[node].rightunion.right.value;
+ }
+ else
+ {
+ total += (VP6_ProbCost[hn[node].freq]+128)>>8;
+ node = hn[node].leftunion.left.value;
+ }
+ }
+ return total;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : modeCost
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 MBrow : MB row.
+ * UINT32 MBcol : MB column.
+ * CODING_MODE mode : Mode to be costed.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Cost of coding mode (in bits*64).
+ *
+ * FUNCTION : Computes the cost of coding mode (in bits*64).
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 modeCost ( CP_INSTANCE *cpi, UINT32 MBrow, UINT32 MBcol, CODING_MODE mode )
+{
+ int type;
+ CODING_MODE lastmode;
+ PB_INSTANCE *pbi = &cpi->pb;
+ int width = pbi->HFragments;
+
+ VP6_FindNearestandNextNearest ( &cpi->pb, MBrow, MBcol, 1, &type );
+
+ if ( MBcol==BORDER_MBS && MBrow==BORDER_MBS )
+ lastmode = CODE_INTER_NO_MV;
+ else if ( MBcol==BORDER_MBS )
+ lastmode = pbi->predictionMode[MBOffset(MBrow-1,pbi->MBCols - (BORDER_MBS+1))];
+ else
+ lastmode = pbi->predictionMode[MBOffset(MBrow,MBcol-1)];
+
+ return cpi->EstModeCost[(lastmode==mode) ? 0 : 1][mode];
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : blockModeCost
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance (NOT USED).
+ * UINT32 i : Undefined (NOT USED).
+ * UINT32 j : Undefined (NOT USED).
+ * CODING_MODE mode : Mode to be costed (NOT USED).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Cost of coding mode (in bits*64).
+ *
+ * FUNCTION : Computes the cost of coding mode (in bits*64).
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 blockModeCost ( CP_INSTANCE *cpi, UINT32 i, UINT32 j, CODING_MODE mode )
+{
+ // All modes within 4 mode mode cost 2 bits (cost specified as bits * 64)
+ return 128;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : BuildModeCostEstimates
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Compute an estimate of the cost of encoding each mode.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ***************************************************************************/
+void BuildModeCostEstimates ( CP_INSTANCE *cpi )
+{
+ int i;
+
+ for ( i=0; i<MAX_MODES; i++ )
+ {
+ cpi->bc.BitCounter = 0;
+ encodeModeTest ( cpi, i, i, MACROBLOCK );
+ cpi->EstModeCost[0][i] = (cpi->bc.BitCounter) >> 2;
+
+ // Non matching last mode case
+ cpi->bc.BitCounter = 0;
+ if ( i==0 )
+ encodeModeTest ( cpi, 1, i, MACROBLOCK );
+ else
+ encodeModeTest ( cpi, 0, i, MACROBLOCK );
+ cpi->EstModeCost[1][i] = (cpi->bc.BitCounter) >> 2;
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.h
new file mode 100644
index 00000000..a8a6fa65
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.h
@@ -0,0 +1,24 @@
+/****************************************************************************
+*
+* Module Title : encodemode.h
+*
+* Description : Functions for encoding modes and Motion Vectors.
+*
+****************************************************************************/
+#ifndef __INC_ENCODEMODE_H
+#define __INC_ENCODEMODE_H
+
+#ifndef STRICT
+#define STRICT /* Strict type checking */
+#endif
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern void encodeModeAndMotionVector(CP_INSTANCE* cpi, UINT32 MBrow, UINT32 MBcol);
+extern void UpdateModeProbs(CP_INSTANCE *cpi);
+extern UINT32 modeCost(CP_INSTANCE *cpi,UINT32 i,UINT32 j,CODING_MODE mode);
+extern UINT32 blockModeCost(CP_INSTANCE *cpi,UINT32 i,UINT32 j,CODING_MODE mode);
+extern void BuildModeCostEstimates( CP_INSTANCE *cpi );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.c
new file mode 100644
index 00000000..080cf07f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.c
@@ -0,0 +1,720 @@
+/****************************************************************************
+*
+* Module Title : encodemv.c
+*
+* Description : Functions for encoding modes and motion vectors
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "compdll.h"
+#include "boolhuff.h"
+#include "decodemv.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+// This small correction allows for the fact that an update to an MV probability
+// may have benefit in subsequent frames as well as the current one.
+#define MV_PROB_UPDATE_CORECTION -1
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern void AddBitsToBuffer ( BOOL_CODER *bc, UINT32 data, UINT32 bits );
+
+/****************************************************************************
+ *
+ * ROUTINE : encodeMotionVectorComponent
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * int i : Selector as to what set of probs to use.
+ * INT32 Vector : MV component to be coded.
+ * INT32 MvOffset : Reference value to code Vector from.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes a motion vector component either outputting
+ * bits to the bitstream _or_ updating BitCounter in
+ * cpi->bc with the estimated cost.
+ *
+ * SPECIAL NOTES : cpi->bc.MeasureCost determines whether bits are
+ * generated to the bitstream or not.
+ *
+ ****************************************************************************/
+void encodeMotionVectorComponent ( CP_INSTANCE *cpi, int i, INT32 Vector, INT32 MvOffset )
+{
+ UINT8 SignBit;
+ INT32 TmpVector;
+ void (*CodeBool)( BOOL_CODER *, int, int );
+
+ // Are we outputting bits to the bitstream or just estimating cost?
+ if ( cpi->bc.MeasureCost )
+ CodeBool = VP6_EncodeBool2;
+ else
+ CodeBool = VP6_EncodeBool;
+
+ // Code vector differentially
+ TmpVector = Vector - MvOffset;
+
+ // Convert vector to sign bit and magnitude
+ if ( TmpVector < 0 )
+ {
+ TmpVector = - TmpVector;
+ SignBit = 1;
+ }
+ else
+ SignBit = 0;
+
+ // Is the vector a small vector componet (currently < 2 whole pixels)
+ if ( TmpVector <= 7 )
+ {
+ // Small vector
+ CodeBool ( &cpi->bc, 0, cpi->pb.IsMvShortProb[i] );
+
+ // Code up the magnitude value
+ switch ( TmpVector )
+ {
+ case 0:
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][0] );
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][1] );
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][2] );
+ break;
+ case 1:
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][0] );
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][1] );
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][2] );
+ break;
+ case 2:
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][0] );
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][1] );
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][3] );
+ break;
+ case 3:
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][0] );
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][1] );
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][3] );
+ break;
+ case 4:
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][0] );
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][4] );
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][5] );
+ break;
+ case 5:
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][0] );
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][4] );
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][5] );
+ break;
+ case 6:
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][0] );
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][4] );
+ CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][6] );
+ break;
+ case 7:
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][0] );
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][4] );
+ CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][6] );
+ break;
+ }
+
+ // Code the sign bit
+ if ( TmpVector > 0 )
+ CodeBool ( &cpi->bc, SignBit, cpi->pb.MvSignProbs[i] );
+ }
+ else
+ {
+ // Indicate that we have a larger vector
+ CodeBool ( &cpi->bc, 1, cpi->pb.IsMvShortProb[i] );
+
+ // Code the magnitude
+ CodeBool ( &cpi->bc, ((TmpVector & 0x01) ? 1 : 0), cpi->pb.MvSizeProbs[i][0] ); // QPel
+ CodeBool ( &cpi->bc, ((TmpVector & 0x02) ? 1 : 0), cpi->pb.MvSizeProbs[i][1] ); // HPel
+ CodeBool ( &cpi->bc, ((TmpVector & 0x04) ? 1 : 0), cpi->pb.MvSizeProbs[i][2] ); // Pel
+
+ // At least one of the following must be non zero (or we would have coded a short vector)
+ // We code from least likely to be set to most likely. The last bit is thus implicit
+ // if none of the others are set
+ CodeBool ( &cpi->bc, ((TmpVector & 0x80) ? 1 : 0), cpi->pb.MvSizeProbs[i][7] );
+ CodeBool ( &cpi->bc, ((TmpVector & 0x40) ? 1 : 0), cpi->pb.MvSizeProbs[i][6] );
+ CodeBool ( &cpi->bc, ((TmpVector & 0x20) ? 1 : 0), cpi->pb.MvSizeProbs[i][5] );
+ CodeBool ( &cpi->bc, ((TmpVector & 0x10) ? 1 : 0), cpi->pb.MvSizeProbs[i][4] );
+
+ // Only need to code if at least one of the others was set else it is implicit
+ if ( TmpVector & 0xF0 )
+ CodeBool ( &cpi->bc, ((TmpVector & 0x08) ? 1 : 0), cpi->pb.MvSizeProbs[i][3] );
+
+ // Code the sign bit
+ CodeBool ( &cpi->bc, SignBit, cpi->pb.MvSignProbs[i] );
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : encodeMotionVector
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * INT32 MVectorX : MV x-component to be coded.
+ * INT32 MVectorY : MV y-component to be coded.
+ * CODING_MODE Mode : Coding mode for corresponding MB/Block.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes a motion vector to the bitstream.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void encodeMotionVector ( CP_INSTANCE *cpi, INT32 MVectorX, INT32 MVectorY, CODING_MODE Mode )
+{
+ INT32 MvOffsetX = 0;
+ INT32 MvOffsetY = 0;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MV_SECTION;
+#endif
+
+ // Work out how the MV was coded so that the appropriate origin offset can be applied
+ if ( Mode == CODE_INTER_PLUS_MV )
+ {
+ // Normal Inter MV
+ if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestInterMVect.x;
+ MvOffsetY = pbi->mbi.NearestInterMVect.y;
+ }
+ }
+ else
+ {
+ // Golden Frame MV
+ if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+ MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+ }
+ }
+
+ encodeMotionVectorComponent ( cpi, 0, MVectorX, MvOffsetX );
+ encodeMotionVectorComponent ( cpi, 1, MVectorY, MvOffsetY );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CalculateMvNodeProbabilities
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Build the MV entropy coding tree.
+ *
+ * SPECIAL NOTES : None.
+ *
+***************************************************************************/
+void CalculateMvNodeProbabilities ( CP_INSTANCE *cpi )
+{
+ UINT32 Sum;
+ UINT32 Sum2;
+ UINT32 Count;
+ INT32 AbsVector;
+ INT32 DistributionOffset;
+ INT32 NewProb;
+ INT32 i;
+ INT32 j;
+
+ UINT32 MvShortDist[2][2];
+ UINT32 MvShortSizeDist[2][8];
+ UINT32 MvSignDist[2][2];
+ UINT32 MvSizeDist[2][LONG_MV_BITS][2];
+
+ memset( MvShortDist, 0, sizeof(MvShortDist) );
+ memset( MvShortSizeDist, 0, sizeof(MvShortSizeDist) );
+ memset( MvSizeDist, 0, sizeof(MvSizeDist) );
+ memset( MvSignDist, 0, sizeof(MvSignDist) );
+
+ // Calculate the distributions for the MV nodes.
+ cpi->FrameMvCount = 0;
+ for ( i=0; i<2; i++ )
+ {
+ Sum = 0;
+ for ( j=-(MV_ENTROPY_TOKENS >> 1); j<0; j++ )
+ {
+ // -ve vectors
+ DistributionOffset = (MV_ENTROPY_TOKENS >> 1) + j;
+ Count = cpi->MvBaselineDist[i][DistributionOffset];
+ AbsVector = -j;
+ MvSignDist[i][1] += Count;
+
+ if ( AbsVector < 8 )
+ {
+ MvShortDist[i][0] += Count; // Short vector
+ MvShortSizeDist[i][AbsVector] += Count; // Magnitude distribution
+ }
+ else
+ {
+ MvShortDist[i][1] += Count; // Long vector
+
+ MvSizeDist[i][0][(AbsVector & 0x01) ? 1 : 0] += Count; // QPel
+ MvSizeDist[i][1][(AbsVector & 0x02) ? 1 : 0] += Count; // HPel
+ MvSizeDist[i][2][(AbsVector & 0x04) ? 1 : 0] += Count; // Bit1
+
+ MvSizeDist[i][3][(AbsVector & 0x08) ? 1 : 0] += Count; // Bit2
+ MvSizeDist[i][4][(AbsVector & 0x10) ? 1 : 0] += Count; // Bit3
+ MvSizeDist[i][5][(AbsVector & 0x20) ? 1 : 0] += Count; // Bit4
+ MvSizeDist[i][6][(AbsVector & 0x40) ? 1 : 0] += Count; // Bit5
+ MvSizeDist[i][7][(AbsVector & 0x80) ? 1 : 0] += Count; // Bit6
+ }
+ Sum += Count;
+ }
+
+ // Zero Vector component
+ Count = cpi->MvBaselineDist[i][(MV_ENTROPY_TOKENS >> 1)];
+ MvShortDist[i][0] += Count;
+ MvShortSizeDist[i][0] += Count;
+ Sum += Count;
+
+ for ( j=1; j<(MV_ENTROPY_TOKENS >> 1); j++ )
+ {
+ // +ve vectors
+ DistributionOffset = (MV_ENTROPY_TOKENS >> 1) + j;
+ Count = cpi->MvBaselineDist[i][DistributionOffset];
+ AbsVector = j;
+ MvSignDist[i][0] += Count;
+
+ if ( AbsVector < 8 )
+ {
+ MvShortDist[i][0] += Count; // Short vector
+ MvShortSizeDist[i][AbsVector] += Count; // Magnitude distribution
+ }
+ else
+ {
+ MvShortDist[i][1] += Count; // Long vector
+
+ MvSizeDist[i][0][(AbsVector & 0x01) ? 1 : 0] += Count; // QPel
+ MvSizeDist[i][1][(AbsVector & 0x02) ? 1 : 0] += Count; // HPel
+ MvSizeDist[i][2][(AbsVector & 0x04) ? 1 : 0] += Count; // Bit1
+
+ MvSizeDist[i][3][(AbsVector & 0x08) ? 1 : 0] += Count; // Bit2
+ MvSizeDist[i][4][(AbsVector & 0x10) ? 1 : 0] += Count; // Bit3
+ MvSizeDist[i][5][(AbsVector & 0x20) ? 1 : 0] += Count; // Bit4
+ MvSizeDist[i][6][(AbsVector & 0x40) ? 1 : 0] += Count; // Bit5
+ MvSizeDist[i][7][(AbsVector & 0x80) ? 1 : 0] += Count; // Bit6
+ }
+ Sum += Count;
+ }
+ }
+ cpi->FrameMvCount = Sum; // Note that Sum is reset to 0 for each "i" above
+
+ for ( i=0; i<2; i++ ) // X and Y
+ {
+ // Convert the distributions to optimal node probabilities
+ Sum = MvShortDist[i][0] + MvShortDist[i][1];
+
+ if ( Sum>0 )
+ {
+ Sum2 = MvShortDist[i][0];
+
+ NewProb = (Sum2 * 255) / Sum;
+ NewProb &= ~0x01;
+ if ( NewProb < 1 )
+ NewProb = 1;
+ cpi->NewIsMvShortProb[i] = NewProb;
+ cpi->NewIsMvShortHits[i][0] = Sum2;
+ cpi->NewIsMvShortHits[i][1] = Sum - Sum2;
+ }
+
+ // Sign
+ Sum = ( MvSignDist[i][0] + MvSignDist[i][1] );
+ if ( Sum>0 )
+ {
+ Sum2 = MvSignDist[i][0];
+
+ NewProb = (Sum2 * 255) / Sum;
+ NewProb &= ~0x01;
+ if ( NewProb < 1 )
+ NewProb = 1;
+ cpi->NewMvSignProbs[i] = NewProb;
+ cpi->NewMvSignHits[i][0] = Sum2;
+ cpi->NewMvSignHits[i][1] = Sum - Sum2;
+ }
+
+ // Tree nodes for short vectors
+ for ( j=0; j<7; j++ )
+ {
+ // Node specific
+ switch ( j )
+ {
+ case 0:
+ // Node 0 Low
+ Sum = MvShortSizeDist[i][0] + MvShortSizeDist[i][1] + MvShortSizeDist[i][2] + MvShortSizeDist[i][3] +
+ MvShortSizeDist[i][4] + MvShortSizeDist[i][5] + MvShortSizeDist[i][6] + MvShortSizeDist[i][7];
+ Sum2 = MvShortSizeDist[i][0] + MvShortSizeDist[i][1] + MvShortSizeDist[i][2] + MvShortSizeDist[i][3];
+ break;
+ case 1:
+ // Node 1 LowLow
+ Sum = Sum2;
+ Sum2 = MvShortSizeDist[i][0] + MvShortSizeDist[i][1];
+ break;
+ case 2:
+ // Node 2 LowLowLow
+ Sum = Sum2;
+ Sum2 = MvShortSizeDist[i][0];
+ break;
+ case 3:
+ // Node 3 LowHighLow
+ Sum = MvShortSizeDist[i][2] + MvShortSizeDist[i][3];
+ Sum2 = MvShortSizeDist[i][2];
+ break;
+ case 4:
+ // Node 4 HighLow
+ Sum = MvShortSizeDist[i][4] + MvShortSizeDist[i][5] + MvShortSizeDist[i][6] + MvShortSizeDist[i][7];
+ Sum2 = MvShortSizeDist[i][4] + MvShortSizeDist[i][5];
+ break;
+ case 5:
+ // Node 5 HighLowLow
+ Sum = MvShortSizeDist[i][4] + MvShortSizeDist[i][5];
+ Sum2 = MvShortSizeDist[i][4];
+ break;
+ case 6:
+ // Node 6 HighLowHigh
+ Sum = MvShortSizeDist[i][6] + MvShortSizeDist[i][7];
+ Sum2 = MvShortSizeDist[i][6];
+ break;
+ }
+
+ if ( Sum )
+ {
+ NewProb = (Sum2 * 255)/Sum;
+ NewProb &= ~0x01;
+ if ( NewProb < 1 )
+ NewProb = 1;
+ cpi->NewMvShortProbs[i][j] = NewProb;
+ cpi->NewMvShortHits[i][j][0] = Sum2;
+ cpi->NewMvShortHits[i][j][1] = Sum - Sum2;
+ }
+ }
+
+ // Long vectors
+ for ( j=0; j<LONG_MV_BITS; j++ )
+ {
+ Sum = MvSizeDist[i][j][0] + MvSizeDist[i][j][1];
+ Sum2 = MvSizeDist[i][j][0];
+
+ if ( Sum )
+ {
+ NewProb = (Sum2 * 255)/Sum;
+ NewProb &= ~0x01;
+ if ( NewProb < 1 )
+ NewProb = 1;
+ cpi->NewMvSizeProbs[i][j] = NewProb;
+ cpi->NewMvSizeHits[i][j][0] = Sum2;
+ cpi->NewMvSizeHits[i][j][1] = Sum - Sum2;
+ }
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : BuildandPackMvTree
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Build the MV entropy coding tree.
+ *
+ * SPECIAL NOTES : None.
+ *
+***************************************************************************/
+void BuildandPackMvTree ( CP_INSTANCE *cpi )
+{
+ INT32 i;
+ INT32 NewProb;
+ INT32 OldProb;
+ INT32 NewBits;
+ INT32 OldBits;
+ INT32 ProbUpdateCost;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MV_SECTION;
+#endif
+
+ // calculate the MV node Probabilities
+ CalculateMvNodeProbabilities ( cpi );
+
+ // If appropriate update short, sign, qpel, half pixel and low order bit probabilities.
+ for ( i=0; i<2; i++ )
+ {
+ // Update the Short vector probability.
+ NewProb = cpi->NewIsMvShortProb[i];
+ OldProb = cpi->pb.IsMvShortProb[i];
+ OldBits = ((cpi->NewIsMvShortHits[i][0] * VP6_ProbCost[OldProb])/256) +
+ ((cpi->NewIsMvShortHits[i][1] * VP6_ProbCost[255 - OldProb])/256);
+ NewBits = ((cpi->NewIsMvShortHits[i][0] * VP6_ProbCost[NewProb])/256) +
+ ((cpi->NewIsMvShortHits[i][1] * VP6_ProbCost[255 - NewProb])/256);
+
+ ProbUpdateCost = PROB_UPDATE_BASELINE_COST + MV_PROB_UPDATE_CORECTION;
+ ProbUpdateCost += (VP6_ProbCost[255 - VP6_MvUpdateProbs[i][0]] + 128) / 256;
+ ProbUpdateCost -= (VP6_ProbCost[VP6_MvUpdateProbs[i][0]] + 128) / 256;
+
+ if ( (OldBits - NewBits) > ProbUpdateCost )
+ {
+ cpi->pb.IsMvShortProb[i] = NewProb;
+ VP6_EncodeBool ( &cpi->bc, 1, VP6_MvUpdateProbs[i][0] );
+ AddBitsToBuffer ( &cpi->bc, NewProb>>1, PROB_UPDATE_BASELINE_COST );
+ }
+ else
+ {
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][0] );
+ }
+
+ // Sign
+ NewProb = cpi->NewMvSignProbs[i];
+ OldProb = cpi->pb.MvSignProbs[i];
+
+ OldBits = ((cpi->NewMvSignHits[i][0] * VP6_ProbCost[OldProb])/256) +
+ ((cpi->NewMvSignHits[i][1] * VP6_ProbCost[255 - OldProb])/256);
+ NewBits = ((cpi->NewMvSignHits[i][0] * VP6_ProbCost[NewProb])/256) +
+ ((cpi->NewMvSignHits[i][1] * VP6_ProbCost[255 - NewProb])/256);
+
+ ProbUpdateCost = PROB_UPDATE_BASELINE_COST + MV_PROB_UPDATE_CORECTION;
+ ProbUpdateCost += (VP6_ProbCost[255 - VP6_MvUpdateProbs[i][1]] + 128) / 256;
+ ProbUpdateCost -= (VP6_ProbCost[VP6_MvUpdateProbs[i][1]] + 128) / 256;
+
+ if ( (OldBits - NewBits) > ProbUpdateCost )
+ {
+ cpi->pb.MvSignProbs[i] = NewProb;
+ VP6_EncodeBool ( &cpi->bc, 1, VP6_MvUpdateProbs[i][1] );
+ AddBitsToBuffer ( &cpi->bc, NewProb >> 1, PROB_UPDATE_BASELINE_COST );
+ }
+ else
+ {
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][1] );
+ }
+ }
+
+ // If appropriate update the tree probabilities for short vector
+ for ( i = 0; i < 2; i++ ) // X then Y
+ {
+ INT32 j;
+ UINT32 MvUpdateProbsOffset = 2; // Offset into VP6_MvUpdateProbs[i][]
+
+ // For each node in the tree
+ for ( j=0; j<7; j++ )
+ {
+ NewProb = cpi->NewMvShortProbs[i][j];
+ OldProb = cpi->pb.MvShortProbs[i][j];
+
+ OldBits = ((cpi->NewMvShortHits[i][j][0] * VP6_ProbCost[OldProb])/256) +
+ ((cpi->NewMvShortHits[i][j][1] * VP6_ProbCost[255 - OldProb])/256);
+ NewBits = ((cpi->NewMvShortHits[i][j][0] * VP6_ProbCost[NewProb])/256) +
+ ((cpi->NewMvShortHits[i][j][1] * VP6_ProbCost[255 - NewProb])/256);
+
+ ProbUpdateCost = PROB_UPDATE_BASELINE_COST + MV_PROB_UPDATE_CORECTION;
+ ProbUpdateCost += (VP6_ProbCost[255 - VP6_MvUpdateProbs[i][MvUpdateProbsOffset]] + 128) / 256;
+ ProbUpdateCost -= (VP6_ProbCost[VP6_MvUpdateProbs[i][MvUpdateProbsOffset]] + 128) / 256;
+
+ if ( (OldBits - NewBits) > ProbUpdateCost )
+ {
+ cpi->pb.MvShortProbs[i][j] = NewProb;
+ VP6_EncodeBool(&cpi->bc, 1, VP6_MvUpdateProbs[i][MvUpdateProbsOffset] );
+ AddBitsToBuffer( &cpi->bc, NewProb >> 1, PROB_UPDATE_BASELINE_COST );
+ }
+ else
+ {
+ VP6_EncodeBool(&cpi->bc, 0, VP6_MvUpdateProbs[i][MvUpdateProbsOffset] );
+ }
+
+ // Increment to next offset in VP6_MvUpdateProbs[];
+ MvUpdateProbsOffset++;
+ }
+ }
+
+ // If appropriate update the bit probabilities for long vectors
+ for ( i=0; i<2; i++ ) // X then Y
+ {
+ INT32 j;
+ UINT32 MvUpdateProbsOffset = 2 + 7;
+
+ // For each bit
+ for ( j=0; j<LONG_MV_BITS; j++ )
+ {
+ NewProb = cpi->NewMvSizeProbs[i][j];
+ OldProb = cpi->pb.MvSizeProbs[i][j];
+
+ OldBits = ((cpi->NewMvSizeHits[i][j][0] * VP6_ProbCost[OldProb])/256) +
+ ((cpi->NewMvSizeHits[i][j][1] * VP6_ProbCost[255 - OldProb])/256);
+ NewBits = ((cpi->NewMvSizeHits[i][j][0] * VP6_ProbCost[NewProb])/256) +
+ ((cpi->NewMvSizeHits[i][j][1] * VP6_ProbCost[255 - NewProb])/256);
+
+ ProbUpdateCost = PROB_UPDATE_BASELINE_COST + MV_PROB_UPDATE_CORECTION;
+ ProbUpdateCost += (VP6_ProbCost[255 - VP6_MvUpdateProbs[i][MvUpdateProbsOffset]] + 128) / 256;
+ ProbUpdateCost -= (VP6_ProbCost[VP6_MvUpdateProbs[i][MvUpdateProbsOffset]] + 128) / 256;
+
+ if ( (OldBits - NewBits) > ProbUpdateCost )
+ {
+ cpi->pb.MvSizeProbs[i][j] = NewProb;
+ VP6_EncodeBool(&cpi->bc, 1, VP6_MvUpdateProbs[i][MvUpdateProbsOffset] );
+ AddBitsToBuffer( &cpi->bc, NewProb >> 1, PROB_UPDATE_BASELINE_COST );
+ }
+ else
+ {
+ VP6_EncodeBool(&cpi->bc, 0, VP6_MvUpdateProbs[i][MvUpdateProbsOffset] );
+ }
+
+ // Increment to next offset in VP6_MvUpdateProbs[];
+ MvUpdateProbsOffset++;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : BuildandPackMvTree2
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Build the MV entropy coding tree. This version is
+ * used when in unbuffered / VC mode to improve tolerance
+ * to dropped frames.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ***************************************************************************/
+void BuildandPackMvTree2 ( CP_INSTANCE *cpi )
+{
+ INT32 i;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MV_SECTION;
+#endif
+
+ // calculate the MV node Probabilities
+ CalculateMvNodeProbabilities ( cpi );
+
+ // Send short and sign probabilities
+ for ( i=0; i<2; i++ )
+ {
+ cpi->pb.IsMvShortProb[i] = cpi->NewIsMvShortProb[i];
+ VP6_EncodeBool ( &cpi->bc, 1, VP6_MvUpdateProbs[i][0] );
+ AddBitsToBuffer( &cpi->bc, cpi->pb.IsMvShortProb[i] >> 1, PROB_UPDATE_BASELINE_COST );
+
+ cpi->pb.MvSignProbs[i] = cpi->NewMvSignProbs[i];
+ VP6_EncodeBool ( &cpi->bc, 1, VP6_MvUpdateProbs[i][1] );
+ AddBitsToBuffer( &cpi->bc, cpi->pb.MvSignProbs[i] >> 1, PROB_UPDATE_BASELINE_COST );
+ }
+
+ // Short vector tree nodes
+ for ( i=0; i<2; i++ )
+ {
+ // Node 0 Low
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][2] );
+
+ // Node 1 LowLow
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][3] );
+
+ // Node 2 LowLowLow
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][4] );
+
+ // Node 3 LowHighLow
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][5] );
+
+ // Node 4 HighLow
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][6] );
+
+ // Node 5 HighLowLow
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][7] );
+
+ // Node 6 HighHighLow
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][8] );
+ }
+
+ // Long vector Probabilities
+ for ( i=0; i<2; i++ )
+ {
+ // QPel
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][9] );
+
+ // HPel
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][10] );
+
+ // Bit1
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][11] );
+
+ // Bit2
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][12] );
+
+ // Bit3
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][13] );
+
+ // Bit4
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][14] );
+
+ // Bit5
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][15] );
+
+ // Bit6
+ VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][16] );
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : BuildMVCostEstimates
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Calculate a cost in bits of encoding a motion vector.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ***************************************************************************/
+void BuildMVCostEstimates ( CP_INSTANCE *cpi )
+{
+ int i;
+ int vect;
+
+ cpi->bc.MeasureCost = TRUE;
+
+ for ( i=0; i<MV_ENTROPY_TOKENS; i++ )
+ {
+ cpi->bc.BitCounter = 0;
+ vect = i - (MV_ENTROPY_TOKENS/2);
+
+ encodeMotionVectorComponent ( cpi, 0, vect, 0 );
+
+ // keep all costs at 64 * actual number of bits
+ cpi->EstMvCostPtrX[vect] = (cpi->bc.BitCounter ) >> 2;
+
+ cpi->bc.BitCounter = 0;
+ encodeMotionVectorComponent ( cpi, 1, vect, 0 );
+
+ // keep all costs at 64 * actual number of bits
+ cpi->EstMvCostPtrY[vect] = (cpi->bc.BitCounter) >> 2;
+ }
+
+ cpi->bc.MeasureCost = FALSE;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.h
new file mode 100644
index 00000000..41422d70
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.h
@@ -0,0 +1,23 @@
+/****************************************************************************
+*
+* Module Title : encodemv.h
+*
+* Description : functions for decoding modes and motionvectors
+*
+****************************************************************************/
+#ifndef __INC_ENCODEMV_H
+#define __INC_ENCODEMV_H
+
+#ifndef STRICT
+#define STRICT /* Strict type checking */
+#endif
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern void BuildandPackMvTree( CP_INSTANCE *cpi );
+extern void BuildandPackMvTree2( CP_INSTANCE *cpi );
+extern void BuildMVCostEstimates( CP_INSTANCE *cpi );
+extern void encodeMotionVector ( CP_INSTANCE *cpi, INT32 MVectorX, INT32 MVectorY, CODING_MODE Mode );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/fullframefdct.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/fullframefdct.c
new file mode 100644
index 00000000..20d023fd
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/fullframefdct.c
@@ -0,0 +1,24 @@
+/****************************************************************************
+*
+* Module Title : FullFrameFDCT.c
+*
+* Description : Compressor functions for block order transmittal
+*
+* AUTHOR : Paul Wilkins
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "compdll.h"
+#include "misc_common.h"
+#include "decodemode.h"
+#include "decodemv.h"
+#include "quantize.h"
+extern void PredictBlock ( CP_INSTANCE *cpi, BLOCK_POSITION bp, UINT32 MBrow, UINT32 MBcol );
+extern void PredictDCE( CP_INSTANCE *cpi, BLOCK_POSITION bp);
+
+#if defined FULLFRAMEFDCT
+#endif \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.c
new file mode 100644
index 00000000..70fd1072
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.c
@@ -0,0 +1,1906 @@
+/****************************************************************************
+*
+* Module Title : mcomp.c
+*
+* Description : Motion compensation functions.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h> // For Abs()
+#include "mcomp.h"
+#include "compdll.h"
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern INT32 *XX_LUT;
+
+extern void VP6_PredictFiltered(PB_INSTANCE *pbi,UINT8 *SrcPtr,INT32 mx,INT32 my,INT32 MvShift) ;
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define HP_THRESH 0
+
+// bias towards cheaper motion vectors should be tied to cpi->MVErrorPerBit
+// but isn't at least not yet. setting this to 0 says don't bias at all
+#define MVEPBSAD_MULT 1
+#define MVEPBSAD_RSHIFT 2
+#define MVEPBSAD_RSHIFT2 14
+
+/****************************************************************************
+* Exports.
+****************************************************************************/
+UINT32 TotError = 0;
+UINT32 ErrCount = 0;
+
+UINT8 FilteredBlock[256];
+
+/****************************************************************************
+ *
+ * ROUTINE : InitDSMotionCompensation
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initialises data structures used by the diamond search.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void InitDSMotionCompensation ( CP_INSTANCE *cpi )
+{
+ int i;
+ int Len;
+ int SearchSite = 0;
+ int LineStepY = (INT32)cpi->pb.Configuration.YStride;
+
+ // How many search stages are there.
+ cpi->DSMVSearchSteps = 0;
+
+ // Generate offsets for 4 search sites per step.
+ Len = (MAX_MV_EXTENT + 1)/4;
+
+ while ( Len>0 )
+ {
+ // Another step.
+ cpi->DSMVSearchSteps += 1;
+
+ // Compute offsets for search sites.
+ cpi->DSMVOffsetX[SearchSite] = 0;
+ cpi->DSMVOffsetY[SearchSite++] = -Len;
+
+ cpi->DSMVOffsetX[SearchSite] = -Len;
+ cpi->DSMVOffsetY[SearchSite++] = 0;
+
+ cpi->DSMVOffsetX[SearchSite] = Len;
+ cpi->DSMVOffsetY[SearchSite++] = 0;
+
+ cpi->DSMVOffsetX[SearchSite] = 0;
+ cpi->DSMVOffsetY[SearchSite++] = Len;
+
+ // Contract.
+ Len /= 2;
+ }
+
+ // Compute pixel index offsets.
+ for ( i=SearchSite-1; i>=0; i-- )
+ cpi->DSMVPixelOffsetY[i] = (cpi->DSMVOffsetY[i]*LineStepY) + cpi->DSMVOffsetX[i];
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : InitMotionCompensation
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initialises motion compensation data structures.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void InitMotionCompensation ( CP_INSTANCE *cpi )
+{
+ int i;
+ int Len;
+ int SearchSite = 0;
+ int LineStepY = (INT32)cpi->pb.Configuration.YStride;
+
+ // How many search stages are there.
+ cpi->MVSearchSteps = 0;
+
+ // Set up offsets arrays used in fractional pel searches
+ cpi->SubPixelXOffset[0] = 0;
+ cpi->SubPixelXOffset[1] = 0;
+ cpi->SubPixelXOffset[2] = -1;
+ cpi->SubPixelXOffset[3] = 1;
+ cpi->SubPixelXOffset[4] = 0;
+ cpi->SubPixelXOffset[5] = -1;
+ cpi->SubPixelXOffset[6] = 1;
+ cpi->SubPixelXOffset[7] = -1;
+ cpi->SubPixelXOffset[8] = 1;
+
+ cpi->SubPixelYOffset[0] = 0;
+ cpi->SubPixelYOffset[1] = -1;
+ cpi->SubPixelYOffset[2] = 0;
+ cpi->SubPixelYOffset[3] = 0;
+ cpi->SubPixelYOffset[4] = 1;
+ cpi->SubPixelYOffset[5] = -1;
+ cpi->SubPixelYOffset[6] = -1;
+ cpi->SubPixelYOffset[7] = 1;
+ cpi->SubPixelYOffset[8] = 1;
+
+ // Generate offsets for 8 search sites per step.
+ Len = (MAX_MV_EXTENT + 1)/4;
+ while ( Len>0 )
+ {
+ // Another step.
+ cpi->MVSearchSteps += 1;
+
+ // Compute offsets for search sites.
+ cpi->MVOffsetX[SearchSite] = -Len;
+ cpi->MVOffsetY[SearchSite++] = -Len;
+ cpi->MVOffsetX[SearchSite] = 0;
+ cpi->MVOffsetY[SearchSite++] = -Len;
+ cpi->MVOffsetX[SearchSite] = Len;
+ cpi->MVOffsetY[SearchSite++] = -Len;
+ cpi->MVOffsetX[SearchSite] = -Len;
+ cpi->MVOffsetY[SearchSite++] = 0;
+ cpi->MVOffsetX[SearchSite] = Len;
+ cpi->MVOffsetY[SearchSite++] = 0;
+ cpi->MVOffsetX[SearchSite] = -Len;
+ cpi->MVOffsetY[SearchSite++] = Len;
+ cpi->MVOffsetX[SearchSite] = 0;
+ cpi->MVOffsetY[SearchSite++] = Len;
+ cpi->MVOffsetX[SearchSite] = Len;
+ cpi->MVOffsetY[SearchSite++] = Len;
+
+ // Contract.
+ Len /= 2;
+ }
+
+ // Compute pixel index offsets.
+ for ( i=SearchSite-1; i>=0; i-- )
+ cpi->MVPixelOffsetY[i] = (cpi->MVOffsetY[i]*LineStepY) + cpi->MVOffsetX[i];
+
+ // set up search sites for 5 region Diamond search
+ InitDSMotionCompensation(cpi);
+
+ // Initialize the function pointers for block motion search
+ // and fractional pixel motion search
+ cpi->FindMvViaSearch = FindMvVia3StepSearch;
+ cpi->FindBestHalfPixelMv = FindBestFractionalPixelStep;
+ cpi->FindBestQuarterPixelMv = FindBestFractionalPixelStep;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetMBFrameVerticalVariance
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: vertical variance for a macroblock.
+ *
+ * FUNCTION : Computes the vertical variance by summing the local
+ * 2 pixel variances throughout the MB.
+ *
+ * SPECIAL NOTES : The difference between the last two rows in a
+ * macro-block are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 GetMBFrameVerticalVariance ( CP_INSTANCE *cpi )
+{
+ int i, j;
+ UINT32 x, y, z;
+ UINT32 MBVariance = 0;
+ PB_INSTANCE *pbi = &cpi->pb;
+// UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.Source];
+ UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[0].Source];
+ INT32 SourceStride = pbi->Configuration.VideoFrameWidth;
+ INT32 Pitch2 = SourceStride*2;
+
+ for ( i=0; i<7; i++ )
+ {
+ for ( j=0; j<16; j++ )
+ {
+ x = SrcPtr[j];
+ y = SrcPtr[j+SourceStride];
+ z = SrcPtr[j+Pitch2 ];
+ MBVariance +=(x-y)*(x-y) + (y-z)*(y-z);
+ }
+ SrcPtr += Pitch2;
+ }
+ return MBVariance;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetMBFieldVerticalVariance
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: field vertical variance for a macroblock.
+ *
+ * FUNCTION : Computes the vertical variance by summing the local
+ * 2 pixel variances within two independent fields
+ * throughout the MB.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 GetMBFieldVerticalVariance ( CP_INSTANCE *cpi )
+{
+ int i,j;
+ UINT32 x, y, z, w;
+ UINT32 MBFieldVariance = 0;
+ PB_INSTANCE *pbi = &cpi->pb;
+ //UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.Source];
+ UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[0].Source];
+ INT32 SourceStride = pbi->Configuration.VideoFrameWidth;
+ INT32 Pitch2 = SourceStride*2;
+
+ for ( i=0; i<7; i++ )
+ {
+ for ( j=0; j<16; j++ )
+ {
+ x = SrcPtr[j];
+ y = SrcPtr[j+SourceStride];
+ z = SrcPtr[j+Pitch2 ];
+ w = SrcPtr[j+Pitch2 + SourceStride];
+ MBFieldVariance +=(x-z)*(x-z) + (y-w)*(y-w);
+ }
+ SrcPtr += Pitch2;
+ }
+ return MBFieldVariance;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetReconReferencePoints
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT8 *BufferPointer : Pointer to refernce point in reference image.
+ * MOTION_VECTOR *MV : Motion vector to be used.
+ *
+ * OUTPUTS : UINT8 **ReconPtr1 : Pointer-to-pointer to first block in ref frame.
+ * UINT8 **ReconPtr2 : Pointer-to-pointer to second block in ref frame.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Computes pointers to two blocks in the reference frame
+ * that bracket the fractional pixel position specified in MV.
+ * These two blocks will later be used to interpolate
+ * the prediction block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void GetReconReferencePoints
+(
+ PB_INSTANCE *pbi,
+ UINT8 *BufferPointer,
+ UINT8 **ReconPtr1,
+ UINT8 **ReconPtr2,
+ MOTION_VECTOR *MV
+)
+{
+ INT32 mVx, mVy;
+ INT32 ModX, ModY;
+
+ // Calculate full pixel motion vector position
+ if ( MV->x >= 0 )
+ mVx = (MV->x >> Y_MVSHIFT);
+ else
+ mVx = -((-MV->x) >> Y_MVSHIFT);
+
+ if ( MV->y >= 0 )
+ mVy = (MV->y >> Y_MVSHIFT);
+ else
+ mVy = -((-MV->y) >> Y_MVSHIFT);
+
+ // Calculate the first pointer.
+ *ReconPtr1 = BufferPointer + (pbi->mbi.blockDxInfo[0].FrameReconStride * mVy) + mVx;
+
+ // Calculate the second pointer
+ *ReconPtr2 = *ReconPtr1;
+ ModX = (MV->x & Y_MVMODMASK);
+ ModY = (MV->y & Y_MVMODMASK);
+
+ if ( ModX )
+ {
+ if ( MV->x > 0 )
+ *ReconPtr2 += 1;
+ else
+ *ReconPtr2 -= 1;
+ }
+
+ if ( ModY )
+ {
+ if ( MV->y > 0 )
+ *ReconPtr2 += pbi->mbi.blockDxInfo[0].CurrentReconStride;
+ else
+ *ReconPtr2 -= pbi->mbi.blockDxInfo[0].CurrentReconStride;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetInterErrQPel
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT8 *NewDataPtr : Pointer to source block.
+ * UINT32 SourceStride : Stride for NewDataPtr.
+ * UINT8 *RefDataPtr1 : Pointer to block position in reference frame.
+ * UINT8 *RefDataPtr2 : Pointer to block position in reference frame.
+ * INT32 ReconStride : Size of the block.
+ * MOTION_VECTOR *MV : Best MV found for block in reference frame.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Variance of the prediction error (scaled by 2^12)
+ *
+ * FUNCTION : Calculates scaled prediction error variance for the
+ * QPel interpolated block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 GetInterErrQPel
+(
+ PB_INSTANCE *pbi,
+ UINT8 *NewDataPtr,
+ INT32 SourceStride,
+ UINT8 *RefDataPtr1,
+ UINT8 *RefDataPtr2,
+ INT32 ReconStride,
+ MOTION_VECTOR *MV
+)
+{
+ INT32 ModX, ModY;
+
+ // Compute fractional MV offsets (to 1/8 point precision as required by FilterBlock)
+ ModX = (MV->x & Y_MVMODMASK) << 1;
+ ModY = (MV->y & Y_MVMODMASK) << 1;
+
+ // FilterBlockBil_8 filters the input data to produce an 8x8 Qpel precision prediction block.
+ FilterBlockBil_8 ( RefDataPtr1, RefDataPtr2, FilteredBlock, ReconStride, ModX, ModY );
+
+ // Compute and return population variance as mis-match metric.
+ return GetInterError ( NewDataPtr, SourceStride, FilteredBlock, FilteredBlock, 8 );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetInterError2
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT8 *NewDataPtr : Pointer to current block.
+ * UINT8 *RefDataPtr1 : Pointer to reference block.
+ * MOTION_VECTOR *MV : Pointer to motion vector.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Error variance.
+ *
+ * FUNCTION : Calculates a difference error score between two blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 GetInterError2 ( PB_INSTANCE *pbi, UINT8 *NewDataPtr, UINT8 *RefDataPtr1, MOTION_VECTOR *MV )
+{
+ UINT8 *ReconDataPtr1;
+ UINT8 *ReconDataPtr2;
+ UINT32 err = 0;
+
+ // Get the reference pointers for the motion vector
+ GetReconReferencePoints( pbi, RefDataPtr1, &ReconDataPtr1, &ReconDataPtr2, MV );
+
+ // Calculate the variance error score for the vector
+ if ( (MV->x & Y_MVMODMASK) || (MV->y & Y_MVMODMASK) )
+ {
+ err = GetInterErrQPel ( pbi, NewDataPtr, pbi->mbi.blockDxInfo[0].CurrentSourceStride, ReconDataPtr1, ReconDataPtr2, pbi->mbi.blockDxInfo[0].CurrentReconStride, MV);
+ }
+ else
+ {
+ err = GetInterError ( NewDataPtr, pbi->mbi.blockDxInfo[0].CurrentSourceStride, ReconDataPtr1, ReconDataPtr2, pbi->mbi.blockDxInfo[0].CurrentReconStride );
+ }
+ return err;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetInterError2_slow
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT8 *NewDataPtr : Pointer to current block.
+ * UINT8 *RefDataPtr1 : Pointer to reference block.
+ * MOTION_VECTOR *MV : Pointer to motion vector.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Error variance.
+ *
+ * FUNCTION : Calculates a difference error score between two blocks.
+ *
+ * SPECIAL NOTES : This function works on a loop filtered version of
+ * the data.
+ *
+ ****************************************************************************/
+UINT32 GetInterError2_slow ( PB_INSTANCE *pbi, UINT8 *NewDataPtr, UINT8 *RefDataPtr1, MOTION_VECTOR *MV )
+{
+ INT32 ModX, ModY;
+ UINT32 err = 0;
+ UINT8 *TempPtr1 = pbi->LoopFilteredBlock + (2*16+2);
+ UINT8 *TempPtr2 = TempPtr1;
+
+ // This function produces a filtered copy of the appropriate part of the
+ // reconstruction buffer in pbi->LoopFilteredBlock[].
+ VP6_PredictFiltered ( pbi, RefDataPtr1, MV->x, MV->y, Y_MVSHIFT );
+
+ // Pull off the fractional bits
+ ModX = (MV->x & Y_MVMODMASK);
+ ModY = (MV->y & Y_MVMODMASK);
+
+ // Update the second reference pointer inrespect of the fractional X bits.
+ if ( ModX )
+ {
+ if ( MV->x >= 0 )
+ TempPtr2 += 1;
+ else
+ TempPtr2 -= 1;
+ }
+
+ // Update the second reference pointer inrespect of the fractional Y bits.
+ if ( ModY )
+ {
+ if ( MV->y > 0 )
+ TempPtr2 += 16;
+ else
+ TempPtr2 -= 16;
+ }
+
+ // If any of the fractional bits are set use GetInterErrQPel() else GetInterError()
+ if ( ModX || ModY )
+ err = GetInterErrQPel(pbi,NewDataPtr,pbi->mbi.blockDxInfo[0].CurrentSourceStride, TempPtr1,TempPtr2,16, MV );
+ else
+ err = GetInterError(NewDataPtr,pbi->mbi.blockDxInfo[0].CurrentSourceStride, TempPtr1,TempPtr2,16);
+
+ return err;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetInterErr
+ *
+ * INPUTS : UINT8 *NewDataPtr : Pointer to current block.
+ * INT32 SourceStride : Stride for NewDataPtr block.
+ * UINT8 *RefDataPtr1 : Pointer to reference block.
+ * UINT8 *RefDataPtr2 : Pointer to reference block.
+ * INT32 ReconStride : Stride for RefDataPtr1 & RefDataPtr2.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : UINT32: Error variance (scaled by 2^12).
+ *
+ * FUNCTION : Calculates the variance of the difference between the
+ * NewDataPtr block and the average of the RefDataPtr1 &
+ * RefDataPtr2 blocks.
+ *
+ * SPECIAL NOTES : Computed error variance is multiplied by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetInterErr
+(
+ UINT8 * NewDataPtr,
+ INT32 SourceStride,
+ UINT8 * RefDataPtr1,
+ UINT8 * RefDataPtr2,
+ INT32 ReconStride
+)
+{
+ UINT32 i;
+ INT32 XSum=0;
+ INT32 XXSum=0;
+ INT32 DiffVal;
+ INT32 AbsRefOffset = abs((int)(RefDataPtr1 - RefDataPtr2));
+
+ // Mode of interpolation chosen based upon on the offset of the second reference pointer
+ if ( AbsRefOffset == 0 )
+ {
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ DiffVal = ((int)NewDataPtr[0]) - (int)RefDataPtr1[0];
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[1]) - (int)RefDataPtr1[1];
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[2]) - (int)RefDataPtr1[2];
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[3]) - (int)RefDataPtr1[3];
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[4]) - (int)RefDataPtr1[4];
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[5]) - (int)RefDataPtr1[5];
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[6]) - (int)RefDataPtr1[6];
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[7]) - (int)RefDataPtr1[7];
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ // Step to next row of block.
+ NewDataPtr += SourceStride;
+ RefDataPtr1 += ReconStride;
+ }
+ }
+ // Simple two reference interpolation
+ else
+ {
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ DiffVal = ((int)NewDataPtr[0]) - (((int)RefDataPtr1[0] + (int)RefDataPtr2[0]) / 2);
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[1]) - (((int)RefDataPtr1[1] + (int)RefDataPtr2[1]) / 2);
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[2]) - (((int)RefDataPtr1[2] + (int)RefDataPtr2[2]) / 2);
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[3]) - (((int)RefDataPtr1[3] + (int)RefDataPtr2[3]) / 2);
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[4]) - (((int)RefDataPtr1[4] + (int)RefDataPtr2[4]) / 2);
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[5]) - (((int)RefDataPtr1[5] + (int)RefDataPtr2[5]) / 2);
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[6]) - (((int)RefDataPtr1[6] + (int)RefDataPtr2[6]) / 2);
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ DiffVal = ((int)NewDataPtr[7]) - (((int)RefDataPtr1[7] + (int)RefDataPtr2[7]) / 2);
+ XSum += DiffVal;
+ XXSum += XX_LUT[DiffVal];
+
+ // Step to next row of block.
+ NewDataPtr += SourceStride;
+ RefDataPtr1 += ReconStride;
+ RefDataPtr2 += ReconStride;
+ }
+ }
+
+ // Compute and return population variance as mis-match metric.
+ return (( (XXSum<<6) - XSum*XSum ));
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetSumAbsDiffs
+ *
+ * INPUTS : UINT8 *NewDataPtr : Pointer to current block.
+ * INT32 SourceStride : Stride for NewDataPtr block.
+ * UINT8 *RefDataPtr : Pointer to reference block.
+ * INT32 ReconStride : Stride for RefDataPtr.
+ * UINT32 ErrorSoFar : Error for MB so far.
+ * UINT32 BestSoFar : Best error found so far.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Sum absolute differences
+ *
+ * FUNCTION : Calculates the sum of the absolute differences.
+ *
+ * SPECIAL NOTES : ErrorSoFar represents the prediction error sum for
+ * those blocks within the current MB that have been predicted.
+ * BestSoFar is used as an early bail-out condition.
+ *
+ ****************************************************************************/
+UINT32 GetSumAbsDiffs
+(
+ UINT8 * NewDataPtr,
+ INT32 SourceStride,
+ UINT8 * RefDataPtr,
+ INT32 ReconStride,
+ UINT32 ErrorSoFar,
+ UINT32 BestSoFar
+)
+{
+ UINT32 i;
+ UINT32 DiffVal = ErrorSoFar;
+
+ for ( i=0; i < BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ DiffVal += AbsX_LUT[ ((int)NewDataPtr[0]) - ((int)RefDataPtr[0]) ];
+ DiffVal += AbsX_LUT[ ((int)NewDataPtr[1]) - ((int)RefDataPtr[1]) ];
+ DiffVal += AbsX_LUT[ ((int)NewDataPtr[2]) - ((int)RefDataPtr[2]) ];
+ DiffVal += AbsX_LUT[ ((int)NewDataPtr[3]) - ((int)RefDataPtr[3]) ];
+ DiffVal += AbsX_LUT[ ((int)NewDataPtr[4]) - ((int)RefDataPtr[4]) ];
+ DiffVal += AbsX_LUT[ ((int)NewDataPtr[5]) - ((int)RefDataPtr[5]) ];
+ DiffVal += AbsX_LUT[ ((int)NewDataPtr[6]) - ((int)RefDataPtr[6]) ];
+ DiffVal += AbsX_LUT[ ((int)NewDataPtr[7]) - ((int)RefDataPtr[7]) ];
+
+ if ( DiffVal > BestSoFar )
+ break;
+
+ // Step to next row of block.
+ NewDataPtr += SourceStride;
+ RefDataPtr += ReconStride;
+ }
+ return DiffVal;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetHalfPixelSumAbsDiffs
+ *
+ * INPUTS : UINT8 *SrcData : Pointer to current block.
+ * INT32 SourceStride : Stride for NewDataPtr block.
+ * UINT8 *RefDataPtr1 : Pointer to first reference block.
+ * UINT8 *RefDataPtr2 : Pointer to second reference block.
+ * INT32 ReconStride : Stride for RefDataPtr1 & RefDataPtr2.
+ * UINT32 ErrorSoFar : Error for MB so far.
+ * UINT32 BestSoFar : Best error found so far.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Sum absolute differences at 1/2 pixel accuracy.
+ *
+ * FUNCTION : Calculates the sum of the absolute differences against
+ * half pixel interpolated references.
+ *
+ * SPECIAL NOTES : ErrorSoFar represents the prediction error sum for
+ * those blocks within the current MB that have been predicted.
+ * BestSoFar is used as an early bail-out condition.
+ *
+ ****************************************************************************/
+UINT32 GetHalfPixelSumAbsDiffs
+(
+ UINT8 * SrcData,
+ INT32 SourceStride,
+ UINT8 * RefDataPtr1,
+ UINT8 * RefDataPtr2,
+ INT32 ReconStride,
+ UINT32 ErrorSoFar,
+ UINT32 BestSoFar
+)
+{
+
+ UINT32 i;
+ UINT32 DiffVal = ErrorSoFar;
+
+ for ( i=0; i < BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ DiffVal += AbsX_LUT[ ((int)SrcData[0]) - (((int)RefDataPtr1[0] + (int)RefDataPtr2[0]) / 2) ];
+ DiffVal += AbsX_LUT[ ((int)SrcData[1]) - (((int)RefDataPtr1[1] + (int)RefDataPtr2[1]) / 2) ];
+ DiffVal += AbsX_LUT[ ((int)SrcData[2]) - (((int)RefDataPtr1[2] + (int)RefDataPtr2[2]) / 2) ];
+ DiffVal += AbsX_LUT[ ((int)SrcData[3]) - (((int)RefDataPtr1[3] + (int)RefDataPtr2[3]) / 2) ];
+ DiffVal += AbsX_LUT[ ((int)SrcData[4]) - (((int)RefDataPtr1[4] + (int)RefDataPtr2[4]) / 2) ];
+ DiffVal += AbsX_LUT[ ((int)SrcData[5]) - (((int)RefDataPtr1[5] + (int)RefDataPtr2[5]) / 2) ];
+ DiffVal += AbsX_LUT[ ((int)SrcData[6]) - (((int)RefDataPtr1[6] + (int)RefDataPtr2[6]) / 2) ];
+ DiffVal += AbsX_LUT[ ((int)SrcData[7]) - (((int)RefDataPtr1[7] + (int)RefDataPtr2[7]) / 2) ];
+
+ if ( DiffVal > BestSoFar )
+ break;
+
+ // Step to next row of block.
+ SrcData += SourceStride;
+ RefDataPtr1 += ReconStride;
+ RefDataPtr2 += ReconStride;
+ }
+ return DiffVal;
+
+}
+/****************************************************************************
+ *
+ * ROUTINE : GetIntraErrorC
+ *
+ * INPUTS : UINT8 *DataPtr : Pointer to intra block.
+ * INT32 SourceStride : Block stride.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Intra frame block variance (scaled by 2^12)
+ *
+ * FUNCTION : Calculates a variance score for the block.
+ *
+ * SPECIAL NOTES : Computed variance value is scaled by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetIntraErrorC ( UINT8 *DataPtr, INT32 SourceStride )
+{
+ UINT32 i;
+ UINT32 XSum=0;
+ UINT32 XXSum=0;
+ UINT8 *DiffPtr;
+
+ DiffPtr = DataPtr;
+
+ // Loop expanded out for speed.
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ // Examine alternate pixel locations.
+ XSum += DiffPtr[0];
+ XXSum += XX_LUT[DiffPtr[0]];
+ XSum += DiffPtr[1];
+ XXSum += XX_LUT[DiffPtr[1]];
+ XSum += DiffPtr[2];
+ XXSum += XX_LUT[DiffPtr[2]];
+ XSum += DiffPtr[3];
+ XXSum += XX_LUT[DiffPtr[3]];
+ XSum += DiffPtr[4];
+ XXSum += XX_LUT[DiffPtr[4]];
+ XSum += DiffPtr[5];
+ XXSum += XX_LUT[DiffPtr[5]];
+ XSum += DiffPtr[6];
+ XXSum += XX_LUT[DiffPtr[6]];
+ XSum += DiffPtr[7];
+ XXSum += XX_LUT[DiffPtr[7]];
+
+ // Step to next row of block.
+ DiffPtr += SourceStride;
+ }
+
+ // Compute population variance as mis-match metric.
+ return ((XXSum<<6) - XSum*XSum);
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetSumAbsDiffs16
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to current block.
+ * INT32 SourceStride : Stride for SrcPtr block.
+ * UINT8 *RefPtr : Pointer to reference block.
+ * INT32 ReconStride : Stride for RefPtr.
+ * UINT32 ErrorSoFar : Error for MB so far (NOT USED).
+ * UINT32 BestSoFar : Best error found so far (NOT USED).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD for the 16x16 block
+ *
+ * FUNCTION : Calculates the sum of the absolute differences for
+ * the 16x16 block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 GetSumAbsDiffs16
+(
+ UINT8 *SrcPtr,
+ INT32 SourceStride,
+ UINT8 *RefPtr,
+ INT32 ReconStride,
+ UINT32 ErrorSoFar,
+ UINT32 BestSoFar
+)
+{
+ UINT32 Error = 0;
+
+ Error = GetSAD ( SrcPtr, SourceStride, RefPtr, ReconStride, Error, HUGE_ERROR );
+ Error = GetSAD ( SrcPtr+8, SourceStride, RefPtr+8, ReconStride, Error, HUGE_ERROR );
+ Error = GetSAD ( SrcPtr+8*SourceStride, SourceStride, RefPtr+8*ReconStride, ReconStride, Error, HUGE_ERROR );
+ Error = GetSAD ( SrcPtr+8*SourceStride+8, SourceStride, RefPtr+8*ReconStride+8, ReconStride, Error, HUGE_ERROR );
+ return Error;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetHalfPixelSumAbsDiffs16
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to current block.
+ * INT32 SourceStride : Stride for SrcPtr block.
+ * UINT8 *RefPtr : Pointer to first reference block.
+ * UINT8 *RefPtr2 : Pointer to second reference block.
+ * UINT32 ReconStride : Stride for RefPtr & RefPtr2.
+ * INT32 ErrorSoFar : Error for MB so far (NOT USED).
+ * INT32 BestSoFar : Best error found so far (NOT USED).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD at 1/2 pixel accuracy.
+ *
+ * FUNCTION : Calculates the sum of the absolute differences between
+ * the block pointed to by SrcPtr and the half pixel
+ * interpolation block created from RefPtr & RefPtr2.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 GetHalfPixelSumAbsDiffs16
+(
+ UINT8 *SrcPtr,
+ INT32 SourceStride,
+ UINT8 *RefPtr,
+ UINT8 *RefPtr2,
+ INT32 ReconStride,
+ UINT32 ErrorSoFar,
+ UINT32 BestSoFar
+)
+{
+ UINT32 Error = 0;
+
+ Error = GetSadHalfPixel ( SrcPtr, SourceStride, RefPtr, RefPtr2, ReconStride, Error, HUGE_ERROR );
+
+ Error = GetSadHalfPixel ( SrcPtr+8,SourceStride, RefPtr+8, RefPtr2+8, ReconStride, Error, HUGE_ERROR );
+
+ Error = GetSadHalfPixel ( SrcPtr+8*SourceStride, SourceStride, RefPtr+8*ReconStride ,
+ RefPtr2+8*ReconStride, ReconStride, Error, HUGE_ERROR );
+
+ Error = GetSadHalfPixel( SrcPtr+8*SourceStride+8, SourceStride,
+ RefPtr+8*ReconStride+8, RefPtr2+8*ReconStride+8,
+ ReconStride, Error, HUGE_ERROR );
+
+ return Error;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetMBIntraError
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Intra-frame variance for the MB.
+ *
+ * FUNCTION : Calculates the intra-frame variance for the MB.
+ *
+ * SPECIAL NOTES : Only considers the four Y blocks in the MB (chroma
+ * ignored).
+ *
+ ****************************************************************************/
+UINT32 GetMBIntraError ( CP_INSTANCE *cpi )
+{
+ UINT32 i;
+ UINT32 IntraError = 0;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Add together the intra errors for the four Y blocks in the MB
+ for ( i=0; i<4; i++ )
+ IntraError += GetIntraError( &cpi->yuv1ptr[pbi->mbi.blockDxInfo[i].Source], pbi->mbi.blockDxInfo[i].CurrentSourceStride );
+ return IntraError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetMBInterError
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT8 *SrcPtr : Pointer to first block.
+ * UINT8 *RefPtr : Pointer to second block.
+ * MOTION_VECTOR *MV : Motion vector to be used.
+ *
+ * OUTPUTS : UINT32 *BlockError : Array to hold individual block variances.
+ *
+ * RETURNS : UINT32: Inter-frame variance for the MB (scaled by 2^12).
+ *
+ * FUNCTION : Calculates the variance of the difference between
+ * the MB pointed to by SrcPtr & the MB found by
+ * applying MV to RefPtr.
+ *
+ * SPECIAL NOTES : Variance is scaled by 2^12 (4096). Choma is ignored
+ * when computing the variance.
+ *
+ ****************************************************************************/
+UINT32 GetMBInterError
+(
+ CP_INSTANCE *cpi,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT32 *BlockError
+)
+{
+ int i;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ for ( i=0; i<4; i++ )
+ {
+ // Select either GetInterError2 which does not do loop filtering
+ // or GetInterError2_slow which does based on speed and profile
+ // constraints.
+ if( (cpi->pb.UseLoopFilter == NO_LOOP_FILTER) ||
+ (cpi->Speed > 8) ||
+ (cpi->pb.VpProfile == SIMPLE_PROFILE) )
+ {
+ BlockError[i] = GetInterError2 ( pbi,
+ &SrcPtr[pbi->mbi.blockDxInfo[i].Source],
+ &RefPtr[pbi->mbi.blockDxInfo[i].thisRecon],
+ MV );
+ }
+ else
+ {
+ BlockError[i] = GetInterError2_slow ( pbi,
+ &SrcPtr[pbi->mbi.blockDxInfo[i].Source],
+ &RefPtr[pbi->mbi.blockDxInfo[i].thisRecon],
+ MV );
+ }
+
+ }
+ return BlockError[0]+BlockError[1]+BlockError[2]+BlockError[3];
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FindMvVia3StepSearch
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * CODING_MODE Mode : Coding mode for the block.
+ * UINT8 *SrcPtr : Pointer to source block.
+ * UINT8 *RefPtr : Pointer to block position in reference frame.
+ * UINT32 BlockSize : Size of the block.
+ *
+ * OUTPUTS : MOTION_VECTOR *MV : Best MV found for block in reference frame.
+ * UINT8 **BestBlockPtr : Pointer-to-pointer to best blockin ref frame.
+ *
+ * RETURNS : UINT32: SAD error of the best matching block.
+ *
+ * FUNCTION : Finds block in reference frame that best matches the SrcPtr
+ * block using a hierarchical search.
+ *
+ * SPECIAL NOTES : The actual number of steps in the search varies depending
+ * on the maximum possible MV size. Motion vectors are
+ * stored in 1/4 pixel units.
+ *
+ ****************************************************************************/
+UINT32 FindMvVia3StepSearch
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT8 **BestBlockPtr,
+ UINT32 BlockSize
+)
+{
+ INT32 i;
+ INT32 step;
+ UINT32 EstMvBits; // Actualy bits * 64
+ INT32 SourceStride;
+ INT32 ReconStride;
+ INT32 FirstStepOffset;
+ MOTION_VECTOR DifferentialVector;
+ INT32 x=0, y=0;
+ INT32 SearchSite=0;
+ UINT32 Error = 0;
+ UINT32 MinError = HUGE_ERROR;
+ INT32 MvOffsetX = 0;
+ INT32 MvOffsetY = 0;
+ UINT8 *CandidateBlockPtr = NULL;
+ PB_INSTANCE *pbi = &cpi->pb;
+ UINT32 (*GetSad)( UINT8 * SrcPtr, INT32 SourceStride, UINT8 * RefPtr, INT32 ReconStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+
+ // Work out if we will code the vector relative to 0,0 or nearest
+ if ( Mode == CODE_INTER_PLUS_MV )
+ {
+ if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestInterMVect.x;
+ MvOffsetY = pbi->mbi.NearestInterMVect.y;
+ }
+ }
+ else // Golden frame
+ {
+ if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+ MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+ }
+ }
+
+ if (BlockSize == 8)
+ {
+ GetSad = GetSAD;
+//sjlhack -- always assuming y plane
+ SourceStride = pbi->mbi.blockDxInfo[0].CurrentSourceStride;
+ ReconStride = pbi->mbi.blockDxInfo[0].CurrentReconStride;
+ }
+ else
+ {
+ // get sad 16 function works for a whole macroblock interlaced only if pixels per line
+ // works frame wise
+ GetSad = GetSAD16;
+ ReconStride = pbi->Configuration.YStride;
+ SourceStride = pbi->Configuration.VideoFrameWidth;
+ }
+
+ // Check the 0,0 candidate.
+ Error = GetSad( SrcPtr, SourceStride, RefPtr, ReconStride, 0, HUGE_ERROR );
+
+ MinError = Error;
+ *BestBlockPtr = RefPtr;
+ x = 0;
+ y = 0;
+ MV->x = 0;
+ MV->y = 0;
+
+ // Set up control of how many steps to take and size of first step
+ // For larger images use a longer initial step and hence more search steps
+ if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+ {
+ BOOL LongVectorsAllowed= TRUE;
+
+ if ( LongVectorsAllowed &&
+ ( (MvOffsetX >= 48) || (MvOffsetX <= -48) || (MvOffsetY >= 48) || (MvOffsetY <= -48) ) )
+ {
+ FirstStepOffset = 0;
+ }
+ else if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+ FirstStepOffset = 1;
+ else
+ FirstStepOffset = 2;
+ }
+ else if ( cpi->pb.Configuration.VideoFrameWidth >= 320 )
+ {
+ if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+ FirstStepOffset = 1;
+ else
+ FirstStepOffset = 2;
+ }
+ else
+ {
+ if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+ FirstStepOffset = 1;
+ else
+ FirstStepOffset = 2;
+ }
+ SearchSite = FirstStepOffset * 8;
+
+ // Proceed through the appropriate number of steps.
+ for ( step=FirstStepOffset; step<cpi->MVSearchSteps; step++ )
+ {
+ // Search the 8-neighbours at distance pertinent to current step.
+ for ( i=0; i<8; i++ )
+ {
+ // Set pointer to next candidate matching block.
+ CandidateBlockPtr = RefPtr + cpi->MVPixelOffsetY[SearchSite];
+
+ // Get the block error score.
+ Error = GetSad( SrcPtr, SourceStride, CandidateBlockPtr,ReconStride,0, MinError );
+
+ // Calculate differential vector in Qpel units
+ DifferentialVector.x = (4 * (MV->x + cpi->MVOffsetX[SearchSite])) - MvOffsetX;
+ DifferentialVector.y = (4 * (MV->y + cpi->MVOffsetY[SearchSite])) - MvOffsetY;
+
+ EstMvBits = cpi->EstMvCostPtrX[DifferentialVector.x]
+ + cpi->EstMvCostPtrY[DifferentialVector.y];
+
+ Error += (EstMvBits * MVEPBSAD_MULT)>>MVEPBSAD_RSHIFT;
+ Error += (EstMvBits * Error)>>MVEPBSAD_RSHIFT2;
+
+ if ( Error < MinError )
+ {
+ // Remember best match.
+ MinError = Error;
+ *BestBlockPtr = CandidateBlockPtr;
+
+ // Where is it.
+ x = MV->x + cpi->MVOffsetX[SearchSite];
+ y = MV->y + cpi->MVOffsetY[SearchSite];
+ }
+
+ // Move to next search location.
+ SearchSite += 1;
+ }
+
+ // Move to best location this step.
+ RefPtr = *BestBlockPtr;
+ MV->x = x;
+ MV->y = y;
+ }
+
+ // Factor vectors to 1/4 pixel resoultion.
+ MV->x = (MV->x * 4);
+ MV->y = (MV->y * 4);
+
+ TotError += MinError;
+ ErrCount++;
+
+ return MinError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FindMvViaExhaustSearch
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * CODING_MODE Mode : Coding mode for the block.
+ * UINT8 *SrcPtr : Pointer to source block.
+ * UINT8 *RefPtr : Pointer to block position in reference frame.
+ * UINT32 BlockSize : Size of the block.
+ *
+ * OUTPUTS : MOTION_VECTOR *MV : Best MV found for block in reference frame.
+ * UINT8 **BestBlockPtr : Pointer-to-pointer to best blockin ref frame.
+ *
+ * RETURNS : UINT32: SAD error of the best matching block.
+ *
+ * FUNCTION : Finds block in reference frame that best matches the SrcPtr
+ * block using an exhaustive search.
+ *
+ * SPECIAL NOTES : Motion vectors are stored in 1/4 pixel units.
+ *
+ ****************************************************************************/
+UINT32 FindMvViaExhaustSearch
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT8 **BestBlockPtr,
+ UINT32 BlockSize
+)
+{
+ INT32 i,j;
+ UINT32 Error;
+ UINT32 EstMvBits; // Actualy bits * 64
+ INT32 MvMaxExtent;
+ INT32 HalfMvMaxExtent;
+ INT32 SourceStride;
+ INT32 ReconStride;
+ MOTION_VECTOR ThisMv;
+ MOTION_VECTOR DifferentialVector;
+ INT32 MvOffsetX = 0;
+ INT32 MvOffsetY = 0;
+ UINT32 MinError = HUGE_ERROR;
+ UINT8 *CandidateBlockPtr=NULL;
+ PB_INSTANCE *pbi = &cpi->pb;
+ UINT32 (*GetSad)( UINT8 * SrcPtr, INT32 SourceStride, UINT8 * RefPtr, INT32 ReconStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+
+ // Work out if we will code the vector relative to 0,0 or nearest
+ if ( Mode == CODE_INTER_PLUS_MV )
+ {
+ if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestInterMVect.x;
+ MvOffsetY = pbi->mbi.NearestInterMVect.y;
+ }
+ }
+ else // Golden frame
+ {
+ if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+ MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+ }
+ }
+
+ // For larger images allow a longer search
+ // NOTE:
+ // MvOffsetX and MvOffsetY are in 1/4 pel units.
+ if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+ {
+ MvMaxExtent = 63;
+ }
+ else if ( cpi->pb.Configuration.VideoFrameWidth >= 320 )
+ {
+ // Consider the length of the nearest X and Y
+ MvMaxExtent = 31;
+ }
+ else
+ MvMaxExtent = 31;
+
+ HalfMvMaxExtent = MvMaxExtent/2;
+
+ if (BlockSize == 8)
+ {
+ GetSad = GetSAD;
+//sjlhack -- always assuming y plane
+ SourceStride = pbi->mbi.blockDxInfo[0].CurrentSourceStride;
+ ReconStride = pbi->mbi.blockDxInfo[0].CurrentReconStride;
+ }
+ else
+ {
+ // get sad 16 function works for a whole macroblock interlaced only if pixels per line
+ // works frame wise
+ GetSad = GetSAD16;
+ ReconStride = pbi->Configuration.YStride;
+ SourceStride = pbi->Configuration.VideoFrameWidth;
+ }
+
+ RefPtr = RefPtr - (HalfMvMaxExtent * pbi->Configuration.YStride) - HalfMvMaxExtent;
+
+ // Search each pixel alligned site
+ for ( i=0; i<(INT32)MvMaxExtent; i++ )
+ {
+ // Starting position in row
+ CandidateBlockPtr = RefPtr;
+
+ for ( j=0; j<(INT32)MvMaxExtent; j++ )
+ {
+ // *4 converts to 1/4 pixel resolution
+ ThisMv.x = 4 * (j - HalfMvMaxExtent);
+ ThisMv.y = 4 * (i - HalfMvMaxExtent);
+
+ // Get the block error score.
+ Error = GetSad( SrcPtr, SourceStride, CandidateBlockPtr, ReconStride,0, HUGE_ERROR );
+
+ // Should we code relative to 0,0 or nearest
+ DifferentialVector.x = ThisMv.x - MvOffsetX;
+ DifferentialVector.y = ThisMv.y - MvOffsetY;
+
+ EstMvBits = cpi->EstMvCostPtrX[DifferentialVector.x]
+ + cpi->EstMvCostPtrY[DifferentialVector.y];
+
+ Error += (EstMvBits * MVEPBSAD_MULT)>>MVEPBSAD_RSHIFT;
+ Error += (EstMvBits * Error)>>MVEPBSAD_RSHIFT2;
+
+ // Was this the best so far
+ if ( Error < MinError )
+ {
+ MinError = Error;
+ *BestBlockPtr = CandidateBlockPtr;
+ MV->x = ThisMv.x;
+ MV->y = ThisMv.y;
+ }
+
+ // Move the the next site
+ CandidateBlockPtr++;
+ }
+
+ // Move on to the next row.
+ RefPtr += pbi->Configuration.YStride;
+ }
+ return MinError;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : FindBestFractionalPixelStep
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * CODING_MODE Mode : Coding mode for the block.
+ * UINT8 *SrcPtr : Pointer to source block.
+ * UINT8 *RefPtr : Pointer to block position in reference frame.
+ * UINT32 BlockSize : Size of the block.
+ * UINT32 *MinError : Pointer to best error found to date.
+ * UINT8 BitShift : Number of its to shift the MV components
+ * by (depending whether 1/2 or 1/4 pel search)
+ *
+ * OUTPUTS : MOTION_VECTOR *MV : Best MV found for block in reference frame.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Finds the best fractional (1/2 or 1/4) pixel MV that
+ * gives the best matching block in the refernce frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FindBestFractionalPixelStep
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT32 BlockSize,
+ UINT32 *MinError,
+ UINT8 BitShift
+)
+{
+ UINT32 i, j;
+ UINT32 nBlocks;
+ INT32 ModX, ModY;
+ UINT32 EstMvBits; // bits * 64
+ INT32 SourceStride;
+ INT32 ReconStride;
+ INT32 BlockOffset[4];
+ UINT8 *SourceBlock[4];
+ UINT8 *RefDataPtr1;
+ UINT8 *RefDataPtr2;
+ MOTION_VECTOR DifferentialVector;
+
+ UINT32 Error = 0;
+ UINT8 BestOffset = 0;
+ INT32 MvOffsetX = 0;
+ INT32 MvOffsetY = 0;
+ MOTION_VECTOR TmpVector = {0, 0};
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Work out if we will code the vector relative to 0,0 or nearest
+ if ( Mode == CODE_INTER_PLUS_MV )
+ {
+ if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestInterMVect.x;
+ MvOffsetY = pbi->mbi.NearestInterMVect.y;
+ }
+ }
+ else // Golden frame
+ {
+ if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+ MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+ }
+ }
+
+//sjlhack -- always assuming y plane
+ SourceStride = pbi->mbi.blockDxInfo[0].CurrentSourceStride;
+ ReconStride = pbi->mbi.blockDxInfo[0].CurrentReconStride;
+ if (BlockSize == 8)
+ {
+ // Only 1 block to process
+ nBlocks = 1;
+ BlockOffset[0] = 0;
+ SourceBlock[0] = SrcPtr;
+ }
+ else
+ {
+ // 4 8x8s to process--may be interlaced!
+ nBlocks = 4;
+ if ( pbi->mbi.Interlaced == 1 )
+ {
+ SourceBlock[0] = SrcPtr;
+ SourceBlock[1] = SrcPtr + 8;
+ SourceBlock[2] = SrcPtr + pbi->Configuration.VideoFrameWidth;
+ SourceBlock[3] = SourceBlock[2] + 8;
+
+ BlockOffset[0] = 0;
+ BlockOffset[1] = 8;
+ BlockOffset[2] = pbi->Configuration.YStride - 8;
+ BlockOffset[3] = 8;
+ }
+ else
+ {
+ SourceBlock[0] = SrcPtr;
+ SourceBlock[1] = SrcPtr + 8;
+ SourceBlock[2] = SrcPtr + (8*pbi->Configuration.VideoFrameWidth);
+ SourceBlock[3] = SourceBlock[2] + 8;
+
+ BlockOffset[0] = 0;
+ BlockOffset[1] = 8;
+ BlockOffset[2] = (8 * pbi->Configuration.YStride) - 8;
+ BlockOffset[3] = 8;
+ }
+ }
+
+ // Examine eight positions around a central position
+ for ( i = 1; i < 9; i++ )
+ {
+ // MV holds best mv in 1/4 pixel units
+ TmpVector.x = MV->x + (cpi->SubPixelXOffset[i] << BitShift);
+ TmpVector.y = MV->y + (cpi->SubPixelYOffset[i] << BitShift);
+
+ // Get the two reference pointers for the motion vector
+ GetReconReferencePoints( pbi, RefPtr, &RefDataPtr1, &RefDataPtr2, &TmpVector );
+
+ // Filter number is based on 1/8th pixel positions
+ ModX = (TmpVector.x & Y_MVMODMASK) << 1;
+ ModY = (TmpVector.y & Y_MVMODMASK) << 1;
+
+ // Ptr1 & Ptr2 are current frame and fractional pel filtered block respectively
+ Error = 0;
+
+ for ( j=0; j<nBlocks; j++ )
+ {
+ //UINT32 error1, error2;
+ RefDataPtr1 += BlockOffset[j];
+ RefDataPtr2 += BlockOffset[j];
+
+ Error += FiltBlockBilGetSad(SourceBlock[j], SourceStride, RefDataPtr1, RefDataPtr2, ReconStride, ModX, ModY,HUGE_ERROR);
+ }
+
+ // Should we code relative to 0,0 or nearest
+ DifferentialVector.x = TmpVector.x - MvOffsetX;
+ DifferentialVector.y = TmpVector.y - MvOffsetY;
+
+ EstMvBits = cpi->EstMvCostPtrX[DifferentialVector.x]
+ + cpi->EstMvCostPtrY[DifferentialVector.y];
+
+ Error += (EstMvBits * MVEPBSAD_MULT)>>MVEPBSAD_RSHIFT;
+ Error += (EstMvBits * Error)>>MVEPBSAD_RSHIFT2;
+
+ if ( Error < *MinError )
+ {
+ BestOffset = (UINT8)i;
+ *MinError = Error;
+ }
+ }
+
+ // Set the returned vector
+ MV->x += (cpi->SubPixelXOffset[BestOffset] << BitShift);
+ MV->y += (cpi->SubPixelYOffset[BestOffset] << BitShift);
+
+ return;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetMBMVInterError
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * CODING_MODE Mode : Coding mode for the block.
+ * UINT8 *RefPtr : Pointer to block position in reference frame.
+ *
+ * OUTPUTS : MOTION_VECTOR *MV : Best MV found for block in reference frame.
+ * UINT32 *TempErrors : Array to hold variances of individual Y-blocks.
+ *
+ * RETURNS : UINT32: Prediction error variance for best matching block.
+ *
+ * FUNCTION : Calculates a MB MV using a heirachical search.
+ *
+ * SPECIAL NOTES : Returned variance is scaled by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetMBMVInterError
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *RefFramePtr,
+ MOTION_VECTOR *MV,
+ UINT32 *TempErrors
+)
+{
+ UINT32 MinError;
+ UINT32 InterMVError = 0;
+
+ PB_INSTANCE *pbi=&cpi->pb;
+//sjlhack -- always assuming y plane
+ UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[0].Source];
+ UINT8 *RefPtr = &RefFramePtr[pbi->mbi.blockDxInfo[0].thisRecon];
+
+ UINT8 *BestBlockPtr=NULL;
+
+ MinError = cpi->FindMvViaSearch ( cpi, Mode, SrcPtr,RefPtr,MV, &BestBlockPtr,16);
+
+ if ( MinError > HP_THRESH )
+ cpi->FindBestHalfPixelMv ( cpi, Mode, SrcPtr, RefPtr, MV, 16, &MinError, 1 );
+
+ if ( MinError > HP_THRESH )
+ cpi->FindBestQuarterPixelMv( cpi, Mode, SrcPtr, RefPtr, MV, 16, &MinError, 0 );
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ // Get the error score for the chosen 1/2 pixel offset as a variance.
+ InterMVError = GetMBInterError( cpi, cpi->yuv1ptr, RefFramePtr, MV, TempErrors );
+
+ // Return score of best matching block.
+ return InterMVError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetMBMVExhaustiveSearch
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * CODING_MODE Mode : Coding mode for the block.
+ * UINT8 *RefPtr : Pointer to block position in reference frame.
+ *
+ * OUTPUTS : MOTION_VECTOR *MV : Best MV found for block in reference frame.
+ * UINT32 *TempErrors : Array to hold variances of individual Y-blocks.
+ *
+ * RETURNS : UINT32: Prediction error variance for best matching block.
+ *
+ * FUNCTION : Calculates a MB MV using an exhaustive search.
+ *
+ * SPECIAL NOTES : Returned variance is scaled by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetMBMVExhaustiveSearch
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *RefFramePtr,
+ MOTION_VECTOR *MV,
+ UINT32 *TempErrors
+)
+{
+ UINT32 MinError;
+ UINT32 InterMVError = 0;
+
+ PB_INSTANCE *pbi=&cpi->pb;
+//sjlhack -- always assuming y plane
+ UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[0].Source];
+ UINT8 *RefPtr = &RefFramePtr[pbi->mbi.blockDxInfo[0].thisRecon];
+ UINT8 *BestBlockPtr=NULL;
+
+ MinError = FindMvViaExhaustSearch( cpi, Mode, SrcPtr,RefPtr,MV, &BestBlockPtr,16);
+
+ if ( MinError > HP_THRESH )
+ cpi->FindBestHalfPixelMv ( cpi, Mode, SrcPtr, RefPtr, MV, 16, &MinError, 1 );
+
+ if ( MinError > HP_THRESH )
+ cpi->FindBestQuarterPixelMv( cpi, Mode, SrcPtr, RefPtr, MV, 16, &MinError, 0 );
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ // Get the error score for the chosen 1/2 pixel offset as a variance.
+ InterMVError = GetMBInterError( cpi, cpi->yuv1ptr, RefFramePtr, MV, TempErrors );
+
+ // Return score of best matching block.
+ return InterMVError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetBMVExhaustiveSearch
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT8 *RefPtr : Pointer to block position in reference frame.
+ *
+ * OUTPUTS : MOTION_VECTOR *MV : Best MV found for block in reference frame.
+ *
+ * RETURNS : UINT32: Prediction error variance for best matching block.
+ *
+ * FUNCTION : Calculates a MV for an 8x8 Y block using an exhaustive search.
+ *
+ * SPECIAL NOTES : Returned variance is scaled by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetBMVExhaustiveSearch ( CP_INSTANCE *cpi, UINT8 *RefFramePtr, MOTION_VECTOR *MV, UINT32 bp )
+{
+ UINT32 MinError;
+ UINT32 InterMVError = 0;
+
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[bp].Source];
+ UINT8 *RefPtr = &RefFramePtr[pbi->mbi.blockDxInfo[bp].thisRecon];
+ UINT8 *BestBlockPtr = NULL;
+
+ MinError = FindMvViaExhaustSearch( cpi, CODE_INTER_PLUS_MV, SrcPtr,RefPtr,MV, &BestBlockPtr,8);
+
+ if ( MinError > HP_THRESH )
+ cpi->FindBestHalfPixelMv ( cpi, CODE_INTER_PLUS_MV, SrcPtr, RefPtr, MV, 8, &MinError, 1 );
+
+ if ( MinError > HP_THRESH )
+ cpi->FindBestQuarterPixelMv( cpi, CODE_INTER_PLUS_MV, SrcPtr, RefPtr, MV, 8, &MinError, 0 );
+
+ InterMVError = GetInterError2( pbi, SrcPtr, RefPtr, MV );
+
+ // Return score of best matching block.
+ return InterMVError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : GetBMVSearch
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT8 *RefFramePtr : Pointer to block position in reference frame.
+ *
+ * OUTPUTS : MOTION_VECTOR *MV : Best MV found for block in reference frame.
+ *
+ * RETURNS : UINT32: Prediction error variance for best matching block.
+ *
+ * FUNCTION : Calculates a MV for an 8x8 Y block using an exhaustive search.
+ *
+ * SPECIAL NOTES : Returned variance is scaled by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetBMVSearch ( CP_INSTANCE *cpi, UINT8 *RefFramePtr, MOTION_VECTOR *MV, UINT32 bp )
+{
+ UINT32 MinError;
+ UINT32 InterMVError = 0;
+
+ PB_INSTANCE *pbi=&cpi->pb;
+
+ UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[bp].Source];
+ UINT8 *RefPtr = &RefFramePtr[pbi->mbi.blockDxInfo[bp].thisRecon];
+ UINT8 *BestBlockPtr=NULL;
+
+
+ MinError = cpi->FindMvViaSearch( cpi, CODE_INTER_PLUS_MV, SrcPtr,RefPtr, MV, &BestBlockPtr, 8);
+
+
+ if ( MinError > HP_THRESH )
+ cpi->FindBestHalfPixelMv ( cpi, CODE_INTER_PLUS_MV, SrcPtr, RefPtr, MV, 8, &MinError, 1 );
+
+
+ if ( MinError > HP_THRESH )
+ cpi->FindBestQuarterPixelMv( cpi, CODE_INTER_PLUS_MV, SrcPtr, RefPtr, MV, 8, &MinError, 0 );
+
+
+ InterMVError = GetInterError2( pbi, SrcPtr, RefPtr, MV );
+
+
+ // Return score of best matching block.
+ return InterMVError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FindMvViaDiamondSearch
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * CODING_MODE Mode : Coding mode for the block.
+ * UINT8 *SrcPtr : Pointer to block in source image.
+ * UINT8 *RefPtr : Pointer to block in reference image.
+ * UINT32 BlockSize : Size of block.
+ *
+ * OUTPUTS : MOTION_VECTOR *MV : Motion vector of best block found.
+ * UINT8 **BestBlockPtr : Pointer-to-pointer of best block found.
+ *
+ * RETURNS : UINT32: SAD for the best matching block found.
+ *
+ * FUNCTION : Calculates a MV using a diamond search.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 FindMvViaDiamondSearch
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT8 **BestBlockPtr,
+ UINT32 BlockSize
+)
+{
+ INT32 i;
+ INT32 step;
+ UINT32 EstMvBits; // Actualy bits * 64
+ INT32 SourceStride;
+ INT32 ReconStride;
+ INT32 FirstStepOffset;
+ MOTION_VECTOR DifferentialVector;
+
+ INT32 x=0, y=0;
+ UINT32 Error = 0;
+ UINT32 MinError = HUGE_ERROR;
+ INT32 MvOffsetX = 0;
+ INT32 MvOffsetY = 0;
+ INT32 SearchSite = 0;
+ UINT8 *CandidateBlockPtr = NULL;
+ PB_INSTANCE *pbi = &cpi->pb;
+ UINT32 (*GetSad)( UINT8 * SrcPtr, INT32 SourceStride, UINT8 * RefPtr, INT32 ReconStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+
+ // Work out if we will code the vector relative to 0,0 or nearest
+ if ( Mode == CODE_INTER_PLUS_MV )
+ {
+ if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestInterMVect.x;
+ MvOffsetY = pbi->mbi.NearestInterMVect.y;
+ }
+ }
+ else // Golden frame
+ {
+ if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+ MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+ }
+ }
+
+
+ if ( BlockSize == 8 )
+ {
+ GetSad = GetSAD;
+//sjlhack -- always assuming y plane
+ SourceStride = pbi->mbi.blockDxInfo[0].CurrentSourceStride;
+ ReconStride = pbi->mbi.blockDxInfo[0].CurrentReconStride;
+ }
+ else
+ {
+ // get sad 16 function works for a whole macroblock interlaced only if pixels per line
+ // works frame wise
+ GetSad = GetSAD16;
+ ReconStride = pbi->Configuration.YStride;
+ SourceStride = pbi->Configuration.VideoFrameWidth;
+ }
+
+ // Check the 0,0 candidate.
+ Error = GetSad( SrcPtr, SourceStride, RefPtr, ReconStride, 0, HUGE_ERROR );
+
+ MinError = Error;
+ *BestBlockPtr = RefPtr;
+ x = 0;
+ y = 0;
+ MV->x = 0;
+ MV->y = 0;
+
+ // Set up control of how many steps to take and size of first step
+ // For larger images use a longer initial step and hence more search steps
+ if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+ {
+ BOOL LongVectorsAllowed = TRUE;
+
+ if ( LongVectorsAllowed &&
+ ( (MvOffsetX >= 48) || (MvOffsetX <= -48) || (MvOffsetY >= 48) || (MvOffsetY <= -48) ) )
+ {
+ FirstStepOffset = 0;
+ }
+ else if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+ FirstStepOffset = 1;
+ else
+ FirstStepOffset = 2;
+ }
+ else if ( cpi->pb.Configuration.VideoFrameWidth >= 320 )
+ {
+ if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+ FirstStepOffset = 1;
+ else
+ FirstStepOffset = 2;
+ }
+ else
+ {
+ if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+ FirstStepOffset = 1;
+ else
+ FirstStepOffset = 2;
+ }
+
+ SearchSite = FirstStepOffset * 4;
+
+ // Proceed through N-steps.
+ for ( step=FirstStepOffset; step<cpi->DSMVSearchSteps; step++ )
+ {
+ // Search the 4-neighbours at distance pertinent to current step.
+ for ( i=0; i<4; i++ )
+ {
+ // Set pointer to next candidate matching block.
+ CandidateBlockPtr = RefPtr + cpi->DSMVPixelOffsetY[SearchSite];
+
+ // Get the block error score.
+ Error = GetSad( SrcPtr, SourceStride, CandidateBlockPtr,ReconStride,0, MinError );
+
+ // Calculate differential vector in Qpel units
+ DifferentialVector.x = (4 * (MV->x + cpi->MVOffsetX[SearchSite])) - MvOffsetX;
+ DifferentialVector.y = (4 * (MV->y + cpi->MVOffsetY[SearchSite])) - MvOffsetY;
+
+ EstMvBits = cpi->EstMvCostPtrX[DifferentialVector.x]
+ + cpi->EstMvCostPtrY[DifferentialVector.y];
+
+ Error += (EstMvBits * MVEPBSAD_MULT)>>MVEPBSAD_RSHIFT;
+ Error += (EstMvBits * Error)>>MVEPBSAD_RSHIFT2;
+
+ if ( Error < MinError )
+ {
+ // Remember best match.
+ MinError = Error;
+ *BestBlockPtr = CandidateBlockPtr;
+
+ // Where is it.
+ x = MV->x + cpi->DSMVOffsetX[SearchSite];
+ y = MV->y + cpi->DSMVOffsetY[SearchSite];
+ }
+
+ // Move to next search location.
+ SearchSite += 1;
+ }
+
+ // Move to best location this step.
+ RefPtr = *BestBlockPtr;
+ MV->x = x;
+ MV->y = y;
+ }
+ // Factor vectors to 1/4 pixel resoultion.
+ MV->x = (MV->x * 4);
+ MV->y = (MV->y * 4);
+
+ TotError += MinError;
+ ErrCount++;
+
+ return MinError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SkipFractionalPixelStep
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance (NOT USED).
+ * CODING_MODE Mode : Coding mode for the block (NOT USED).
+ * UINT8 *SrcPtr : Pointer to source block (NOT USED).
+ * UINT8 *RefPtr : Pointer to block position in reference frame (NOT USED).
+ * UINT32 BlockSize : Size of the block (NOT USED).
+ * UINT32 *MinError : Pointer to best error found to date (NOT USED).
+ * UINT8 BitShift : Number of its to shift the MV components
+ * by (depending whether 1/2 or 1/4 pel search)(NOT USED).
+ *
+ * OUTPUTS : MOTION_VECTOR *MV : Best MV found for block in reference frame.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Stub function to avoid fractional pixel MV search.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SkipFractionalPixelStep
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT32 BlockSize,
+ UINT32 *MinError,
+ UINT8 BitShift
+)
+{
+ // stub function
+ return;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FiltBlockBilGetSad_C
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * INT32 SrcStride : Stride of source image.
+ * UINT8 *ReconPtr1 : Pointer to first block position in reference frame.
+ * UINT8 *ReconPtr2 : Pointer to second block position in reference frame.
+ * INT32 PixelsPerLine : Pixels in line of frame containing ReconPtr1/2.
+ * INT32 ModX : Fractional part of MV x-component.
+ * INT32 ModY : Fractional part of MV x-component.
+ * UINT32 BestSoFar : Best error found to date.
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD of the filtered block prediction error.
+ *
+ * FUNCTION : Produces a filtered fractional pel prediction block
+ * using bi-linear filters and calculates the SAD of
+ * the prediction error.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 FiltBlockBilGetSad_C
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ INT32 PixelsPerLine,
+ INT32 ModX,
+ INT32 ModY,
+ UINT32 BestSoFar
+)
+{
+ // AWG This array name masks array of same name at file scope!!! BEWARE!!!
+ UINT8 FilteredBlock[256];
+
+ FilterBlockBil_8 ( ReconPtr1, ReconPtr2, FilteredBlock, PixelsPerLine, ModX, ModY );
+ return GetSAD ( SrcPtr, SrcStride, FilteredBlock, 8, 0, BestSoFar );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.h
new file mode 100644
index 00000000..74b7be7f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.h
@@ -0,0 +1,84 @@
+/****************************************************************************
+*
+* Module Title : MComp.h
+*
+* Description : Video CODEC: motion compensation module header .
+*
+****************************************************************************/
+#ifndef __INC_MCOMP_H
+#define __INC_MCOMP_H
+
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "type_aliases.h"
+#include "codec_common.h"
+#include "compdll.h"
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern INT32 *AbsX_LUT;
+
+/****************************************************************************
+* Functions
+****************************************************************************/
+extern void InitMotionCompensation ( CP_INSTANCE *cpi);
+extern UINT32 GetIntraErrorC ( UINT8 * DataPtr, INT32 SourceStride );
+extern UINT32 GetMBIntraError ( CP_INSTANCE *cpi );
+extern UINT32 GetMBInterError ( CP_INSTANCE *cpi, UINT8 * SrcPtr, UINT8 * RefPtr, MOTION_VECTOR *MV, UINT32 * );
+extern UINT32 GetMBMVInterError ( CP_INSTANCE *cpi, CODING_MODE Mode, UINT8 * RefFramePtr, MOTION_VECTOR *MV, UINT32 *TempErrors );
+extern UINT32 GetMBMVExhaustiveSearch ( CP_INSTANCE *cpi, CODING_MODE Mode, UINT8 * RefFramePtr, MOTION_VECTOR *MV, UINT32 * );
+
+extern UINT32 GetBMVExhaustiveSearch ( CP_INSTANCE* cpi, UINT8* RefFramePtr, MOTION_VECTOR* MV, UINT32);
+extern UINT32 GetBMVSearch ( CP_INSTANCE* cpi, UINT8* RefFramePtr, MOTION_VECTOR* MV, UINT32 );
+
+extern UINT32 GetMBFrameVerticalVariance ( CP_INSTANCE* cpi );
+extern UINT32 GetMBFieldVerticalVariance ( CP_INSTANCE* cpi );
+extern UINT32 FindMvViaDiamondSearch
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT8 **BestBlockPtr,
+ UINT32 BlockSize
+);
+extern UINT32 FindMvVia3StepSearch
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT8 **BestBlockPtr,
+ UINT32 BlockSize
+);
+
+extern void FindBestFractionalPixelStep
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT32 BlockSize,
+ UINT32 *MinError,
+ UINT8 BitShift
+);
+extern void SkipFractionalPixelStep
+(
+ CP_INSTANCE *cpi,
+ CODING_MODE Mode,
+ UINT8 *SrcPtr,
+ UINT8 *RefPtr,
+ MOTION_VECTOR *MV,
+ UINT32 BlockSize,
+ UINT32 *MinError,
+ UINT8 BitShift
+);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/misc_common.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/misc_common.c
new file mode 100644
index 00000000..4420e248
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/misc_common.c
@@ -0,0 +1,482 @@
+/****************************************************************************
+*
+* Module Title : MiscCommon.c
+*
+* Description : Miscellaneous common routines
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "compdll.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define KF_WORST_Q_INDEX 20
+
+/****************************************************************************
+* Module Static
+****************************************************************************/
+// Provisional data for interpolated positions (x.xx00)
+static const LINE_EQ2 InterBpBEquations[Q_TABLE_SIZE] =
+{
+ { 0.00115, 445.98890}, { 0.00132, 406.83041}, { 0.00148, 400.18762}, { 0.00160, 363.68569},
+ { 0.00174, 378.33470}, { 0.00199, 377.42412}, { 0.00237, 300.00652}, { 0.00262, 266.74763},
+ { 0.00280, 252.69107}, { 0.00312, 205.72084}, { 0.00351, 183.14721}, { 0.00386, 155.88815},
+ { 0.00432, 95.74501}, { 0.00447, 91.53841}, { 0.00469, 69.65309}, { 0.00481, 80.08054},
+ { 0.00496, 63.44023}, { 0.00520, 110.00485}, { 0.00542, 108.04172}, { 0.00558, 165.23727},
+ { 0.00585, 154.10530}, { 0.00600, 176.84087}, { 0.00621, 169.06892}, { 0.00641, 157.49036},
+ { 0.00664, 148.93471}, { 0.00713, 199.24375}, { 0.00752, 210.01239}, { 0.00816, 195.86514},
+ { 0.00883, 352.16439}, { 0.00920, 354.57230}, { 0.00958, 393.60319}, { 0.00999, 420.30206},
+ { 0.01063, 529.24195}, { 0.01118, 538.52879}, { 0.01170, 651.23813}, { 0.01218, 713.79800},
+ { 0.01263, 788.52303}, { 0.01321, 871.46329}, { 0.01393, 1078.68114}, { 0.01459, 1180.46989},
+ { 0.01529, 1309.93961}, { 0.01597, 1366.39052}, { 0.01677, 1627.17452}, { 0.01762, 1826.38865},
+ { 0.01859, 2010.00287}, { 0.01963, 2388.91757}, { 0.02070, 2683.36530}, { 0.02178, 2875.49060},
+ { 0.02260, 3178.16923}, { 0.02418, 3572.88801}, { 0.02531, 4062.37227}, { 0.02709, 4921.59728},
+ { 0.02918, 5592.29649}, { 0.03107, 6186.93245}, { 0.03372, 7376.13311}, { 0.03768, 9534.78915},
+ { 0.04197, 11906.09757}, { 0.04691, 15241.79652}, { 0.05157, 18904.29545}, { 0.05953, 27091.47553},
+ { 0.07025, 41522.27709}, { 0.08343, 67789.86180}, { 0.11547, 124265.97640}, { 0.13380, 210301.81305},
+};
+
+static const LINE_EQ2 IntraBpBEquations[Q_TABLE_SIZE] =
+{
+ { 0.00106, 2288.83435}, { 0.00111, 2381.24321}, { 0.00116, 2484.21594}, { 0.00120, 2536.01662},
+ { 0.00127, 2674.68182}, { 0.00136, 2835.12286}, { 0.00146, 2946.60819}, { 0.00154, 3034.48115},
+ { 0.00163, 3117.20084}, { 0.00172, 3233.89966}, { 0.00184, 3407.24634}, { 0.00195, 3543.03650},
+ { 0.00210, 3699.64900}, { 0.00215, 3793.02049}, { 0.00220, 3854.74475}, { 0.00224, 3915.99566},
+ { 0.00227, 3959.82316}, { 0.00233, 4204.84699}, { 0.00237, 4276.08365}, { 0.00242, 4387.12774},
+ { 0.00246, 4452.87571}, { 0.00251, 4578.78112}, { 0.00256, 4642.65467}, { 0.00261, 4710.56167},
+ { 0.00267, 4780.30368}, { 0.00279, 5030.71570}, { 0.00288, 5170.75293}, { 0.00303, 5374.83851},
+ { 0.00315, 5872.91562}, { 0.00324, 6002.40178}, { 0.00331, 6163.13111}, { 0.00341, 6330.88665},
+ { 0.00356, 6638.13056}, { 0.00367, 6813.20389}, { 0.00378, 7073.27347}, { 0.00391, 7264.41977},
+ { 0.00401, 7464.35187}, { 0.00414, 7686.68885}, { 0.00427, 8222.38307}, { 0.00442, 8469.27069},
+ { 0.00459, 8750.44432}, { 0.00472, 8961.97754}, { 0.00492, 9406.63273}, { 0.00513, 9784.70928},
+ { 0.00531, 10199.58953}, { 0.00556, 10786.82064}, { 0.00582, 11271.52430}, { 0.00606, 11694.10222},
+ { 0.00631, 12147.95242}, { 0.00664, 12808.92178}, { 0.00695, 13528.07213}, { 0.00732, 14860.00245},
+ { 0.00779, 15815.03822}, { 0.00822, 16685.69714}, { 0.00884, 18214.89132}, { 0.00972, 20431.29266},
+ { 0.01063, 22995.09970}, { 0.01169, 26309.59450}, { 0.01275, 29857.49766}, { 0.01436, 37027.81351},
+ { 0.01637, 49621.40625}, { 0.01873, 72068.47846}, { 0.02150, 123873.67566}, { 0.02488, 208511.43171},
+};
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+
+// For FixedQ helps choos appropriate key frame quality.
+const UINT8 FixedQKfBoostTable[64] =
+{
+ 22, 24, 26, 26, 26, 26, 27, 28,
+ 28, 27, 27, 26, 26, 25, 25, 24,
+ 24, 23, 23, 22, 22, 21, 21, 20,
+ 20, 19, 18, 18, 17, 16, 16, 15,
+ 15, 14, 14, 13, 13, 13, 12, 12,
+ 12, 12, 12, 11, 11, 11, 10, 9,
+ 8, 7, 7, 6, 5, 4, 3, 2,
+ 1, 1, 0, 0, 0, 0, 0, 0
+};
+
+const UINT8 GfFixedQKfBoostTable[64] =
+{
+ 20, 22, 23, 23, 23, 24, 25, 26,
+ 27, 27, 28, 28, 29, 29, 28, 28,
+ 28, 27, 27, 27, 26, 26, 26, 26,
+ 25, 25, 25, 25, 24, 24, 23, 23,
+ 22, 21, 21, 20, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9,
+ 8, 7, 6, 6, 5, 5, 4, 4,
+ 4, 3, 3, 2, 1, 0, 0, 0
+};
+
+/****************************************************************************
+ *
+ * ROUTINE : GetEstimatedBpb
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 TargetQIndex : Q Index to estimate for.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : double: The current estimate for the number of bits per block
+ * at the current Q.
+ *
+ * FUNCTION : Computes estimate of the number of bits per block
+ * that will be produced if coded at the specified Q.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+double GetEstimatedBpb ( CP_INSTANCE *cpi, UINT32 TargetQIndex )
+{
+ double BitsPerBlock;
+ double Complexity = (double)cpi->InterError;
+
+ // NOTE 1: Inter and Intra error are the same for key frames.
+ // NOTE 2: It may prove necessary to clip the complexity value.
+
+ // Adjust according to currently active correction factor
+ if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+ {
+ // TEMP: use inter equations * 2 until Key frame values worked out
+ //BitsPerBlock = ((InterBpBEquations[TargetQIndex].M * Complexity) + InterBpBEquations[TargetQIndex].C)/(double)cpi->pb.UnitFragments;
+ BitsPerBlock = ((IntraBpBEquations[TargetQIndex].M * Complexity) + IntraBpBEquations[TargetQIndex].C)/(double)cpi->pb.UnitFragments;
+ BitsPerBlock = BitsPerBlock * cpi->KeyFrameBpbCorrectionFactor;
+ }
+ else
+ {
+ // Get primary prediction
+ BitsPerBlock = ((InterBpBEquations[TargetQIndex].M * Complexity) + InterBpBEquations[TargetQIndex].C)/(double)cpi->pb.UnitFragments;
+
+ // Apply the correction factor that is based upon recent observations of overshoot and undershoot
+ // Note that if we are coding a GF update frame we expect overshoot because we are jumping to
+ // a higher quality from a lower quality (the tables were caluclated using fixed Q). Hence the
+ // additional correction for this case.
+ if ( cpi->pb.RefreshGoldenFrame )
+ BitsPerBlock = BitsPerBlock * (cpi->BpbCorrectionFactor * cpi->GfuBpbCorrectionFactor);
+ else
+ BitsPerBlock = BitsPerBlock * cpi->BpbCorrectionFactor;
+ }
+
+ return BitsPerBlock;
+}
+
+void UpdateBpbCorrectionFactor2 ( CP_INSTANCE *cpi, UINT32 FrameSize )
+{
+ double BpbCorrectionFactor;
+
+#if defined(_MSC_VER)
+ // NOTE: This function uses floating point
+ ClearSysState();
+#endif
+
+ if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+ BpbCorrectionFactor = cpi->KeyFrameBpbCorrectionFactor;
+ else
+ {
+ if ( cpi->pb.RefreshGoldenFrame )
+ BpbCorrectionFactor = cpi->GfuBpbCorrectionFactor;
+ else
+ BpbCorrectionFactor = cpi->BpbCorrectionFactor;
+ }
+
+ // Work out a size correction factor.
+ BpbCorrectionFactor *= (3+(2.0 * FrameSize) / cpi->ThisFrameTarget) /5;
+
+ if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+ cpi->KeyFrameBpbCorrectionFactor = BpbCorrectionFactor;
+ else
+ {
+ if ( cpi->pb.RefreshGoldenFrame )
+ cpi->GfuBpbCorrectionFactor = BpbCorrectionFactor;
+ else
+ cpi->BpbCorrectionFactor = BpbCorrectionFactor;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateBpbCorrectionFactor
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 FrameSize : Size of coded frame.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Adjusts the Bits Per Block correction factor used
+ * for rate prediction.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UpdateBpbCorrectionFactor ( CP_INSTANCE *cpi, UINT32 FrameSize )
+{
+ INT32 CorrectionFactor=100;
+ double BpbCorrectionFactor;
+
+#if defined(_MSC_VER)
+ // NOTE: This function uses floating point
+ ClearSysState();
+#endif
+
+ if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+ BpbCorrectionFactor = cpi->KeyFrameBpbCorrectionFactor;
+ else
+ {
+ if ( cpi->pb.RefreshGoldenFrame )
+ BpbCorrectionFactor = cpi->GfuBpbCorrectionFactor;
+ else
+ BpbCorrectionFactor = cpi->BpbCorrectionFactor;
+ }
+
+ // Work out a size correction factor.
+ if(cpi->ThisFrameTarget > 0 )
+ CorrectionFactor = (100 * FrameSize) / cpi->ThisFrameTarget;
+
+ if ( (CorrectionFactor > 101) &&
+ (cpi->pb.quantizer->FrameQIndex > cpi->Configuration.ActiveWorstQuality ) )
+ {
+ // We are not already at the worst allowable quality
+ CorrectionFactor = 100 + ((CorrectionFactor - 100)/4);
+ if ( CorrectionFactor > 125 ) // Damp the adjustment
+ BpbCorrectionFactor = (BpbCorrectionFactor * 125)/100;
+ else
+ BpbCorrectionFactor = (BpbCorrectionFactor * CorrectionFactor) / 100;
+
+ // Keep BpbCorrectionFactor within limits
+ if ( BpbCorrectionFactor > MAX_BPB_FACTOR )
+ BpbCorrectionFactor = MAX_BPB_FACTOR;
+ }
+ else if ( (CorrectionFactor < 99) &&
+ (cpi->pb.quantizer->FrameQIndex < cpi->Configuration.ActiveBestQuality ) )
+ {
+ // We are not already at the best allowable quality
+ CorrectionFactor = 100 - ((100 - CorrectionFactor)/4);
+ if ( CorrectionFactor < 80 ) // Damp the adjustment
+ BpbCorrectionFactor = (BpbCorrectionFactor * 80)/100;
+ else
+ BpbCorrectionFactor = (BpbCorrectionFactor * CorrectionFactor) / 100;
+
+ // Keep BpbCorrectionFactor within limits
+ if ( BpbCorrectionFactor < MIN_BPB_FACTOR )
+ BpbCorrectionFactor = MIN_BPB_FACTOR;
+ }
+
+ if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+ cpi->KeyFrameBpbCorrectionFactor = BpbCorrectionFactor;
+ else
+ {
+ if ( cpi->pb.RefreshGoldenFrame )
+ cpi->GfuBpbCorrectionFactor = BpbCorrectionFactor;
+ else
+ cpi->BpbCorrectionFactor = BpbCorrectionFactor;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ClampAndUpdateQ
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 QIndex : Current Q Index.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Ensures that the specified Q index is within current
+ * active range and applies other constraints.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ClampAndUpdateQ ( CP_INSTANCE *cpi, UINT32 QIndex )
+{
+ // Apply limits to the value of QIndex
+ // NOTE: Bigger QIndex ==> Higher Quality (Lower Quantizer)!!!!
+ if ( QIndex > cpi->Configuration.ActiveBestQuality )
+ QIndex = cpi->Configuration.ActiveBestQuality;
+ else if ( QIndex < cpi->Configuration.ActiveWorstQuality )
+ QIndex = cpi->Configuration.ActiveWorstQuality;
+
+ // Apply range restrictions for key frames.
+ if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+ {
+ // Fixed Q Stuff for key frames
+ if ( cpi->FixedQ >= 0 )
+ {
+ UINT8 Q;
+
+ // Set an appropriate Key frame Q to match the recent ambient quality
+ if ( (cpi->LastKeyFrame >= cpi->ForceKeyFrameEvery) )
+ Q = cpi->FixedQ + (FixedQKfBoostTable[cpi->FixedQ]/2);
+ else
+ Q = cpi->FixedQ + FixedQKfBoostTable[cpi->FixedQ];
+
+ cpi->pb.quantizer->FrameQIndex = Q;
+ }
+ else
+ {
+ // Additional QIndex limits for Key frames
+ if( cpi->pass != 2)
+ {
+ if ( QIndex < KF_WORST_Q_INDEX )
+ QIndex = KF_WORST_Q_INDEX;
+ else if ( QIndex > 60 )
+ QIndex = 60;
+ }
+
+ cpi->pb.quantizer->FrameQIndex = QIndex;
+ }
+
+ // We are going to update GF this frame so reset counter till next update due.
+ if(cpi->pass < 2)
+ cpi->GfUpdateInterval = DEFAULT_GF_UPDATE_INTERVAL;
+ else
+ cpi->GfUpdateInterval = DEFAULT_2PASS_GF_UPDATE_INTERVAL;
+
+ cpi->FramesTillGfUpdateDue = cpi->GfUpdateInterval;
+
+ if ( cpi->GfUpdateInterval )
+ cpi->GfuMotionSpeed = GF_UPDATE_MOTION_INTERVAL / cpi->GfUpdateInterval;
+ else
+ cpi->GfuMotionSpeed = 0;
+
+ cpi->GfuMotionComplexity = GF_DEFAULT_MOTION_CMPLX;
+ cpi->GfuBoost = 0;
+ }
+ else
+ {
+ if(cpi->FixedQ >= 0)
+ {
+ // We want KFs to count as GF updates
+ cpi->pb.quantizer->FrameQIndex = cpi->FixedQ;
+
+ if(!cpi->DisableGolden)
+ {
+ if ( cpi->FramesTillGfUpdateDue == 0 )
+ {
+ UINT32 Sum = 0;
+ UINT32 Sum2 = 0;
+ UINT32 Sum3 = 0;
+ UINT32 i;
+ UINT32 VarianceX = 0;
+ UINT32 VarianceY = 0;
+ UINT32 MaxVariance = 0;
+
+ // Check the level of MV reuse as a measure of how valuable a GF update is likely to be.
+ for ( i = 0; i < MAX_MODES; i++ )
+ Sum += cpi->ModeDist[i];
+
+ if ( Sum )
+ {
+ Sum2 = Sum - (cpi->ModeDist[CODE_INTRA] + cpi->ModeDist[CODE_INTER_PLUS_MV] + cpi->ModeDist[CODE_INTER_FOURMV]);
+ Sum3 = Sum2 - cpi->ModeDist[CODE_INTER_NO_MV];
+
+ // Convert Sum2 and Sum3 to %
+ Sum2 = (Sum2 * 100 / Sum);
+ Sum3 = (Sum3 * 100 / Sum);
+ }
+
+ // Calculate various motion metrics
+ if ( cpi->FrameMvStats.NumMvs )
+ {
+ cpi->GfuMotionSpeed = (cpi->FrameMvStats.SumAbsX > cpi->FrameMvStats.SumAbsY) ? (cpi->FrameMvStats.SumAbsX/cpi->FrameMvStats.NumMvs) : (cpi->FrameMvStats.SumAbsY/cpi->FrameMvStats.NumMvs);
+ VarianceX = ((cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.SumXSq) - (cpi->FrameMvStats.SumX*cpi->FrameMvStats.SumX)) / (cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.NumMvs);
+ VarianceY = ((cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.SumYSq) - (cpi->FrameMvStats.SumY*cpi->FrameMvStats.SumY)) / (cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.NumMvs);
+ MaxVariance = (VarianceX > VarianceY) ? VarianceX : VarianceY;
+ cpi->GfuMotionComplexity = cpi->GfuMotionSpeed + ((VarianceX)/4) + ((VarianceY)/4);
+ if ( cpi->GfuMotionComplexity > 31 )
+ cpi->GfuMotionComplexity = 31;
+ }
+ else
+ {
+ cpi->GfuMotionSpeed = 0;
+ cpi->GfuMotionComplexity = 0;
+ }
+
+ // Should we even consider a GF update or is there no point
+ if ( (Sum2 > GF_MODE_DIST_THRESH1) && (Sum3 > GF_MODE_DIST_THRESH2) &&
+ (cpi->GfuMotionSpeed <= MAX_GF_UPDATE_MOTION) &&
+ (MaxVariance <= GF_MAX_VAR_THRESH) )
+ {
+ cpi->pb.quantizer->FrameQIndex = cpi->FixedQ + GfFixedQKfBoostTable[cpi->FixedQ];
+
+ cpi->pb.RefreshGoldenFrame = TRUE;
+ }
+ else
+ {
+ cpi->pb.quantizer->FrameQIndex = cpi->FixedQ;
+ }
+ }
+ else
+ {
+
+ cpi->pb.quantizer->FrameQIndex = cpi->FixedQ;
+ }
+ }
+ }
+ else
+ {
+ cpi->pb.quantizer->FrameQIndex = QIndex;
+ }
+ }
+
+ // If necessary re-initialise the quantiser
+ VP6_UpdateQC( cpi->pb.quantizer, cpi->pb.Vp3VersionNo );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : RegulateQ
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * INT32 TargetBits : Target number of bits for frame.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : This function tries to regulate quanitzer level
+ * to produce the specified target number of bits.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void RegulateQ ( CP_INSTANCE *cpi, INT32 TargetBits )
+{
+ UINT32 i;
+ double Predbpb;
+
+ UINT32 QIndex = Q_TABLE_SIZE - 1;
+ double Targetbpb = (double)TargetBits / (double)cpi->pb.UnitFragments;
+ double LastBitError = 10000.0; // Infeasibly high number to initialize
+
+ // Search for the best Q for the target bitrate.
+ for ( i=0; i<Q_TABLE_SIZE; i++ )
+ {
+ Predbpb = GetEstimatedBpb( cpi, i );
+ if ( Predbpb > Targetbpb )
+ {
+ if ( (Predbpb - Targetbpb) <= LastBitError )
+ QIndex = i;
+ else
+ QIndex = i - 1;
+ break;
+ }
+ else
+ LastBitError = Targetbpb - Predbpb;
+ }
+
+ ClampAndUpdateQ ( cpi, QIndex );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ConfigureQuality
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 QualityValue : Quality value.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets maximum operating Q value for specified
+ * quality level.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ConfigureQuality ( CP_INSTANCE *cpi, UINT32 QualityValue )
+{
+ // Set the worst case quality value.
+ // Note that the actual quality is determined by lookup into the quantiser table QThreshTable[]
+ cpi->Configuration.WorstQuality = 63 - QualityValue;
+
+ // Set the default Active WorstQuality.
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/resource.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/resource.h
new file mode 100644
index 00000000..06c51167
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/resource.h
@@ -0,0 +1,97 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Developer Studio generated include file.
+// Used by compdll.rc
+//
+#define IDD_SYNC_SENTINEL 109
+#define IDD_STATS_DIALOG 113
+#define IDM_SHOW_STATS 115
+#define ID_OPTIONS_LIVEVIDEO 116
+#define IDM_STOP 117
+#define IDM_SHOW_DIFFERENCES 130
+#define ID_OPTIONS_SETUPGRABBER 131
+#define IDD_DIALOG1 209
+#define IDC_CK_ENABLE 1023
+#define IDC_ED_HSEARCH 1024
+#define IDC_ED_HINSERT 1028
+#define IDC_ED_VSEARCH 1029
+#define IDC_ED_VINSERT 1030
+#define IDC_MFILTER 1039
+#define IDC_STATS_FRAME_NO_EDIT 1040
+#define IDC_BAR_ENHANCE_EDIT 1041
+#define IDC_STATS_LAST_FRAME_EDIT 1041
+#define IDC_STATS_AV_EDIT 1042
+#define IDC_STATS_PEAK_EDIT 1043
+#define IDC_STATS_DROPPED_FRAMES_EDIT 1044
+#define IDC_STATS_AVFPS_EDIT 1045
+#define IDC_STATS_IFPS_EDIT 1046
+#define IDC_STATS_TIME_EDIT 1047
+#define IDC_STATS_CURR_EDIT 1048
+#define IDC_STATS_CFA_EDIT 1049
+#define IDC_STATS_CFAAV_EDIT 1050
+#define IDC_EDIT_CAT_A 2000
+#define IDC_EDIT_SRC_FR 2001
+#define IDC_EDIT_PIX_DIFF_THRESH 2002
+#define IDC_EDIT_LOCALS_LOSSY 2003
+#define IDC_EDIT_CAT_C 2004
+#define IDC_EDIT_CAT_D 2005
+#define IDC_EDIT_CAT_B 2006
+#define IDC_EDIT_CAT_A_FR 2007
+#define IDC_EDIT_CAT_C_FR 2008
+#define IDC_EDIT_CAT_D_FR 2009
+#define IDC_EDIT_CAT_B_FR 2010
+#define IDC_EDIT_NUM_FRAMES 2011
+#define IDC_EDIT_NOISE_SUP 2012
+#define IDC_EDIT_NOISE_SUP2 2013
+#define IDC_PIXEL_LOSSY_CHECK 2014
+#define IDC_SCORE_LOSSY 2015
+#define IDC_LOCALS_LOSSY 2016
+#define IDC_SING_LOSSY 2017
+#define IDC_EDIT_SCORE_LOSSY 2018
+#define IDC_EDIT_OUT_FRAME_RATE 2019
+#define IDC_EDIT_TARGET_DATA_RATE 2020
+#define IDC_EDIT_PIX_GREY_THRESH 2021
+#define IDC_DCT_THRESH 2023
+#define IDC_GREY_DCT_VARIABLE 2024
+#define IDC_DCT_THRESH_TOP 2025
+#define IDC_CONS_SEMI 2026
+#define IDC_CONS_NORM_FRAMES 2027
+#define IDC_NUM_CONS_SEMI 2028
+#define IDC_NUM_CONS_NORM 2029
+#define IDC_NUM_CONS_NORM_MAX 2030
+#define IDC_CONS_NORM_FRAMES_MAX 2031
+#define IDC_DCT_GREY 2033
+#define IDC_GREY 2034
+#define IDC_COLOUR 2035
+#define IDC_EDIT_MAX_DATA_RATE 2036
+#define IDC_CWASH_CHECK 2037
+#define IDC_PWASH_CHECK 2038
+#define IDC_FF_DCT_EDIT 2039
+#define IDM_RUN 40001
+#define IDM_COMPRESS 40002
+#define IDM_STEP 40003
+#define IDM_REPLAY 40004
+#define IDM_PARAMS 40005
+#define IDM_CAPTURE_RAW 40006
+#define IDM_SHOW_ZERO_DIFFERENCES 40007
+#define IDM_SHOW_EDGES 40008
+#define ID_OPTIONS_DISKSTATS 40008
+#define IDM_SHOW_NORMAL 40009
+#define IDM_SHOW_SCORE 40010
+#define ID_OPTIONS_OUTPUTFILTEREDBITMAPS 40011
+#define ID_OPTIONS_DISABLEENCODER 40012
+#define RGM_SLOW_FDCT 40013
+#define RGM_FAST_IDCT 40014
+#define RGM_FAST_FDCT 40015
+#define RGM_SLOW_IDCT 40016
+#define RGM_DEBLOCK 40017
+
+// Next default values for new objects
+//
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE 104
+#define _APS_NEXT_COMMAND_VALUE 40018
+#define _APS_NEXT_CONTROL_VALUE 1000
+#define _APS_NEXT_SYMED_VALUE 101
+#endif
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.c
new file mode 100644
index 00000000..218c9cea
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.c
@@ -0,0 +1,810 @@
+/****************************************************************************
+*
+* Module Title : vfwcomp_if.c
+*
+* Description : Compressor interface definition.
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <stdio.h>
+#include "compdll.h"
+#include "twopass.h"
+#include <math.h>
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern const UINT32 GfuDataRateBoost[64];
+extern const UINT32 GfuMotionCorrection[32];
+extern const UINT32 GfUsageCorrection[64];
+
+/****************************************************************************
+ *
+ * ROUTINE : ZeroStats
+ *
+ * INPUTS :
+ * FIRSTPASS_STATS *stats Stats to empty the accumulator of
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ZeroStats( FIRSTPASS_STATS *section)
+{
+ section->count = 0;
+
+ section->MotionSpeed = 0 ;
+ section->VarianceX = 0 ;
+ section->VarianceY = 0 ;
+ section->PercentGolden = 0;
+ section->PercentMotionY = 0 ;
+ section->PercentMotion = 0 ;
+ section->PercentNewMotion = 0 ;
+ section->MeanInterError = 0 ;
+ section->MeanIntraError = 0 ;
+ section->BitsPerMacroblock = 0 ;
+ section->SqBitsPerMacroblock = 0 ;
+ section->PSNR = 0 ;
+ section->isGolden = 0;
+ section->isKey = 0;
+
+}
+/****************************************************************************
+ *
+ * ROUTINE : AccumulateStats
+ *
+ * INPUTS : FIRSTPASS_STATS *section stats to accumulate into
+ * FIRSTPASS_STATS *stats Stats to add to accumulated values
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Accumulates firstpass statistics
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void AccumulateStats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
+{
+ section->count ++;
+
+ section->MotionSpeed += frame->MotionSpeed;
+ section->VarianceX += frame->VarianceX;
+ section->VarianceY += frame->VarianceY;
+ section->PercentGolden += frame->PercentGolden;
+ section->PercentMotionY += frame->PercentMotionY;
+ section->PercentMotion += frame->PercentMotion;
+ section->PercentNewMotion += frame->PercentNewMotion;
+ section->MeanInterError += frame->MeanInterError;
+ section->MeanIntraError += frame->MeanIntraError;
+ section->BitsPerMacroblock += frame->BitsPerMacroblock;
+ section->SqBitsPerMacroblock += frame->SqBitsPerMacroblock;
+ section->PSNR += frame->PSNR;
+ section->isGolden += frame->isGolden;
+ section->isKey += frame->isKey;
+}
+/****************************************************************************
+ *
+ * ROUTINE : AvgStats
+ *
+ * INPUTS :
+ * FIRSTPASS_STATS *stats Stats to convert to averages using count
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void AvgStats ( FIRSTPASS_STATS *section)
+{
+ if(!section->count)
+ return;
+
+ section->MotionSpeed /= section->count;
+ section->VarianceX /= section->count;
+ section->VarianceY /= section->count;
+ section->PercentGolden /= section->count;
+ section->PercentMotionY /= section->count;
+ section->PercentMotion /= section->count;
+ section->PercentNewMotion /= section->count;
+ section->MeanInterError /= section->count;
+ section->MeanIntraError /= section->count;
+ section->BitsPerMacroblock /= section->count;
+ section->SqBitsPerMacroblock /= section->count;
+ section->PSNR /= section->count;
+}
+/****************************************************************************
+ *
+ * ROUTINE : OutputStats
+ *
+ * INPUTS : FILE *F File to output the stats to
+ * FIRSTPASS_STATS *stats Stats to fill in
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void OutputStats( FILE *f, FIRSTPASS_STATS *stats)
+{
+ fprintf(f,
+ "%8d %8d %8d %8d %12.04f %12.04f %12.04f %12.04f %12.04f %12.04f %12.04f %12.04f %12.04f %12.04f \n",
+ stats->frame,
+ stats->count,
+ stats->isKey,
+ stats->isGolden,
+ stats->BitsPerMacroblock,
+ stats->SqBitsPerMacroblock,
+ stats->MeanInterError,
+ stats->MeanIntraError,
+ stats->MotionSpeed,
+ stats->VarianceX,
+ stats->VarianceY,
+ stats->PercentMotion,
+ stats->PercentNewMotion,
+ stats->PercentGolden);
+}
+/****************************************************************************
+ *
+ * ROUTINE : InputStats
+ *
+ * INPUTS : FILE *F File to read the stats in
+ * FIRSTPASS_STATS *stats Stats to fill in
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void InputStats( FILE *f, FIRSTPASS_STATS *stats)
+{
+ fscanf(f,
+ "%d %d %d %d %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg \n",
+ &stats->frame,
+ &stats->count,
+ &stats->isKey,
+ &stats->isGolden,
+ &stats->BitsPerMacroblock,
+ &stats->SqBitsPerMacroblock,
+ &stats->MeanInterError,
+ &stats->MeanIntraError,
+ &stats->MotionSpeed,
+ &stats->VarianceX,
+ &stats->VarianceY,
+ &stats->PercentMotion,
+ &stats->PercentNewMotion,
+ &stats->PercentGolden);
+}
+/****************************************************************************
+ *
+ * ROUTINE : Pass2Initialize
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * COMP_CONFIG_VP6 *CompConfig : Encoder configuration.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initialize 1st or 2nd pass of the compressor
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV Pass2Initialize ( CP_INSTANCE *cpi, COMP_CONFIG_VP6 *CompConfig )
+{
+ if(cpi->pass == 2)
+ {
+
+ int actualMBS =(cpi->pb.MBRows - (BORDER_MBS*2)) * (cpi->pb.MBCols - (BORDER_MBS*2));
+ double fpBitRate; // first pass bitrate
+ double target; // target bitrate
+ double NewQ;
+ double Sigma;
+ double RoomForVariation;
+ double tmp;
+ char dummy[1024];
+ ClearSysState();
+ cpi->fs = fopen(CompConfig->FirstPassFile,"r");
+ strncpy(dummy,CompConfig->FirstPassFile,1024);
+ strcat(dummy,".sst");
+ cpi->ss = fopen(dummy,"r");
+
+ fgets(dummy,1024,cpi->fs);
+ fgets(dummy,1024,cpi->ss);
+
+ InputStats(cpi->ss,&cpi->fpmss);
+
+ tmp = cpi->fpmss.SqBitsPerMacroblock - cpi->fpmss.BitsPerMacroblock*cpi->fpmss.BitsPerMacroblock;
+ Sigma = sqrt(tmp);
+ RoomForVariation = (Sigma+2) / 3.5; // 5 q steps above
+ RoomForVariation = (Sigma+2) / 15; // 5 q steps above
+ RoomForVariation = 1;
+
+ fpBitRate = cpi->fpmss.BitsPerMacroblock * actualMBS * cpi->Configuration.OutputFrameRate;
+ target = (double) cpi->Configuration.TargetBandwidth;
+
+ NewQ = (INT32) FIRSTPASS_Q - ( RoomForVariation + .5 + log(fpBitRate/target) / log(1.04));
+ if(NewQ < cpi->Configuration.WorstQuality )
+ NewQ = cpi->Configuration.WorstQuality;
+
+ if(NewQ > cpi->Configuration.ActiveBestQuality)
+ NewQ = cpi->Configuration.ActiveBestQuality;
+
+ if(NewQ > 50)
+ NewQ = 50;
+
+
+ cpi->PassedInWorstQ = cpi->Configuration.WorstQuality;
+ cpi->Configuration.WorstQuality = (INT32) NewQ;
+ cpi->CalculatedWorstQ = (INT32) NewQ;
+
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+
+ cpi->TotalBitsLeftInClip = 1.0 * cpi->ActualTargetBitRate * cpi->fpmss.count / cpi->Configuration.OutputFrameRate;
+ cpi->FramesYetToEncode = cpi->fpmss.count;
+ //cpi->TotalBitsPerMB = cpi->fpmss.BitsPerMacroblock * cpi->fpmss.count;
+ cpi->TotalBitsPerMB = cpi->fpmss.MeanInterError * cpi->fpmss.count;
+
+ }
+ else if (cpi->pass == 1)
+ {
+ char dummy[1024];
+ ZeroStats( &cpi->fpmss);
+
+ cpi->fs = fopen(CompConfig->FirstPassFile,"w");
+ fprintf(cpi->fs,
+ "%8s %8s %8s %8s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s \n",
+ "","#","key","golden","bits/mb","sq bits/mb","Inter","Intra","Motion","VarX","VarY",
+ "%Motion","%NewMotion","%Golden");
+
+ strncpy(dummy,CompConfig->FirstPassFile,1024);
+ strcat(dummy,".sst");
+ cpi->ss = fopen(dummy,"w");
+ fprintf(cpi->ss,
+ "%8s %8s %8s %8s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s \n",
+ "","#","key","golden","bits/mb","sq bits/mb","Inter","Intra","Motion","VarX","VarY",
+ "%Motion","%NewMotion","%Golden");
+
+
+ }
+
+}
+/****************************************************************************
+ *
+ * ROUTINE : Pass2Control
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : unsigned int *is_key : Flag whether frame coded
+ * as intra-frame or not.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : Determines Section info, and does datarate control
+ * that is only possible in 2nd pass
+ *
+ * SPECIAL NOTES :
+ *
+ ****************************************************************************/
+void CCONV Pass2Control( CP_INSTANCE *cpi)
+{
+ INT32 i;
+ FIRSTPASS_STATS sectionStats;
+ FIRSTPASS_STATS thisFrame;
+ FIRSTPASS_STATS nextFrame;
+ FIRSTPASS_STATS lastFrame;
+ double NewBitsPerMB;
+
+ double total=0;
+ double avg = 0;
+ int actualMBS ;
+
+ fpos_t pos1,pos2;
+
+ InputStats(cpi->fs,&thisFrame);
+ fgetpos(cpi->fs,&pos1);
+
+ //NewBitsPerMB = cpi->TotalBitsPerMB - thisFrame.BitsPerMacroblock;
+ NewBitsPerMB = cpi->TotalBitsPerMB - thisFrame.MeanInterError;
+ // keyframe and section processing !
+ if( cpi->FramesToKey == 0 )
+ {
+
+ cpi->KFForced = cpi->NextKFForced;
+ cpi->NextKFForced = 0;
+
+
+ cpi->ThisIsKeyFrame = TRUE;
+ ZeroStats( &sectionStats);
+ //AccumulateStats(&sectionStats, &thisFrame);
+
+ cpi->FramesToKey = 1;
+ InputStats(cpi->fs,&nextFrame);
+
+ // find the next keyframe
+ while(!feof(cpi->fs))
+ {
+ memcpy(&lastFrame,&thisFrame,sizeof(thisFrame));
+ memcpy(&thisFrame,&nextFrame,sizeof(thisFrame));
+
+ InputStats(cpi->fs,&nextFrame);
+
+ // mark a key if first pass marked it a keyframe and its within minimum distance to keyframe numbers or
+ // the next frame gets a big benefit from it being a keyframe
+
+ if( ( thisFrame.isKey
+ && ( cpi->FramesToKey > cpi->MinimumDistanceToKeyFrame
+ && ( fabs(lastFrame.MeanInterError - thisFrame.MeanInterError) / thisFrame.MeanInterError > .40
+ || fabs(lastFrame.MeanIntraError - thisFrame.MeanIntraError) / thisFrame.MeanIntraError > .40
+ || thisFrame.MeanIntraError * 5 < thisFrame.MeanInterError * 6
+ )
+ || nextFrame.MeanIntraError > nextFrame.MeanInterError + 2000
+ )
+
+ )
+ )
+ {
+ break;
+ }
+
+ cpi->FramesToKey ++;
+
+ // since we don't have a key frame within the next two forcekeyframeevery intervals
+ // set the next keyframe to be forcekeyframe every
+ if(cpi->FramesToKey > 2 * cpi->ForceKeyFrameEvery)
+ {
+ cpi->FramesToKey = cpi->ForceKeyFrameEvery;
+ cpi->NextKFForced = 1;
+ break;
+ }
+ }
+ if(feof(cpi->fs))
+ cpi->FramesToKey ++;
+
+ // distance to keyframe is not 2 times our max distance but it is greater than our max distance
+ // since we need a keyframe put it in the center between this key frame and the next
+ if( cpi->FramesToKey > cpi->ForceKeyFrameEvery )
+ {
+ cpi->FramesToKey /= 2;
+ cpi->NextKFForced = 1;
+ }
+
+ fgetpos(cpi->fs,&pos2);
+ pos2-=pos1;
+ fseek(cpi->fs,(INT32) -pos2,SEEK_CUR);
+
+
+ // determine how big to make this keyframe based on how well the subsequent frames use inter blocks
+ total = 1.0;
+ for(i = 0 ;i < 4 && i < cpi->FramesToKey ; i++)
+ {
+ InputStats(cpi->fs,&nextFrame);
+ total *= ( nextFrame.MeanIntraError - nextFrame.MeanInterError ) / nextFrame.MeanIntraError;
+ avg += total * ( nextFrame.MeanIntraError - nextFrame.MeanInterError ) ;
+
+ // this break out is to insure we handle the situation that is really different from
+ // our last frame but similar to our next frame doesn't get counted in our metric, which
+ // is trying to estimate the average amount of data retained from the keyframe.
+ if(total < .1 || nextFrame.MeanIntraError < 200)
+ break;
+
+ }
+
+ cpi->KFBoost = (INT32 ) avg / 180 ;//(16* total);//16 * avg / 6 );
+
+
+ if(cpi->FramesToKey < 4 )//&& cpi->BufferLevel < cpi->OptimalBufferLevel / 2)
+ cpi->KFBoost = 0;
+
+ fgetpos(cpi->fs,&pos2);
+ pos2-=pos1;
+ fseek(cpi->fs,(INT32) -pos2,SEEK_CUR);
+
+ // read first pass file up until next keyframe and generate avg section stats.
+ total = 1.0;
+ for(i = 0 ;i < cpi->FramesToKey-1 ; i++)
+ {
+ InputStats(cpi->fs,&thisFrame);
+ AccumulateStats(&sectionStats, &thisFrame);
+ }
+ AvgStats(&sectionStats);
+ fgetpos(cpi->fs,&pos2);
+ pos2-=pos1;
+ fseek(cpi->fs,(INT32) -pos2,SEEK_CUR);
+
+
+ actualMBS = (cpi->pb.MBRows - (BORDER_MBS*2)) * (cpi->pb.MBCols - (BORDER_MBS*2));
+
+ if(cpi->TwoPassVBREnabled)
+ // determine bitrate to shoot for for this section
+ {
+ //double SectionBitsPerMB = sectionStats.BitsPerMacroblock * sectionStats.count;
+ //double Pctg = SectionBitsPerMB / cpi->TotalBitsPerMB ;
+ double SectionErrorPerMB = sectionStats.MeanInterError * sectionStats.count;
+ double Pctg = SectionErrorPerMB / cpi->TotalBitsPerMB;
+ double DesiredSectionSize = cpi->TotalBitsLeftInClip * Pctg;
+ double DesiredSectionBitRate = cpi->Configuration.OutputFrameRate * DesiredSectionSize / sectionStats.count;
+
+ if(sectionStats.count < 2)
+ DesiredSectionBitRate = cpi->ActualTargetBitRate ;
+
+ if(cpi->TwoPassVBRBias)
+ {
+ DesiredSectionBitRate = cpi->ActualTargetBitRate * (100 - cpi->TwoPassVBRBias) / 100 + DesiredSectionBitRate * cpi->TwoPassVBRBias / 100 ;
+ }
+
+ if(DesiredSectionBitRate < cpi->ActualTargetBitRate * cpi->TwoPassVBRMinSection / 100 )
+ DesiredSectionBitRate = cpi->ActualTargetBitRate * cpi->TwoPassVBRMinSection /100 ;
+
+ if(DesiredSectionBitRate > (double) cpi->ActualTargetBitRate * cpi->TwoPassVBRMaxSection / 100 )
+ DesiredSectionBitRate = cpi->ActualTargetBitRate * cpi->TwoPassVBRMaxSection / 100 ;
+
+
+ cpi->Configuration.TargetBandwidth = (INT32) DesiredSectionBitRate;
+ cpi->InterFrameTarget = (INT32)((cpi->Configuration.TargetBandwidth -
+ ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency)) / cpi->Configuration.OutputFrameRate);
+
+ cpi->PerFrameBandwidth = (cpi->Configuration.TargetBandwidth / cpi->Configuration.OutputFrameRate);
+
+ if(0)
+ {
+ FILE *sectionstats = fopen("section.stt","a");
+ fprintf(sectionstats,"Frame : %8d Count :%4d sq bits/mb:%8.3f BitsPerMB:%8.3f BitRate: %8d Q:%3d s:%8d buffer:%8d max:%8d \n ",
+ - 1 + (INT32) cpi->CurrentFrame , sectionStats.count, sectionStats.SqBitsPerMacroblock, sectionStats.BitsPerMacroblock,
+ cpi->Configuration.TargetBandwidth / 1024, cpi->Configuration.WorstQuality, cpi->SizeStep, cpi->BufferLevel ,cpi->MaxBufferLevel);
+ fclose(sectionstats);
+ }
+ }
+ else
+ // determine q to use for this section
+ {
+ double SectionErrorPerMB = sectionStats.MeanInterError * sectionStats.count;
+ double Pctg = SectionErrorPerMB / cpi->TotalBitsPerMB;
+ double DesiredSectionSize = cpi->TotalBitsLeftInClip * Pctg;
+ double DesiredSectionBitRate = cpi->Configuration.OutputFrameRate * DesiredSectionSize / sectionStats.count;
+ double target; // target bitrate
+ double NewQ;
+ double RoomForVariation=3;
+ double FirstPassBitRate = sectionStats.BitsPerMacroblock * actualMBS * cpi->Configuration.OutputFrameRate;
+
+ if(sectionStats.count < 2)
+ DesiredSectionBitRate = cpi->ActualTargetBitRate ;
+
+ if(cpi->TwoPassVBRBias)
+ {
+ DesiredSectionBitRate = cpi->ActualTargetBitRate * (100 - cpi->TwoPassVBRBias) / 100 + DesiredSectionBitRate * cpi->TwoPassVBRBias / 100 ;
+ }
+
+ if(DesiredSectionBitRate < cpi->ActualTargetBitRate * cpi->TwoPassVBRMinSection / 100 )
+ DesiredSectionBitRate = cpi->ActualTargetBitRate * cpi->TwoPassVBRMinSection /100 ;
+
+ if(DesiredSectionBitRate > (double) cpi->ActualTargetBitRate * cpi->TwoPassVBRMaxSection / 100 )
+ DesiredSectionBitRate = cpi->ActualTargetBitRate * cpi->TwoPassVBRMaxSection / 100 ;
+
+
+
+ // Clamp the Section Datarate between what will fill up the buffer and what will empty it to .25 of the optimal
+ {
+ double ActualPerFrameBandWidth = cpi->ActualTargetBitRate / cpi->Configuration.OutputFrameRate;
+ double UnusedSectionEndBufferLevel = cpi->BufferLevel + (((cpi->MaxAllowedDatarate * ActualPerFrameBandWidth) / 100) * sectionStats.count);
+ double QuarterOptimalBufferLevel = cpi->OptimalBufferLevel / 4.0;
+ double MaxBitRate = cpi->Configuration.OutputFrameRate * (UnusedSectionEndBufferLevel - QuarterOptimalBufferLevel) / (sectionStats.count + cpi->KFBoost / 16);
+ double MinBitRate = cpi->Configuration.OutputFrameRate * (UnusedSectionEndBufferLevel - cpi->MaxBufferLevel ) / (sectionStats.count + cpi->KFBoost / 16);
+ if( MaxBitRate < cpi->ActualTargetBitRate / 3)
+ MaxBitRate = cpi->ActualTargetBitRate / 3;
+ if( MinBitRate < cpi->ActualTargetBitRate / 3)
+ MinBitRate = cpi->ActualTargetBitRate / 3;
+
+ if(DesiredSectionBitRate > MaxBitRate)
+ DesiredSectionBitRate = MaxBitRate;
+
+ if(DesiredSectionBitRate < MinBitRate)
+ DesiredSectionBitRate = MinBitRate;
+
+ cpi->Configuration.TargetBandwidth = (INT32) DesiredSectionBitRate;
+
+
+ cpi->InterFrameTarget = (INT32)((cpi->Configuration.TargetBandwidth -
+ ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency)) / cpi->Configuration.OutputFrameRate);
+
+
+ }
+
+ target = (double) cpi->Configuration.TargetBandwidth;
+
+ // if q is worse than we estimated for the entire clip use it ( this must be a tough section )!!
+ // otherwise use the one we estimated.
+ NewQ = (INT32) FIRSTPASS_Q - ( .5 + log(FirstPassBitRate/target) / log(1.040));
+ if( NewQ < cpi->CalculatedWorstQ )
+ {
+ if(NewQ < cpi->PassedInWorstQ)
+ NewQ = cpi->PassedInWorstQ;
+
+ cpi->Configuration.ActiveWorstQuality = (INT32) NewQ;
+ cpi->Configuration.WorstQuality = (INT32) NewQ;
+ }
+ else
+ {
+ cpi->Configuration.ActiveWorstQuality = cpi->CalculatedWorstQ;
+ cpi->Configuration.WorstQuality = cpi->CalculatedWorstQ;
+ }
+
+ if(0)
+ {
+ FILE *sectionstats = fopen("section.stt","a");
+ fprintf(sectionstats,"Frame : %8d Count :%4d sq bits/mb:%8.3f BitsPerMB:%8.3f BitRate: %8d Q:%3d s:%8d buffer:%8d max:%8d mdr %d %d \n ",
+ - 1 + (INT32) cpi->CurrentFrame , sectionStats.count, sectionStats.SqBitsPerMacroblock, sectionStats.BitsPerMacroblock,
+ cpi->Configuration.TargetBandwidth / 1024, cpi->Configuration.WorstQuality, cpi->SizeStep,cpi->BufferLevel ,
+ cpi->MaxBufferLevel , cpi->MaxAllowedDatarate * cpi->ActualTargetBitRate / cpi->Configuration.OutputFrameRate / 100,
+ cpi->ThisFrameTarget
+ );
+ fclose(sectionstats);
+ }
+ }
+
+
+ /*
+
+ // determine q to use for this section
+
+ double target; // target bitrate
+ double NewQ;
+ double RoomForVariation=3;
+ double FirstPassBitRate = sectionStats.BitsPerMacroblock * actualMBS * cpi->Configuration.OutputFrameRate;
+ double SectionBitsPerMB = sectionStats.BitsPerMacroblock * sectionStats.count;
+ double Pctg = SectionBitsPerMB / cpi->TotalBitsPerMB ;
+ double DesiredSectionSize = cpi->TotalBitsLeftInClip* Pctg;
+ double DesiredSectionBitRate = cpi->Configuration.OutputFrameRate * DesiredSectionSize / sectionStats.count;
+
+
+ if(cpi->TwoPassVBRBias)
+ {
+ DesiredSectionBitRate = cpi->ActualTargetBitRate * (100 - cpi->TwoPassVBRBias) / 100 + DesiredSectionBitRate * cpi->TwoPassVBRBias / 100 ;
+ }
+
+ // Clamp the Section Datarate between what will fill up the buffer and what will empty it to .25 of the optimal
+ {
+ double ActualPerFrameBandWidth = cpi->ActualTargetBitRate / cpi->Configuration.OutputFrameRate;
+ double UnusedSectionEndBufferLevel = cpi->BufferLevel + (((cpi->MaxAllowedDatarate * ActualPerFrameBandWidth) / 100) * sectionStats.count);
+ double QuarterOptimalBufferLevel = cpi->OptimalBufferLevel / 4.0;
+ double MaxBitRate = cpi->Configuration.OutputFrameRate * (UnusedSectionEndBufferLevel - QuarterOptimalBufferLevel) / (sectionStats.count + cpi->KFBoost / 16);
+ double MinBitRate = cpi->Configuration.OutputFrameRate * (UnusedSectionEndBufferLevel - cpi->MaxBufferLevel ) / (sectionStats.count + cpi->KFBoost / 16);
+ if( MaxBitRate < cpi->ActualTargetBitRate / 3)
+ MaxBitRate = cpi->ActualTargetBitRate / 3;
+ if( MinBitRate < cpi->ActualTargetBitRate / 3)
+ MinBitRate = cpi->ActualTargetBitRate / 3;
+
+ if(DesiredSectionBitRate > MaxBitRate)
+ DesiredSectionBitRate = MaxBitRate;
+
+ if(DesiredSectionBitRate < MinBitRate)
+ DesiredSectionBitRate = MinBitRate;
+
+ cpi->Configuration.TargetBandwidth = (INT32) DesiredSectionBitRate;
+
+
+ cpi->InterFrameTarget = (INT32)((cpi->Configuration.TargetBandwidth -
+ ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency)) / cpi->Configuration.OutputFrameRate);
+
+
+ //cpi->PerFrameBandwidth = (cpi->Configuration.TargetBandwidth / cpi->Configuration.OutputFrameRate);
+
+ }
+
+ target = (double) cpi->Configuration.TargetBandwidth;
+
+ // if q is worse than we estimated for the entire clip use it ( this must be a tough section )!!
+ // otherwise use the one we estimated.
+ NewQ = (INT32) FIRSTPASS_Q - ( RoomForVariation + .5 + log(FirstPassBitRate/target) / log(1.040));
+ if( NewQ < cpi->CalculatedWorstQ )
+ {
+ if(NewQ < cpi->PassedInWorstQ)
+ NewQ = cpi->PassedInWorstQ;
+
+ cpi->Configuration.ActiveWorstQuality = (INT32) NewQ;
+ cpi->Configuration.WorstQuality = (INT32) NewQ;
+ }
+ else
+ {
+ cpi->Configuration.ActiveWorstQuality = cpi->CalculatedWorstQ;
+ cpi->Configuration.WorstQuality = cpi->CalculatedWorstQ;
+ }
+
+ if(0)
+ {
+ FILE *sectionstats = fopen("section.stt","a");
+ fprintf(sectionstats,"Frame : %8d Count :%4d sq bits/mb:%8.3f BitsPerMB:%8.3f BitRate: %8d Q:%3d s:%8d buffer:%8d max:%8d mdr %d %d \n ",
+ - 1 + (INT32) cpi->CurrentFrame , sectionStats.count, sectionStats.SqBitsPerMacroblock, sectionStats.BitsPerMacroblock,
+ cpi->Configuration.TargetBandwidth / 1024, cpi->Configuration.WorstQuality, cpi->SizeStep,cpi->BufferLevel ,
+ cpi->MaxBufferLevel , cpi->MaxAllowedDatarate * cpi->ActualTargetBitRate / cpi->Configuration.OutputFrameRate / 100,
+ cpi->ThisFrameTarget
+ );
+ fclose(sectionstats);
+ }
+ }
+
+*/
+ }
+
+ // its not a keyframe check if its time to update our golden frame?
+ else if (cpi->FramesTillGfUpdateDue == 0 )
+ {
+ FIRSTPASS_STATS GfStats;
+ int count =0;
+ //double GfuMotionComplexity;
+ //double MaxVariance;
+ //int NonZeroMV;
+ //int NewMotion = 100 - (int) GfStats.PercentMotion;
+ //int ZeroMotion = (int) (GfStats.PercentMotion - GfStats.PercentNewMotion);
+ int IntraToInterRatio;
+ int GfUsage;
+
+ ZeroStats( &GfStats);
+ // ignore the next frame ( it will have this frame as reference no matter what)
+ InputStats(cpi->fs,&nextFrame);
+
+ // check next frames
+ for(i = 0 ;i < 4 ; i++)
+ {
+
+ InputStats(cpi->fs,&nextFrame);
+ AccumulateStats(&GfStats, &nextFrame);
+
+ if(nextFrame.isGolden)
+ {
+ // throwout the next frame after this one
+ InputStats(cpi->fs,&lastFrame);
+ }
+ }
+ AvgStats(&GfStats);
+
+ // + 300 to stop tiny frames from producing huge boosts)
+ IntraToInterRatio = (int) (100 * GfStats.MeanIntraError / (GfStats.MeanInterError ));
+ IntraToInterRatio = (int) (IntraToInterRatio * GfStats.PercentNewMotion / 100);
+ GfUsage = (int) (GfStats.PercentGolden * 8);
+
+ cpi->GfuBoost = IntraToInterRatio;
+
+ // Correct boost to take account of recent observed level of GF usage
+ if ( (GfUsage >> 3) < 64)
+ cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection[(GfUsage >> 3)]) / 16;
+ else
+ cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection[63]) / 16;
+
+
+ cpi->GfuBoost = cpi->GfuBoost* GfuDataRateBoost[cpi->pb.AvgFrameQIndex] / 1000;
+
+
+ // Should we even consider a GF update or is there no point
+ if ( ( GfStats.PercentNewMotion > GF_MODE_DIST_THRESH2) &&
+ (GfStats.MotionSpeed <= MAX_GF_UPDATE_MOTION) //&&
+ //(cpi->GfuBoost > 80 ) &&
+ //(MaxVariance <= GF_MAX_VAR_THRESH)
+ )
+ {
+ cpi->ThisFrameTarget = (cpi->InterFrameTarget * (100 * cpi->GfUpdateInterval)) /
+ ((100 * cpi->GfUpdateInterval) + cpi->GfuBoost);
+
+ cpi->ThisFrameTarget = cpi->ThisFrameTarget + ((cpi->ThisFrameTarget * cpi->GfuBoost) / 100);
+
+ if(cpi->FramesToKey > 3)
+ {
+ cpi->pb.RefreshGoldenFrame = TRUE;
+ }
+
+ // Select the interval before the next GF update
+ // To find the interval we find the max of AvX and AvY and work out how many frames
+ // it will take to move X pels (GF_UPDATE_MOTION_INTERVAL in 1/4 pel) assuming the motion
+ // level does not change. The value is then capped to the range MIN_GF_UPDATE_INTERVAL to MAX_GF_UPDATE_INTERVAL
+ if ( cpi->GfuMotionSpeed > 0 )
+ {
+ cpi->GfUpdateInterval = GF_UPDATE_MOTION_INTERVAL / cpi->GfuMotionSpeed;
+
+ if ( cpi->GfUpdateInterval < MIN_GF_UPDATE_INTERVAL )
+ cpi->GfUpdateInterval = MIN_GF_UPDATE_INTERVAL;
+
+ else if ( cpi->GfUpdateInterval > MAX_GF_UPDATE_INTERVAL )
+ cpi->GfUpdateInterval = MAX_GF_UPDATE_INTERVAL;
+
+ }
+ else
+ cpi->GfUpdateInterval = MAX_GF_UPDATE_INTERVAL;
+
+ if(0)
+ {
+ FILE *gfstats= fopen("gf.stt","a");
+ fprintf(gfstats,"Frame : %8d boost: %d, speed:%d,baseq:%d, intra2inter: %d, newmotion:%d, GfUsage:%d \n",
+ - 1 + (INT32) cpi->CurrentFrame ,
+ cpi->GfuBoost,
+ cpi->GfuMotionSpeed,
+ GfuDataRateBoost[cpi->pb.AvgFrameQIndex],
+ 100 * GfStats.MeanIntraError / (GfStats.MeanInterError),
+ GfStats.PercentNewMotion,
+ GfUsage
+ );
+ fclose(gfstats);
+ }
+
+ }
+ else
+ {
+
+ }
+
+
+ fgetpos(cpi->fs,&pos2);
+ pos2-=pos1;
+ fseek(cpi->fs,(INT32) -pos2,SEEK_CUR);
+
+
+ }
+
+ // check if we should boost or lower this frame based on our neighbors.
+ else
+ {
+ }
+
+
+ cpi->FramesYetToEncode --;
+ cpi->FramesToKey --;
+ cpi->TotalBitsPerMB = NewBitsPerMB;
+
+}
+/****************************************************************************
+ *
+ * ROUTINE : Pass1Output
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS :
+ *
+ *
+ * RETURNS :
+ *
+ * FUNCTION : output to external file the 1st pass results
+ *
+ * SPECIAL NOTES :
+ *
+ ****************************************************************************/
+void CCONV Pass1Output( CP_INSTANCE *cpi)
+{
+ PB_INSTANCE *pbi = &cpi->pb;
+ int actualMBS= (cpi->pb.MBRows - (BORDER_MBS*2)) * (cpi->pb.MBCols - (BORDER_MBS*2));
+ ClearSysState();
+ cpi->fps.MeanInterError = 1.0 * cpi->InterErrorb / actualMBS;
+ cpi->fps.MeanIntraError = 1.0 * cpi->IntraError / actualMBS;
+
+ cpi->fps.isKey = pbi->FrameType == BASE_FRAME;
+ cpi->fps.isGolden = pbi->RefreshGoldenFrame;
+ cpi->fps.PSNR = 60;
+ cpi->fps.BitsPerMacroblock = 1.0 * cpi->ThisFrameSize / actualMBS;
+ cpi->fps.SqBitsPerMacroblock = cpi->fps.BitsPerMacroblock*cpi->fps.BitsPerMacroblock;
+ cpi->fps.QValue = cpi->pb.quantizer->FrameQIndex;
+ cpi->fps.MeanInterError ;
+ cpi->fps.MeanIntraError ;
+ cpi->fps.frame = (UINT32) (cpi->CurrentFrame-1);
+
+ AccumulateStats( &cpi->fpmss, &cpi->fps);
+ OutputStats(cpi->fs,&cpi->fps);
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.h
new file mode 100644
index 00000000..4bcdbfea
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.h
@@ -0,0 +1,29 @@
+/****************************************************************************
+*
+* Module Title : twopass.h
+*
+* Description : Functions for handling twopass dataratecontrol
+*
+****************************************************************************/
+#ifndef __INC_TWOPASS_H
+#define __INC_TWOPASS_H
+
+#ifndef STRICT
+#define STRICT /* Strict type checking */
+#endif
+
+/****************************************************************************
+* Module statics
+****************************************************************************/
+
+
+extern void ZeroStats( FIRSTPASS_STATS *section);
+extern void AccumulateStats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame);
+extern void AvgStats ( FIRSTPASS_STATS *section);
+extern void OutputStats( FILE *f, FIRSTPASS_STATS *stats);
+extern void InputStats( FILE *f, FIRSTPASS_STATS *stats);
+extern void CCONV Pass2Initialize ( CP_INSTANCE *cpi, COMP_CONFIG_VP6 *CompConfig );
+extern void CCONV Pass2Control( CP_INSTANCE *cpi);
+extern void CCONV Pass1Output( CP_INSTANCE *cpi);
+
+#endif \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfw_comp_main.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfw_comp_main.c
new file mode 100644
index 00000000..965f8e26
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfw_comp_main.c
@@ -0,0 +1,87 @@
+/****************************************************************************
+*
+* Module Title : VFW_COMP_MAIN.c
+*
+* Description : Main for video codec demo compression dll
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+#define INC_WIN_HEADER 1
+#include <windows.h>
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module statics.
+*****************************************************************************
+*/
+
+unsigned long cProcessesAttached = 0;
+
+HINSTANCE hInstance; /* Application instance handle. */
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imports
+*****************************************************************************
+*/
+extern void VPEInitLibrary(void);
+extern void VPEDeInitLibrary(void);
+
+
+BOOL WINAPI DllMain(HANDLE hInst, DWORD fdwReason, LPVOID lpReserved)
+{
+ if ( fdwReason == DLL_PROCESS_ATTACH )
+ {
+ hInstance = hInst;
+ if ( cProcessesAttached++ )
+ {
+ return(TRUE); // Not the first initialization.
+ }
+ else
+ {
+ // initialize all the global variables in the dll
+ VPEInitLibrary();
+
+ return TRUE;
+ }
+ }
+
+ else if ( fdwReason == DLL_PROCESS_DETACH )
+ {
+ if (--cProcessesAttached)
+ {
+ return TRUE;
+ }
+ else
+ {
+ VPEDeInitLibrary();
+ return TRUE;
+ }
+ }
+ else
+ return FALSE;
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp.c
new file mode 100644
index 00000000..27099dd1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp.c
@@ -0,0 +1,1687 @@
+/****************************************************************************
+*
+* Module Title : vfwcomp.c
+*
+* Description : Video for Windows Compressor interface definition.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <stdio.h>
+#include <math.h>
+#include "compdll.h"
+#include "misc_common.h"
+#include "decodemode.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define MAX_PSNR 60.0
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+static const UINT8 EndpointLookup[SCAN_ORDER_BANDS] =
+ { 1, 4, 10, 12, 15, 19, 21, 26, 28, 34, 36, 42, 48, 53, 57, 63 };
+
+static const UINT32 PriorKeyFrameWeight[KEY_FRAME_CONTEXT] = { 1, 2, 3, 4, 5 };
+
+static UINT32 TotDropFrameCount = 0;
+
+// % boost to data rate for GF update frames.
+// This extra spend is recovered from the next few frames
+const UINT32 GfuDataRateBoost[64] =
+{
+ 1150, 1150, 1150, 1150, 1200, 1200, 1200, 1200,
+ 1250, 1250, 1250, 1250, 1350, 1350, 1350, 1350,
+ 1250, 1250, 1250, 1250, 1100, 1100, 1050, 1050,
+ 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
+ 950, 950, 950, 950, 950, 950, 950, 950,
+ 900, 900, 900, 900, 900, 900, 850, 850,
+ 800, 800, 750, 600, 500, 400, 350, 300,
+ 250, 200, 150, 125, 100, 75, 50, 0
+};
+
+// Reduce GFU boost as motion lvl increases
+const UINT32 GfuMotionCorrection[32] =
+{
+ 100, 95, 90, 85, 80, 75, 70, 65,
+ 60, 55, 50, 45, 40, 35, 30, 25,
+ 20, 15, 10, 5, 5, 4, 4, 3,
+ 3, 2, 2, 1, 1, 0, 0, 0,
+};
+
+// Correction to boost value that depends on recent observed GF usage
+// These are 1% steps. > 15% gets max boost.
+// Boost is multipled by table value then divided by 128.
+const UINT32 GfUsageCorrection2[16] =
+{
+ 8, 16, 32, 64, 80, 96, 112, 120,
+ 128, 128, 128, 128, 128, 128, 128, 128
+};
+
+const UINT32 GfUsageCorrection[64] =
+{
+ 12,12,12,12,12,12,12,12,
+ 12,12,12,12,12,13,14,15,
+ 16,17,18,19,20,21,22,23,
+ 24,25,26,27,28,29,30,31,
+ 32,33,34,35,36,37,38,39,
+ 40,41,42,43,44,45,46,47,
+ 48,49,50,51,52,53,54,55,
+ 56,57,58,59,60,61,62,80
+};
+
+// Threshold and alpha limits for bicubi filtering
+const UINT8 BicubicMaxAlpha[64] =
+{
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 7, 7, 7, 7,
+ 8, 8, 8, 8, 9, 9, 9, 9,
+ 10,10,10,10,10,10,10,10,
+ 11,11,11,11,11,11,11,11,
+};
+const UINT8 BicubicMinThresh[64] =
+{
+ 31,31,31,31,31,31,31,31,
+ 16,16,16,16,16,16,16,16,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern UINT8 FixedQKfBoostTable[64];
+
+#if defined PSNR_ON
+
+/****************************************************************************
+ *
+ * ROUTINE : CalcPSNR
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : PSNR value for frame (in dB).
+ *
+ * FUNCTION : Calculate frame PSNR for diagnostic and tuning purposes.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+double CalcPSNR ( CP_INSTANCE *cpi )
+{
+ UINT32 i, j;
+ INT32 Diff;
+ UINT32 LineLength;
+ UINT32 PlaneHeight;
+ UINT32 FrameSize;
+ double FramePsnr;
+ double FrameYPsnr;
+ double FrameUPsnr;
+ double FrameVPsnr;
+ UINT8 *RawDataPtr;
+ UINT8 *ReconPtr;
+ UINT8 *RawDataBuffer;
+ UINT8 *ReconBuffer;
+ INT32 Total = 0;
+ INT32 GrandTotal = 0;
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ // choose the Raw data buffer to include or exclude the effect of pre-processing
+ // cpi->yuv1ptr (or yuv0ptr to exclude the effect of pre-processing)
+ if ( cpi->PreProcFilterLevel == 0 )
+ RawDataBuffer = cpi->yuv1ptr;
+ else
+ RawDataBuffer = cpi->yuv0ptr;
+
+ // Choose the reconstruction buffer according to whether or not post processing is on.
+ if ( cpi->pb.PostProcessingLevel )
+ ReconBuffer = cpi->pb.PostProcessBuffer;
+ else
+ ReconBuffer = cpi->pb.LastFrameRecon;
+
+ // Set up for Y plane measurement
+ LineLength = cpi->pb.Configuration.VideoFrameWidth;
+ PlaneHeight = cpi->pb.Configuration.VideoFrameHeight;
+ RawDataPtr = &RawDataBuffer[cpi->pb.YDataOffset];
+ ReconPtr = &ReconBuffer[cpi->pb.ReconYDataOffset+(UMV_BORDER*cpi->pb.Configuration.YStride)+UMV_BORDER];
+
+ // Loop throught the Y plane raw and reconstruction data summing (square differences)
+ for ( i=0; i<PlaneHeight; i++ )
+ {
+ for ( j=0; j<LineLength; j++ )
+ {
+ Diff = (INT32)(RawDataPtr[j]) - (INT32)(ReconPtr[j]);
+ Total += Diff*Diff;
+ GrandTotal += Diff*Diff;
+ }
+ RawDataPtr += LineLength;
+ ReconPtr += cpi->pb.Configuration.YStride;
+ }
+
+ // Work out Y PSNR
+ FrameSize = cpi->pb.YPlaneSize;
+
+ if ( (double)Total > 0.0 )
+ FramePsnr = 10.0 * log10((255.0 * 255.0 * FrameSize) / (double)Total);
+ else
+ FramePsnr = MAX_PSNR; // Limit to prevent / 0
+
+ // Limit max reported frame PSNR to limit the effect of any one frame on the average.
+ if ( FramePsnr > MAX_PSNR )
+ FramePsnr = MAX_PSNR;
+
+ cpi->TotYPsnr += FramePsnr;
+ if ( FramePsnr < cpi->MinYPsnr )
+ cpi->MinYPsnr = FramePsnr;
+ if ( FramePsnr > cpi->MaxYPsnr )
+ cpi->MaxYPsnr = FramePsnr;
+
+ FrameYPsnr = FramePsnr;
+
+ // Set up for U plane measurement
+ LineLength = cpi->pb.Configuration.VideoFrameWidth/2;
+ PlaneHeight = cpi->pb.Configuration.VideoFrameHeight/2;
+ RawDataPtr = &RawDataBuffer[cpi->pb.UDataOffset];
+ ReconPtr = &ReconBuffer[cpi->pb.ReconUDataOffset+(UMV_BORDER>>1)*cpi->pb.Configuration.UVStride+(UMV_BORDER>>1)];
+
+ // Loop throught the U plane raw and reconstruction data summing (square differences)
+ Total = 0;
+ for ( i=0; i<PlaneHeight; i++ )
+ {
+ for ( j=0; j<LineLength; j++ )
+ {
+ Diff = (INT32)(RawDataPtr[j]) - (INT32)(ReconPtr[j]);
+ Total += Diff*Diff;
+ GrandTotal += Diff*Diff;
+ }
+ RawDataPtr += LineLength;
+ ReconPtr += cpi->pb.Configuration.UVStride;
+ }
+
+ // Work out U PSNR
+ FrameSize = cpi->pb.UVPlaneSize;
+
+ if ( (double)Total > 0.0 )
+ FramePsnr = 10.0 * log10((255.0 * 255.0 * FrameSize) / (double)Total);
+ else
+ FramePsnr = MAX_PSNR; // Limit to prevent / 0
+
+ // Limit max reported frame PSNR to limit the effect of any one frame on the average.
+ if ( FramePsnr > MAX_PSNR )
+ FramePsnr = MAX_PSNR;
+
+ cpi->TotUPsnr += FramePsnr;
+ if ( FramePsnr < cpi->MinUPsnr )
+ cpi->MinUPsnr = FramePsnr;
+ if ( FramePsnr > cpi->MaxUPsnr )
+ cpi->MaxUPsnr = FramePsnr;
+
+ FrameUPsnr = FramePsnr;
+
+ // Set up for V plane measurement
+ LineLength = cpi->pb.Configuration.VideoFrameWidth/2;
+ PlaneHeight = cpi->pb.Configuration.VideoFrameHeight/2;
+ RawDataPtr = &RawDataBuffer[cpi->pb.VDataOffset];
+ ReconPtr = &ReconBuffer[cpi->pb.ReconVDataOffset+(UMV_BORDER>>1)*cpi->pb.Configuration.UVStride+(UMV_BORDER>>1)];
+
+ // Loop throught the UV plane raw and reconstruction data summing (square differences)
+ Total = 0;
+ for ( i=0; i<PlaneHeight; i++ )
+ {
+ for ( j=0; j<LineLength; j++ )
+ {
+ Diff = (INT32)(RawDataPtr[j]) - (INT32)(ReconPtr[j]);
+ Total += Diff*Diff;
+ GrandTotal += Diff*Diff;
+ }
+ RawDataPtr += LineLength;
+ ReconPtr += cpi->pb.Configuration.UVStride;
+ }
+
+ // Work out V PSNR
+ FrameSize = cpi->pb.UVPlaneSize;
+
+ if ( (double)Total > 0.0 )
+ FramePsnr = 10.0 * log10((255.0 * 255.0 * FrameSize) / (double)Total);
+ else
+ FramePsnr = MAX_PSNR; // Limit to prevent / 0
+
+ // Limit max reported frame PSNR to limit the effect of any one frame on the average.
+ if ( FramePsnr > MAX_PSNR )
+ FramePsnr = MAX_PSNR;
+
+ cpi->TotVPsnr += FramePsnr;
+
+ if ( FramePsnr < cpi->MinVPsnr )
+ cpi->MinVPsnr = FramePsnr;
+ if ( FramePsnr > cpi->MaxVPsnr )
+ cpi->MaxVPsnr = FramePsnr;
+
+ FrameVPsnr = FramePsnr;
+
+ // Now work out the average accross YU and V
+ FrameSize = cpi->pb.YPlaneSize + cpi->pb.UVPlaneSize + cpi->pb.UVPlaneSize;
+
+ if ( (double)GrandTotal > 0.0 )
+ FramePsnr = 10.0 * log10((255.0 * 255.0 * FrameSize) / (double)GrandTotal);
+ else
+ FramePsnr = MAX_PSNR; // Limit to prevent / 0
+
+ cpi->TotalSqError += GrandTotal;
+
+ // Limit max reported frame PSNR to limit the effect of any one frame on the average.
+ if ( FramePsnr > MAX_PSNR )
+ FramePsnr = MAX_PSNR;
+
+ cpi->TotPsnr += FramePsnr;
+
+ if ( FramePsnr < cpi->MinPsnr )
+ cpi->MinPsnr = FramePsnr;
+ if ( FramePsnr > cpi->MaxPsnr )
+ cpi->MaxPsnr = FramePsnr;
+
+ return FramePsnr;
+}
+#endif
+/****************************************************************************
+ *
+ * ROUTINE : SetupKeyFrame
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets frame type as Keyframe.
+ *
+ * SPECIAL NOTES : Replace this function with cpi->pb.FrameType = BASE_FRAME;
+ *
+ ****************************************************************************/
+void SetupKeyFrame ( CP_INSTANCE *cpi )
+{
+ VP6_SetFrameType ( &cpi->pb, BASE_FRAME );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : AdjustKeyFrameContext
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Adjusts the context for a keyframe.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void AdjustKeyFrameContext ( CP_INSTANCE *cpi )
+{
+ UINT32 i;
+ INT32 AvKeyFramesPerSecond;
+ INT32 MinFrameTargetRate;
+
+ // Average key frame frequency and size
+ UINT32 AvKeyFrameFrequency = (UINT32) (cpi->CurrentFrame / cpi->KeyFrameCount);
+ UINT32 AvKeyFrameBytes = (UINT32) (cpi->TotKeyFrameBytes / cpi->KeyFrameCount);
+ UINT32 TotalWeight = 0;
+
+ // Update the frame carry over
+ cpi->TotKeyFrameBytes += (cpi->ThisFrameSize/8);
+
+ // reset keyframe context and calculate weighted average of last KEY_FRAME_CONTEXT keyframes
+ for ( i=0; i<KEY_FRAME_CONTEXT; i++ )
+ {
+ if ( i < KEY_FRAME_CONTEXT-1 )
+ {
+ cpi->PriorKeyFrameSize[i] = cpi->PriorKeyFrameSize[i+1];
+ cpi->PriorKeyFrameDistance[i] = cpi->PriorKeyFrameDistance[i+1];
+ }
+ else
+ {
+ cpi->PriorKeyFrameSize[KEY_FRAME_CONTEXT - 1] = cpi->ThisFrameSize;
+ cpi->PriorKeyFrameDistance[KEY_FRAME_CONTEXT - 1] = cpi->LastKeyFrame;
+ }
+
+ AvKeyFrameBytes += PriorKeyFrameWeight[i] * cpi->PriorKeyFrameSize[i] / 8;
+ AvKeyFrameFrequency += PriorKeyFrameWeight[i] * cpi->PriorKeyFrameDistance[i];
+ TotalWeight += PriorKeyFrameWeight[i];
+ }
+ AvKeyFrameBytes /= TotalWeight;
+ AvKeyFrameFrequency /= TotalWeight;
+ AvKeyFramesPerSecond = 100 * cpi->Configuration.OutputFrameRate / AvKeyFrameFrequency ;
+
+ /* Calculate a new target rate per frame allowing for average key frame frequency over newest frames . */
+ if ( (100 * cpi->Configuration.TargetBandwidth > AvKeyFrameBytes * AvKeyFramesPerSecond) &&
+ (100 * cpi->Configuration.OutputFrameRate - AvKeyFramesPerSecond ))
+ {
+ cpi->InterFrameTarget =
+ (INT32)(100* cpi->Configuration.TargetBandwidth - AvKeyFrameBytes * AvKeyFramesPerSecond )
+ / ( (100 * cpi->Configuration.OutputFrameRate - AvKeyFramesPerSecond ) );
+
+ }
+ else // don't let this number get too small!!!
+ {
+ cpi->InterFrameTarget = 1;
+ }
+
+ // minimum allowable frame_target_rate
+ MinFrameTargetRate = cpi->PerFrameBandwidth / 3;
+
+ if ( cpi->InterFrameTarget < MinFrameTargetRate )
+ cpi->InterFrameTarget = MinFrameTargetRate;
+
+
+ cpi->LastKeyFrame = 1;
+ cpi->LastKeyFrameSize = cpi->ThisFrameSize;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ResizeFrameTo
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * int hscale : Horizontal scale factor numerator.
+ * int hratio : Horizontal scale factor denominator.
+ * int vscale : Vertical scale factor numerator.
+ * int vratioNone : Vertical scale factor denominator.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Changes the encoder frame size by the specified ratio.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ResizeFrameTo ( CP_INSTANCE *cpi, int hscale, int hratio, int vscale, int vratio )
+{
+ int w = cpi->YuvInputData.YWidth;
+ int h = cpi->YuvInputData.YHeight;
+ int nw = w;
+ int nh = h;
+
+ cpi->pb.Configuration.HScale = hscale;
+ cpi->pb.Configuration.HRatio = hratio;
+ cpi->pb.Configuration.VScale = vscale;
+ cpi->pb.Configuration.VRatio = vratio;
+
+ nw = (cpi->pb.Configuration.HScale - 1 + w * cpi->pb.Configuration.HRatio) / cpi->pb.Configuration.HScale;
+ nh = (cpi->pb.Configuration.VScale - 1 + h * cpi->pb.Configuration.VRatio) / cpi->pb.Configuration.VScale;
+ nw = (nw + 15) / 16 * 16;
+ nh = (nh + 15) / 16 * 16;
+
+ cpi->InputConfig.YWidth = nw;
+ cpi->InputConfig.YHeight = nh;
+ cpi->InputConfig.UVWidth = nw/2;
+ cpi->InputConfig.UVHeight = nh/2;
+ cpi->InputConfig.YStride = nw;
+ cpi->InputConfig.UVStride = nw/2;
+
+ ChangeEncoderSize ( cpi, nw, nh );
+
+ CopyOrResize ( cpi, TRUE );
+
+ cpi->KeyFrameDataTarget = (int)cpi->KeyFrameDataTargetOrig * (nw + nh) / (w + h);
+
+ if ( cpi->KeyFrameDataTarget > (int)cpi->Configuration.TargetBandwidth/2 )
+ cpi->KeyFrameDataTarget = (int)cpi->Configuration.TargetBandwidth/2;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ResizeFrame
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Resizes a frame as necessary.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ResizeFrame ( CP_INSTANCE *cpi )
+{
+ int HScale = 1;
+ int HRatio = 1;
+ int VScale = 1;
+ int VRatio = 1;
+
+ if ( cpi->ForceInternalSize )
+ {
+ ResizeFrameTo ( cpi,
+ cpi->ForceHScale,
+ cpi->ForceHRatio,
+ cpi->ForceVScale,
+ cpi->ForceVRatio );
+ return;
+ }
+
+ if ( cpi->pb.Configuration.Interlaced )
+ {
+ switch ( cpi->SizeStep )
+ {
+ case 1:
+ HScale = 5;
+ HRatio = 4;
+ break;
+ case 2:
+ HScale = 5;
+ HRatio = 3;
+ break;
+ case 3:
+ HScale = 2;
+ HRatio = 1;
+ break;
+ case 4:
+ HScale = 5;
+ HRatio = 3;
+ VScale = 2;
+ VRatio = 1;
+ break;
+ case 5:
+ HScale = 2;
+ HRatio = 1;
+ VScale = 2;
+ VRatio = 1;
+ break;
+ }
+ }
+ else
+ {
+ switch ( cpi->SizeStep )
+ {
+ case 1:
+ HScale = 5;
+ HRatio = 4;
+ break;
+ case 2:
+ HScale = 5;
+ HRatio = 4;
+ VScale = 5;
+ VRatio = 4;
+ break;
+ case 3:
+ HScale = 5;
+ HRatio = 3;
+ VScale = 5;
+ VRatio = 4;
+ break;
+ case 4:
+ HScale = 5;
+ HRatio = 3;
+ VScale = 5;
+ VRatio = 3;
+ break;
+ case 5:
+ HScale = 2;
+ HRatio = 1;
+ VScale = 2;
+ VRatio = 1;
+ break;
+ }
+ }
+
+ ResizeFrameTo ( cpi, HScale, HRatio, VScale, VRatio );
+
+
+ return;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CompressFirstFrame
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Compresses the first frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CompressFirstFrame ( CP_INSTANCE *cpi )
+{
+ UINT32 i;
+
+ cpi->ErrorPerBit = 80;
+
+ // MV and mode counters used in assessing new MV frequency.
+ cpi->FrameNewMvCounter = 0;
+ cpi->FrameModeCounter = 0;
+
+ // if not AutoKeyframing cpi->ForceKeyFrameEvery = is frequency
+ if ( !cpi->AutoKeyFrameEnabled )
+ cpi->ForceKeyFrameEvery = cpi->KeyFrameFrequency;
+
+ /* set up context of key frame sizes and distances for more local datarate control */
+ for ( i=0; i<KEY_FRAME_CONTEXT; i++ )
+ {
+ cpi->PriorKeyFrameSize[i] = cpi->KeyFrameDataTarget;
+ cpi->PriorKeyFrameDistance[i] = cpi->ForceKeyFrameEvery;
+ }
+
+ // Keep track of the total number of Key Frames Coded.
+ cpi->KeyFrameCount = 1;
+ cpi->LastKeyFrame = 1;
+ cpi->TotKeyFrameBytes = 0;
+
+ if ( cpi->AllowSpatialResampling && cpi->SizeStep != 0 || cpi->ForceInternalSize )
+ ResizeFrame ( cpi );
+ else
+ CopyOrResize ( cpi, TRUE );
+
+ // Use scan order updates for larger images.
+ if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+ cpi->AllowScanOrderUpdates = TRUE;
+ else
+ cpi->AllowScanOrderUpdates = FALSE;
+
+ SetupKeyFrame ( cpi );
+
+ // Calculate a new target rate per frame allowing for average key frame frequency and size thus far.
+ if ( cpi->Configuration.TargetBandwidth > ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency) )
+ {
+ cpi->InterFrameTarget = (INT32)((cpi->Configuration.TargetBandwidth -
+ ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency)) / cpi->Configuration.OutputFrameRate);
+
+ }
+ else
+ cpi->InterFrameTarget = 1;
+
+ // Reset the drop frame flags
+ cpi->DropCount = 0;
+ cpi->MaxDropCount = 0;
+
+
+ // Select Intra mode for all MBs and calculate the total error score
+ cpi->IntraError = PickIntra ( cpi );
+ cpi->InterError = cpi->IntraError;
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ if( 0) //cpi->pass == 2)
+ {
+ {
+ int Q,R;
+
+ cpi->ThisFrameTarget = cpi->InterFrameTarget;
+ RegulateQ ( cpi, cpi->ThisFrameTarget);
+ Q = cpi->pb.quantizer->FrameQIndex;
+ cpi->ThisFrameTarget = cpi->InterFrameTarget + ((cpi->InterFrameTarget * cpi->KFBoost) >> 4) ;
+ RegulateQ ( cpi, cpi->ThisFrameTarget);
+
+ R= cpi->pb.quantizer->FrameQIndex - Q;
+ if(R>FixedQKfBoostTable[Q])
+ R=FixedQKfBoostTable[Q];
+ //S= (FixedQKfBoostTable[Q] + R) / 2;
+ ClampAndUpdateQ ( cpi, Q+R);
+
+ }
+ //ClampAndUpdateQ ( cpi, cpi->pb.AvgFrameQIndex + FixedQKfBoostTable[cpi->pb.AvgFrameQIndex]);
+ }
+ else
+ {
+
+ // Set a target size for this key frame based upon the baseline target and frequency
+ cpi->ThisFrameTarget = cpi->KeyFrameDataTarget;
+ RegulateQ ( cpi, cpi->ThisFrameTarget);
+ }
+
+ /* Compress and output the frist frame */
+ UpdateFrame ( cpi );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CompressKeyFrame
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Compresses a Keyframe.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CompressKeyFrame ( CP_INSTANCE *cpi )
+{
+
+ // Reset the active worst quality to the baseline value for key frames.
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+
+ // Auto-spatial re-sampling only allowed for buffered mode.
+ if ( cpi->BufferedMode && cpi->pass != 2)
+ {
+
+
+ // Decide whether we need to down sample or if we are able to return to a larger image size
+ // We downsample if the buffer fullness is below a given level and falling since the last key frame.
+ if( (cpi->BufferLevel < cpi->ResampleDownWaterMark) &&
+ (cpi->BufferLevel <= cpi->LastKeyFrameBufferLevel) )
+ {
+ if ( cpi->SizeStep < 5 )
+ cpi->SizeStep++;
+ }
+ // We upsample (or undo down sample) if the buffer fullness is above a given level
+ // and is not falling or it is significantly above the optimal level.
+ else if ( (cpi->BufferLevel > ((cpi->OptimalBufferLevel * 110)/100) ) ||
+ ((cpi->BufferLevel > cpi->ResampleUpWaterMark) &&
+ (cpi->BufferLevel >= cpi->LastKeyFrameBufferLevel) ) )
+ {
+ if ( cpi->SizeStep > 0 )
+ cpi->SizeStep--;
+ }
+ }
+
+ // Implement any resize that has been chosen
+ if ( cpi->AllowSpatialResampling && cpi->SizeStep != cpi->LastSizeStep || cpi->ForceInternalSize )
+ ResizeFrame ( cpi );
+ else
+ CopyOrResize ( cpi, TRUE );
+
+ // Use scan order updates for larger images.
+ if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+ cpi->AllowScanOrderUpdates = TRUE;
+ else
+ cpi->AllowScanOrderUpdates = FALSE;
+
+ // Keep track of the total number of Key Frames Coded
+ cpi->KeyFrameCount += 1;
+
+ // Reset the drop frame flags
+ cpi->DropCount = 0;
+ cpi->MaxDropCount = 0;
+
+ SetupKeyFrame ( cpi );
+
+ // Set the key frame size constraints
+ cpi->ThisFrameTarget = cpi->KeyFrameDataTarget;
+
+ // Select Intra mode for all MBs and calculate the total error score
+ cpi->IntraError = PickIntra ( cpi );
+ cpi->InterError = cpi->IntraError;
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ // Reset the KeyFrameBpbCorrectionFactor to 1.0
+ cpi->KeyFrameBpbCorrectionFactor = 1;
+
+ // Set an appropriate Key frame Q to match the recent ambient quality
+ if( cpi->pass == 2)
+ {
+ if ( cpi->KFForced == TRUE)
+ ClampAndUpdateQ ( cpi, cpi->pb.AvgFrameQIndex + (FixedQKfBoostTable[cpi->pb.AvgFrameQIndex]/2) );
+ else
+ {
+ int Q,R;
+
+ cpi->ThisFrameTarget = cpi->InterFrameTarget;
+ RegulateQ ( cpi, cpi->ThisFrameTarget);
+ Q = cpi->pb.quantizer->FrameQIndex;
+ cpi->ThisFrameTarget = cpi->InterFrameTarget + ((cpi->InterFrameTarget * cpi->KFBoost) >> 4) ;
+ RegulateQ ( cpi, cpi->ThisFrameTarget);
+
+ R= cpi->pb.quantizer->FrameQIndex - Q;
+ if(R>FixedQKfBoostTable[Q])
+ R=FixedQKfBoostTable[Q];
+ //S= (FixedQKfBoostTable[Q] + R) / 2;
+ ClampAndUpdateQ ( cpi, Q+R);
+
+ }
+ //ClampAndUpdateQ ( cpi, cpi->pb.AvgFrameQIndex + FixedQKfBoostTable[cpi->pb.AvgFrameQIndex]);
+ }
+ else
+ {
+ if ( cpi->KFForced == TRUE)
+ ClampAndUpdateQ ( cpi, cpi->pb.AvgFrameQIndex + (FixedQKfBoostTable[cpi->pb.AvgFrameQIndex]/2) );
+ else
+ ClampAndUpdateQ ( cpi, cpi->pb.AvgFrameQIndex + FixedQKfBoostTable[cpi->pb.AvgFrameQIndex] );
+ }
+
+ /* Compress and output the first frame */
+ UpdateFrame ( cpi );
+ cpi->LastSizeStep = cpi->SizeStep;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CompressFrame
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 FrameNumber : Frame number (NOT USED).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Compresses a frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CompressFrame ( CP_INSTANCE *cpi, UINT32 FrameNumber )
+{
+ UINT32 i;
+ UINT32 KFIndicator;
+ BOOL DropedFrame = FALSE;
+ int actualMBS;
+
+ cpi->KFForced=0;
+ CopyOrResize ( cpi, FALSE );
+
+ /* Clear down the macro block level mode and MV arrays. */
+ for ( i=0; i<cpi->pb.UnitFragments; i++ )
+ {
+ cpi->pb.FragInfo[i].FragCodingMode = CODE_INTER_NO_MV; // Default coding mode
+ cpi->pb.FragInfo[i].MVectorX = 0;
+ cpi->pb.FragInfo[i].MVectorY = 0;
+ }
+
+ /* Default to normal frames. */
+ VP6_SetFrameType ( &cpi->pb, NORMAL_FRAME );
+
+ // Calculate the target bytes for this frame. */
+ cpi->ThisFrameTarget = cpi->InterFrameTarget;
+
+ /* */
+/*
+ cpi->pb.mbi.blockDxInfo[0].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[0]];
+ cpi->pb.mbi.blockDxInfo[1].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[1]];
+ cpi->pb.mbi.blockDxInfo[2].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[2]];
+ cpi->pb.mbi.blockDxInfo[3].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[3]];
+ cpi->pb.mbi.blockDxInfo[4].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[4]];
+ cpi->pb.mbi.blockDxInfo[5].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[5]];
+
+ cpi->pb.mbi.blockDxInfo[0].MvShift =
+ cpi->pb.mbi.blockDxInfo[1].MvShift =
+ cpi->pb.mbi.blockDxInfo[2].MvShift =
+ cpi->pb.mbi.blockDxInfo[3].MvShift = Y_MVSHIFT;
+ cpi->pb.mbi.blockDxInfo[4].MvShift =
+ cpi->pb.mbi.blockDxInfo[5].MvShift = UV_MVSHIFT;
+
+ cpi->pb.mbi.blockDxInfo[0].MvModMask =
+ cpi->pb.mbi.blockDxInfo[1].MvModMask =
+ cpi->pb.mbi.blockDxInfo[2].MvModMask =
+ cpi->pb.mbi.blockDxInfo[3].MvModMask = Y_MVMODMASK;
+ cpi->pb.mbi.blockDxInfo[4].MvModMask =
+ cpi->pb.mbi.blockDxInfo[5].MvModMask = UV_MVMODMASK;
+
+ cpi->pb.mbi.blockDxInfo[0].Plane =
+ cpi->pb.mbi.blockDxInfo[1].Plane =
+ cpi->pb.mbi.blockDxInfo[2].Plane =
+ cpi->pb.mbi.blockDxInfo[3].Plane = 0;
+ cpi->pb.mbi.blockDxInfo[4].Plane =
+ cpi->pb.mbi.blockDxInfo[5].Plane = 1;
+
+ cpi->pb.mbi.blockDxInfo[0].LastDc =
+ cpi->pb.mbi.blockDxInfo[1].LastDc =
+ cpi->pb.mbi.blockDxInfo[2].LastDc =
+ cpi->pb.mbi.blockDxInfo[3].LastDc = cpi->pb.fc.LastDcY;
+ cpi->pb.mbi.blockDxInfo[4].LastDc = cpi->pb.fc.LastDcU;
+ cpi->pb.mbi.blockDxInfo[5].LastDc = cpi->pb.fc.LastDcV;
+
+ cpi->pb.mbi.blockDxInfo[0].Left = &cpi->pb.fc.LeftY[0];
+ cpi->pb.mbi.blockDxInfo[1].Left = &cpi->pb.fc.LeftY[0];
+ cpi->pb.mbi.blockDxInfo[2].Left = &cpi->pb.fc.LeftY[1];
+ cpi->pb.mbi.blockDxInfo[3].Left = &cpi->pb.fc.LeftY[1];
+ cpi->pb.mbi.blockDxInfo[4].Left = &cpi->pb.fc.LeftU;
+ cpi->pb.mbi.blockDxInfo[5].Left = &cpi->pb.fc.LeftV;
+*/
+
+ // For Buffered mode make data rate and Q range adjustments based on buffer fullness.
+ if ( cpi->BufferedMode )
+ {
+ INT32 OnePercentBits = 1 + cpi->OptimalBufferLevel/100;
+
+ //if ( cpi->BufferLevel < cpi->OptimalBufferLevel || cpi->BytesOffTarget < 0 )
+ if ( ( cpi->BufferLevel < cpi->OptimalBufferLevel ) ||
+ ( cpi->BytesOffTarget < cpi->OptimalBufferLevel ) )
+ {
+ INT32 PercentLow = 0;
+
+ // Decide whether or not we need to adjust the frame data rate target.
+ //
+ // If we are are below the optimal buffer fullness level and adherence
+ // to buffering contraints is important to the end useage then adjust
+ // the per frame target.
+ if ( (cpi->EndUsage == USAGE_STREAM_FROM_SERVER) && ( cpi->BufferLevel < cpi->OptimalBufferLevel ) )
+ {
+ PercentLow = (cpi->OptimalBufferLevel - cpi->BufferLevel) / OnePercentBits;
+ if ( PercentLow > 100 )
+ PercentLow = 100;
+ else if ( PercentLow < 0 )
+ PercentLow = 0;
+ }
+ // Are we overshooting the long term clip data rate...
+ else if ( cpi->BytesOffTarget < 0 )
+ {
+ // Adjust per frame data target downwards to compensate.
+ PercentLow = (INT32) (100 * -cpi->BytesOffTarget / (cpi->TotalByteCount * 8));
+ if ( PercentLow > 100 )
+ PercentLow = 100;
+ else if ( PercentLow < 0 )
+ PercentLow = 0;
+ }
+
+ // lower the target bandwidth for this frame.
+ cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 - (PercentLow/2)) )/100;
+
+ // Set a reduced data rate target for our initial Q calculation.
+ // This should provide a slight upward pressure on buffer fullness
+ // during easier sections.
+ if ( (cpi->UnderShootPct > 0) && (cpi->UnderShootPct <= 100) )
+ {
+ cpi->ThisFrameTarget = (cpi->ThisFrameTarget * cpi->UnderShootPct)/100;
+ }
+
+ // Are we using allowing control of ActiveWorstQuality according to buffer level.
+ if ( cpi->AutoWorstQ )
+ {
+ INT32 CriticalBufferLevel;
+
+ // For streaming applications the most important factor is cpi->BufferLevel as this takes
+ // into account the specified short term buffering constraints. However, hitting the long
+ // term clip data rate target is also important.
+ if ( cpi->EndUsage == USAGE_STREAM_FROM_SERVER )
+ {
+ // Take the smaller of cpi->BufferLevel and cpi->BytesOffTarget
+ CriticalBufferLevel = (cpi->BufferLevel < cpi->BytesOffTarget) ? cpi->BufferLevel : cpi->BytesOffTarget;
+ }
+ // For local file playback short term buffering contraints are less of an issue
+ else
+ {
+ // Consider only how we are doing for the clip as a whole
+ CriticalBufferLevel = cpi->BytesOffTarget;
+ }
+
+ // Set the active worst quality based upon the selected buffer fullness number.
+ if ( CriticalBufferLevel < cpi->OptimalBufferLevel )
+ {
+ if ( CriticalBufferLevel > (cpi->OptimalBufferLevel/4) )
+ {
+ UINT32 QAdjustmentRange = cpi->NiAvQi - cpi->Configuration.WorstQuality;
+ UINT32 AboveBase = (CriticalBufferLevel - (cpi->OptimalBufferLevel/4));
+
+ // Step active worst quality down from cpi->NiAvQi when (CriticalBufferLevel == cpi->OptimalBufferLevel)
+ // to cpi->Configuration.WorstQuality when (CriticalBufferLevel == cpi->OptimalBufferLevel/4)
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality +
+ ( (QAdjustmentRange * AboveBase) / (cpi->OptimalBufferLevel*3/4) );
+
+ //cpi->Configuration.ActiveWorstQuality = (cpi->NiAvQi * CriticalBufferLevel) / cpi->OptimalBufferLevel;
+ if ( cpi->Configuration.ActiveWorstQuality < cpi->Configuration.WorstQuality )
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+ }
+ else
+ {
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+ }
+ }
+ else
+ {
+ cpi->Configuration.ActiveWorstQuality = cpi->NiAvQi;
+ }
+
+/* // Problems with this for local file mode because
+ // cpi->NiAvQi set to lower of average and last frame so as soon as cpi->BytesOffTarget
+ // goes negative we tend to race down to worst quality so this does not behave as one might expect.
+ else
+ {
+ if ( cpi->BytesOffTarget < 0 )
+ {
+ INT32 PercentOvershoot;
+
+ // Work out the overshoot as a percentage of the total file size
+ // Base cpi->Configuration.ActiveWorstQuality on this amount.
+ PercentOvershoot = (100 * -cpi->BytesOffTarget / (cpi->TotalByteCount * 8));
+
+ if ( PercentOvershoot > (cpi->NiAvQi - cpi->Configuration.WorstQuality) )
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+ else
+ cpi->Configuration.ActiveWorstQuality = cpi->NiAvQi - PercentOvershoot;
+ }
+ else
+ cpi->Configuration.ActiveWorstQuality = cpi->NiAvQi;
+ }
+*/
+ }
+ else
+ {
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+ }
+ }
+ else
+ {
+ INT32 PercentHigh;
+
+ if(cpi->BytesOffTarget > cpi->OptimalBufferLevel)
+ {
+ PercentHigh = (INT32) (100 * (cpi->BytesOffTarget - cpi->OptimalBufferLevel) / (cpi->TotalByteCount * 8));
+ if ( PercentHigh > 100 )
+ PercentHigh = 100;
+ else if ( PercentHigh < 0 )
+ PercentHigh = 0;
+ cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 + (PercentHigh/2)) )/100;
+ }
+
+ // Are we using allowing control of ActiveWorstQuality according to bufferl level.
+ if ( cpi->AutoWorstQ )
+ {
+ // When using the relaxed buffer model stick to the user specified value
+ cpi->Configuration.ActiveWorstQuality = cpi->NiAvQi;
+ }
+ else
+ {
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+ }
+
+ }
+
+ // Set ActiveBestQuality to prevent quality rising too high
+ cpi->Configuration.ActiveBestQuality = Q_TABLE_SIZE - cpi->BestAllowedQ;
+
+ // Worst quality obviously must not be better than best quality
+ if ( cpi->Configuration.ActiveWorstQuality > cpi->Configuration.ActiveBestQuality )
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.ActiveBestQuality - 1;
+
+ }
+ // Unbuffered mode (eg. video conferencing)
+ else
+ {
+ // Set the active worst quality
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+ }
+
+ // The auto-drop frame code is only used in buffered mode.
+ // In unbufferd mode (eg vide conferencing) the descision to
+ // code or drop a frame is made outside the codec in response to real
+ // world comms or buffer considerations.
+ if ( cpi->DropFramesAllowed && cpi->BufferedMode )
+ {
+ // Check for a buffer underrun-crisis in which case we have to drop a frame
+ if ( cpi->BufferLevel < cpi->PerFrameBandwidth )
+ cpi->DropFrame = TRUE;
+ // Check for drop frame crtieria
+ else if ( cpi->BufferLevel < cpi->DropFramesWaterMark )
+ {
+ if ( cpi->DropCount < cpi->MaxDropCount )
+ cpi->DropFrame = TRUE;
+ }
+ }
+
+ if ( !cpi->DropFrame )
+ {
+ // pick all the macroblock modes and motion vectors
+ UINT32 InterError;
+ UINT32 IntraError;
+
+ /*********************** Q PREDICTION STAGE 1 *****************************/
+
+ /* Select modes and motion vectors for each of the blocks : return an error score for inter and intra */
+
+ // Test for auto key frame.
+ if ( cpi->AutoKeyFrameEnabled && (cpi->LastKeyFrame >= cpi->ForceKeyFrameEvery) )
+ {
+ cpi->KFForced=1;
+ CompressKeyFrame(cpi); // Code a key frame
+ return;
+ }
+
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ // Update data rate to allow for GF updates.
+ // Note that we come in here even for fixed. In order to set the next update interval.
+ // Also not that we do not make a correction for the frames between a kf and the first GF update after a KF.
+ if ( (!cpi->DisableGolden) && cpi->BufferedMode && (cpi->pb.quantizer->FrameQIndex < 60) && (cpi->LastKeyFrame >= cpi->GfUpdateInterval))
+ {
+
+ UINT32 MaxVariance = 0;
+ UINT32 Sum2 = 0;
+ UINT32 Sum3 = 0;
+ int Sum = // number of macroblocks
+ (cpi->pb.MBRows - (BORDER_MBS*2))
+ * (cpi->pb.MBCols - (BORDER_MBS*2));
+
+ if ( Sum )
+ {
+ Sum2 = Sum - (cpi->ModeDist[CODE_INTRA] + cpi->ModeDist[CODE_INTER_PLUS_MV] + cpi->ModeDist[CODE_INTER_FOURMV]);
+ Sum3 = Sum2 - cpi->ModeDist[CODE_INTER_NO_MV] - cpi->ModeDist[CODE_USING_GOLDEN];
+
+ // Convert Sum2 and Sum3 to %
+ Sum2 = (Sum2 * 100 / Sum);
+ Sum3 = (Sum3 * 100 / Sum);
+
+ cpi->fps.PercentMotion = Sum2;
+ cpi->fps.PercentNewMotion = Sum3;
+ }
+
+ // Calculate various motion metrics
+ if ( cpi->FrameMvStats.NumMvs )
+ {
+ cpi->GfuMotionSpeed = (cpi->FrameMvStats.SumAbsX > cpi->FrameMvStats.SumAbsY) ? (cpi->FrameMvStats.SumAbsX/cpi->FrameMvStats.NumMvs) : (cpi->FrameMvStats.SumAbsY/cpi->FrameMvStats.NumMvs);
+ cpi->fps.MotionSpeed = cpi->GfuMotionSpeed;
+ cpi->fps.VarianceX = ((cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.SumXSq) - (cpi->FrameMvStats.SumX*cpi->FrameMvStats.SumX)) / (cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.NumMvs);
+ cpi->fps.VarianceY = ((cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.SumYSq) - (cpi->FrameMvStats.SumY*cpi->FrameMvStats.SumY)) / (cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.NumMvs);
+ MaxVariance = (UINT32) ((cpi->fps.VarianceX > cpi->fps.VarianceY) ? cpi->fps.VarianceX : cpi->fps.VarianceY);
+ cpi->GfuMotionComplexity = (UINT32) (cpi->GfuMotionSpeed + ((cpi->fps.VarianceX)/4) + ((cpi->fps.VarianceY)/4));
+ if ( cpi->GfuMotionComplexity > 31 )
+ cpi->GfuMotionComplexity = 31;
+ }
+ else
+ {
+ cpi->GfuMotionSpeed = 0;
+ cpi->GfuMotionComplexity = 0;
+ }
+
+
+ if( cpi->pass == 2 )
+ {
+ if(cpi->pb.RefreshGoldenFrame == TRUE)
+ {
+ cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 * cpi->GfUpdateInterval)) /
+ ((100 * cpi->GfUpdateInterval) + cpi->GfuBoost);
+
+ cpi->ThisFrameTarget = cpi->ThisFrameTarget + ((cpi->ThisFrameTarget * cpi->GfuBoost) / 100);
+ }
+ else if ( cpi->FramesTillGfUpdateDue > 0 )
+ {
+ // Non GFU frames reduced in bandwidth to account for
+ // + GfuBoost % on GFU frames
+ cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 * cpi->GfUpdateInterval)) /
+ ((100 * cpi->GfUpdateInterval) + cpi->GfuBoost);
+ }
+ }
+ else
+ {
+ if ( cpi->FramesTillGfUpdateDue > 0 )
+ {
+ // Non GFU frames reduced in bandwidth to account for
+ // + GfuBoost % on GFU frames
+ cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 * cpi->GfUpdateInterval)) /
+ ((100 * cpi->GfUpdateInterval) + cpi->GfuBoost);
+ }
+ else if (cpi->DisableGolden == 0)
+ {
+ int IntraToInterRatio;
+#define NEWWAY
+#ifdef NEWWAY
+ IntraToInterRatio = 100 * cpi->IntraError / (cpi->InterError );
+ IntraToInterRatio = IntraToInterRatio * Sum3 / 100;
+
+ cpi->GfuBoost = IntraToInterRatio;
+
+ // Correct boost to take account of recent observed level of GF usage
+ if ( (cpi->GfUsage >> 3) < 64)
+ cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection[(cpi->GfUsage >> 3)]) / 16;
+ else
+ cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection[63]) / 16;
+
+ cpi->GfuBoost = cpi->GfuBoost* GfuDataRateBoost[cpi->pb.AvgFrameQIndex] / 1000;
+
+
+ // Should we even consider a GF update or is there no point
+ if ( ( Sum3 > GF_MODE_DIST_THRESH2) &&
+ ( cpi->GfuMotionSpeed <= MAX_GF_UPDATE_MOTION)
+ )
+ {
+
+
+#else
+
+ // Calculate the %extra for GFU frames
+ cpi->GfuBoost = (GfuDataRateBoost[cpi->pb.quantizer->FrameQIndex] * GfuMotionCorrection[cpi->GfuMotionComplexity]) / 100;
+
+ // Correct boost to take account of recent observed level of GF usage
+ if ( (cpi->GfUsage >> 3) <= 15 )
+ cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection2[(cpi->GfUsage >> 3)]) / 128;
+ else
+ cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection2[15]) / 128;
+
+ // Should we even consider a GF update or is there no point
+ if ( (Sum2 > GF_MODE_DIST_THRESH1) && (Sum3 > GF_MODE_DIST_THRESH2) &&
+ (cpi->GfuMotionSpeed <= MAX_GF_UPDATE_MOTION) &&
+ (cpi->GfuBoost >= 80) &&
+ (MaxVariance <= GF_MAX_VAR_THRESH) )
+ {
+
+
+#endif
+
+ cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 * cpi->GfUpdateInterval)) /
+ ((100 * cpi->GfUpdateInterval) + cpi->GfuBoost);
+
+ cpi->ThisFrameTarget = cpi->ThisFrameTarget + ((cpi->ThisFrameTarget * cpi->GfuBoost) / 100);
+
+ cpi->pb.RefreshGoldenFrame = TRUE;
+
+ if(0)
+ {
+ FILE *gfstats= fopen("gf.stt","a");
+ fprintf(gfstats,"Frame : %8d boost:%d, sp:%d,base:%d,ratio:%d,motion:%d,Gf:%d \n",
+ - 1 + (INT32) cpi->CurrentFrame ,
+ cpi->GfuBoost,
+ cpi->GfuMotionSpeed,
+ GfuDataRateBoost[cpi->pb.AvgFrameQIndex],
+ 100 * cpi->IntraError / (cpi->InterError),
+ Sum3,
+ cpi->GfUsage
+ );
+ fclose(gfstats);
+ }
+
+
+
+ // Select the interval before the next GF update
+ // To find the interval we find the max of AvX and AvY and work out how many frames
+ // it will take to move X pels (GF_UPDATE_MOTION_INTERVAL in 1/4 pel) assuming the motion
+ // level does not change. The value is then capped to the range MIN_GF_UPDATE_INTERVAL to MAX_GF_UPDATE_INTERVAL
+ if ( cpi->GfuMotionSpeed > 0 )
+ {
+ cpi->GfUpdateInterval = GF_UPDATE_MOTION_INTERVAL / cpi->GfuMotionSpeed;
+
+ if ( cpi->GfUpdateInterval < MIN_GF_UPDATE_INTERVAL )
+ cpi->GfUpdateInterval = MIN_GF_UPDATE_INTERVAL;
+
+ else if ( cpi->GfUpdateInterval > MAX_GF_UPDATE_INTERVAL )
+ cpi->GfUpdateInterval = MAX_GF_UPDATE_INTERVAL;
+
+ }
+ else
+ cpi->GfUpdateInterval = MAX_GF_UPDATE_INTERVAL;
+
+
+
+ }
+ }
+ }
+ }
+
+ // If we have a mode where RD opt is to be used re-do pickmodes with rdopt enabled
+ if (cpi->QuickCompress == 0)
+ cpi->RdOpt = 2;
+ else if (cpi->QuickCompress == 3)
+ cpi->RdOpt = 2;
+ //cpi->RdOpt = 1;
+
+ // Get a cost estimate for the sake of RD opt.
+ // As we have not yet done pick modes for this frame this is by necessity
+ // based upon stats from the last frame.
+ //if ( cpi->RdOpt )
+ {
+ RegulateQ ( cpi, (cpi->ThisFrameTarget - (cpi->ModeMvCostEstimate/64)) );
+ }
+
+ // Select the optimal modes
+ PickModes ( cpi, &InterError, &IntraError );
+
+ // Normalize the key frame indicator to the range 0-100
+ actualMBS = (cpi->pb.MBRows - (BORDER_MBS*2)) * (cpi->pb.MBCols - (BORDER_MBS*2));
+ KFIndicator = (cpi->MotionScore * 100)/((actualMBS * 2)/3);
+
+ cpi->InterErrorb = InterError;
+ cpi->InterError = InterError;
+ cpi->IntraError = IntraError;
+
+ // Test for auto key frame.
+ if( cpi->AutoKeyFrameEnabled )
+ {
+
+ if( cpi->pass < 2
+ && KFIndicator > (UINT32) cpi->AutoKeyFrameThreshold
+ && cpi->LastKeyFrame > cpi->MinimumDistanceToKeyFrame
+ && ( cpi->IntraError < 2 * cpi->InterError
+ && cpi->IntraError < cpi->InterError + 2000 * actualMBS
+ )
+ && ( 100 * abs(cpi->InterError - cpi->LastInterError ) / cpi->LastInterError > 40
+ || 100 * abs(cpi->LastIntraError - cpi->IntraError) / cpi->LastIntraError > 40
+ || cpi->IntraError * 5 < cpi->InterError * 6
+ )
+ )
+ {
+
+ CompressKeyFrame(cpi); // Code a key frame
+ return;
+ }
+
+ }
+
+ // Increment the frames since last key frame count
+ cpi->LastKeyFrame++;
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ // Maintain a record of GF usage over the last few frames
+ // Each frame reduce value by 1/8 then add in usage (0-100) for the current frame
+ {
+
+ UINT32 ThisFrameGolden;
+
+ ThisFrameGolden = cpi->ModeDist[CODE_USING_GOLDEN] + cpi->ModeDist[CODE_GOLDEN_MV] +
+ cpi->ModeDist[CODE_GOLD_NEAREST_MV] + cpi->ModeDist[CODE_GOLD_NEAR_MV];
+
+ ThisFrameGolden = (ThisFrameGolden * 100) / ((cpi->pb.MBRows-2*BORDER_MBS )*(cpi->pb.MBCols-2*BORDER_MBS));
+ cpi->fps.PercentGolden = ThisFrameGolden;
+
+ cpi->GfUsage = ((cpi->GfUsage * 7) + 4) / 8;
+ cpi->GfUsage += ThisFrameGolden;
+ }
+
+ // Get an estimate of the Q that we should code at.
+ RegulateQ ( cpi, (cpi->ThisFrameTarget - (cpi->ModeMvCostEstimate/64)) );
+
+ cpi->DropCount = 0;
+
+
+ // This code is experimental and needs further refinement.
+ if ( cpi->pb.Vp3VersionNo > 7 )
+ {
+ INT32 IIRatio;
+ UINT8 MaxAplha;
+ UINT8 MinThresh;
+
+ // Calucalte an intra inter ratio for blocks that use motion prediction.
+ if ( cpi->MotionInterErr > 0 )
+ IIRatio = (cpi->MotionIntraErr * 10)/cpi->MotionInterErr;
+ else
+ IIRatio = 10;
+
+ // Set Bicubic alpha and apply Q related limits
+ cpi->pb.PredictionFilterAlpha = cpi->BaselineAlpha;
+
+ // If a golden frame was thrown recently use its q for deciding alpha and thresholdd limits else the current frame Q.
+ if ( cpi->FramesTillGfUpdateDue > 0 )
+ {
+ MaxAplha = BicubicMaxAlpha[cpi->LastGfOrKFrameQ];
+ MinThresh = BicubicMinThresh[cpi->LastGfOrKFrameQ];
+ }
+ else
+ {
+ MaxAplha = BicubicMaxAlpha[cpi->pb.quantizer->FrameQIndex];
+ MinThresh = BicubicMinThresh[cpi->pb.quantizer->FrameQIndex];
+ }
+
+ cpi->pb.PredictionFilterMode = AUTO_SELECT_PM;
+
+ // Select the filtering parameters based upon the inter intra ratio
+ if ( IIRatio < 15 )
+ {
+ cpi->pb.PredictionFilterVarThresh = 31;
+ }
+ else if ( IIRatio < 20 )
+ {
+ cpi->pb.PredictionFilterVarThresh = cpi->BaselineBicThresh + 16;
+ }
+ else if ( IIRatio < 40 )
+ {
+ cpi->pb.PredictionFilterVarThresh = cpi->BaselineBicThresh + 8;
+ }
+ else if ( IIRatio < 60 )
+ {
+ cpi->pb.PredictionFilterVarThresh = cpi->BaselineBicThresh + 4;
+ }
+ else if ( IIRatio < 80 )
+ {
+ cpi->pb.PredictionFilterVarThresh = cpi->BaselineBicThresh + 2;
+ }
+ else if ( IIRatio < 100 )
+ {
+ cpi->pb.PredictionFilterVarThresh = cpi->BaselineBicThresh + 1;
+ cpi->pb.PredictionFilterAlpha += 1;
+ }
+ else
+ {
+ cpi->pb.PredictionFilterVarThresh = cpi->BaselineBicThresh;
+ cpi->pb.PredictionFilterAlpha += 1;
+ }
+
+ // Limit check alpha
+ if ( cpi->pb.PredictionFilterAlpha > MaxAplha )
+ cpi->pb.PredictionFilterAlpha = MaxAplha;
+
+ // Limit check variance threshold
+ if ( cpi->pb.PredictionFilterVarThresh > 31 )
+ cpi->pb.PredictionFilterVarThresh = 31;
+ else if ( cpi->pb.PredictionFilterVarThresh < MinThresh )
+ cpi->pb.PredictionFilterVarThresh = MinThresh;
+ }
+
+ /* Proceed with the frame update. */
+ UpdateFrame ( cpi );
+ }
+ else
+ {
+ // Update the buffer level variable.
+ cpi->BytesOffTarget += cpi->PerFrameBandwidth;
+
+ // Are we are using the secondary buffer limit constraints
+ if ( cpi->MaxAllowedDatarate )
+ {
+ cpi->BufferLevel += ((cpi->MaxAllowedDatarate * cpi->PerFrameBandwidth) / 100);
+ if ( cpi->BufferLevel > cpi->MaxBufferLevel )
+ cpi->BufferLevel = cpi->MaxBufferLevel;
+ }
+ // else update the secondary buffer level in line with the current buffer level
+ else
+ {
+ cpi->BufferLevel = cpi->BytesOffTarget;
+ }
+
+ // Update the drop frame flag etc.
+ cpi->DropFrame = FALSE;
+ cpi->DropCount++;
+ TotDropFrameCount++;
+ DropedFrame = TRUE;
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : PredictScanOrder
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Work out an optimal DCT coefficient scan order based
+ * upon stats gathered from previous frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PredictScanOrder ( CP_INSTANCE *cpi )
+{
+ UINT32 i, j, k;
+ UINT32 Sum;
+ UINT32 tmp2[2];
+ UINT32 tmp[BLOCK_SIZE][2];
+ UINT32 GroupStartPoint, GroupEndPoint;
+
+ // Convert frame nz counts to ratio values vs frame zero counts
+ for ( i=1; i<BLOCK_SIZE; i++ )
+ {
+ Sum = cpi->FrameNzCount[i][0] + cpi->FrameNzCount[i][1];
+ if ( Sum )
+ tmp[i][0] = (cpi->FrameNzCount[i][1]*255)/Sum;
+ else
+ tmp[i][0] = 0;
+ tmp[i][1] = i;
+ }
+
+ // Sort into decending order.
+ for ( i=1; i<BLOCK_SIZE-1; i++ )
+ {
+ for ( j=i+1; j>1; j-- )
+ {
+ if ( tmp[j][0] > tmp[j-1][0] )
+ {
+ // Swap them over
+ tmp2[0] = tmp[j-1][0];
+ tmp2[1] = tmp[j-1][1];
+
+ tmp[j-1][0] = tmp[j][0];
+ tmp[j-1][1] = tmp[j][1];
+
+ tmp[j][0] = tmp2[0];
+ tmp[j][1] = tmp2[1];
+ }
+ }
+ }
+
+ // Split the coeffs into value range groups then re-sort within each group
+ // into ascending order based upon zig zag scan position
+ GroupEndPoint = 0;
+ for ( k=0; k<SCAN_ORDER_BANDS; k++ )
+ {
+ GroupStartPoint = GroupEndPoint+1;
+ GroupEndPoint = EndpointLookup[k];
+
+ for ( i=GroupStartPoint; i<GroupEndPoint; i++ )
+ {
+ for ( j=i+1; j>GroupStartPoint; j-- )
+ {
+ if ( tmp[j][1] < tmp[j-1][1] )
+ {
+ // Swap them over
+ tmp2[0] = tmp[j-1][0];
+ tmp2[1] = tmp[j-1][1];
+
+ tmp[j-1][0] = tmp[j][0];
+ tmp[j-1][1] = tmp[j][1];
+
+ tmp[j][0] = tmp2[0];
+ tmp[j][1] = tmp2[1];
+ }
+ }
+ }
+
+ // For each coef index mark its band number
+ for ( i=GroupStartPoint; i<=GroupEndPoint; i++ )
+ {
+ // Note the scan band number for each coef.
+ // tmp[i][1] is the position of the coef in the traditional zig-zag scan order,
+ // i is the position in the new scan order and K is the band number.
+ cpi->NewScanOrderBands[tmp[i][1]] = k;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateFrame
+ *
+ * INPUTS : None.
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Writes the fragment data to the output file and updates
+ * the displayed frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void UpdateFrame ( CP_INSTANCE *cpi )
+{
+ double FramePSNR = 0.0;
+ PB_INSTANCE *pbi = &cpi->pb;
+
+ // Key frames can not have backwards dependancy so set up defaults for pbi->ScanBands.
+ if ( VP6_GetFrameType( pbi ) == BASE_FRAME )
+ {
+ // Set starting point for key frames... These cannot rely on what went before
+ if ( pbi->Configuration.Interlaced )
+ memcpy ( pbi->ScanBands, DefaultInterlacedScanBands, sizeof(pbi->ScanBands) );
+ else
+ memcpy ( pbi->ScanBands, DefaultNonInterlacedScanBands, sizeof(pbi->ScanBands) );
+ }
+
+ // Based upon the previous coded frame work out a predicted best
+ // scan order banding for coding this frame
+ if ( (cpi->CurrentFrame > 1) && (!cpi->ErrorResilliantMode) &&
+ ((pbi->Configuration.Interlaced) || (cpi->AllowScanOrderUpdates)) )
+ {
+ PredictScanOrder( cpi );
+ }
+ else
+ {
+ // Chose between default interlaced and non-interlaced sets.
+ if ( pbi->Configuration.Interlaced )
+ memcpy ( cpi->NewScanOrderBands, DefaultInterlacedScanBands, sizeof(cpi->NewScanOrderBands) );
+ else
+ memcpy ( cpi->NewScanOrderBands, DefaultNonInterlacedScanBands, sizeof(cpi->NewScanOrderBands) );
+ }
+
+ // Build the scan order
+ BuildScanOrder ( pbi, cpi->NewScanOrderBands );
+
+ // Encode the frame.
+ EncodeData ( cpi );
+
+ /* Update the BpbCorrectionFactor variable according to whether or not we were
+ * close enough with our selection of DCT quantiser.
+ */
+ if ( VP6_GetFrameType( pbi ) != BASE_FRAME )
+
+ UpdateBpbCorrectionFactor ( cpi, cpi->ThisFrameSize );
+
+ // Adjust carry over and or key frame context.
+ if ( VP6_GetFrameType( pbi ) == BASE_FRAME )
+ AdjustKeyFrameContext ( cpi );
+
+ cpi->TotalByteCount += (cpi->ThisFrameSize/8);
+
+ // The auto-drop frame code is only used in buffered mode.
+ // In unbufferd mode (eg video conferencing) the descision to
+ // code or drop a frame is made outside the codec in response to real
+ // world comms or buffer considerations.
+ if ( cpi->BufferedMode )
+ {
+ // If the frame was massively oversize and we are below optimal buffer level drop next frame
+ if ( (cpi->DropFramesAllowed) &&
+ (cpi->BufferLevel < cpi->OptimalBufferLevel) &&
+ ((int)cpi->ThisFrameSize > (4 * cpi->ThisFrameTarget)) )
+ {
+ cpi->DropFrame = TRUE;
+ }
+
+ // Set the count for maximum consequative dropped frames based upon ratio of
+ // this frame size to target size for this frame.
+
+ if(cpi->ThisFrameTarget > 0)
+ {
+ cpi->MaxDropCount = (cpi->ThisFrameSize / cpi->ThisFrameTarget);
+ if ( cpi->MaxDropCount > cpi->MaxConsecDroppedFrames )
+ cpi->MaxDropCount = cpi->MaxConsecDroppedFrames;
+ }
+ }
+
+ // If appropriate call the frame PSNR function
+#if defined PSNR_ON
+ if ( !cpi->AllowSpatialResampling )
+ {
+ if ( cpi->pb.quantizer->FrameQIndex < PPROC_QTHRESH )
+ {
+ cpi->pb.PostProcessingLevel = 4;
+
+ PostProcess ( cpi->pb.postproc,
+ cpi->pb.Vp3VersionNo,
+ cpi->pb.FrameType,
+ cpi->pb.PostProcessingLevel,
+ cpi->pb.quantizer->FrameQIndex,
+ cpi->pb.LastFrameRecon,
+ cpi->pb.PostProcessBuffer,
+ (unsigned char *) cpi->pb.FragInfo,
+ sizeof(FRAG_INFO),
+ 0x0001 );
+ }
+ else
+ cpi->pb.PostProcessingLevel = 0;
+
+ FramePSNR = CalcPSNR ( cpi );
+ }
+#endif
+
+ // If appropriate call the frame PSNR function
+#if defined FILE_PSNR
+ if ( !cpi->AllowSpatialResampling )
+ {
+ if ( cpi->pb.quantizer->FrameQIndex < PPROC_QTHRESH )
+ {
+ cpi->pb.PostProcessingLevel=4;
+
+ PostProcess
+ (
+ cpi->pb.postproc,
+ cpi->pb.Vp3VersionNo,
+ cpi->pb.FrameType,
+ cpi->pb.PostProcessingLevel,
+ cpi->pb.quantizer->FrameQIndex,
+ cpi->pb.LastFrameRecon,
+ cpi->pb.PostProcessBuffer,
+ (unsigned char *) cpi->pb.FragInfo,
+ sizeof(FRAG_INFO),
+ 0x0001
+ );
+ }
+ else
+ cpi->pb.PostProcessingLevel=0;
+ }
+#endif
+
+
+ // Update the buffer level variable.
+ cpi->BytesOffTarget += (cpi->PerFrameBandwidth - cpi->ThisFrameSize);
+
+ // Are we are using the secondary buffer limit constraints
+ if ( cpi->MaxAllowedDatarate )
+ {
+ cpi->BufferLevel += (((cpi->MaxAllowedDatarate * cpi->PerFrameBandwidth) / 100) - cpi->ThisFrameSize);
+ if ( cpi->BufferLevel > cpi->MaxBufferLevel )
+ cpi->BufferLevel = cpi->MaxBufferLevel;
+ }
+ // else update the secondary buffer level in line with the current buffer level
+ else
+ {
+ cpi->BufferLevel = cpi->BytesOffTarget;
+ }
+
+ // If appropriate update the "last key frame buffer level" value.
+ if ( VP6_GetFrameType( pbi ) == BASE_FRAME )
+ cpi->LastKeyFrameBufferLevel = cpi->BufferLevel;
+
+ // Keep a record of ambient average Q.
+ if ( pbi->FrameType == BASE_FRAME )
+ pbi->AvgFrameQIndex = pbi->quantizer->FrameQIndex;
+ else
+ pbi->AvgFrameQIndex = (2 + 3 * pbi->AvgFrameQIndex + pbi->quantizer->FrameQIndex) / 4 ;
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp_if.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp_if.c
new file mode 100644
index 00000000..804d1d6b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp_if.c
@@ -0,0 +1,1564 @@
+/****************************************************************************
+*
+* Module Title : vfwcomp_if.c
+*
+* Description : Compressor interface definition.
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <stdio.h>
+#include "compdll.h"
+#include "mcomp.h"
+#include "misc_common.h"
+#include "vp60eversion.h"
+#include "twopass.h"
+#include <math.h>
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define CommentString "\nON2.COM VERSION VP60E " VP60EVERSION "\n"
+
+#ifdef _MSC_VER
+#pragma comment(exestr,CommentString)
+#endif
+
+/****************************************************************************
+* Typedefs
+****************************************************************************/
+
+typedef struct _COMPRESSOR_STATE
+{
+ UINT32 PriorKeyFrameSize[KEY_FRAME_CONTEXT];
+ UINT32 PriorKeyFrameDistance[KEY_FRAME_CONTEXT];
+ INT64 CurrentFrame;
+ UINT32 LastFrameSize;
+ INT32 DropCount;
+ INT64 KeyFrameCount;
+ INT64 TotKeyFrameBytes;
+ UINT32 LastKeyFrameSize;
+ UINT32 LastKeyFrame;
+ INT64 TotalByteCount;
+ UINT32 ActiveMaxQ;
+ double BpbCorrectionFactor;
+} COMPRESSOR_STATE;
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+static const char vp60eVersion[] = VP60EVERSION;
+static INT32 ClipBytes;
+
+//#define TIMING
+#ifdef TIMING
+#include "mmsystem.h"
+static long ITotalTime=0;
+static long ITime1, ITime2;
+#endif
+
+#if defined MEASURE_SECTION_COSTS
+UINT32 ClipSectionBits[10] = {0,0,0,0,0,0,0,0,0,0};
+#endif
+
+static const UINT8 BicThreshTable[11] = { 31, 31, 31, 16, 8, 4, 3, 2, 1, 1, 1};
+static const UINT8 BicAlphaTable[11] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern UINT32 scanupdates[64][2];
+
+extern void ScaleFrame (
+ YUV_BUFFER_CONFIG *src,
+ YUV_BUFFER_CONFIG *dst,
+ unsigned char *tempArea,
+ unsigned char tempHeight,
+ unsigned int hscale,
+ unsigned int hratio,
+ unsigned int vscale,
+ unsigned int vratio,
+ unsigned int interlaced
+ );
+
+extern void CompressFirstFrame ( CP_INSTANCE *cpi );
+extern void CompressKeyFrame ( CP_INSTANCE *cpi );
+extern void CompressFrame ( CP_INSTANCE *cpi, UINT32 FrameNumber );
+
+/****************************************************************************
+ *
+ * ROUTINE : VP60E_GetVersionNumber
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : const char *CCONV: Pointer to version string.
+ *
+ * FUNCTION : Returns a pointer to the version string.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+const char *CCONV VP60E_GetVersionNumber ( void )
+{
+ return vp60eVersion;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ChangeEncoderSize
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * UINT32 Width : New frame Width.
+ * UINT32 Height : New frame Height.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates the encoder frame size.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV ChangeEncoderSize ( CP_INSTANCE *cpi, UINT32 Width, UINT32 Height )
+{
+ // Frame size __MUST__ be multiple of 16 pels in each dimension
+ cpi->pb.Configuration.VideoFrameHeight = ((Height+15)&0xFFFFFFF0);
+ cpi->pb.Configuration.VideoFrameWidth = ((Width +15)&0xFFFFFFF0);
+ cpi->pb.YPlaneSize = 0xFFF;
+
+ // Initialise image format details
+ if ( !VP6_InitFrameDetails( &cpi->pb ) )
+ return;
+
+ if ( !EAllocateFragmentInfo ( cpi ) )
+ {
+ VP6_DeleteFragmentInfo ( &cpi->pb );
+ VP6_DeleteFrameInfo ( &cpi->pb );
+ return;
+ }
+
+ if ( !EAllocateFrameInfo ( cpi ) )
+ {
+ VP6_DeleteFragmentInfo ( &cpi->pb );
+ VP6_DeleteFrameInfo ( &cpi->pb );
+ EDeleteFragmentInfo ( cpi );
+ return;
+ }
+
+ // Initialise Motion compensation
+ InitMotionCompensation ( cpi );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PickSizeStep
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * COMP_CONFIG_VP6 *CompConfig : Encoder configuration.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Starts & initializes encoder's size stepping mechanism.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void pickSizeStep ( CP_INSTANCE *cpi, COMP_CONFIG_VP6 *CompConfig )
+{
+ double bitsPerPixel;
+
+ int Width = ((CompConfig->FrameSize & 0xFFFF0000) >> 16);
+ int Height = CompConfig->FrameSize & 0x0000FFFF;
+
+ if ( CompConfig->FrameRate == 0 )
+ CompConfig->FrameRate = 30;
+
+ if ( Width==0 )
+ Width = 320;
+
+ if ( Height== 0 )
+ Height = 240;
+
+ bitsPerPixel = (CompConfig->TargetBitRate * 1024.0) /
+ (CompConfig->FrameRate * Width * Height);
+
+ // drop size to 4/5 before dropping frame rate to 1/2 or 1/3
+ if ( bitsPerPixel < 0.03 ) // VP4 was 0.043
+ {
+ cpi->SizeStep = 2;
+ bitsPerPixel = (CompConfig->TargetBitRate * 1024.0) /
+ (CompConfig->FrameRate * Width * Height * 4/5 * 4/5);
+ }
+
+ cpi->FrameRateInput = CompConfig->FrameRate;
+ cpi->FrameRateDropFrames = 0;
+
+ if ( cpi->DropFramesAllowed )
+ {
+ // figure out output frame rate
+ if ( bitsPerPixel > 0.025 )
+ cpi->FrameRateDropFrames = 0;
+ else if ( bitsPerPixel > 0.015 )
+ cpi->FrameRateDropFrames = 1;
+ else
+ cpi->FrameRateDropFrames = 2;
+ }
+
+ cpi->FrameRateDropCount = 0;
+ cpi->Configuration.OutputFrameRate = CompConfig->FrameRate / (cpi->FrameRateDropFrames+1);
+
+ bitsPerPixel = (CompConfig->TargetBitRate * 1024.0) /
+ (cpi->Configuration.OutputFrameRate * Width * Height);
+
+ // categorize the cpi->SizeStep of the clip by the number of
+ // bits we are allowing per pixel!
+ if( bitsPerPixel > 0.090 )
+ cpi->SizeStep = 0;
+ else if( bitsPerPixel > 0.060 ) // VP4 was 0.09
+ cpi->SizeStep = 1;
+ else if ( bitsPerPixel > .040 ) // VP4 was 0.070
+ cpi->SizeStep = 2;
+ else if ( bitsPerPixel > .030 ) // VP4 was 0.06
+ cpi->SizeStep = 3;
+ else if ( bitsPerPixel > .015 ) // VP4 was 0.043
+ cpi->SizeStep = 4;
+ else
+ cpi->SizeStep = 5;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : ChangeEncoderConfig
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * COMP_CONFIG_VP6 *CompConfig : Encoder configuration.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates encoder with new configuration.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV ChangeEncoderConfig ( CP_INSTANCE *cpi, COMP_CONFIG_VP6 *CompConfig )
+{
+ INT32 Quality = CompConfig->Quality;
+ cpi->BufferedMode = (CompConfig->OptimalBufferLevel > 0) ? TRUE : FALSE;
+ cpi->AutoKeyFrameEnabled = CompConfig->AutoKeyFrameEnabled;
+ cpi->MinimumDistanceToKeyFrame = CompConfig->MinimumDistanceToKeyFrame;
+ cpi->ForceKeyFrameEvery = CompConfig->ForceKeyFrameEvery;
+ cpi->PreProcFilterLevel = CompConfig->NoiseSensitivity;
+ cpi->AllowSpatialResampling = CompConfig->AllowSpatialResampling && cpi->BufferedMode; // NOTE: disallow if mode is unbuffered.
+ cpi->AutoKeyFrameThreshold = CompConfig->AutoKeyFrameThreshold;
+ cpi->CPUUsed = CompConfig->Speed;
+ cpi->Configuration.TargetBandwidth = CompConfig->TargetBitRate * 1024;
+ cpi->ActualTargetBitRate = cpi->Configuration.TargetBandwidth;
+
+ cpi->OptimalBufferLevel = CompConfig->OptimalBufferLevel * cpi->Configuration.TargetBandwidth;
+ cpi->StartingBufferLevel = CompConfig->StartingBufferLevel * cpi->Configuration.TargetBandwidth;
+ cpi->MaxBufferLevel = CompConfig->MaximumBufferSize * cpi->Configuration.TargetBandwidth;
+
+ cpi->DropFramesWaterMark = (cpi->OptimalBufferLevel * CompConfig->DropFramesWaterMark) / 100;
+ cpi->ResampleDownWaterMark = (cpi->OptimalBufferLevel * CompConfig->ResampleDownWaterMark) / 100;
+ cpi->ResampleUpWaterMark = (cpi->OptimalBufferLevel * CompConfig->ResampleUpWaterMark) / 100;
+
+ cpi->DisableGolden = CompConfig->DisableGolden ;
+ cpi->VBMode = CompConfig->VBMode ;
+ cpi->BestAllowedQ = CompConfig->BestAllowedQ ;
+ cpi->UnderShootPct = CompConfig->UnderShootPct ;
+
+ cpi->MaxAllowedDatarate = CompConfig->MaxAllowedDatarate ;
+ cpi->MaximumBufferSize = CompConfig->MaximumBufferSize ;
+
+ cpi->TwoPassVBREnabled = CompConfig->TwoPassVBREnabled ;
+ cpi->TwoPassVBRBias = CompConfig->TwoPassVBRBias ;
+ cpi->TwoPassVBRMaxSection = CompConfig->TwoPassVBRMaxSection;
+ cpi->TwoPassVBRMinSection = CompConfig->TwoPassVBRMinSection;
+ cpi->Pass = CompConfig->Pass ;
+ cpi->ErrorResilliantMode = CompConfig->ErrorResilientMode;
+
+ if(cpi->ErrorResilliantMode)
+ cpi->DisableGolden =1;
+
+ cpi->DropFramesAllowed = CompConfig->AllowDF && cpi->BufferedMode; // NOTE: disallow if mode is unbuffered.
+ cpi->MaxConsecDroppedFrames = 4; // TBD
+ cpi->QuickCompress = CompConfig->QuickCompress;
+
+ cpi->BaselineAlpha = BicAlphaTable[CompConfig->Sharpness];
+ cpi->BaselineBicThresh = BicThreshTable[CompConfig->Sharpness];
+
+ if(CompConfig->TwoPassVBRMaxSection == DEFAULT_VALUE)
+ cpi->TwoPassVBRMaxSection = CompConfig->MaxAllowedDatarate;
+
+ if( CompConfig->FixedQ > 0 )
+ cpi->FixedQ = 63 - CompConfig->Quality;
+ else
+ cpi->FixedQ = -1;
+
+ // compression mode dependant
+ switch(CompConfig->Mode)
+ {
+ case MODE_REALTIME:
+ cpi->Speed = 4;
+ if(CompConfig->QuickCompress == DEFAULT_VALUE)
+ cpi->QuickCompress = 2;
+ if(CompConfig->Pass == DEFAULT_VALUE)
+ cpi->Pass = 0;
+ break;
+ case MODE_GOODQUALITY:
+ if(CompConfig->QuickCompress == DEFAULT_VALUE)
+ cpi->QuickCompress = 1;
+ if(CompConfig->Pass == DEFAULT_VALUE)
+ cpi->Pass = 0;
+ break;
+ case MODE_BESTQUALITY:
+ if(CompConfig->QuickCompress == DEFAULT_VALUE)
+ cpi->QuickCompress = 0;
+ if(CompConfig->Pass == DEFAULT_VALUE)
+ cpi->Pass = 0;
+
+ break;
+ case MODE_FIRSTPASS:
+ if(CompConfig->QuickCompress == DEFAULT_VALUE)
+ cpi->QuickCompress = 1;
+ if(CompConfig->Pass == DEFAULT_VALUE)
+ cpi->Pass = 1;
+ cpi->PreProcFilterLevel = 0;
+ cpi->FixedQ = FIRSTPASS_Q;
+ cpi->ForceKeyFrameEvery = 99999;
+ cpi->AutoKeyFrameThreshold = 50;
+ cpi->MinimumDistanceToKeyFrame = 0;
+ cpi->AllowSpatialResampling = 0;
+ cpi->DropFramesAllowed = 0;
+ break;
+
+ case MODE_SECONDPASS:
+ if(CompConfig->QuickCompress == DEFAULT_VALUE)
+ cpi->QuickCompress = 1;
+ if(CompConfig->Pass == DEFAULT_VALUE)
+ cpi->Pass = 2;
+ break;
+ case MODE_SECONDPASS_BEST:
+ if(CompConfig->QuickCompress == DEFAULT_VALUE)
+ cpi->QuickCompress = 0;
+ if(CompConfig->Pass == DEFAULT_VALUE)
+ cpi->Pass = 2;
+ break;
+
+ }
+
+ // Are we planning local file playback or streamed
+ cpi->EndUsage = CompConfig->EndUsage;
+
+ // We auto-adjust worst quality for 1 pass modes only and
+ // disable when coding real time.
+ if ( (CompConfig->Mode < MODE_SECONDPASS) && (cpi->QuickCompress != 2) )
+ cpi->AutoWorstQ = TRUE;
+ else
+ cpi->AutoWorstQ = FALSE;
+
+ // endusage dependent
+ // 1 pass + local file playback
+ if(CompConfig->EndUsage == USAGE_LOCAL_FILE_PLAYBACK && CompConfig->Mode < MODE_SECONDPASS)
+ {
+
+ cpi->MaxAllowedDatarate = 200;
+ cpi->StartingBufferLevel = 4 * cpi->Configuration.TargetBandwidth;
+ cpi->OptimalBufferLevel = 4 * cpi->Configuration.TargetBandwidth;
+ cpi->MaxBufferLevel = 5 * cpi->Configuration.TargetBandwidth;
+ cpi->VBMode = 1;
+ cpi->TwoPassVBREnabled = 0;
+ }
+ // 2 pass local file playback
+ else if(CompConfig->EndUsage == USAGE_LOCAL_FILE_PLAYBACK && CompConfig->Mode >= MODE_SECONDPASS)
+ {
+ cpi->MaxAllowedDatarate = 400;
+ cpi->StartingBufferLevel = 10 * cpi->Configuration.TargetBandwidth;
+ cpi->OptimalBufferLevel = 10 * cpi->Configuration.TargetBandwidth;
+ cpi->MaxBufferLevel = 10 * cpi->Configuration.TargetBandwidth;
+ cpi->VBMode = 1;
+ cpi->TwoPassVBREnabled = 1;
+ }
+ // 1 or 2 pass streaming playback
+ else
+ {
+ cpi->VBMode = 0;
+ cpi->TwoPassVBREnabled = 0;
+ }
+
+
+ //if(cpi->QuickCompress == 0)
+ //cpi->QuickCompress = 3;
+ //if(cpi->QuickCompress == 3)
+ // cpi->QuickCompress = 0;
+
+
+ // Set the output frame rate.
+ cpi->Configuration.OutputFrameRate = CompConfig->FrameRate;
+ if ( cpi->Configuration.OutputFrameRate < 1 )
+ cpi->Configuration.OutputFrameRate = CompConfig->OutputFrameRate;
+ else if ( cpi->Configuration.OutputFrameRate > 1000 )
+ cpi->Configuration.OutputFrameRate = 1000;
+
+ // Set key frame data rate target and frequency
+ cpi->KeyFrameDataTargetOrig = (CompConfig->KeyFrameDataTarget * 1024);
+ cpi->KeyFrameDataTarget = cpi->KeyFrameDataTargetOrig;
+ if(cpi->KeyFrameDataTarget > (int) cpi->Configuration.TargetBandwidth / 2)
+ cpi->KeyFrameDataTarget = (int) cpi->Configuration.TargetBandwidth / 2;
+
+ cpi->KeyFrameFrequency = CompConfig->KeyFrameFrequency;
+
+ cpi->BytesOffTarget = cpi->StartingBufferLevel; // Set the current buffer level
+ cpi->BufferLevel = cpi->StartingBufferLevel; // Set the current buffer level
+
+ cpi->LastKeyFrameBufferLevel = cpi->StartingBufferLevel; // Used to monitor changes in buffer level when considering re-sampling.
+
+ cpi->pb.Configuration.Interlaced = CompConfig->Interlaced;
+ cpi->pb.Configuration.HScale = CompConfig->HScale;
+ cpi->pb.Configuration.HRatio = CompConfig->HRatio;
+ cpi->pb.Configuration.VScale = CompConfig->VScale;
+ cpi->pb.Configuration.VRatio = CompConfig->VRatio;
+ cpi->pb.Configuration.ScalingMode = CompConfig->ScalingMode;
+
+ // Set the quality settings.
+ ConfigureQuality ( cpi, Quality );
+
+ /* Set the video frame size. */
+ if ( CompConfig->FrameSize !=
+ (unsigned int) ((cpi->YuvInputData.YWidth << 16) | cpi->YuvInputData.YHeight) )
+ {
+ ChangeEncoderSize ( cpi, ((CompConfig->FrameSize & 0xFFFF0000) >> 16),
+ CompConfig->FrameSize & 0x0000FFFF);
+
+ cpi->InputConfig.YWidth = ((CompConfig->FrameSize & 0xFFFF0000) >> 16);
+ cpi->InputConfig.YHeight = CompConfig->FrameSize & 0x0000FFFF;
+ cpi->InputConfig.YStride = cpi->InputConfig.YWidth;
+ cpi->InputConfig.UVWidth = cpi->InputConfig.YWidth /2 ;
+ cpi->InputConfig.UVHeight = (CompConfig->FrameSize & 0x0000FFFF) /2;
+ cpi->InputConfig.UVStride = cpi->InputConfig.YWidth/2;
+
+ cpi->SizeStep = 0;
+ }
+
+ if(cpi->BufferedMode )
+ pickSizeStep ( cpi, CompConfig );
+
+ cpi->InterFrameTarget = cpi->Configuration.TargetBandwidth / cpi->Configuration.OutputFrameRate;
+ cpi->PerFrameBandwidth = (cpi->Configuration.TargetBandwidth / cpi->Configuration.OutputFrameRate);
+ // Calculate a new target bytes per frame allowing for predicted key frame frequency and size.
+ if ( (INT32)cpi->Configuration.TargetBandwidth > ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency) )
+ cpi->InterFrameTarget = (INT32)((cpi->Configuration.TargetBandwidth - ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency)) / cpi->Configuration.OutputFrameRate);
+ else
+ cpi->InterFrameTarget = 1;
+
+
+ cpi->pass = cpi->Pass;
+ if(cpi->pass)
+ Pass2Initialize(cpi,CompConfig);
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : StartEncoder
+ *
+ * INPUTS : COMP_CONFIG_VP6 *CompConfig : Encoder configuration.
+ *
+ * OUTPUTS : CP_INSTANCE **cpi : Pointer to pointer to encoder instance.
+ *
+ * RETURNS : BOOL: TRUE=success, FALSE=failure.
+ *
+ * FUNCTION : Creates a new encoder instance & initializes it.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+BOOL CCONV StartEncoder ( CP_INSTANCE **cpi, COMP_CONFIG_VP6 *CompConfig )
+{
+#ifdef TIMING
+ // DEBUG CODE
+ ITime1=timeGetTime();
+ {
+ FILE *fp = fopen( "d:\\Times.txt", "at" );
+ fprintf(fp, "StartEncoder: %d\n",ITime1);
+ fclose(fp);
+ }
+#endif
+
+ // Create an instance of the encoder
+ *cpi = CreateCPInstance();
+
+ // Initialisation default config.
+ (*cpi)->pb.Configuration.HFragPixels = 8;
+ (*cpi)->pb.Configuration.VFragPixels = 8;
+ (*cpi)->pb.postproc = CreatePostProcInstance ( &((*cpi)->pb.Configuration) );
+ (*cpi)->pb.quantizer = VP6_CreateQuantizer();
+
+
+ // profile 4 is actually encode version 8
+ if(CompConfig->Profile == 4)
+ {
+ (*cpi)->pb.VpProfile = 3;
+ (*cpi)->pb.Vp3VersionNo = 8;
+ }
+ else
+ {
+ (*cpi)->pb.VpProfile = CompConfig->Profile;
+ (*cpi)->pb.Vp3VersionNo = 6;
+ }
+
+ ChangeEncoderConfig ( *cpi, CompConfig );
+
+ /* set the encoder version number */
+
+ /* Initialise the compression process. */
+ (*cpi)->CurrentFrame = 1;
+ (*cpi)->BpbCorrectionFactor = 1.0;
+ (*cpi)->KeyFrameBpbCorrectionFactor = 0.4;
+ (*cpi)->GfuBpbCorrectionFactor = 2.0;
+ (*cpi)->TotalByteCount = 0;
+ (*cpi)->TotalMotionScore = 0;
+
+ (*cpi)->NiTotQi = 0;
+ (*cpi)->NiFrames = 0;
+ (*cpi)->NiAvQi = (*cpi)->Configuration.WorstQuality;
+
+ // This makes sure encoder version specific tables are initialised
+ VP6_InitQTables ( (*cpi)->pb.quantizer, (*cpi)->pb.Vp3VersionNo );
+
+ // Indicate that the next frame to be compressed is the first in the current clip.
+ (*cpi)->ThisIsFirstFrame = TRUE;
+
+ // Initialize the drop frame flags
+ (*cpi)->DropFrame = FALSE;
+ (*cpi)->MaxConsecDroppedFrames = 4;
+
+#if defined PSNR_ON
+ // DEBUG: Clear down PSNR variables
+ (*cpi)->TotalSqError =0.0;
+ (*cpi)->TotPsnr = 0.0;
+ (*cpi)->TotYPsnr = 0.0;
+ (*cpi)->TotUPsnr = 0.0;
+ (*cpi)->TotVPsnr = 0.0;
+ (*cpi)->MinPsnr = 999.00;
+ (*cpi)->MinYPsnr = 999.00;
+ (*cpi)->MinUPsnr = 999.00;
+ (*cpi)->MinVPsnr = 999.00;
+ (*cpi)->MaxPsnr = 0.0;
+ (*cpi)->MaxYPsnr = 0.0;
+ (*cpi)->MaxUPsnr = 0.0;
+ (*cpi)->MaxVPsnr = 0.0;
+#endif
+
+#ifdef MAPCA
+ InitMERefDs();
+#endif
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ChangeCompressorSetting
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * C_SETTING Setting : Compreesor seeting to change.
+ * int Value : Value to set setting to.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets the specified compressor setting to the
+ * specified value.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV ChangeCompressorSetting ( CP_INSTANCE *cpi, C_SETTING Setting, int Value )
+{
+ switch ( Setting )
+ {
+ case C_SET_RECOVERY_FRAME:
+ cpi->GfRecoveryFrame = TRUE;
+ break;
+
+ case C_SET_GOLDENFRAME:
+ cpi->pb.RefreshGoldenFrame = TRUE;
+ break;
+
+ case C_SET_REFERENCEFRAME:
+ CopyFrame ( cpi->pb.postproc, (YUV_BUFFER_CONFIG *) Value, cpi->pb.LastFrameRecon );
+ CopyFrame ( cpi->pb.postproc, (YUV_BUFFER_CONFIG *) Value, cpi->pb.GoldenFrame );
+ break;
+
+ case C_SET_INTERNAL_SIZE:
+ sscanf ( (unsigned char *)Value, "%d %d %d %d", &cpi->ForceHRatio, &cpi->ForceHScale, &cpi->ForceVRatio, &cpi->ForceVScale );
+ cpi->ForceInternalSize = 1;
+ cpi->ThisIsKeyFrame = TRUE;
+ break;
+
+ case C_SET_KEY_FRAME:
+ cpi->ThisIsKeyFrame = TRUE;
+ break;
+
+ case C_SET_FIXED_Q:
+ if ( (Value >= 0) && (Value < 64) )
+ cpi->FixedQ = 63 - Value;
+ break;
+
+ case C_SET_FIRSTPASS_FILE:
+ break;
+
+ case C_SET_TESTMODE:
+ cpi->pb.testMode = Value;
+ break;
+
+ default:
+ if ( (Setting >= C_SET_EXPERIMENTAL_MIN) && (Setting <= C_SET_EXPERIMENTAL_MAX) )
+ {
+ INT32 nExperimental = Setting - C_SET_EXPERIMENTAL_MIN;
+
+ if (nExperimental >= (INT32)cpi->nExperimentals)
+ cpi->nExperimentals = nExperimental + 1;
+
+ cpi->Experimental[nExperimental] = Value;
+
+ switch(nExperimental)
+ {
+ case 0:
+ cpi->DisableGolden = Value;
+ break;
+ case 1:
+ cpi->VBMode = Value;
+ break;
+ case 2:
+ cpi->BestAllowedQ = Value;
+ break;
+ case 3:
+ cpi->UnderShootPct = Value;
+ break;
+ case 4:
+ cpi->MaxAllowedDatarate = Value;
+ break;
+ case 5:
+ cpi->MaximumBufferSize = Value;
+ cpi->MaxBufferLevel = cpi->OptimalBufferLevel + ((cpi->MaximumBufferSize * cpi->Configuration.TargetBandwidth) / 100);
+ break;
+ case 250:
+ cpi->TwoPassVBREnabled = Value;
+ break;
+ case 251:
+ cpi->TwoPassVBRBias = Value;
+ break;
+ case 252:
+ cpi->TwoPassVBRMaxSection = Value;
+ break;
+ case 253:
+ cpi->TwoPassVBRMinSection = Value;
+ break;
+ case 255:
+ cpi->Pass = Value;
+ cpi->pass = Value;
+ if(cpi->pass == 2)
+ {
+ char dummy[1024];
+ cpi->fs = fopen("firstpass.fst","r");
+ cpi->ss = fopen("firstpass.sst","r");
+
+ fgets(dummy,1024,cpi->fs);
+ fgets(dummy,1024,cpi->ss);
+
+ { // calculate a q value to use
+
+
+ int actualMBS = // number of macroblocks
+ (cpi->pb.MBRows - (BORDER_MBS*2))
+ * (cpi->pb.MBCols - (BORDER_MBS*2));
+
+ double fpBitRate; // first pass bitrate
+ double target; // target bitrate
+ double NewQ;
+
+ const double RoomForVariation = 5; // 5 q steps above
+
+ const double FirstPassQ = 32; //
+
+ InputStats(cpi->ss,&cpi->fpmss);
+
+ fpBitRate = cpi->fpmss.BitsPerMacroblock * actualMBS * cpi->Configuration.OutputFrameRate;
+ target = (double) cpi->Configuration.TargetBandwidth;
+
+ NewQ = (INT32) 63 - ( RoomForVariation + FirstPassQ + .5 + log(fpBitRate/target) / log(1.05));
+ if(NewQ < cpi->Configuration.WorstQuality )
+ NewQ = cpi->Configuration.WorstQuality;
+
+ if(NewQ > cpi->Configuration.ActiveBestQuality)
+ NewQ = cpi->Configuration.ActiveBestQuality;
+
+ cpi->Configuration.WorstQuality = (INT32) NewQ;
+ cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+
+
+ /*
+ NewQ += 5*RoomForVariation;
+ if(NewQ < cpi->Configuration.WorstQuality )
+ NewQ = cpi->Configuration.WorstQuality;
+
+ if(NewQ > cpi->Configuration.ActiveBestQuality)
+ NewQ = cpi->Configuration.ActiveBestQuality;
+
+ cpi->Configuration.ActiveBestQuality = NewQ;
+
+ */
+
+
+
+ }
+ }
+ else if (cpi->pass == 1)
+ {
+ cpi->fs = fopen("firstpass.fst","w");
+ fprintf(cpi->fs,
+ "%8s %8s %8s %8s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s \n",
+ "","#","key","golden","bits/mb","sq bits/mb","Inter","Intra","Motion","VarX","VarY",
+ "%Motion","%NewMotion","%Golden");
+
+ cpi->ss = fopen("firstpass.sst","w");
+ fprintf(cpi->ss,
+ "%8s %8s %8s %8s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s \n",
+ "","#","key","golden","bits/mb","sq bits/mb","Inter","Intra","Motion","VarX","VarY",
+ "%Motion","%NewMotion","%Golden");
+
+
+ }
+ break;
+ }
+ }
+
+
+ break;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CopyOrResize
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * BOOL ResetPreproc : Should the preprocessor be reset (e.g for a key frame)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Copies and if necessary scales the frame cpi->YuvInputData
+ * into the frame defined by cpi->InputConfig.
+ *
+ * SPECIAL NOTES : cpi->pb.ThisFrameRecon is used as temporary workspace
+ * for the scaler.
+ *
+ ****************************************************************************/
+void CopyOrResize ( CP_INSTANCE *cpi, BOOL ResetPreproc )
+{
+ INT32 i;
+ unsigned char *LocalDataPtr;
+ unsigned char *InputDataPtr;
+ YUV_BUFFER_CONFIG yuvConfig = cpi->InputConfig; // For tempFilter
+
+ // Copy over input YUV to internal YUV buffers.
+ if( cpi->InputConfig.YWidth != cpi->YuvInputData.YWidth ||
+ cpi->InputConfig.YHeight!= cpi->YuvInputData.YHeight )
+ {
+ UINT8 tmpHeight;
+
+ if( cpi->InputConfig.YHeight*2 == cpi->YuvInputData.YHeight )
+ tmpHeight = 9;
+ else
+ tmpHeight = 11;
+
+ cpi->InputConfig.YBuffer = (char *) cpi->yuv1ptr;
+ cpi->InputConfig.UBuffer = (char *) &cpi->yuv1ptr[(cpi->pb.Configuration.VideoFrameHeight*cpi->pb.Configuration.VideoFrameWidth)];
+ cpi->InputConfig.VBuffer = (char *) &cpi->yuv1ptr[((cpi->pb.Configuration.VideoFrameHeight*cpi->pb.Configuration.VideoFrameWidth)*5)/4];
+
+ ScaleFrame ( &cpi->YuvInputData, &cpi->InputConfig, cpi->pb.ThisFrameRecon,tmpHeight,
+ cpi->pb.Configuration.HScale, cpi->pb.Configuration.HRatio,
+ cpi->pb.Configuration.VScale, cpi->pb.Configuration.VRatio,
+ cpi->pb.Configuration.Interlaced);
+ }
+ else
+ {
+ // First copy over the Y data
+ LocalDataPtr = cpi->yuv1ptr;
+ InputDataPtr = (unsigned char *)cpi->YuvInputData.YBuffer;
+ for ( i=0; i<cpi->YuvInputData.YHeight; i++ )
+ {
+ memcpy ( LocalDataPtr, InputDataPtr, cpi->YuvInputData.YWidth );
+ LocalDataPtr += cpi->YuvInputData.YWidth;
+ InputDataPtr += cpi->YuvInputData.YStride;
+ }
+
+ // Now copy over the U data
+ LocalDataPtr = &cpi->yuv1ptr[(cpi->YuvInputData.YHeight * cpi->YuvInputData.YWidth)];
+ InputDataPtr = (unsigned char *)cpi->YuvInputData.UBuffer;
+ for ( i=0; i<cpi->YuvInputData.UVHeight; i++ )
+ {
+ memcpy ( LocalDataPtr, InputDataPtr, cpi->YuvInputData.UVWidth );
+ LocalDataPtr += cpi->YuvInputData.UVWidth;
+ InputDataPtr += cpi->YuvInputData.UVStride;
+ }
+
+ // Now copy over the V data
+ LocalDataPtr = &cpi->yuv1ptr[((cpi->YuvInputData.YHeight * cpi->YuvInputData.YWidth) * 5) / 4];
+ InputDataPtr = (unsigned char *)cpi->YuvInputData.VBuffer;
+ for ( i=0; i<cpi->YuvInputData.UVHeight; i++ )
+ {
+ memcpy ( LocalDataPtr, InputDataPtr, cpi->YuvInputData.UVWidth );
+ LocalDataPtr += cpi->YuvInputData.UVWidth;
+ InputDataPtr += cpi->YuvInputData.UVStride;
+ }
+ }
+
+
+ if ( cpi->PreProcFilterLevel != 0 )
+ {
+
+ // Take a copy of the un-preprocessed frame
+#if defined FILE_PSNR
+ memcpy(cpi->yuv0ptr, cpi->yuv1ptr, (cpi->pb.YPlaneSize + (2 * cpi->pb.UVPlaneSize)));
+#endif
+
+#if defined PSNR_ON
+ memcpy(cpi->yuv0ptr, cpi->yuv1ptr, (cpi->pb.YPlaneSize + (2 * cpi->pb.UVPlaneSize)));
+#endif
+
+ // If appropriate reset the proprocessor frame counter.
+ if ( ResetPreproc )
+ cpi->preproc.frame = 0;
+
+ if ( yuvConfig.YStride < 0 )
+ {
+ yuvConfig.YBuffer = &cpi->yuv1ptr[(yuvConfig.YHeight - 1) * yuvConfig.YWidth];
+ yuvConfig.UBuffer = &cpi->yuv1ptr[yuvConfig.YHeight * yuvConfig.YWidth * 5 / 4 - yuvConfig.YWidth / 2];
+ yuvConfig.VBuffer = &cpi->yuv1ptr[yuvConfig.YHeight * yuvConfig.YWidth * 3 / 2 - yuvConfig.YWidth / 2];
+ tempFilter ( &cpi->preproc,
+ yuvConfig.YBuffer + (yuvConfig.YHeight - 1) * yuvConfig.YStride ,
+ yuvConfig.YBuffer + (yuvConfig.YHeight - 1) * yuvConfig.YStride ,
+ yuvConfig.YHeight * yuvConfig.YWidth * 3 / 2 , cpi->PreProcFilterLevel);
+ }
+ else
+ {
+ yuvConfig.YBuffer = cpi->yuv1ptr;
+ yuvConfig.UBuffer = &cpi->yuv1ptr[yuvConfig.YHeight * yuvConfig.YWidth];
+ yuvConfig.VBuffer = &cpi->yuv1ptr[yuvConfig.YHeight * yuvConfig.YWidth * 5 / 4];
+ tempFilter ( &cpi->preproc, yuvConfig.YBuffer, yuvConfig.YBuffer,
+ yuvConfig.YHeight * yuvConfig.YWidth * 3 / 2, cpi->PreProcFilterLevel );
+ }
+ }
+
+ return;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : EncodeFrameYuv
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * YUV_INPUT_BUFFER_CONFIG *YuvInputData : Pointer to input frame (YUV).
+ * unsigned char *OutPutPtr : Output buffer.
+ *
+ * OUTPUTS : unsigned int *is_key : Flag whether frame coded
+ * as intra-frame or not.
+ *
+ * RETURNS : UINT32: Number of bytes written to output buffer.
+ *
+ * FUNCTION : Encodes the specified frame creating an output buffer
+ * containing the compressed bitstream for the frame.
+ *
+ * SPECIAL NOTES : The format of the input image is planar YUV 4:2:0.
+ *
+ ****************************************************************************/
+UINT32 CCONV EncodeFrameYuv ( CP_INSTANCE *cpi, YUV_INPUT_BUFFER_CONFIG *YuvInputData, unsigned char *OutPutPtr, unsigned int *is_key )
+{
+ UINT8 iskey;
+ UINT32 ret_val;
+
+ if ( cpi->FrameRateDropCount )
+ {
+ --cpi->FrameRateDropCount;
+ return 0;
+ }
+
+ cpi->FrameRateDropCount = cpi->FrameRateDropFrames;
+ cpi->pb.Configuration.ExpandedFrameWidth = YuvInputData->YWidth;
+ cpi->pb.Configuration.ExpandedFrameHeight = YuvInputData->YHeight;
+ cpi->pb.OutputWidth = YuvInputData->YWidth;
+ cpi->pb.OutputHeight = YuvInputData->YHeight;
+
+ if ( cpi->PreProcFilterLevel )
+ {
+ int OldFrameSize = cpi->YuvInputData.YHeight * cpi->YuvInputData.YWidth * 3/2;
+ int FrameSize = YuvInputData->YHeight * YuvInputData->YWidth * 3/2;
+
+ if ( OldFrameSize != FrameSize )
+ {
+ if ( !InitPreProc ( &cpi->preproc, FrameSize ) )
+ {
+ EDeleteFrameInfo ( cpi );
+ return FALSE;
+ }
+ }
+ }
+
+ // remember our input buffer (incase we want to do something to it later!)
+ memcpy ( &cpi->YuvInputData, YuvInputData, sizeof(YUV_INPUT_BUFFER_CONFIG) );
+
+ cpi->ThisFrameSize = 0; // Reset the frame size monitor variable
+
+ cpi->DataOutputBuffer = OutPutPtr;
+ cpi->pb.DataOutputInPtr = cpi->DataOutputBuffer;
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ // Decide whether to allow selective bicubic filtered prediction
+ if ( cpi->pb.VpProfile == SIMPLE_PROFILE )
+ {
+ // NOTE: Use huffman only allowed if using multiple data streams
+ cpi->pb.MultiStream = TRUE;
+ cpi->pb.UseHuffman = TRUE;
+ cpi->pb.UseLoopFilter = NO_LOOP_FILTER;
+ cpi->pb.PredictionFilterMode = BILINEAR_ONLY_PM;
+ }
+ else
+ {
+ // NOTE: Use huffman only allowed if using multiple data streams
+ cpi->pb.MultiStream = FALSE;
+ cpi->pb.UseHuffman = FALSE;
+ cpi->pb.UseLoopFilter = LOOP_FILTER_BASIC;
+ cpi->pb.PredictionFilterMode = AUTO_SELECT_PM;
+
+ // Vp6.2 and later specific
+ if ( cpi->pb.Vp3VersionNo > 7 )
+ {
+ cpi->pb.PredictionFilterVarThresh = 31; // Default bicubic variance threshold
+ cpi->pb.PredictionFilterAlpha = cpi->BaselineAlpha; // Default Aplha Index for bicubic filter.
+ }
+ else
+ {
+ cpi->pb.PredictionFilterVarThresh = (2 << 5); // Variance threshold for using bicubic (range 0 to 32) << 5. (note however 0 = no threshold)
+ cpi->pb.PredictionFilterAlpha = 16; // Filter Alpha index 32 provides for backwards compatibility with VP61
+ }
+
+ // Size of frame influences default limit on motion length for use of bicubic.
+ if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+ cpi->pb.PredictionFilterMvSizeThresh = 4; // Restrict bicubic to mvs of < +/- (1 << (X-1)) pels. 0 Indicates unrestricted.
+ else
+ cpi->pb.PredictionFilterMvSizeThresh = 3; // Restrict bicubic to mvs of < +/- (1 << (X-1)) pels. 0 Indicates unrestricted.
+
+ cpi->pb.UseLoopFilter = NO_LOOP_FILTER;
+ cpi->pb.PredictionFilterMode = BICUBIC_ONLY_PM;
+
+ }
+
+ // Variables used to track inter vs intra prediction error for mbs that use motion
+ cpi->MotionIntraErr = 0;
+ cpi->MotionInterErr = 0;
+
+ // Set default KF boost
+ cpi->KFBoost = 4;
+
+ // 2nd pass datarate control
+ if(cpi->pass == 2)
+ {
+ Pass2Control(cpi);
+ }
+
+ // Special case for first frame
+ if ( cpi->ThisIsFirstFrame )
+ {
+ cpi->pb.RefreshGoldenFrame = TRUE; // KF is also GF update
+
+ // Stats and other first frame initialisation
+ ClipBytes = 0;
+ cpi->NiAvQi = cpi->Configuration.WorstQuality;
+
+ // Now code the first frame
+ CompressFirstFrame ( cpi );
+ cpi->ThisIsFirstFrame = FALSE;
+ cpi->ThisIsKeyFrame = FALSE;
+ }
+ // A key frame explicitly requested by the calling application
+ else if ( cpi->ThisIsKeyFrame )
+ {
+ cpi->pb.RefreshGoldenFrame = TRUE; // KF is also GF update
+ CompressKeyFrame ( cpi );
+ cpi->ThisIsKeyFrame = FALSE;
+ }
+ else
+ {
+ /* Compress the frame. */
+ CompressFrame ( cpi, (unsigned int) cpi->CurrentFrame );
+ }
+
+
+ // Keep a record from which we can calculate the average Q excluding GF updates and key frames
+ if ( (cpi->pb.FrameType != BASE_FRAME) && !cpi->pb.RefreshGoldenFrame )
+ {
+ cpi->NiFrames++;
+
+ // Calculate the average Q for normal inter frames (not key or GFU frames)
+ // This is used as a basis for setting active worst quality.
+ if ( cpi->NiFrames > 150 )
+ {
+ cpi->NiTotQi += cpi->pb.quantizer->FrameQIndex;
+ cpi->NiAvQi = (cpi->NiTotQi/cpi->NiFrames);
+ }
+ // Early in the clip ... average the current frame Q value with the default
+ // entered by the user as a dampening measure (often there are very easy intro credits).
+ else
+ {
+ cpi->NiTotQi += ((cpi->Configuration.WorstQuality + cpi->pb.quantizer->FrameQIndex + 1) / 2);
+ cpi->NiAvQi = (cpi->NiTotQi/cpi->NiFrames);
+ }
+
+ // If the average is higher than what was used in the last frame
+ // (after going through the recode loop to keep the frame size within range)
+ // then use the last frame value + 1.
+ // The +1 is designed to stop Q and hence the data rate, from progressively
+ // falling away during difficult sections.
+ if ( cpi->pb.quantizer->FrameQIndex < cpi->NiAvQi )
+ cpi->NiAvQi = cpi->pb.quantizer->FrameQIndex + 1;
+ }
+
+ // Clip size stats
+ ClipBytes += (cpi->ThisFrameSize >> 3);
+
+ // Update stats variables.
+ cpi->LastFrameSize = (UINT32)cpi->ThisFrameSize;
+ cpi->CurrentFrame++;
+
+ // If we have had a GF update then reset the counter till next one due.
+ if ( cpi->pb.RefreshGoldenFrame )
+ {
+ cpi->FramesTillGfUpdateDue = cpi->GfUpdateInterval;
+ cpi->LastGfOrKFrameQ = cpi->pb.quantizer->FrameQIndex;
+ cpi->pb.RefreshGoldenFrame = FALSE;
+ }
+
+ // Decrement count till next GF update due
+ if ( cpi->FramesTillGfUpdateDue > 0 )
+ cpi->FramesTillGfUpdateDue--;
+
+ // return whether or not we are a key frame
+ iskey = VP6_GetFrameType ( &cpi->pb );
+ if ( iskey == 0 )
+ *is_key = 1;
+ else
+ *is_key = 0;
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+ if(cpi->pass==1)
+ {
+ Pass1Output(cpi);
+ }
+#if defined(_MSC_VER)
+ if ( cpi->pb.testMode )
+ vp6_appendframe ( &cpi->pb );
+#endif
+ cpi->GfRecoveryFrame = FALSE;
+ cpi->TotalBitsLeftInClip -= cpi->ThisFrameSize ;
+ // Set the output bytes buffered count and reset the buffer input pointer.
+ cpi->pb.DataOutputInPtr = cpi->DataOutputBuffer;
+ ret_val = (cpi->ThisFrameSize >> 3);
+
+ cpi->LastInterError = cpi->InterError;
+ cpi->LastIntraError = cpi->IntraError;
+
+//TEMP STATS
+// DEBUG Code
+if ( FALSE )
+{
+ FILE *StatsFilePtr;
+
+ // Open stats file and write out data
+ StatsFilePtr = fopen( "buffers.stt", "a" );
+ if ( StatsFilePtr )
+ {
+ fprintf( StatsFilePtr, "%12ld ", (UINT32)cpi->CurrentFrame );
+ fprintf( StatsFilePtr, "%12ld ", (cpi->BufferLevel * 100)/cpi->OptimalBufferLevel );
+ fprintf( StatsFilePtr, "%12ld ", (100 * cpi->BytesOffTarget / (cpi->TotalByteCount * 8)));
+ fprintf( StatsFilePtr, "%12ld ", cpi->NiAvQi );
+ fprintf( StatsFilePtr, "%12ld ", cpi->Configuration.ActiveWorstQuality );
+ fprintf( StatsFilePtr, "%12ld\n", ((cpi->ThisFrameSize * 100)/cpi->ThisFrameTarget) );
+ fclose ( StatsFilePtr );
+ }
+}
+
+#if defined MEASURE_SECTION_COSTS
+ {
+ UINT32 i;
+
+ // Temps Stats for section data rate analysis
+ for ( i = 0; i < 10; i++ )
+ {
+ ClipSectionBits[i] += (Sectionbits[i] / 256);
+ Sectionbits[i] = 0;
+ }
+ }
+#endif
+
+ return ret_val;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : StopEncoder
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : CP_INSTANCE **cpi : Pointer to pointer to encoder instance.
+ *
+ * RETURNS : BOOL: Always TRUE.
+ *
+ * FUNCTION : Stops the encoder and de-allocates memory used for
+ * encoder data structures.
+ *
+ * SPECIAL NOTES : Also include lots of debug/test code for outputting
+ * timing and run statistics to file.
+ *
+ ****************************************************************************/
+BOOL CCONV StopEncoder ( CP_INSTANCE **cpi )
+{
+#ifdef TIMING
+ ITime2 = timeGetTime();
+ ITotalTime = ITime2-ITime1;
+ {
+ FILE *fp = fopen( "d:\\Times.txt", "at" );
+ fprintf ( fp, "StopEncoder: %d\n", ITime2 );
+ fprintf ( fp, "The total time spent is %d\n", ITotalTime );
+ fprintf ( fp, "------------------------------------\n" );
+ fclose ( fp );
+ }
+#endif
+
+#if defined MEASURE_SECTION_COSTS
+ // DEBUG Code
+ if ( TRUE && *cpi )
+ {
+ UINT32 i;
+ UINT32 Sum = 0;
+ FILE *StatsFilePtr;
+
+ for ( i = 0; i < 6; i++ )
+ {
+ Sum += ClipSectionBits[i];
+ }
+
+ if ( Sum )
+ {
+ // Open stats file and write out data
+ StatsFilePtr = fopen( "Section_bits.stt", "a" );
+ if ( StatsFilePtr )
+ {
+ fprintf( StatsFilePtr, "Header %4ld ", ((ClipSectionBits[0]+(Sum/200)) * 100)/Sum );
+ fprintf( StatsFilePtr, "Mode %4ld ", ((ClipSectionBits[1]+(Sum/200)) * 100)/Sum );
+ fprintf( StatsFilePtr, "Mv %4ld ", ((ClipSectionBits[2]+(Sum/200)) * 100)/Sum );
+ fprintf( StatsFilePtr, "Context %4ld ", ((ClipSectionBits[3]+(Sum/200)) * 100)/Sum );
+ fprintf( StatsFilePtr, "DC %4ld ", ((ClipSectionBits[4]+(Sum/200)) * 100)/Sum );
+ fprintf( StatsFilePtr, "AC %4ld ", ((ClipSectionBits[5]+(Sum/200)) * 100)/Sum );
+ fprintf( StatsFilePtr, "\n" );
+ fclose ( StatsFilePtr );
+ }
+ }
+ }
+#endif
+
+#if defined PSNR_ON
+ if ( *cpi )
+ {
+ // TEST Code
+ if ( (*cpi)->CurrentFrame && !(*cpi)->AllowSpatialResampling )
+ {
+ FILE *StatsFilePtr;
+ UINT32 FrameCount = ((UINT32)(*cpi)->CurrentFrame) -1;
+ double FrameSize = 1.5 * (*cpi)->pb.YPlaneSize;
+ double OverallPSNR = 10.0 * log10((255.0 * 255.0 * FrameSize * (*cpi)->CurrentFrame) / (*cpi)->TotalSqError);
+
+ // Open stats file and write out data
+ StatsFilePtr = fopen( "psnr.stt", "a" );
+ if ( StatsFilePtr )
+ {
+ // Fudge to deal with 29.97 fps material
+ if ( (*cpi)->Configuration.OutputFrameRate == 30 )
+ {
+ fprintf( StatsFilePtr, "%6.3f %10.2f %6.3f\n",
+ (*cpi)->TotPsnr / (double)(FrameCount),
+ (((double)ClipBytes/1024) * 8 * 29.97) / ((UINT32)(*cpi)->CurrentFrame - 1) ,
+ OverallPSNR);
+ }
+ else
+ {
+ fprintf( StatsFilePtr, "%6.3f %10.2f %6.3f\n",
+ (*cpi)->TotPsnr / (double)(FrameCount),
+ (((double)ClipBytes/1024) * 8 * (*cpi)->Configuration.OutputFrameRate) / ((UINT32)(*cpi)->CurrentFrame - 1),
+ OverallPSNR);
+ }
+
+ fclose( StatsFilePtr );
+ }
+
+ }
+ }
+#endif
+
+
+#if 0
+ // DEBUG Code
+ if ( FALSE )
+ {
+ UINT32 i;
+ FILE *StatsFilePtr;
+
+ // Open stats file and write out data
+ StatsFilePtr = fopen( "tmp.stt", "a" );
+ if ( StatsFilePtr )
+ {
+ fprintf( StatsFilePtr, "%12ld %12ld\n", BcCount, TotTokens );
+ fclose ( StatsFilePtr );
+ }
+
+ StatsFilePtr = fopen( "tmp2.stt", "a" );
+ if ( StatsFilePtr && NzCount[1][0] )
+ {
+ memcpy ( (*cpi)->FrameNzCount, NzCount, sizeof((*cpi)->FrameNzCount) );
+ PredictScanOrder( (*cpi) );
+
+ for ( i=0; i<64; i++ )
+ {
+ fprintf ( StatsFilePtr, "%2ld,", (*cpi)->NewScanOrderBands[i] );
+ if ( (i%8) == 7 )
+ fprintf ( StatsFilePtr, "\n" );
+ }
+ fprintf ( StatsFilePtr, "\n" );
+ fclose ( StatsFilePtr );
+ }
+
+ if ( scanupdates[1][0] > 0 )
+ {
+ FILE *StatsFilePtr;
+ UINT32 i, Sum, Sum2, Prob;
+
+ StatsFilePtr = fopen( "scanupdates.stt", "a" );
+ if ( StatsFilePtr )
+ {
+ for ( i=0; i<64; i++ )
+ {
+ Sum = scanupdates[i][0] + scanupdates[i][1];
+ Sum2 = scanupdates[i][0];
+
+ if ( Sum > 0 )
+ {
+ Prob = (Sum2 * 255)/Sum;
+ if ( Prob == 0 )
+ Prob = 1;
+ fprintf( StatsFilePtr, "%3ld, ", Prob );
+ }
+ else
+ fprintf( StatsFilePtr, "%3ld, ", 255 );
+
+ if ( (i % 8) == 7 )
+ fprintf( StatsFilePtr, "\n");
+ }
+ fprintf ( StatsFilePtr, "\n" );
+ fclose ( StatsFilePtr );
+ }
+ }
+ }
+#endif
+
+ if ( *cpi )
+ {
+#if defined FILE_PSNR
+ // TEST Code
+ if ( (*cpi)->CurrentFrame && !(*cpi)->AllowSpatialResampling )
+ {
+ FILE *StatsFilePtr;
+ UINT32 FrameCount = ((UINT32)(*cpi)->CurrentFrame) -1;
+ double PSNR = (*cpi)->TotPsnr / (double)(FrameCount);
+ double KBS = ((double)ClipBytes * 8 * (*cpi)->Configuration.OutputFrameRate ) / ((double) FrameCount);
+ double LGKBS = log10(KBS);
+ double FrameSize = 1.5 * cpi->pb.YPlaneSize;
+ double OverallPSNR = 10.0 * log10((255.0 * 255.0 * FrameSize * cpi->CurrentFrame) / (double)Total);
+
+ // Open stats file and write out data
+ StatsFilePtr = fopen( "psnr.stt", "a" );
+ if ( StatsFilePtr )
+ {
+
+ // Fudge to deal with 29.97 fps material
+ if ( (*cpi)->Configuration.OutputFrameRate == 30 )
+ {
+ fprintf( StatsFilePtr, "%6.3f %10.2f %10.6f\n",
+ PSNR,
+ (((double)ClipBytes/1024) * 8 * 29.97) / (FrameCount),
+
+ PSNR/
+ log10((((double)ClipBytes/1024) * 8 * 29.97) / (FrameCount))
+ );
+ }
+ else
+ {
+ fprintf( StatsFilePtr, "%6.3f %10.2f %10.6f\n",
+ PSNR,
+ KBS/1024,
+ PSNR / LGKBS );
+ }
+
+ fclose( StatsFilePtr );
+ }
+
+ }
+#endif
+
+
+ AvgStats ( &(*cpi)->fpmss);
+ if((*cpi)->fpmss.count)
+ OutputStats((*cpi)->ss,&(*cpi)->fpmss);
+
+ if((*cpi)->fs)
+ fclose((*cpi)->fs);
+
+ if((*cpi)->ss)
+ fclose((*cpi)->ss);
+
+
+ VP6_DeleteFragmentInfo ( &(*cpi)->pb );
+ VP6_DeleteFrameInfo ( &(*cpi)->pb );
+ EDeleteFragmentInfo ( (*cpi) );
+ EDeleteFrameInfo ( (*cpi) );
+ VP6_DeleteQuantizer ( &(*cpi)->pb.quantizer );
+ DeletePostProcInstance ( &(*cpi)->pb.postproc );
+ DeleteCPInstance ( cpi );
+ }
+
+ // test output code for filter taps
+ if(0)
+ {
+ UINT32 i,j,k;
+ FILE *StatsFilePtr;
+ double dval;
+ double aval = -0.05;
+ int y1,y2,y3,y4;
+ double d2, d3;
+ int sum;
+
+ // Open stats file and write out data
+ StatsFilePtr = fopen( "filters.stt", "a" );
+ if ( StatsFilePtr )
+ {
+ fprintf( StatsFilePtr, " **** \n" );
+ for ( i = 0; i < 32; i++ )
+ {
+
+ fprintf( StatsFilePtr, " {\n" );
+ dval = 0.0;
+ for ( j = 0; j < 8; j++ )
+ {
+ d2 = dval * dval;
+ d3 = dval * dval * dval;
+
+ y1 = (int)floor(0.5 + ( ((aval*dval) - (2.0*aval*d2) + (aval*d3)) * 128));
+ y2 = (int)floor(0.5 + ( (1.0 - ((aval+3.0)*d2) + ((aval+2.0)*d3)) * 128));
+ y3 = (int)floor(0.5 + ( (-(aval*dval) + ((2.0*aval+3.0)*d2) - ((aval+2.0)*d3)) * 128));
+ y4 = (int)floor(0.5 + ( ( (aval*d2) - (aval*d3)) * 128));
+
+ sum = y1 + y2 + y3 + y4;
+ if ( sum < 128 )
+ {
+ if ( sum < 127 )
+ {
+ y2++;
+ y3++;
+ }
+ else
+ {
+ if ( y2 >= y3 )
+ y2++;
+ else
+ y3++;
+ }
+ }
+ else if ( sum > 128 )
+ {
+ if ( sum > 129 )
+ {
+ y2--;
+ y3--;
+ }
+ else
+ {
+ if ( y2 >= y3 )
+ y2--;
+ else
+ y3--;
+ }
+ }
+ fprintf( StatsFilePtr, " { ");
+ for(k=0;k<8;k++)
+ fprintf(StatsFilePtr,"%3ld,",y1);
+ fprintf( StatsFilePtr, " ");
+ for(k=0;k<8;k++)
+ fprintf(StatsFilePtr,"%3ld,",y2);
+ fprintf( StatsFilePtr, " ");
+ for(k=0;k<8;k++)
+ fprintf(StatsFilePtr,"%3ld,",y3);
+ fprintf( StatsFilePtr, " ");
+ for(k=0;k<8;k++)
+ fprintf(StatsFilePtr,"%3ld,",y4);
+ fprintf( StatsFilePtr, " }");
+
+
+ if (y1 + y2 + y3 + y4 != 128)
+ {
+ fprintf( StatsFilePtr, " **** %ld %ld", (y1 + y2 + y3 + y4), sum );
+ }
+
+ fprintf( StatsFilePtr, "\n" );
+
+ dval += 0.125;
+ }
+ aval -= 0.05;
+ fprintf( StatsFilePtr, " },\n" );
+ fprintf( StatsFilePtr, "\n" );
+ }
+
+
+ fprintf( StatsFilePtr, "%ld\n", i );
+ fclose( StatsFilePtr );
+ }
+ }
+
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VPGetState
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * void *ret : Pointer to COMPRESSOR_STATE object
+ * representing encoder state.
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Size of the returned COMPRESSOR_STATE object.
+ *
+ * FUNCTION : Fills in the supplied COMPRESSOR_STATE object with
+ * details of the compressor state.
+ *
+ * SPECIAL NOTES : The buffer supplied by the caller (ret) should
+ * be large enough to hold a COMPRESSOR_STATE object.
+ *
+ ****************************************************************************/
+UINT32 CCONV VPGetState ( CP_INSTANCE *cpi, void *ret )
+{
+ INT32 i;
+ COMPRESSOR_STATE *cs = (COMPRESSOR_STATE *) ret;
+
+ if ( !ret )
+ return sizeof ( COMPRESSOR_STATE );
+
+ for ( i=0; i<KEY_FRAME_CONTEXT; i++ )
+ {
+ cs->PriorKeyFrameSize[i] = cpi->PriorKeyFrameSize[i];
+ cs->PriorKeyFrameDistance[i] = cpi->PriorKeyFrameDistance[i];
+ }
+
+ cs->CurrentFrame = cpi->CurrentFrame;
+ cs->LastFrameSize = cpi->LastFrameSize;
+ cs->DropCount = cpi->DropCount;
+ cs->KeyFrameCount = cpi->KeyFrameCount;
+ cs->TotKeyFrameBytes = cpi->TotKeyFrameBytes;
+ cs->LastKeyFrameSize = cpi->LastKeyFrameSize;
+ cs->LastKeyFrame = cpi->LastKeyFrame;
+ cs->TotalByteCount = cpi->TotalByteCount;
+ cs->ActiveMaxQ = cpi->Configuration.ActiveWorstQuality;
+ cs->BpbCorrectionFactor = cpi->BpbCorrectionFactor;
+
+ return sizeof ( COMPRESSOR_STATE );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VPSetState
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * void *arg : Pointer to COMPRESSOR_STATE object
+ * representing encoder state.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets the compressor state to that specified by the
+ * supplied COMPRESSOR_STATE object.
+ *
+ * SPECIAL NOTES : arg should point to the COMPRESSOR_STATE object that
+ * contains the required state of the compressor.
+ *
+ ****************************************************************************/
+void CCONV VPSetState ( CP_INSTANCE *cpi, void *arg )
+{
+ INT32 i;
+ COMPRESSOR_STATE *cs = (COMPRESSOR_STATE *) arg;
+
+ for ( i=0; i<KEY_FRAME_CONTEXT; i++ )
+ {
+ cpi->PriorKeyFrameSize[i] = cs->PriorKeyFrameSize[i];
+ cpi->PriorKeyFrameDistance[i] = cs->PriorKeyFrameDistance[i];
+ }
+
+ cpi->CurrentFrame = cs->CurrentFrame;
+ cpi->LastFrameSize = cs->LastFrameSize;
+
+ cpi->DropCount = cs->DropCount;
+ cpi->KeyFrameCount = cs->KeyFrameCount;
+ cpi->TotKeyFrameBytes = cs->TotKeyFrameBytes;
+ cpi->LastKeyFrameSize = cs->LastKeyFrameSize;
+ cpi->LastKeyFrame = cs->LastKeyFrame;
+ cpi->TotalByteCount = cs->TotalByteCount;
+ cpi->BpbCorrectionFactor = cs->BpbCorrectionFactor;
+ cpi->Configuration.ActiveWorstQuality = cs->ActiveMaxQ;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VPGetPB
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : int: Pointer to the compressor's decoder object (cast to int)
+ *
+ * FUNCTION : Returns pointer to the compressor's decoder object as
+ * an int.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int CCONV VPGetPB ( CP_INSTANCE *cpi )
+{
+ return (int) &cpi->pb;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcompdll.def b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcompdll.def
new file mode 100644
index 00000000..dc006b38
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcompdll.def
@@ -0,0 +1,20 @@
+LIBRARY vp31e
+
+EXPORTS
+ StartEncoder
+ ChangeCompressorSetting
+ ChangeEncoderConfig
+ EncodeFrame
+ EncodeFrameYuv
+ StopEncoder
+ StartDecoder
+ SetPbParam
+ GetYUVConfig
+ DecodeFrame
+ DecodeFrameToYUV
+ DrawFrame
+ StopDecoder
+ wilkDXrefCreate
+ wilkDXrefDestroy
+ wilkDXrefKeyFrame
+ wilkDXrefInterFrame
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/COptFunctions.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/COptFunctions.c
new file mode 100644
index 00000000..ec98776d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/COptFunctions.c
@@ -0,0 +1,1967 @@
+/****************************************************************************
+*
+* Module Title : OptFunctions.c
+*
+* Description : Encoder system dependant functions.
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "compdll.h"
+#include "math.h"
+/****************************************************************************
+* Macros
+****************************************************************************/
+#pragma warning(disable:4799)
+
+#define FILTER_WEIGHT 128
+#define FILTER_SHIFT 7
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+static __declspec(align(16)) short rd[] = { 64, 64, 64, 64, 64, 64, 64, 64 };
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern INT16 BilinearFilters_mmx[8][16];
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxGetSAD
+ *
+ * INPUTS : UINT8 *NewDataPtr : Pointer to first input data array.
+ * INT32 PixelsPerLine : Length of line for NewDataPtr.
+ * UINT8 *RefDataPtr : Pointer to second input data array.
+ * INT32 RefPixelsPerLine : Length of line for RefDataPtr.
+ * INT32 ErrorSoFar : Error accumulated before this call.
+ * INT32 BestSoFar : (NOT USED).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : INT32: SAD for the two blocks.
+ *
+ * FUNCTION : Calculates the sum of the absolute differences for
+ * the two blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INT32 MmxGetSAD
+(
+ UINT8 *NewDataPtr,
+ INT32 PixelsPerLine,
+ UINT8 *RefDataPtr,
+ INT32 RefPixelsPerLine,
+ INT32 ErrorSoFar,
+ INT32 BestSoFar
+)
+{
+ INT32 DiffVal = ErrorSoFar;
+ INT16 DiffAcc[4] = { 0, 0, 0, 0}; // MMX accumulator.
+
+ // MMX code for SAD.
+__asm
+ {
+ pxor mm6, mm6 ; Blank mmx6
+ pxor mm7, mm7 ; Blank mmx7
+
+ mov eax,dword ptr [NewDataPtr] ; Load base addresses
+ mov ebx,dword ptr [RefDataPtr]
+ mov ecx,dword ptr [PixelsPerLine]
+ mov edx,dword ptr [RefPixelsPerLine]
+
+ // Row 1
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copy of MM0
+
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,ecx ; Inc pointer into the new data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,edx ; Inc pointer into ref data
+
+ // Row 2
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copy of MM0
+
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,ecx ; Inc pointer into the new data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,edx ; Inc pointer into ref data
+
+ // Row 3
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copy of MM0
+
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,ecx ; Inc pointer into the new data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,edx ; Inc pointer into ref data
+
+ // Row 4
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copy of MM0
+
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,ecx ; Inc pointer into the new data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,edx ; Inc pointer into ref data
+
+ // Row 5
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copy of MM0
+
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,ecx ; Inc pointer into the new data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,edx ; Inc pointer into ref data
+
+ // Row 6
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copy of MM0
+
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,ecx ; Inc pointer into the new data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,edx ; Inc pointer into ref data
+
+ // Row 7
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copy of MM0
+
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,ecx ; Inc pointer into the new data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,edx ; Inc pointer into ref data
+
+ // Row 8
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copy of MM0
+
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ paddw mm7, mm1 ; accumulate difference...
+
+ movq DWORD PTR [DiffAcc], mm7 ; copy back accumulated results into normal memory
+// emms ; Clear the MMX state.
+ }
+
+ // Accumulate the 4 resulting word values.
+ DiffVal += DiffAcc[0] + DiffAcc[1] + DiffAcc[2] + DiffAcc[3];
+
+ return DiffVal;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxGetHalfPixelSAD
+ *
+ * INPUTS : UINT8 *SrcData : Pointer to first input data array.
+ * INT32 PixelsPerLine : Length of line for NewDataPtr.
+ * UINT8 *RefDataPtr1 : Pointer to first reference data array.
+ * UINT8 *RefDataPtr2 : Pointer to second reference data array.
+ * INT32 RefPixelsPerLine : Length of line for RefDataPtr1/2.
+ * INT32 ErrorSoFar : Error accumulated before this call.
+ * INT32 BestSoFar : (NOT USED).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : INT32: SAD at 1/2 pixel accuracy.
+ *
+ * FUNCTION : Calculates the sum of the absolute differences against
+ * half pixel interpolated references.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INT32 MmxGetHalfPixelSAD
+(
+ UINT8 *SrcData,
+ INT32 PixelsPerLine,
+ UINT8 *RefDataPtr1,
+ UINT8 *RefDataPtr2,
+ INT32 RefPixelsPerLine,
+ INT32 ErrorSoFar,
+ INT32 BestSoFar
+)
+{
+ INT32 DiffVal = ErrorSoFar;
+ INT32 RefOffset = (int)(RefDataPtr1 - RefDataPtr2);
+ INT16 DiffAcc[4] = { 0, 0, 0, 0 }; // MMX accumulator.
+
+ if ( RefOffset == 0 )
+ {
+ // Simple case as for non 0.5 pixel
+ DiffVal += MmxGetSAD ( SrcData, PixelsPerLine, RefDataPtr1, RefPixelsPerLine, ErrorSoFar, BestSoFar );
+ }
+ else
+ {
+__asm
+ // MMX code for SAD.
+ {
+ pxor mm6, mm6 ; Blank mmx6
+ pxor mm7, mm7 ; Blank mmx7
+
+ mov eax,dword ptr [SrcData] ; Load base addresses and line increment
+ mov ebx,dword ptr [RefDataPtr1]
+ mov ecx,dword ptr [RefDataPtr2]
+ mov edx,dword ptr [PixelsPerLine]
+ mov esi,dword ptr [RefPixelsPerLine]
+
+ // Row 1
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ paddw mm1, mm2 ; Add word values together.
+ punpckhbw mm4, mm6
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ paddw mm3, mm4 ; Add word values together.
+ movq mm0, [eax] ; Copy eight of src data to mm0
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+ movq mm2, mm0 ; Take copy of MM0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data for SAD
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,edx ; Inc pointer into the src data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,esi ; Inc pointer into ref1
+ add ecx,esi ; Inc pointer into ref2
+
+ // Row 2
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ paddw mm1, mm2 ; Add word values together.
+ punpckhbw mm4, mm6
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ paddw mm3, mm4 ; Add word values together.
+ movq mm0, [eax] ; Copy eight of src data to mm0
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+ movq mm2, mm0 ; Take copy of MM0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data for SAD
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,edx ; Inc pointer into the src data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,esi ; Inc pointer into ref1
+ add ecx,esi ; Inc pointer into ref2
+
+ // Row 3
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ paddw mm1, mm2 ; Add word values together.
+ punpckhbw mm4, mm6
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ paddw mm3, mm4 ; Add word values together.
+ movq mm0, [eax] ; Copy eight of src data to mm0
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+ movq mm2, mm0 ; Take copy of MM0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data for SAD
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,edx ; Inc pointer into the src data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,esi ; Inc pointer into ref1
+ add ecx,esi ; Inc pointer into ref2
+
+ // Row 4
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ paddw mm1, mm2 ; Add word values together.
+ punpckhbw mm4, mm6
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ paddw mm3, mm4 ; Add word values together.
+ movq mm0, [eax] ; Copy eight of src data to mm0
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+ movq mm2, mm0 ; Take copy of MM0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data for SAD
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,edx ; Inc pointer into the src data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,esi ; Inc pointer into ref1
+ add ecx,esi ; Inc pointer into ref2
+
+ // Row 5
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ paddw mm1, mm2 ; Add word values together.
+ punpckhbw mm4, mm6
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ paddw mm3, mm4 ; Add word values together.
+ movq mm0, [eax] ; Copy eight of src data to mm0
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+ movq mm2, mm0 ; Take copy of MM0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data for SAD
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,edx ; Inc pointer into the src data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,esi ; Inc pointer into ref1
+ add ecx,esi ; Inc pointer into ref2
+
+ // Row 6
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ paddw mm1, mm2 ; Add word values together.
+ punpckhbw mm4, mm6
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ paddw mm3, mm4 ; Add word values together.
+ movq mm0, [eax] ; Copy eight of src data to mm0
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+ movq mm2, mm0 ; Take copy of MM0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data for SAD
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,edx ; Inc pointer into the src data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,esi ; Inc pointer into ref1
+ add ecx,esi ; Inc pointer into ref2
+
+ // Row 7
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ paddw mm1, mm2 ; Add word values together.
+ punpckhbw mm4, mm6
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ paddw mm3, mm4 ; Add word values together.
+ movq mm0, [eax] ; Copy eight of src data to mm0
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+ movq mm2, mm0 ; Take copy of MM0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data for SAD
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ add eax,edx ; Inc pointer into the src data
+ paddw mm7, mm1 ; accumulate difference...
+ add ebx,esi ; Inc pointer into ref1
+ add ecx,esi ; Inc pointer into ref2
+
+ // Row 8
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ paddw mm1, mm2 ; Add word values together.
+ punpckhbw mm4, mm6
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ paddw mm3, mm4 ; Add word values together.
+ movq mm0, [eax] ; Copy eight of src data to mm0
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+ movq mm2, mm0 ; Take copy of MM0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data for SAD
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+ punpcklbw mm0, mm6 ; unpack to higher precision for accumulation
+ paddw mm7, mm0 ; accumulate difference...
+ punpckhbw mm1, mm6 ; unpack high four bytes to higher precision
+ paddw mm7, mm1 ; accumulate difference...
+
+ movq DWORD PTR [DiffAcc], mm7 ; copy back accumulated results into normal memory
+ }
+
+ // Accumulate the 4 word values in DiffAcc
+ DiffVal += DiffAcc[0] + DiffAcc[1] + DiffAcc[2] + DiffAcc[3];
+ }
+ return DiffVal;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxGetInterErr
+ *
+ * INPUTS : UINT8 *NewDataPtr : Pointer to first input data array.
+ * INT32 PixelsPerLine : Length of line for NewDataPtr.
+ * UINT8 *RefDataPtr1 : Pointer to first reference data array.
+ * UINT8 *RefDataPtr2 : Pointer to second reference data array.
+ * INT32 RefPixelsPerLine : Length of line for RefDataPtr1/2.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Error variance.
+ *
+ * FUNCTION : Calculates a difference error score for two blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 MmxGetInterErr
+(
+ UINT8 *NewDataPtr,
+ INT32 PixelsPerLine,
+ UINT8 *RefDataPtr1,
+ UINT8 *RefDataPtr2,
+ INT32 RefPixelsPerLine
+)
+{
+ UINT32 XSum = 0;
+ UINT32 XXSum = 0;
+ INT16 MmxXSum[4] = { 0, 0, 0, 0 }; // XSum accumulators
+ INT32 MmxXXSum[2] = { 0, 0 }; // XXSum accumulators
+
+ INT32 AbsRefOffset = abs( (int)(RefDataPtr1 - RefDataPtr2) );
+
+ // Mode of interpolation chosen based upon on the offset of the second reference pointer
+ if ( AbsRefOffset == 0 )
+ {
+ __asm
+ {
+ pxor mm5, mm5 ; Blank mmx6
+ pxor mm6, mm6 ; Blank mmx7
+ pxor mm7, mm7 ; Blank mmx7
+
+ mov eax,dword ptr [NewDataPtr] ; Load base addresses
+ mov ebx,dword ptr [RefDataPtr1]
+ mov ecx,dword ptr [PixelsPerLine]
+ mov edx,dword ptr [RefPixelsPerLine]
+
+ // Row 1
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 2
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ punpcklbw mm1, mm6
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 3
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ punpcklbw mm1, mm6
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 4
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ punpcklbw mm1, mm6
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 5
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ punpcklbw mm1, mm6
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 6
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ punpcklbw mm1, mm6
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 7
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ punpcklbw mm1, mm6
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 8
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ punpcklbw mm1, mm6
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ }
+
+ // Now accumulate the final results.
+ XSum = MmxXSum[0] + MmxXSum[1] + MmxXSum[2] + MmxXSum[3];
+ XXSum = MmxXXSum[0] + MmxXXSum[1];
+ }
+ // Simple half pixel reference data
+ else
+ {
+__asm
+ {
+ pxor mm5, mm5 ; Blank mmx6
+ pxor mm6, mm6 ; Blank mmx7
+ pxor mm7, mm7 ; Blank mmx7
+
+ mov eax,dword ptr [NewDataPtr] ; Load base addresses
+ mov ebx,dword ptr [RefDataPtr1]
+ mov ecx,dword ptr [RefDataPtr2]
+ mov edx,dword ptr [PixelsPerLine]
+ mov esi,dword ptr [RefPixelsPerLine]
+
+ // Row 1
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ paddw mm1, mm2 ; Add word values together.
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ punpckhbw mm4, mm6
+ paddw mm3, mm4 ; Add word values together.
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ add eax,edx ; Inc pointer into the new data
+ add ebx,esi ; Inc pointer into ref data
+ add ecx,esi ; Inc pointer into ref2 data
+
+ // Row 2
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ paddw mm1, mm2 ; Add word values together.
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ punpckhbw mm4, mm6
+ paddw mm3, mm4 ; Add word values together.
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ add eax,edx ; Inc pointer into the new data
+ add ebx,esi ; Inc pointer into ref data
+ add ecx,esi ; Inc pointer into ref2 data
+
+ // Row 3
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ paddw mm1, mm2 ; Add word values together.
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ punpckhbw mm4, mm6
+ paddw mm3, mm4 ; Add word values together.
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ add eax,edx ; Inc pointer into the new data
+ add ebx,esi ; Inc pointer into ref data
+ add ecx,esi ; Inc pointer into ref2 data
+
+ // Row 4
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ paddw mm1, mm2 ; Add word values together.
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ punpckhbw mm4, mm6
+ paddw mm3, mm4 ; Add word values together.
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ add eax,edx ; Inc pointer into the new data
+ add ebx,esi ; Inc pointer into ref data
+ add ecx,esi ; Inc pointer into ref2 data
+
+ // Row 5
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ paddw mm1, mm2 ; Add word values together.
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ punpckhbw mm4, mm6
+ paddw mm3, mm4 ; Add word values together.
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ add eax,edx ; Inc pointer into the new data
+ add ebx,esi ; Inc pointer into ref data
+ add ecx,esi ; Inc pointer into ref2 data
+
+ // Row 6
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ paddw mm1, mm2 ; Add word values together.
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ punpckhbw mm4, mm6
+ paddw mm3, mm4 ; Add word values together.
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ add eax,edx ; Inc pointer into the new data
+ add ebx,esi ; Inc pointer into ref data
+ add ecx,esi ; Inc pointer into ref2 data
+
+ // Row 7
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ paddw mm1, mm2 ; Add word values together.
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ punpckhbw mm4, mm6
+ paddw mm3, mm4 ; Add word values together.
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+
+ add eax,edx ; Inc pointer into the new data
+ add ebx,esi ; Inc pointer into ref data
+ add ecx,esi ; Inc pointer into ref2 data
+
+ // Row 8
+ movq mm1, [ebx] ; Copy eight bytes from each of ref 1 and ref 2.
+ movq mm2, [ecx]
+ movq mm3, mm1 ; Take copies.
+ movq mm4, mm2
+
+ punpcklbw mm1, mm6 ; unpack low four bytes to higher precision
+ punpcklbw mm2, mm6
+ paddw mm1, mm2 ; Add word values together.
+ psrlw mm1, 1 ; Devide by two (shift right 1)
+ punpckhbw mm3, mm6 ; unpack high four bytes to higher precision
+ punpckhbw mm4, mm6
+ paddw mm3, mm4 ; Add word values together.
+ psrlw mm3, 1 ; Devide by two (shift right 1)
+
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ packuswb mm1, mm3 ; Repack to give 1/2 pixel averaged reference data
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ movq DWORD PTR [MmxXSum], mm5 ; copy back accumulated results into normal memory
+ movq DWORD PTR [MmxXXSum], mm7 ; copy back accumulated results into normal memory
+ }
+
+ // Now accumulate the final results.
+ XSum = MmxXSum[0] + MmxXSum[1] + MmxXSum[2] + MmxXSum[3];
+ XXSum = MmxXXSum[0] + MmxXXSum[1];
+ }
+
+ // Compute and return population variance as mis-match metric.
+ return ( ((XXSum << 6) - XSum*XSum ) );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxGetIntraError
+ *
+ * INPUTS : UINT8 *DataPtr : Pointer to input block.
+ * INT32 PixelsPerLine : Length of line for input block.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Block variance.
+ *
+ * FUNCTION : Calculates a variance score for the block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 MmxGetIntraError ( UINT8 *DataPtr, INT32 PixelsPerLine )
+{
+ UINT8 *DiffPtr;
+ UINT32 XSum = 0;
+ UINT32 XXSum = 0;
+
+ // Loop expanded out for speed.
+ DiffPtr = DataPtr;
+
+ __asm
+ {
+ pxor mm5, mm5 ; Blank mmx6
+ pxor mm6, mm6 ; Blank mmx7
+ pxor mm7, mm7 ; Blank mmx7
+
+ mov eax,dword ptr [DiffPtr] ; Load base addresses
+ mov ecx,dword ptr [PixelsPerLine]
+
+ // Row 1
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add eax,ecx ; Inc pointer into the new data
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+
+ // Row 2
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add eax,ecx ; Inc pointer into ref data
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 3
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add eax,ecx ; Inc pointer into ref data
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+
+ // Row 4
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add eax,ecx ; Inc pointer into ref data
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 5
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add eax,ecx ; Inc pointer into ref data
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 6
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add eax,ecx ; Inc pointer into ref data
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 7
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add eax,ecx ; Inc pointer into ref data
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ // Row 8
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add eax,ecx ; Inc pointer into ref data
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ movq mm4, mm5 ;
+ punpcklwd mm5, mm6
+ punpckhwd mm4, mm6
+ movq mm0, mm7
+ paddw mm5, mm4
+
+ punpckhdq mm0, mm6
+ punpckldq mm7, mm6
+ movq mm4, mm5
+ paddd mm0, mm7
+ punpckhdq mm4, mm6
+ punpckldq mm5, mm6
+ movd DWORD PTR [XXSum], mm0
+ paddw mm4, mm5
+ movd DWORD ptr [XSum], mm4
+ }
+
+ // Compute population variance as mis-match metric.
+ return ( (XXSum<<6) - XSum*XSum );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxGetMBFrameVertVar
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Vertical variance for frame.
+ *
+ * FUNCTION : Computes the vertical variance for a macroblock based
+ * upon the sum of the local 2 pixel variances within
+ * the entire frame.
+ *
+ * SPECIAL NOTES : The difference between the last two rows in a MB
+ * are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 MmxGetMBFrameVertVar ( CP_INSTANCE *cpi )
+{
+ UINT32 FrameError;
+ INT32 Stride = cpi->pb.Configuration.VideoFrameWidth;
+// UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.Source];
+//sjlhack
+ UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[0].Source];
+
+ __asm
+ {
+ mov ecx, [Stride]
+ mov eax, DWORD PTR [SrcPtr]
+
+ pxor mm7, mm7 ; clear mm7
+ pxor mm6, mm6 ; clear mm6
+
+ mov edx, 7 ;
+
+MmxGetMBFrameVertVarLoop:
+
+ movq mm1, [eax] ; 00 01 02 03 04 05 06 07
+ movq mm0, [eax+ecx] ; 10 11 12 13 14 15 16 17
+
+ movq mm3, mm0 ; copy of 00 01 02 03 04 05 06 07
+ punpcklbw mm0, mm7 ; xx 00 xx 01 xx 02 xx 03
+
+ punpckhbw mm3, mm7 ; xx 04 xx 05 xx 06 xx 07
+ movq mm2, [eax+ecx*2] ; 20 21 22 23 24 25 26 27
+
+ movq mm4, mm1 ; 10 11 12 13 14 15 16 17
+ punpcklbw mm1, mm7 ; xx 10 xx 11 xx 12 xx 13
+
+ punpckhbw mm4, mm7 ; xx 14 xx 15 xx 16 xx 17
+ movq mm5, mm2 ; 20 21 22 23 24 25 26 27
+
+ punpcklbw mm2, mm7 ; xx 20 xx 21 xx 22 xx 23
+ psubw mm1, mm0 ; difference between 0, 1 low four
+
+ pmaddwd mm1, mm1 ; SD between 0, 1 low four
+ psubw mm4, mm3 ; difference bwtween 0, 1 high four
+
+ pmaddwd mm4, mm4 ; SD between 0, 1 high foru
+ punpckhbw mm5, mm7 ; xx 24 xx 25 xx 26 xx 27
+
+ psubw mm2, mm0 ; difference between 0, 2 low four
+ pmaddwd mm2, mm2 ; sd between 0, 2 low four
+
+ psubw mm5, mm3 ; difference between 0, 2 high four
+ pmaddwd mm5, mm5 ; sd between 0, 2 high four
+
+ paddd mm1, mm4 ;
+ paddd mm2, mm5 ;
+
+ paddd mm6, mm1 ;
+ paddd mm6, mm2 ; accumlated in mm6
+
+ // done with the low eight
+
+ movq mm1, 8[eax] ; 00 01 02 03 04 05 06 07
+ movq mm0, 8[eax+ecx] ; 10 11 12 13 14 15 16 17
+
+ movq mm3, mm0 ; copy of 00 01 02 03 04 05 06 07
+ punpcklbw mm0, mm7 ; xx 00 xx 01 xx 02 xx 03
+
+ punpckhbw mm3, mm7 ; xx 04 xx 05 xx 06 xx 07
+ movq mm2, 8[eax+ecx*2] ; 20 21 22 23 24 25 26 27
+
+ movq mm4, mm1 ; 10 11 12 13 14 15 16 17
+ punpcklbw mm1, mm7 ; xx 10 xx 11 xx 12 xx 13
+
+ punpckhbw mm4, mm7 ; xx 14 xx 15 xx 16 xx 17
+ movq mm5, mm2 ; 20 21 22 23 24 25 26 27
+
+ punpcklbw mm2, mm7 ; xx 20 xx 21 xx 22 xx 23
+ psubw mm1, mm0 ; difference between 0, 1 low four
+
+ pmaddwd mm1, mm1 ; SD between 0, 1 low four
+ psubw mm4, mm3 ; difference bwtween 0, 1 high four
+
+ pmaddwd mm4, mm4 ; SD between 0, 1 high foru
+ punpckhbw mm5, mm7 ; xx 24 xx 25 xx 26 xx 27
+
+ psubw mm2, mm0 ; difference between 0, 2 low four
+ pmaddwd mm2, mm2 ; sd between 0, 2 low four
+
+ psubw mm5, mm3 ; difference between 0, 2 high four
+ pmaddwd mm5, mm5 ; sd between 0, 2 high four
+
+ paddd mm1, mm4 ;
+ paddd mm2, mm5 ;
+
+ paddd mm6, mm1 ;
+ paddd mm6, mm2 ; accumlated in mm6
+
+ lea eax, [eax + ecx *2] ; skip one line
+ sub edx, 1
+
+ jnz MmxGetMBFrameVertVarLoop
+
+ movq mm0, mm6
+ psrlq mm0, 32
+
+ paddd mm0, mm6
+ movd [FrameError], mm0
+ }
+
+ return FrameError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxGetMBFieldVertVar
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Vertical variance for fields within a frame.
+ *
+ * FUNCTION : Computes the vertical variance for a macroblock based
+ * upon the sum of the local 2 pixel variances within
+ * the two fields of a frame.
+ *
+ * SPECIAL NOTES : The difference between the last two rows in a MB
+ * are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 MmxGetMBFieldVertVar ( CP_INSTANCE *cpi )
+{
+ UINT32 FieldError;
+ INT32 Stride = cpi->pb.Configuration.VideoFrameWidth;
+// UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.Source];
+//sjlhack
+ UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[0].Source];
+
+ __asm
+ {
+ mov ecx, [Stride]
+ mov eax, DWORD PTR [SrcPtr]
+
+ pxor mm7, mm7 ; clear mm7
+ pxor mm6, mm6 ; clear mm6
+
+ mov edx, 7 ;
+
+MmxGetMBFieldVertVarLoop:
+
+ movq mm1, [eax] ; 00 01 02 03 04 05 06 07
+ movq mm0, [eax+ecx*2] ; 10 11 12 13 14 15 16 17
+
+ movq mm2, mm0 ; 00 01 02 03 04 05 06 07
+ punpcklbw mm0, mm7 ; xx 00 xx 01 xx 02 xx 03
+
+ movq mm3, mm1 ; 10 11 12 13 14 15 16 17
+ punpckhbw mm2, mm7 ; xx 04 xx 05 xx 06 xx 07
+
+ punpcklbw mm1, mm7 ; xx 10 xx 11 xx 12 xx 13
+ punpckhbw mm3, mm7 ; xx 14 xx 15 xx 16 xx 17
+
+ psubw mm0, mm1 ; diff between 0 1 low four
+ pmaddwd mm0, mm0 ; SD between 0 1 low four
+
+ psubw mm2, mm3 ; diff between 0 1 high four
+ pmaddwd mm2, mm2 ; SD between 0 1 high four
+
+ paddd mm0, mm2
+ paddd mm6, mm0
+
+ movq mm1, 8[eax] ; 00 01 02 03 04 05 06 07
+ movq mm0, 8[eax+ecx*2] ; 10 11 12 13 14 15 16 17
+
+ movq mm2, mm0 ; 00 01 02 03 04 05 06 07
+ punpcklbw mm0, mm7 ; xx 00 xx 01 xx 02 xx 03
+
+ movq mm3, mm1 ; 10 11 12 13 14 15 16 17
+ punpckhbw mm2, mm7 ; xx 04 xx 05 xx 06 xx 07
+
+ punpcklbw mm1, mm7 ; xx 10 xx 11 xx 12 xx 13
+ punpckhbw mm3, mm7 ; xx 14 xx 15 xx 16 xx 17
+
+ psubw mm0, mm1 ; diff between 0 1 low four
+ pmaddwd mm0, mm0 ; SD between 0 1 low four
+
+ psubw mm2, mm3 ; diff between 0 1 high four
+ pmaddwd mm2, mm2 ; SD between 0 1 high four
+
+ paddd mm0, mm2
+ paddd mm6, mm0
+
+ lea eax, [eax+ecx]
+
+ movq mm1, [eax] ; 00 01 02 03 04 05 06 07
+ movq mm0, [eax+ecx*2] ; 10 11 12 13 14 15 16 17
+
+ movq mm2, mm0 ; 00 01 02 03 04 05 06 07
+ punpcklbw mm0, mm7 ; xx 00 xx 01 xx 02 xx 03
+
+ movq mm3, mm1 ; 10 11 12 13 14 15 16 17
+ punpckhbw mm2, mm7 ; xx 04 xx 05 xx 06 xx 07
+
+ punpcklbw mm1, mm7 ; xx 10 xx 11 xx 12 xx 13
+ punpckhbw mm3, mm7 ; xx 14 xx 15 xx 16 xx 17
+
+ psubw mm0, mm1 ; diff between 0 1 low four
+ pmaddwd mm0, mm0 ; SD between 0 1 low four
+
+ psubw mm2, mm3 ; diff between 0 1 high four
+ pmaddwd mm2, mm2 ; SD between 0 1 high four
+
+ paddd mm0, mm2
+ paddd mm6, mm0
+
+ movq mm1, 8[eax] ; 00 01 02 03 04 05 06 07
+ movq mm0, 8[eax+ecx*2] ; 10 11 12 13 14 15 16 17
+
+ movq mm2, mm0 ; 00 01 02 03 04 05 06 07
+ punpcklbw mm0, mm7 ; xx 00 xx 01 xx 02 xx 03
+
+ movq mm3, mm1 ; 10 11 12 13 14 15 16 17
+ punpckhbw mm2, mm7 ; xx 04 xx 05 xx 06 xx 07
+
+ punpcklbw mm1, mm7 ; xx 10 xx 11 xx 12 xx 13
+ punpckhbw mm3, mm7 ; xx 14 xx 15 xx 16 xx 17
+
+ psubw mm0, mm1 ; diff between 0 1 low four
+ pmaddwd mm0, mm0 ; SD between 0 1 low four
+
+ psubw mm2, mm3 ; diff between 0 1 high four
+ pmaddwd mm2, mm2 ; SD between 0 1 high four
+
+ paddd mm0, mm2
+ paddd mm6, mm0
+
+ lea eax, [eax + ecx ] ; skip one line
+ sub edx, 1
+
+ jnz MmxGetMBFieldVertVarLoop
+
+ movq mm0, mm6
+ psrlq mm0, 32
+
+ paddd mm0, mm6
+ movd [FieldError], mm0
+ }
+
+ return FieldError;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dBil_SAD_mmx
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to input block.
+ * INT32 SrcStride : Stride for input block.
+ * UINT8 *RefPtr : Pointer to reference block.
+ * UINT32 SrcPixelsPerLine : Stride for reference block.
+ * INT16 *HFilter : Pointer to horizontal filter taps.
+ * INT16 *VFilter : Pointer to vertical filter taps.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD error.
+ *
+ * FUNCTION : Produces a filtered fractional block in 2-D
+ * using bilinear filters and calculate the SAD.
+ *
+ * SPECIAL NOTES : The difference between the last two rows in a MB
+ * are not accounted for!
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock2dBil_SAD_mmx
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *RefPtr,
+ UINT32 SrcPixelsPerLine,
+ INT16 *HFilter,
+ INT16 *VFilter
+)
+{
+
+ UINT32 Error=0;
+ __asm
+ {
+ mov eax, HFilter ;
+ mov edi, SrcPtr ;
+
+ mov esi, RefPtr ;
+ mov ecx, 8 ;
+
+ mov edx, SrcPixelsPerLine ;
+
+ movq mm1, [eax] ;
+ movq mm2, [eax+16] ;
+
+ mov eax, VFilter ;
+ pxor mm0, mm0 ;
+
+ // get the first horizontal line done ;
+ movq mm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movq mm4, mm3 ; make a copy of current line
+
+ punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw mm4, mm0 ;
+
+ pmullw mm3, mm1 ;
+ pmullw mm4, mm1 ;
+
+ movq mm5, [esi+1] ;
+ movq mm6, mm5 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0 ;
+
+ pmullw mm5, mm2 ;
+ pmullw mm6, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+ paddw mm3, rd ; xmm3 += round value
+ psraw mm3, FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, rd ;
+ psraw mm4, FILTER_SHIFT ;
+
+ movq mm7, mm3 ;
+ packuswb mm7, mm4 ;
+
+
+ add esi, edx ; next line
+NextRow:
+ movq mm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movq mm4, mm3 ; make a copy of current line
+
+ punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw mm4, mm0 ;
+
+ pmullw mm3, mm1 ;
+ pmullw mm4, mm1 ;
+
+ movq mm5, [esi+1] ;
+ movq mm6, mm5 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0 ;
+
+ pmullw mm5, mm2 ;
+ pmullw mm6, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+ movq mm5, mm7 ;
+ movq mm6, mm7 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0
+
+ pmullw mm5, [eax] ;
+ pmullw mm6, [eax] ;
+
+ paddw mm3, rd ; xmm3 += round value
+ psraw mm3, FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, rd ;
+ psraw mm4, FILTER_SHIFT ;
+
+ movq mm7, mm3 ;
+ packuswb mm7, mm4 ;
+
+
+ pmullw mm3, [eax+16] ;
+ pmullw mm4, [eax+16] ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+
+ paddw mm3, rd ; xmm3 += round value
+ psraw mm3, FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, rd ;
+ psraw mm4, FILTER_SHIFT ;
+
+ packuswb mm3, mm4
+ movq mm4, [edi] ;
+
+ psadbw mm3, mm4 ;
+ movd mm4, Error ;
+
+ paddd mm3, mm4 ;
+ movd Error, mm3 ;
+
+ add esi, edx ; next line
+ add edi, SrcStride ; ;
+
+ dec ecx ;
+ jne NextRow
+ }
+ return Error;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock1d_vb8_SAD_mmx
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to input block.
+ * INT32 SrcStride : Stride for input block.
+ * UINT8 *RefPtr : Pointer to reference block.
+ * UINT32 PixelsPerLine : Stride for reference block.
+ * UINT32 PixelStep : Offset to move to next pixel in input.
+ * INT16 *Filter : Pointer to filter taps.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD error.
+ *
+ * FUNCTION : Applies 1-D vertical bi-linear filter to input block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock1d_vb8_SAD_mmx
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *RefPtr,
+ UINT32 PixelsPerLine,
+ UINT32 PixelStep,
+ INT16 *Filter
+)
+{
+ UINT32 Error;
+ __asm
+ {
+ mov edi, Filter
+ movq mm1, [edi] ; mm3 *= kernel 0 modifiers.
+ movq mm2, [edi + 16] ; mm3 *= kernel 0 modifiers.
+
+ mov edi, SrcPtr
+ mov esi, RefPtr
+
+ mov ecx, 8 ;
+
+ mov edx, SrcStride
+ mov eax, PixelsPerLine;
+
+ pxor mm7, mm7
+ pxor mm0, mm0 ; mm0 = 00000000
+
+nextrow:
+ movq mm3, [esi] ; mm3 = p0..p7
+ movq mm4, mm3 ; mm4 = p0..p7
+
+ punpcklbw mm3, mm0 ; mm3 = p0..p3
+ punpckhbw mm4, mm0 ; mm4 = p4..p7
+
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+ pmullw mm4, mm1 ; mm4 *= kernel 0 modifiers.
+
+ movq mm5, [esi + eax] ;
+ movq mm6, mm5 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0 ;
+
+ pmullw mm5, mm2 ;
+ pmullw mm6, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+ paddw mm3, rd ; xmm3 += round value
+ psraw mm3, FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, rd ;
+ psraw mm4, FILTER_SHIFT ;
+
+
+ packuswb mm3, mm4 ; pack and unpack to saturate
+ movq mm5, [edi] ;
+
+ psadbw mm3, mm5 ;
+ paddd mm7, mm3
+ // the subsequent iterations repeat 3 out of 4 of these reads. Since the
+ // recon block should be in cache this shouldn't cost much. Its obviously
+ // avoidable!!!.
+ add esi, eax
+ add edi, edx
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+
+ movd Error, mm7
+ }
+ return Error;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock1d_hb8_SAD_mmx
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to input block.
+ * INT32 SrcStride : Stride for input block.
+ * UINT8 *RefPtr : Pointer to reference block.
+ * UINT32 SrcPixelsPerLine : Stride for reference block.
+ * UINT32 PixelStep : Offset to move to next pixel in input.
+ * INT16 *Filter : Pointer to filter taps.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD error.
+ *
+ * FUNCTION : Applies 1-D horizontal bi-linear filter to input block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock1d_hb8_SAD_mmx
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *RefPtr,
+ UINT32 SrcPixelsPerLine,
+ UINT32 PixelStep,
+ INT16 *Filter
+)
+{
+ UINT32 Error = 0;
+
+ __asm
+ {
+
+ mov edi, Filter
+ movq mm1, [edi] ; xmm3 *= kernel 0 modifiers.
+ movq mm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
+
+ mov edi, SrcPtr
+ mov esi, RefPtr
+
+ pxor mm0, mm0 ; mm0 = 00000000
+ pxor mm7, mm7 ; mm7 = 0
+
+ mov ecx, 8 ;
+
+ mov edx, SrcStride
+ mov eax, SrcPixelsPerLine;
+
+nextrow:
+ movq mm3, [esi] ; mm3 = p-1..p6
+ movq mm4, mm3 ; make a copy
+
+ punpcklbw mm3, mm0 ;
+ pmullw mm3, mm1 ;
+
+ movq mm5, [esi+1] ;mm5 = p0 ..... p7
+ punpckhbw mm4, mm0
+
+ pmullw mm4, mm1 ;
+ movq mm6, mm5 ;
+
+ punpcklbw mm5, mm0 ; mm5 = p0..p7
+ pmullw mm5, mm2 ;
+
+ punpckhbw mm6, mm0 ;
+ pmullw mm6, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+ paddw mm3, rd ; xmm3 += round value
+ psraw mm3, FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, rd ;
+ psraw mm4, FILTER_SHIFT ;
+
+ packuswb mm3, mm4 ; pack and unpack to saturate
+
+ movq mm5, [edi] ; read src
+ psadbw mm3, mm5 ;
+ paddd mm7, mm3
+
+ add esi, eax ; next line
+ add edi, edx ;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+
+ movd Error, mm7;
+
+ }
+ return Error;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FiltBlockBilGetSad_mmx
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to input block.
+ * INT32 SrcStride : Stride for input block.
+ * UINT8 *ReconPtr1 : Pointer to first reference block.
+ * UINT8 *ReconPtr2 : Pointer to second reference block.
+ * UINT32 PixelsPerLine : Stride for reference block.
+ * INT32 ModX : Fractional part of x-component of MV.
+ * INT32 ModY : Fractional part of x-component of MV.
+ * UINT32 BestSoFar : Best error found so far.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD error.
+ *
+ * FUNCTION : Applies 2-D bi-linear filter to get prediction block
+ * and computes SAD for prediction error.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 FiltBlockBilGetSad_mmx
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ INT32 PixelsPerLine,
+ INT32 ModX,
+ INT32 ModY,
+ UINT32 BestSoFar
+)
+{
+ INT32 diff;
+ UINT32 Error;
+
+ // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+ diff = ReconPtr2-ReconPtr1;
+
+ // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+ // This works out to be what we want... despite the pointer swapping that goes on below.
+ // For example... if the X component of the vector is a +ve ModX = X%8.
+ // if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+ if ( diff<0 )
+ { // swap pointers so ReconPtr1 smaller
+ UINT8 *temp = ReconPtr1;
+ ReconPtr1 = ReconPtr2;
+ ReconPtr2 = temp;
+ diff = (int)(ReconPtr2-ReconPtr1);
+ }
+
+ if ( diff==1 )
+ Error = FilterBlock1d_hb8_SAD_mmx ( SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, 1, BilinearFilters_mmx[ModX] );
+ else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
+ Error = FilterBlock1d_vb8_SAD_mmx ( SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, PixelsPerLine, BilinearFilters_mmx[ModY] );
+ else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
+ Error = FilterBlock2dBil_SAD_mmx ( SrcPtr, SrcStride, ReconPtr1-1, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );
+ else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
+ Error = FilterBlock2dBil_SAD_mmx ( SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );
+ return Error;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/CWmtFunctions.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/CWmtFunctions.c
new file mode 100644
index 00000000..8064c503
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/CWmtFunctions.c
@@ -0,0 +1,1728 @@
+/****************************************************************************
+*
+* Module Title : CWmtFunctions.c
+*
+* Description : Encoder system dependant functions
+*
+* AUTHOR : Paul Wilkins
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h>
+#include "compdll.h"
+#include <assert.h>
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#pragma warning(disable:4799)
+
+#define FILTER_WEIGHT 128
+#define FILTER_SHIFT 7
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern XMMGetSAD ( UINT8 *NewDataPtr,
+ INT32 PixelsPerLine,
+ UINT8 *RefDataPtr,
+ INT32 RefPixelsPerLine,
+ UINT32 ErrorSoFar,
+ UINT32 BestSoFar );
+
+extern UINT32 GetSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8 * RefPtr,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern INT16 BilinearFilters_wmt[8][16];
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+static __declspec(align(16)) short rd[] = { 64, 64, 64, 64, 64, 64, 64, 64 };
+
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtGetSumAbsDiffs16
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to input 16x16 block.
+ * INT32 SourceStride : Stride of input block.
+ * UINT8 *RefPtr : Pointer to reference 16x16 block.
+ * INT32 ReconStride : Stride of reference block.
+ * UINT32 ErrorSoFar : Accumulated error to date.
+ * UINT32 BestSoFar : Best error found so far.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD.
+ *
+ * FUNCTION : Calculate the Sum of Absolute difference on 16x16 pixels.
+ *
+ * SPECIAL NOTES : The function assumes the SrcPtr is aligned on 16 bytes,
+ * RefPtr can be aligned any byte boundary.
+ *
+ ****************************************************************************/
+UINT32 WmtGetSumAbsDiffs16
+(
+ UINT8 *SrcPtr,
+ INT32 SourceStride,
+ UINT8 *RefPtr,
+ INT32 ReconStride,
+ UINT32 ErrorSoFar,
+ UINT32 BestSoFar
+)
+{
+
+ UINT32 Error = 0;
+ __asm
+ {
+ mov esi, SrcPtr ;
+ mov edi, RefPtr ;
+
+ mov ecx, SourceStride ;
+ mov edx, ReconStride ;
+
+ movdqu xmm0, [edi] ; Row0 Ref
+ lea eax, [esi + 2 * ecx ] ; Calculate Row3 Source address
+
+ psadbw xmm0, [esi] ; Row0 SAD
+ lea ebx, [edi + 2 * edx ] ; Calculate Row3 Ref address
+
+ movdqu xmm1, [edi + edx] ; Row1 Ref
+ add eax, ecx ; Calculate Row3 Source address
+
+ psadbw xmm1, [esi + ecx] ; Row1 SAD
+ add ebx, edx ; Calculate Row3 Ref address
+
+ movdqu xmm2, [edi + 2 * edx ] ; Row2 Ref
+ paddw xmm0, xmm1 ; Row0 sad + Row1 sad
+
+ psadbw xmm2, [esi + 2 * ecx ] ; Row2 Sad
+ lea esi, [eax + 2 * ecx ] ; Calculate Row6 Source address
+
+ lea edi, [ebx + 2 * edx ] ; Calculate Row6 Ref address
+ movdqu xmm3, [ebx] ; Row3 Ref
+
+ add esi, ecx ; Calculate Row6 Source address
+ psadbw xmm3, [eax] ; Row3 SAD
+
+ add edi, edx ; Calculate Row6 Ref address
+ movdqu xmm4, [ebx + edx] ; Row4 Ref
+
+ paddw xmm2, xmm3 ; Row2 Sad + Row3 Sad
+ psadbw xmm4, [eax + ecx] ; Row4 Sad
+
+ movdqu xmm5, [ebx + 2 * edx] ; Row5 Ref
+ paddd xmm0, xmm2 ; Row0 + Row1 + Row2 + Row3 SAD
+
+ psadbw xmm5, [eax + 2 * ecx] ; Row5 SAD
+ movdqu xmm6, [edi] ; Row6 Ref
+
+ paddw xmm4, xmm5 ; Row4 + Row5 SAD
+ psadbw xmm6, [esi] ; Row6 SAD
+
+ movdqu xmm7, [edi + edx ] ; Row7 Ref
+ paddd xmm0, xmm4 ; Row0 1 2 3 4 5
+
+ psadbw xmm7, [esi + ecx] ; Row7 Sad
+
+ lea esi, [esi + 2* ecx] ; calculate Row8 source address
+ paddw xmm7, xmm6 ; Row7 + Row6 Sad
+
+ lea edi, [edi + 2* edx] ; calculate Row8 source address
+ paddd xmm7, xmm0 ;
+
+ // next eight row
+ movdqu xmm0, [edi] ; Row0 Ref
+ lea eax, [esi + 2 * ecx ] ; Calculate Row3 Source address
+
+ psadbw xmm0, [esi] ; Row0 SAD
+ lea ebx, [edi + 2 * edx ] ; Calculate Row3 Ref address
+
+ movdqu xmm1, [edi + edx] ; Row1 Ref
+ add eax, ecx ; Calculate Row3 Source address
+
+ psadbw xmm1, [esi + ecx] ; Row1 SAD
+ add ebx, edx ; Calculate Row3 Ref address
+
+ movdqu xmm2, [edi + 2 * edx ] ; Row2 Ref
+ paddw xmm0, xmm1 ; Row0 sad + Row1 sad
+
+ psadbw xmm2, [esi + 2 * ecx ] ; Row2 Sad
+ lea esi, [eax + 2 * ecx ] ; Calculate Row6 Source address
+
+ lea edi, [ebx + 2 * edx ] ; Calculate Row6 Ref address
+ movdqu xmm3, [ebx] ; Row3 Ref
+
+ add esi, ecx ; Calculate Row6 Source address
+ psadbw xmm3, [eax] ; Row3 SAD
+
+ add edi, edx ; Calculate Row6 Ref address
+ movdqu xmm4, [ebx + edx] ; Row4 Ref
+
+ paddw xmm2, xmm3 ; Row2 Sad + Row3 Sad
+ psadbw xmm4, [eax + ecx] ; Row4 Sad
+
+ movdqu xmm5, [ebx + 2 * edx] ; Row5 Ref
+ paddd xmm0, xmm2 ; Row0 + Row1 + Row2 + Row3 SAD
+
+ psadbw xmm5, [eax + 2 * ecx] ; Row5 SAD
+ movdqu xmm6, [edi] ; Row6 Ref
+
+ paddw xmm4, xmm5 ; Row4 + Row5 SAD
+ psadbw xmm6, [esi] ; Row6 SAD
+
+ paddd xmm0, xmm4 ; Row0 1 2 3 4 5
+
+ movdqu xmm3, [edi + edx ] ; Row7 Ref
+ psadbw xmm3, [esi + ecx ] ; Row7 Sad
+
+ paddw xmm3, xmm6 ;
+ paddd xmm0, xmm3 ; Sum of 16 row sad
+
+ paddd xmm7, xmm0; ;
+
+ movdq2q mm0, xmm7 ; lower q
+ psrldq xmm7, 8 ;
+
+ movdq2q mm1, xmm7 ; High Q
+ paddd mm0, mm1 ;
+
+ movd Error, mm0
+
+ }
+ return Error;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtGetHalfPixelSumAbsDiffs16
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to input 16x16 block.
+ * INT32 SourceStride : Stride of input block.
+ * UINT8 *RefPtr : Pointer to first reference 16x16 block.
+ * UINT8 *RefPtr2 : Pointer to second reference 16x16 block.
+ * INT32 ReconStride : Stride of reference blocks.
+ * UINT32 ErrorSoFar : Accumulated error to date.
+ * UINT32 BestSoFar : Best error found so far.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD.
+ *
+ * FUNCTION : Calculates the Sum of Absolute differences between a 16x16
+ * pixel MB and the average of two 16x16 pixel references.
+ *
+ * SPECIAL NOTES : The function assumes the SrcPtr is aligned on 16 bytes,
+ * RefPtr & RefPtr2 can be aligned any byte boundary.
+ *
+ ****************************************************************************/
+UINT32 WmtGetHalfPixelSumAbsDiffs16
+(
+ UINT8 *SrcPtr,
+ INT32 SourceStride,
+ UINT8 *RefPtr,
+ UINT8 *RefPtr2,
+ INT32 ReconStride,
+ UINT32 ErrorSoFar,
+ UINT32 BestSoFar
+)
+{
+ UINT32 Error = 0;
+
+ if ( RefPtr == RefPtr2 )
+ {
+ Error = GetSumAbsDiffs16 ( SrcPtr, SourceStride, RefPtr, ReconStride, 0, 0 );
+ }
+ else
+ {
+ __asm
+ {
+ mov esi, SrcPtr;
+ mov edi, RefPtr;
+
+ mov eax, RefPtr2;
+ mov ecx, SourceStride;
+
+ mov edx, ReconStride;
+ pxor xmm7, xmm7;
+
+ mov ebx, 16;
+ pxor xmm6, xmm6;
+
+LoopWmtHalfSad:
+
+ movdqu xmm0, [edi] ; Read 16 bytes from Ref
+ movdqu xmm1, [eax] ; Read 16 bytes from Ref2
+
+ movdqa xmm2, xmm0 ; copy
+ punpcklbw xmm0, xmm7 ; Low 8 bytes from Ref
+
+ movdqa xmm3, xmm1 ; copy
+ punpcklbw xmm1, xmm7 ; Low 8 bytes from Ref2
+
+ paddw xmm0, xmm1 ; Add low 8 bytes
+ punpckhbw xmm2, xmm7 ; High 8 bytes from Ref
+
+ psraw xmm0, 1 ; average of Low 8 bytes Ref and Ref2
+ punpckhbw xmm3, xmm7 ; High 8 bytes from Ref2
+
+ add eax, edx ; Next line of Ref1
+ paddw xmm2, xmm3 ; Add high 8 bytes
+
+ add edi, edx ; Next line of Ref2
+ psraw xmm2, 1 ; Average of high 8 bytes
+
+ packuswb xmm0, xmm2 ; pack the average back into bytes
+ psadbw xmm0, [esi] ; sad
+
+ add esi, ecx ; next line of source
+ dec ebx ;
+
+ paddd xmm6, xmm0 ; accumulate the sad
+ jnz LoopWmtHalfSad
+
+ movdq2q mm0, xmm6 ;
+ psrldq xmm6, 8 ;
+
+ movdq2q mm1, xmm6 ;
+ paddd mm0, mm1 ;
+
+ movd Error, mm0 ;
+
+ }
+ }
+ return Error;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtGetHalfPixelSAD
+ *
+ * INPUTS : UINT8 *SrcData : Pointer to input 16x16 block.
+ * INT32 PixelsPerLine : Stride of input block.
+ * UINT8 *RefDataPtr1 : Pointer to first reference 16x16 block.
+ * UINT8 *RefDataPtr2 : Pointer to second reference 16x16 block.
+ * INT32 RefPixelsPerLine : Stride of reference blocks.
+ * INT32 ErrorSoFar : Accumulated error to date.
+ * INT32 BestSoFar : Best error found so far.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : INT32: SAD at 1/2 pixel accuracy.
+ *
+ * FUNCTION : Calculates the sum of the absolute differences against
+ * half pixel interpolated references.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INT32 WmtGetHalfPixelSAD
+(
+ UINT8 *SrcData,
+ INT32 PixelsPerLine,
+ UINT8 *RefDataPtr1,
+ UINT8 *RefDataPtr2,
+ INT32 RefPixelsPerLine,
+ INT32 ErrorSoFar,
+ INT32 BestSoFar
+)
+{
+ INT32 DiffVal = ErrorSoFar;
+ INT16 DiffAcc[4] = { 0, 0, 0, 0 }; // MMX accumulator.
+ INT32 RefOffset = (int)(RefDataPtr1 - RefDataPtr2);
+
+ if ( RefOffset == 0 )
+ {
+ // Simple case as for non 0.5 pixel
+ DiffVal += XMMGetSAD ( SrcData, PixelsPerLine, RefDataPtr1, RefPixelsPerLine, ErrorSoFar, BestSoFar );
+ }
+ else
+ {
+ // WMT Code for HalfPixelSAD
+ __asm
+ {
+ mov eax, dword ptr [SrcData] // Get Src Pointer
+ pxor xmm6, xmm6 // clear mm6
+
+ mov ebx, dword ptr [RefDataPtr1] // Get Reference pointers
+ pxor xmm7, xmm7
+
+ mov edx, dword ptr [PixelsPerLine] // Width
+ mov ecx, dword ptr [RefDataPtr2]
+
+ mov esi, edx // width
+ mov edx, dword ptr [RefPixelsPerLine] // Src Pitch
+
+ // Row 1 and 2
+ movq xmm1, QWORD ptr [ebx] // Eight bytes from ref 1
+ movq xmm2, QWORD ptr [ecx] // Eight Bytes from ref 2
+
+ punpcklbw xmm1, xmm6 // unpack ref1 to shorts
+ movq xmm3, QWORD ptr [ebx+edx] // Eight bytes from ref 1
+
+ punpcklbw xmm2, xmm6 // unpack ref2 to shorts
+ movq xmm4, QWORD ptr [ecx+edx] // Eight bytes from ref 2
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ paddw xmm1, xmm2 // Add short values together.
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ psrlw xmm1, 1 // Devided by two (shift right 1)
+
+ paddw xmm3, xmm4 // add short values togethter
+ movq xmm0, QWORD PTR [eax] // Copy eight of src data to xmm0
+
+ psrlw xmm3, 1 // divided by 2
+ punpcklbw xmm0, xmm6 // unpack to shorts
+
+ movq xmm5, QWORD PTR [eax+esi] // get the source
+ movdqa xmm2, xmm0 // make a copy of xmm0
+
+ punpcklbw xmm5, xmm6 // unpack to shorts
+ psubusw xmm0, xmm1 // A-B to xmm0
+
+ movdqa xmm4, xmm5 // make a copy
+ psubusw xmm1, xmm2 // B-A to xmm1
+
+ psubusw xmm5, xmm3 // A-B to xmm5
+ psubusw xmm3, xmm4 // B-A to mm1
+
+ por xmm0, xmm1 // abs differences
+ por xmm5, xmm3 // abs differences
+
+ paddw xmm7, xmm0 // accumulate difference...
+ paddw xmm7, xmm5 // accumulate difference...
+
+ lea ebx, [ebx+edx*2] // two line below
+ lea ecx, [ecx+edx*2] // two line below
+
+ lea eax, [eax+esi*2] // two line below
+
+ // Row 3 and 4
+ movq xmm1, QWORD PTR [ebx] // Eight bytes from ref 1
+ movq xmm2, QWORD PTR [ecx] // Eight Bytes from ref 2
+
+ punpcklbw xmm1, xmm6 // unpack ref1 to shorts
+ movq xmm3, QWORD PTR [ebx+edx] // Eight bytes from ref 1
+
+ punpcklbw xmm2, xmm6 // unpack ref2 to shorts
+ movq xmm4, QWORD PTR [ecx+edx] // Eight bytes from ref 2
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ paddw xmm1, xmm2 // Add short values together.
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ psrlw xmm1, 1 // Devided by two (shift right 1)
+
+ paddw xmm3, xmm4 // add short values togethter
+ movq xmm0, QWORD PTR [eax] // Copy eight of src data to xmm0
+
+ psrlw xmm3, 1 // divided by 2
+ punpcklbw xmm0, xmm6 // unpack to shorts
+
+ movq xmm5, QWORD PTR [eax+esi] // get the source
+ movdqa xmm2, xmm0 // make a copy of xmm0
+
+ punpcklbw xmm5, xmm6 // unpack to shorts
+ psubusw xmm0, xmm1 // A-B to xmm0
+
+ movdqa xmm4, xmm5 // make a copy
+ psubusw xmm1, xmm2 // B-A to xmm1
+
+ psubusw xmm5, xmm3 // A-B to xmm5
+ psubusw xmm3, xmm4 // B-A to mm1
+
+ por xmm0, xmm1 // abs differences
+ por xmm5, xmm3 // abs differences
+
+ paddw xmm7, xmm0 // accumulate difference...
+ paddw xmm7, xmm5 // accumulate difference...
+
+ lea ebx, [ebx+edx*2] // two line below
+ lea ecx, [ecx+edx*2] // two line below
+
+ lea eax, [eax+esi*2] // two line below
+
+ // Row 5 and 6
+ movq xmm1, QWORD PTR [ebx] // Eight bytes from ref 1
+ movq xmm2, QWORD PTR [ecx] // Eight Bytes from ref 2
+
+ punpcklbw xmm1, xmm6 // unpack ref1 to shorts
+ movq xmm3, QWORD PTR [ebx+edx] // Eight bytes from ref 1
+
+ punpcklbw xmm2, xmm6 // unpack ref2 to shorts
+ movq xmm4, QWORD PTR [ecx+edx] // Eight bytes from ref 2
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ paddw xmm1, xmm2 // Add short values together.
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ psrlw xmm1, 1 // Devided by two (shift right 1)
+
+ paddw xmm3, xmm4 // add short values togethter
+ movq xmm0, QWORD PTR [eax] // Copy eight of src data to xmm0
+
+ psrlw xmm3, 1 // divided by 2
+ punpcklbw xmm0, xmm6 // unpack to shorts
+
+ movq xmm5, QWORD PTR [eax+esi] // get the source
+ movdqa xmm2, xmm0 // make a copy of xmm0
+
+ punpcklbw xmm5, xmm6 // unpack to shorts
+ psubusw xmm0, xmm1 // A-B to xmm0
+
+ movdqa xmm4, xmm5 // make a copy
+ psubusw xmm1, xmm2 // B-A to xmm1
+
+ psubusw xmm5, xmm3 // A-B to xmm5
+ psubusw xmm3, xmm4 // B-A to mm1
+
+ por xmm0, xmm1 // abs differences
+ por xmm5, xmm3 // abs differences
+
+ paddw xmm7, xmm0 // accumulate difference...
+ paddw xmm7, xmm5 // accumulate difference...
+
+ lea ebx, [ebx+edx*2] // two line below
+ lea ecx, [ecx+edx*2] // two line below
+
+
+ lea eax, [eax+esi*2] // two line below
+
+ // Row 7 and 8
+ movq xmm1, QWORD PTR [ebx] // Eight bytes from ref 1
+ movq xmm2, QWORD PTR [ecx] // Eight Bytes from ref 2
+
+ punpcklbw xmm1, xmm6 // unpack ref1 to shorts
+ movq xmm3, QWORD PTR [ebx+edx] // Eight bytes from ref 1
+
+ punpcklbw xmm2, xmm6 // unpack ref2 to shorts
+ movq xmm4, QWORD PTR [ecx+edx] // Eight bytes from ref 2
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ paddw xmm1, xmm2 // Add short values together.
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ psrlw xmm1, 1 // Devided by two (shift right 1)
+
+ paddw xmm3, xmm4 // add short values togethter
+ movq xmm0, QWORD PTR [eax] // Copy eight of src data to xmm0
+
+ psrlw xmm3, 1 // divided by 2
+ punpcklbw xmm0, xmm6 // unpack to shorts
+
+ movq xmm5, QWORD PTR [eax+esi] // get the source
+ movdqa xmm2, xmm0 // make a copy of xmm0
+
+ punpcklbw xmm5, xmm6 // unpack to shorts
+ psubusw xmm0, xmm1 // A-B to xmm0
+
+ movdqa xmm4, xmm5 // make a copy
+ psubusw xmm1, xmm2 // B-A to xmm1
+
+ psubusw xmm5, xmm3 // A-B to xmm5
+ psubusw xmm3, xmm4 // B-A to mm1
+
+ por xmm0, xmm1 // abs differences
+ por xmm5, xmm3 // abs differences
+
+ paddw xmm7, xmm0 // accumulate difference...
+ paddw xmm7, xmm5 // accumulate difference...
+
+ // add the value to gether
+ movdqa xmm0, xmm7 // low four words
+ psrldq xmm7, 8 // shift 64 bits
+
+ paddw xmm0, xmm7 // add
+ movq QWORD PTR [DiffAcc], xmm0 ; copy back accumulated results into normal memory
+
+ }
+
+ // Accumulate the 4 word values in DiffAcc
+ DiffVal += DiffAcc[0] + DiffAcc[1] + DiffAcc[2] + DiffAcc[3];
+ }
+
+ return DiffVal;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtGetIntraError
+ *
+ * INPUTS : UINT8 *DataPtr : Pointer to input block.
+ * INT32 PixelsPerLine : Stride of input block.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Intra frame variance.
+ *
+ * FUNCTION : Calculates the variance of the block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 WmtGetIntraError ( UINT8 *DataPtr, INT32 PixelsPerLine )
+{
+ UINT32 XSum;
+ UINT32 XXSum;
+ UINT8 *DiffPtr;
+
+ // Loop expanded out for speed.
+ DiffPtr = DataPtr;
+
+ __asm
+ {
+ pxor xmm5, xmm5 ; Blank mmx6
+ pxor xmm6, xmm6 ; Blank mmx7
+ pxor xmm7, xmm7 ; Blank mmx7
+
+ mov eax, dword ptr [DiffPtr] ; Load base addresses
+ mov ecx, dword ptr [PixelsPerLine]
+
+ // Row 1
+ movq xmm0, QWORD ptr [eax] ; Copy eight bytes to xmm0;
+ punpcklbw xmm0, xmm6
+ paddw xmm5, xmm0
+ pmaddwd xmm0, xmm0
+ paddd xmm7, xmm0
+
+ // Row 2
+ movq xmm1, QWORD ptr [eax+ecx] ; Copy eight bytes to xmm0;
+ punpcklbw xmm1, xmm6
+ paddw xmm5, xmm1
+ pmaddwd xmm1, xmm1
+ paddd xmm7, xmm1
+
+ // Row 3
+ movq xmm2, QWORD ptr [eax+ecx * 2] ; Copy eight bytes to xmm0;
+ add eax, ecx
+ punpcklbw xmm2, xmm6
+ paddw xmm5, xmm2
+ pmaddwd xmm2, xmm2
+ paddd xmm7, xmm2
+ lea eax, [eax+ecx*2]
+
+ // Row 4
+ movq xmm0, QWORD ptr [eax] ; Copy eight bytes to xmm0;
+ punpcklbw xmm0, xmm6
+ paddw xmm5, xmm0
+ pmaddwd xmm0, xmm0
+ paddd xmm7, xmm0
+
+ // Row 5
+ movq xmm1, QWORD ptr [eax+ecx] ; Copy eight bytes to xmm0;
+ punpcklbw xmm1, xmm6
+ paddw xmm5, xmm1
+ pmaddwd xmm1, xmm1
+ paddd xmm7, xmm1
+
+ // Row 6
+ movq xmm2, QWORD ptr [eax+ecx * 2] ; Copy eight bytes to xmm0;
+ add eax, ecx
+ punpcklbw xmm2, xmm6
+ paddw xmm5, xmm2
+ pmaddwd xmm2, xmm2
+ paddd xmm7, xmm2
+ lea eax, [eax+ecx*2]
+
+ // Row 7
+ movq xmm0, QWORD ptr [eax] ; Copy eight bytes to xmm0;
+ punpcklbw xmm0, xmm6
+ paddw xmm5, xmm0
+ pmaddwd xmm0, xmm0
+ paddd xmm7, xmm0
+
+ // Row 8
+ movq xmm1, QWORD ptr [eax+ecx] ; Copy eight bytes to xmm0;
+ punpcklbw xmm1, xmm6
+ paddw xmm5, xmm1
+ pmaddwd xmm1, xmm1
+ paddd xmm7, xmm1
+
+ movdqa xmm4, xmm5
+ punpcklwd xmm5, xmm6
+
+ punpckhwd xmm4, xmm6
+ movdqa xmm0, xmm7
+
+ paddw xmm5, xmm4
+ punpckldq xmm7, xmm6
+
+ punpckhdq xmm0, xmm6
+ movdqa xmm4, xmm5
+
+ paddd xmm0, xmm7
+ punpckldq xmm4, xmm6
+ punpckhdq xmm5, xmm6
+ paddw xmm4, xmm5
+
+ movdqa xmm5, xmm4
+ movdqa xmm7, xmm0
+
+ psrldq xmm5, 8;
+ psrldq xmm7, 8;
+
+ paddw xmm4, xmm5
+ paddd xmm0, xmm7
+
+ movd DWORD PTR [XXSum], xmm0
+ movd DWORD ptr [XSum], xmm4
+ }
+ // Compute population variance as mis-match metric.
+ return ( ((XXSum<<6) - XSum*XSum) );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtGetInterErr
+ *
+ * INPUTS : UINT8 *NewDataPtr : Pointer to input block.
+ * INT32 PixelsPerLine : Stride of input block.
+ * UINT8 *RefDataPtr1 : Pointer to first reference block.
+ * UINT8 *RefDataPtr2 : Pointer to second reference block.
+ * INT32 RefPixelsPerLine : Stride of reference blocks.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD at 1/2 pixel accuracy.
+ *
+ * FUNCTION : Calculates the variance of the difference between a block
+ * and the half-pixel interpolated average of two reference blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 WmtGetInterErr
+(
+ UINT8 *NewDataPtr,
+ INT32 PixelsPerLine,
+ UINT8 *RefDataPtr1,
+ UINT8 *RefDataPtr2,
+ INT32 RefPixelsPerLine
+)
+{
+ UINT32 XSum;
+ UINT32 XXSum;
+ INT16 MmxXSum[4];
+ INT32 MmxXXSum[2];
+
+ // Mode of interpolation chosen based upon on the offset of the second reference pointer
+ if ( RefDataPtr1 == RefDataPtr2 )
+ {
+ __asm
+ {
+ mov eax, NewDataPtr // Load base addresses
+ pxor xmm5, xmm5 // Clear Xmm5
+
+ mov ebx, RefDataPtr1 // Ref1
+ pxor xmm6, xmm6 // Clear Xmm6
+
+
+ mov ecx, PixelsPerLine // Get Width
+ pxor xmm7, xmm7 // Clear Xmm7
+
+ mov edx, RefPixelsPerLine // Get Pitch
+
+ // Row 1 and Row 2
+ movq xmm0, QWORD PTR [eax] // Copy eight bytes to xmm0
+ movq xmm1, QWORD PTR [ebx] // Copy eight bytes to xmm1
+
+ punpcklbw xmm0, xmm6 // unpack to higher precision
+ movq xmm3, QWORD Ptr [eax+ecx] // Copy eight Bytes to xmm3
+
+ punpcklbw xmm1, xmm6 // unpack to shorts
+ movq xmm4, QWORD ptr [ebx+edx] // Copy eight Bytes to xmm4
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ psubsw xmm0, xmm1 // A-B to xmm0
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ paddw xmm5, xmm0 // accumulate differences in xmm5
+
+ psubsw xmm3, xmm4 // A-B to xmm3
+ paddw xmm5, xmm3 // accumulate the differences
+
+ pmaddwd xmm0, xmm0 // square and accumulate
+ pmaddwd xmm3, xmm3 // square and accumulate
+
+ lea ebx, [ebx+edx*2] // mov forward two lines
+ lea eax, [eax+ecx*2] // mov forward two lines
+
+ paddd xmm7, xmm0 // accumulate in xmm7
+ paddd xmm7, xmm3 // accumulate in xmm7
+
+ // Row 3 and Row 4
+ movq xmm0, QWORD PTR [eax] // Copy eight bytes to xmm0
+ movq xmm1, QWORD PTR [ebx] // Copy eight bytes to xmm1
+
+ punpcklbw xmm0, xmm6 // unpack to higher precision
+ movq xmm3, QWORD Ptr [eax+ecx] // Copy eight Bytes to xmm3
+
+ punpcklbw xmm1, xmm6 // unpack to shorts
+ movq xmm4, QWORD ptr [ebx+edx] // Copy eight Bytes to xmm4
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ psubsw xmm0, xmm1 // A-B to xmm0
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ paddw xmm5, xmm0 // accumulate differences in xmm5
+
+ psubsw xmm3, xmm4 // A-B to xmm3
+ paddw xmm5, xmm3 // accumulate the differences
+
+ pmaddwd xmm0, xmm0 // square and accumulate
+ pmaddwd xmm3, xmm3 // square and accumulate
+
+ lea ebx, [ebx+edx*2] // mov forward two lines
+ lea eax, [eax+ecx*2] // mov forward two lines
+
+ paddd xmm7, xmm0 // accumulate in xmm7
+ paddd xmm7, xmm3 // accumulate in xmm7
+
+ // Row 5 and Row6
+ movq xmm0, QWORD PTR [eax] // Copy eight bytes to xmm0
+ movq xmm1, QWORD PTR [ebx] // Copy eight bytes to xmm1
+
+ punpcklbw xmm0, xmm6 // unpack to higher precision
+ movq xmm3, QWORD Ptr [eax+ecx] // Copy eight Bytes to xmm3
+
+ punpcklbw xmm1, xmm6 // unpack to shorts
+ movq xmm4, QWORD ptr [ebx+edx] // Copy eight Bytes to xmm4
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ psubsw xmm0, xmm1 // A-B to xmm0
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ paddw xmm5, xmm0 // accumulate differences in xmm5
+
+ psubsw xmm3, xmm4 // A-B to xmm3
+ paddw xmm5, xmm3 // accumulate the differences
+
+ pmaddwd xmm0, xmm0 // square and accumulate
+ pmaddwd xmm3, xmm3 // square and accumulate
+
+ lea ebx, [ebx+edx*2] // mov forward two lines
+ lea eax, [eax+ecx*2] // mov forward two lines
+
+ paddd xmm7, xmm0 // accumulate in xmm7
+ paddd xmm7, xmm3 // accumulate in xmm7
+
+ // Row 7 and Row 8
+ movq xmm0, QWORD PTR [eax] // Copy eight bytes to xmm0
+ movq xmm1, QWORD PTR [ebx] // Copy eight bytes to xmm1
+
+ punpcklbw xmm0, xmm6 // unpack to higher precision
+ movq xmm3, QWORD Ptr [eax+ecx] // Copy eight Bytes to xmm3
+
+ punpcklbw xmm1, xmm6 // unpack to shorts
+ movq xmm4, QWORD ptr [ebx+edx] // Copy eight Bytes to xmm4
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ psubsw xmm0, xmm1 // A-B to xmm0
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ paddw xmm5, xmm0 // accumulate differences in xmm5
+
+ psubsw xmm3, xmm4 // A-B to xmm3
+ paddw xmm5, xmm3 // accumulate the differences
+
+ pmaddwd xmm0, xmm0 // square and accumulate
+ pmaddwd xmm3, xmm3 // square and accumulate
+
+ paddd xmm7, xmm0 // accumulate in xmm7
+ paddd xmm7, xmm3 // accumulate in xmm7
+
+
+ movdqa xmm0, xmm5
+ movdqa xmm1, xmm7
+
+ psrldq xmm5, 8
+ psrldq xmm7, 8
+
+ paddw xmm0, xmm5
+ paddd xmm1, xmm7
+
+
+ movq QWORD PTR [MmxXSum], xmm0 ; copy back accumulated results into normal memory
+ movq QWORD PTR [MmxXXSum], xmm1 ; copy back accumulated results into normal memory
+
+ }
+
+ // Now accumulate the final results.
+ XSum = MmxXSum[0] + MmxXSum[1] + MmxXSum[2] + MmxXSum[3];
+ XXSum = MmxXXSum[0] + MmxXXSum[1];
+ }
+ // Simple half pixel reference data
+ else
+ {
+ __asm
+ {
+
+ mov eax, NewDataPtr // Load base addresses
+ pxor xmm5, xmm5 // Clear Xmm5
+
+ mov ebx, RefDataPtr1 // Ref1
+ pxor xmm6, xmm6 // Clear Xmm6
+
+
+ mov ecx, PixelsPerLine // Get Width
+ pxor xmm7, xmm7 // Clear Xmm7
+
+ mov esi, RefDataPtr2 // Ref 2
+ mov edx, RefPixelsPerLine // Get Pitch
+
+
+ // Row 1 and Row 2
+ movq xmm1, QWORD PTR [ebx] // Copy eight bytes from each of ref 1
+ movq xmm2, QWORD PTR [esi] // Copy eight bytes from each of ref 2
+
+ punpcklbw xmm1, xmm6 // unpack to shorts
+ movq xmm3, QWORD PTR [ebx+edx] // Copy eight bytes from each of ref 1
+
+ punpcklbw xmm2, xmm6 // unpack to shorts
+ movq xmm4, QWORD PTR [esi+edx] // Copy eight bytes from each of ref 2
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ paddw xmm1, xmm2 // Add word values together.
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ psrlw xmm1, 1 // Devide by two (shift right 1)
+
+ paddw xmm3, xmm4 // add word values together
+ movq xmm0, QWORD PTR [eax] // copy eight source bytes to xmm2
+
+ psrlw xmm3, 1 // divided by two
+ movq xmm2, QWORD PTR [eax+ecx] // copy eight source bytes to xmm2
+
+ punpcklbw xmm0, xmm6 // unpack to words
+ punpcklbw xmm2, xmm6 // unpack to words
+
+ psubsw xmm0, xmm1 // the difference
+ psubsw xmm2, xmm3 // the difference
+
+ paddw xmm5, xmm0 // accumulate the difference
+ paddw xmm5, xmm2 // accumulate the difference
+
+ pmaddwd xmm0, xmm0 // square and accumulate
+ pmaddwd xmm2, xmm2 // square and accumulate
+
+ lea eax, [eax+ecx*2]
+ lea ebx, [ebx+edx*2]
+
+ lea esi, [esi+edx*2]
+ paddd xmm7, xmm0 // accumulate in mm7
+
+ paddd xmm7, xmm2 // accumulate in mm7
+
+
+ // Row 3 and Row 4
+ movq xmm1, QWORD PTR [ebx] // Copy eight bytes from each of ref 1
+ movq xmm2, QWORD PTR [esi] // Copy eight bytes from each of ref 2
+
+ punpcklbw xmm1, xmm6 // unpack to shorts
+ movq xmm3, QWORD PTR [ebx+edx] // Copy eight bytes from each of ref 1
+
+ punpcklbw xmm2, xmm6 // unpack to shorts
+ movq xmm4, QWORD PTR [esi+edx] // Copy eight bytes from each of ref 2
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ paddw xmm1, xmm2 // Add word values together.
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ psrlw xmm1, 1 // Devide by two (shift right 1)
+
+ paddw xmm3, xmm4 // add word values together
+ movq xmm0, QWORD PTR [eax] // copy eight source bytes to xmm2
+
+ psrlw xmm3, 1 // divided by two
+ movq xmm2, QWORD PTR [eax+ecx] // copy eight source bytes to xmm2
+
+ punpcklbw xmm0, xmm6 // unpack to words
+ punpcklbw xmm2, xmm6 // unpack to words
+
+ psubsw xmm0, xmm1 // the difference
+ psubsw xmm2, xmm3 // the difference
+
+ paddw xmm5, xmm0 // accumulate the difference
+ paddw xmm5, xmm2 // accumulate the difference
+
+ pmaddwd xmm0, xmm0 // square and accumulate
+ pmaddwd xmm2, xmm2 // square and accumulate
+
+ lea eax, [eax+ecx*2]
+ lea ebx, [ebx+edx*2]
+
+ lea esi, [esi+edx*2]
+ paddd xmm7, xmm0 // accumulate in mm7
+
+ paddd xmm7, xmm2 // accumulate in mm7
+
+
+ // Row 5 and Row 6
+ movq xmm1, QWORD PTR [ebx] // Copy eight bytes from each of ref 1
+ movq xmm2, QWORD PTR [esi] // Copy eight bytes from each of ref 2
+
+ punpcklbw xmm1, xmm6 // unpack to shorts
+ movq xmm3, QWORD PTR [ebx+edx] // Copy eight bytes from each of ref 1
+
+ punpcklbw xmm2, xmm6 // unpack to shorts
+ movq xmm4, QWORD PTR [esi+edx] // Copy eight bytes from each of ref 2
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ paddw xmm1, xmm2 // Add word values together.
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ psrlw xmm1, 1 // Devide by two (shift right 1)
+
+ paddw xmm3, xmm4 // add word values together
+ movq xmm0, QWORD PTR [eax] // copy eight source bytes to xmm2
+
+ psrlw xmm3, 1 // divided by two
+ movq xmm2, QWORD PTR [eax+ecx] // copy eight source bytes to xmm2
+
+ punpcklbw xmm0, xmm6 // unpack to words
+ punpcklbw xmm2, xmm6 // unpack to words
+
+ psubsw xmm0, xmm1 // the difference
+ psubsw xmm2, xmm3 // the difference
+
+ paddw xmm5, xmm0 // accumulate the difference
+ paddw xmm5, xmm2 // accumulate the difference
+
+ pmaddwd xmm0, xmm0 // square and accumulate
+ pmaddwd xmm2, xmm2 // square and accumulate
+
+ lea eax, [eax+ecx*2]
+ lea ebx, [ebx+edx*2]
+
+ lea esi, [esi+edx*2]
+ paddd xmm7, xmm0 // accumulate in mm7
+
+ paddd xmm7, xmm2 // accumulate in mm7
+
+
+ // Row 7 and Row 8
+ movq xmm1, QWORD PTR [ebx] // Copy eight bytes from each of ref 1
+ movq xmm2, QWORD PTR [esi] // Copy eight bytes from each of ref 2
+
+ punpcklbw xmm1, xmm6 // unpack to shorts
+ movq xmm3, QWORD PTR [ebx+edx] // Copy eight bytes from each of ref 1
+
+ punpcklbw xmm2, xmm6 // unpack to shorts
+ movq xmm4, QWORD PTR [esi+edx] // Copy eight bytes from each of ref 2
+
+ punpcklbw xmm3, xmm6 // unpack to shorts
+ paddw xmm1, xmm2 // Add word values together.
+
+ punpcklbw xmm4, xmm6 // unpack to shorts
+ psrlw xmm1, 1 // Devide by two (shift right 1)
+
+ paddw xmm3, xmm4 // add word values together
+ movq xmm0, QWORD PTR [eax] // copy eight source bytes to xmm2
+
+ psrlw xmm3, 1 // divided by two
+ movq xmm2, QWORD PTR [eax+ecx] // copy eight source bytes to xmm2
+
+ punpcklbw xmm0, xmm6 // unpack to words
+ punpcklbw xmm2, xmm6 // unpack to words
+
+ psubsw xmm0, xmm1 // the difference
+ psubsw xmm2, xmm3 // the difference
+
+ paddw xmm5, xmm0 // accumulate the difference
+ paddw xmm5, xmm2 // accumulate the difference
+
+ pmaddwd xmm0, xmm0 // square and accumulate
+ pmaddwd xmm2, xmm2 // square and accumulate
+
+ paddd xmm7, xmm0 // accumulate in mm7
+ paddd xmm7, xmm2 // accumulate in mm7
+
+ movdqa xmm0, xmm5
+ movdqa xmm1, xmm7
+
+ psrldq xmm5, 8
+ psrldq xmm7, 8
+
+ paddw xmm0, xmm5
+ paddd xmm1, xmm7
+
+
+ movq QWORD Ptr [MmxXSum], xmm0 // copy back accumulated results into normal memory
+ movq QWORD Ptr [MmxXXSum], xmm1 // copy back accumulated results into normal memory
+
+ }
+
+ // Now accumulate the final results.
+ XSum = MmxXSum[0] + MmxXSum[1] + MmxXSum[2] + MmxXSum[3];
+ XXSum = MmxXXSum[0] + MmxXXSum[1];
+ }
+
+ // Compute and return population variance as mis-match metric.
+ return ( ((XXSum << 6) - XSum*XSum ) );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtGetMBFrameVertVar
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Vertical variance for the entire frame.
+ *
+ * FUNCTION : Calculates the vertical variance for a frame based
+ * upon the sum of the local 2 pixel variances within
+ * the entire frame.
+ *
+ * SPECIAL NOTE : The difference between the last two rows in a macro-
+ * block are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 WmtGetMBFrameVertVar ( CP_INSTANCE *cpi )
+{
+ UINT32 FrameError;
+ INT32 Stride = cpi->pb.Configuration.VideoFrameWidth;
+// UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.Source];
+//sjlhack
+ UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[0].Source];
+
+ __asm
+ {
+ mov ecx, DWORD PTR [Stride]
+ mov eax, DWORD PTR [SrcPtr]
+
+ pxor xmm7, xmm7
+ pxor xmm6, xmm6
+
+ mov edx, 7
+
+WmtGetMBFrameVertVarLoop:
+
+ movdqa xmm1, [eax] ; 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
+ movdqa xmm0, [eax+ecx] ; 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
+
+ movdqa xmm3, xmm0 ; 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
+ punpcklbw xmm0, xmm7 ; xx 00 xx 01 xx 02 xx 03 xx 04 xx 05 xx 06 xx 07
+
+ movdqa xmm4, xmm1 ; 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
+ punpckhbw xmm3, xmm7 ; xx 08 xx 09 xx 0a xx 0b xx 0c xx 0d xx 0e xx 0f
+
+ movdqa xmm2, [eax+ecx*2] ; 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
+ punpcklbw xmm1, xmm7 ; xx 10 xx 11 xx 12 xx 13 xx 14 xx 15 xx 16 xx 17
+
+ movdqa xmm5, xmm2 ; 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
+ punpckhbw xmm4, xmm7 ; xx 18 xx 19 xx 1a xx 1b xx 1c xx 1d xx 1e xx 1f
+
+ psubw xmm1, xmm0 ; difference between 0 1 low eight
+ pmaddwd xmm1, xmm1 ; SD between 0 1 low eight
+
+ punpcklbw xmm2, xmm7 ; xx 20 xx 21 xx 22 xx 23 xx 24 xx 25 xx 26 xx 27
+ psubw xmm4, xmm3 ; difference between 0 1 high four
+
+ pmaddwd xmm4, xmm4 ; SD between 0 1 high four
+ punpckhbw xmm5, xmm7 ; xx 28 xx 29 xx 2a xx 2b xx 2c xx 2d xx 2e xx 2f
+
+ psubw xmm2, xmm0 ; difference between 0 2 low eight
+ pmaddwd xmm2, xmm2 ; SD between 0 2 low eight
+
+ psubw xmm5, xmm3 ; difference between 0 2 High eight
+ pmaddwd xmm5, xmm5 ; SD between 0 2 High eight
+
+ paddd xmm1, xmm4
+ paddd xmm2, xmm5
+
+ paddd xmm6, xmm1 ; accumlated in xmm6
+ paddd xmm6, xmm2 ; xx xx xx s0 xx xx xx s1 xx xx xx s2 xx xx xx s3
+
+ lea eax, [eax+ecx*2]
+ sub edx, 1
+
+ jnz WmtGetMBFrameVertVarLoop
+
+ movdqa xmm0, xmm6 ; xx xx xx s0 xx xx xx s1 xx xx xx s2 xx xx xx s3
+ punpckldq xmm6, xmm7 ; xx xx xx xx xx xx xx s0 xx xx xx xx xx xx xx s2
+
+ punpckhdq xmm0, xmm7 ; xx xx xx xx xx xx xx s1 xx xx xx xx xx xx xx s3
+ paddd xmm0, xmm6 ; xx xx xx xx xx xx xxs01 xx xx xx xx xx xx xxs23
+
+ movdqa xmm6, xmm0 ; xx xx xx xx xx xx xxs01 xx xx xx xx xx xx xxs23
+ psrldq xmm0, 8; ; xx xx xx xx xx xx xx 23 xx xx xx xx xx xx xx xx
+
+ paddd xmm0, xmm6
+ movd [FrameError], xmm0
+ }
+
+ return FrameError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtGetMBFieldVertVar
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Vertical variance for the entire frame.
+ *
+ * FUNCTION : Calculates the vertical variance for a frame based
+ * upon the sum of the local 2 pixel variances within
+ * the individual fields of the frame.
+ *
+ * SPECIAL NOTE : The difference between the last two rows in a macro-
+ * block are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 WmtGetMBFieldVertVar( CP_INSTANCE *cpi )
+{
+ UINT32 FieldError;
+ INT32 Stride = cpi->pb.Configuration.VideoFrameWidth;
+// UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.Source];
+//sjlhack
+ UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[0].Source];
+
+ __asm
+ {
+ mov ecx, DWORD PTR [Stride]
+ mov eax, DWORD PTR [SrcPtr]
+
+ pxor xmm7, xmm7
+ pxor xmm6, xmm6
+
+ mov edx, 7
+
+WmtGetMBFieldVertVarLoop:
+
+ movdqa xmm1, [eax] ; 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
+ movdqa xmm0, [eax+ecx*2] ; 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
+
+ movdqa xmm2, xmm0 ; 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
+ punpcklbw xmm0, xmm7 ; xx 00 xx 01 xx 02 xx 03 xx 04 xx 05 xx 06 xx 07
+
+ movdqa xmm3, xmm1 ; 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
+ punpckhbw xmm2, xmm7 ; xx 08 xx 09 xx 0a xx 0b xx 0c xx 0d xx 0e xx 0f
+
+ punpcklbw xmm1, xmm7 ; xx 20 xx 21 xx 22 xx 23 xx 24 xx 25 xx 26 xx 27
+ punpckhbw xmm3, xmm7 ; xx 28 xx 29 xx 2a xx 2b xx 2c xx 2d xx 2e xx 2f
+
+
+ psubw xmm0, xmm1;
+ pmaddwd xmm0, xmm0;
+
+ psubw xmm2, xmm3;
+ pmaddwd xmm2, xmm2
+
+ paddd xmm0, xmm2;
+ lea eax, [eax + ecx]
+
+ movdqa xmm2, [eax]
+ movdqa xmm3, [eax + ecx*2]
+
+ movdqa xmm4, xmm2 ;
+ punpcklbw xmm2, xmm7
+
+ movdqa xmm5, xmm3
+ punpckhbw xmm4, xmm7
+
+ punpcklbw xmm3, xmm7
+ punpckhbw xmm5, xmm7
+
+ psubw xmm2, xmm3
+ pmaddwd xmm2, xmm2
+
+ psubw xmm4, xmm5
+ pmaddwd xmm4, xmm4
+
+ paddd xmm2, xmm4
+ paddd xmm0, xmm2
+
+ paddd xmm6, xmm0
+
+ lea eax, [eax+ecx]
+ sub edx, 1
+
+ jnz WmtGetMBFieldVertVarLoop
+
+ movdqa xmm0, xmm6 ; xx xx xx s0 xx xx xx s1 xx xx xx s2 xx xx xx s3
+ punpckldq xmm6, xmm7 ; xx xx xx xx xx xx xx s0 xx xx xx xx xx xx xx s2
+
+ punpckhdq xmm0, xmm7 ; xx xx xx xx xx xx xx s1 xx xx xx xx xx xx xx s3
+ paddd xmm0, xmm6 ; xx xx xx xx xx xx xxs01 xx xx xx xx xx xx xxs23
+
+ movdqa xmm6, xmm0 ; xx xx xx xx xx xx xxs01 xx xx xx xx xx xx xxs23
+ psrldq xmm0, 8; ; xx xx xx xx xx xx xx 23 xx xx xx xx xx xx xx xx
+
+ paddd xmm0, xmm6
+ movd [FieldError], xmm0
+ }
+
+ return FieldError;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dBil_SAD_wmt
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * INT32 SrcStride : Stride of source block.
+ * UINT8 *RefPtr : Pointer to reference block.
+ * UINT32 SrcPixelsPerLine : Number of pels per line in source.
+ * INT16 *HFilter : Pointer to array of horizontal filter taps.
+ * INT16 *VFilter : Pointer to array of vertical filter taps.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD.
+ *
+ * FUNCTION : Produces a filtered fractional block prediction in 2-D
+ * using bi-linear filters and calculates the SAD.
+ *
+ * SPECIAL NOTE : The difference between the last two rows in a macro-
+ * block are not accounted for!
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock2dBil_SAD_wmt
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *RefPtr,
+ UINT32 SrcPixelsPerLine,
+ INT16 *HFilter,
+ INT16 *VFilter
+)
+{
+ UINT32 Error;
+
+ __asm
+ {
+ mov eax, HFilter ;
+ mov edi, SrcPtr ;
+
+ mov esi, RefPtr ;
+ mov ecx, 8 ;
+
+ mov edx, SrcPixelsPerLine ;
+
+ movdqa xmm1, [eax] ;
+ movdqa xmm2, [eax+16] ;
+
+ mov eax, VFilter ;
+ pxor xmm0, xmm0 ;
+
+ // get the first horizontal line done ;
+ movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movdqa xmm4, xmm3 ; make a copy of current line
+
+ punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
+ psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
+
+ pmullw xmm3, xmm1 ;
+ punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
+
+ pmullw xmm4, xmm2 ;
+ paddw xmm3, xmm4 ;
+
+ paddw xmm3, rd ;
+ psraw xmm3, FILTER_SHIFT ; ready for output
+
+ movdqa xmm5, xmm3 ;
+ pxor mm7, mm7
+
+ add esi, edx ; next line
+NextRow:
+ pmullw xmm5, [eax] ;
+ movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+
+ movdqa xmm4, xmm3 ; make a copy of current line
+ punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
+
+ psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
+ pmullw xmm3, xmm1 ;
+ punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
+
+ movdqa xmm6, xmm5 ;
+ pmullw xmm4, xmm2 ;
+
+ paddw xmm3, xmm4 ;
+ paddw xmm3, rd ;
+
+ psraw xmm3, FILTER_SHIFT ; ready for output
+ movdqa xmm5, xmm3 ; make a copy for the next row
+
+ pmullw xmm3, [eax+16] ;
+ paddw xmm6, xmm3 ;
+
+
+ paddw xmm6, rd ; xmm6 += round value
+ psraw xmm6, FILTER_SHIFT ; xmm6 /= 128
+
+ packuswb xmm6, xmm0 ; pack and unpack to saturate
+ movdq2q mm0, xmm6
+
+ movq mm1, [edi] ;
+ psadbw mm0, mm1 ;
+
+ paddd mm7, mm0
+
+ add esi, edx ; next line
+ add edi, SrcStride ; ;
+
+ dec ecx ;
+ jne NextRow
+
+ movd Error, mm7;
+
+ }
+ return Error;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock1d_vb8_SAD_wmt
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * INT32 SrcStride : Stride of source block.
+ * UINT8 *RefPtr : Pointer to reference block.
+ * UINT32 PixelsPerLine : Number of pels per line in source.
+ * UINT32 FilterStep : Pointer to array of horizontal filter taps.
+ * INT16 *Filter : Pointer to array of filter taps.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD.
+ *
+ * FUNCTION : Produces a filtered fractional block vertically
+ * using bi-linear filters and calculates the SAD.
+ *
+ * SPECIAL NOTE : The difference between the last two rows in a macro-
+ * block are not accounted for!
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock1d_vb8_SAD_wmt
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *RefPtr,
+ UINT32 PixelsPerLine,
+ UINT32 PixelStep,
+ INT16 *Filter
+)
+{
+ UINT32 Error;
+ __asm
+
+ {
+
+ mov edi, Filter
+ movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
+
+ mov edi, SrcPtr
+ mov esi, RefPtr
+
+ mov ecx, 8 ;
+
+ mov edx, SrcStride
+ mov eax, PixelsPerLine;
+
+ pxor mm7, mm7
+ pxor xmm0, xmm0 ; xmm0 = 00000000
+
+nextrow:
+ movdqu xmm3, [esi] ; xmm3 = p0..p16
+ punpcklbw xmm3, xmm0 ; xmm3 = p0..p8
+ pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
+
+ movdqu xmm4, [esi + eax ] ; xmm4 = p0..p16
+ punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
+ pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers.
+ paddw xmm3, xmm4 ; xmm3 += xmm4
+
+ paddw xmm3, rd ; xmm3 += round value
+ psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
+ packuswb xmm3, xmm0 ; pack and unpack to saturate
+
+ movdq2q mm0, xmm3
+ movq mm1, [edi] ;
+
+ psadbw mm0, mm1 ;
+ paddd mm7, mm0
+
+ // the subsequent iterations repeat 3 out of 4 of these reads. Since the
+ // recon block should be in cache this shouldn't cost much. Its obviously
+ // avoidable!!!.
+ add esi, eax
+ add edi, edx
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+
+ movd Error, mm7
+
+ }
+ return Error;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock1d_hb8_SAD_wmt
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * INT32 SrcStride : Stride of source block.
+ * UINT8 *RefPtr : Pointer to reference block.
+ * UINT32 SrcPixelsPerLine : Number of pels per line in source.
+ * UINT32 FilterStep : Offset to nest pixel in input image.
+ * INT16 *Filter : Pointer to array of filter taps.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD.
+ *
+ * FUNCTION : Produces a filtered fractional block horizontally
+ * using bi-linear filters and calculates the SAD.
+ *
+ * SPECIAL NOTE : The difference between the last two rows in a macro-
+ * block are not accounted for!
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock1d_hb8_SAD_wmt
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *RefPtr,
+ UINT32 SrcPixelsPerLine,
+ UINT32 PixelStep,
+ INT16 *Filter
+)
+{
+ UINT32 Error = 0;
+
+ __asm
+ {
+
+ mov edi, Filter
+ movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
+
+ mov edi, SrcPtr
+ mov esi, RefPtr
+
+ pxor xmm0, xmm0 ; xmm0 = 00000000
+ pxor mm7, mm7 ; mm7 = 0
+
+ mov ecx, 8 ;
+
+ mov edx, SrcStride
+ mov eax, SrcPixelsPerLine;
+
+nextrow:
+ movdqu xmm3, [esi] ; xmm3 = p-1..p14
+ movdqu xmm5, xmm3 ; xmm4 = p-1..p14
+
+ punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
+ pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
+
+ psrldq xmm5, 1 ; xmm4 = p0..p13
+ punpcklbw xmm5, xmm0 ; xmm5 = p0..p7
+
+ pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers
+ paddw xmm3, xmm5 ; xmm3 += xmm5
+
+ paddw xmm3, rd ; xmm3 += round value
+ psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
+
+ packuswb xmm3, xmm0 ; pack and unpack to saturate
+
+ movdq2q mm0, xmm3
+ movq mm1, [edi] ; read src
+
+ psadbw mm0, mm1 ;
+ paddd mm7, mm0
+
+ add esi, eax ; next line
+ add edi, edx ;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+
+ movd Error, mm7;
+ }
+ return Error;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FiltBlockBilGetSad_wmt
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * INT32 SrcStride : Stride of source block.
+ * UINT8 *ReconPtr1 : Pointer to first reference block.
+ * UINT8 *ReconPtr2 : Pointer to second reference block.
+ * UINT32 PixelsPerLine : Number of pels per line in source.
+ * UINT32 FilterStep : Offset to nest pixel in input image.
+ * INT32 ModX : Fraction part of MV x-component.
+ * INT32 ModY : Fraction part of MV y-component.
+ * UINT32 BestSoFar : Best error found so far.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: SAD.
+ *
+ * FUNCTION : Produces a filtered fractional pel block using
+ * bi-linear filters and calculates the SAD.
+ *
+ * SPECIAL NOTE : The difference between the last two rows in a macro-
+ * block are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 FiltBlockBilGetSad_wmt
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ INT32 PixelsPerLine,
+ INT32 ModX,
+ INT32 ModY,
+ UINT32 BestSoFar
+)
+{
+ INT32 diff;
+ UINT32 Error;
+
+ // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+ diff = ReconPtr2-ReconPtr1;
+
+ // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+ // This works out to be what we want... despite the pointer swapping that goes on below.
+ // For example... if the X component of the vector is a +ve ModX = X%8.
+ // if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+ if ( diff < 0 )
+ { // swap pointers so ReconPtr1 smaller
+ UINT8 *temp = ReconPtr1;
+ ReconPtr1 = ReconPtr2;
+ ReconPtr2 = temp;
+ diff = (int)(ReconPtr2-ReconPtr1);
+ }
+
+ if( diff==1 )
+ Error = FilterBlock1d_hb8_SAD_wmt(SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, 1, BilinearFilters_wmt[ModX] );
+ else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
+ Error = FilterBlock1d_vb8_SAD_wmt(SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, PixelsPerLine, BilinearFilters_wmt[ModY]);
+ else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
+ Error = FilterBlock2dBil_SAD_wmt( SrcPtr, SrcStride, ReconPtr1-1, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+ else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
+ Error = FilterBlock2dBil_SAD_wmt( SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+
+ return Error;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtComputeBlockReconError
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Variance for the block (Scaled by 2^6)
+ *
+ * FUNCTION : Computes a reconstruction error variance for a block.
+ *
+ * SPECIAL NOTES : The variance value returned is scaled by a factor
+ * 2^6 (i.e.64).
+ *
+ ****************************************************************************/
+
+UINT32 WmtComputeBlockReconError ( CP_INSTANCE *cpi, UINT32 bp )
+{
+ INT32 XXSum;
+ INT32 MaxXXDiff;
+
+ UINT8 *NewDataPtr = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[bp].Source];
+ UINT8 *RefDataPtr1 = &cpi->pb.ThisFrameRecon[cpi->pb.mbi.blockDxInfo[bp].thisRecon];
+
+ INT32 SourceStride = cpi->pb.mbi.blockDxInfo[bp].CurrentSourceStride;
+ INT32 ReconStride = cpi->pb.mbi.blockDxInfo[bp].CurrentReconStride;
+ __asm
+ {
+
+ mov esi, NewDataPtr
+ mov edi, RefDataPtr1
+
+ mov eax, SourceStride
+ mov edx, ReconStride
+
+ lea ecx, [esi+eax*8]
+
+ pxor xmm7, xmm7
+ pxor xmm6, xmm6
+
+ pxor xmm5, xmm5
+
+WmtReconErrorLoop:
+ movq xmm0, QWORD ptr [esi] // s0 s1 s2 s3 s4 s5 s6 s7 xx xx xx xx xx xx xx xx
+ movq xmm1, QWORD ptr [edi] // r0 r1 r2 r3 r4 r5 r6 r7 xx xx xx xx xx xx xx xx
+
+ movdqa xmm2, xmm0 // make a copy
+ movdqa xmm3, xmm1 // make a copy
+
+ psubusb xmm0, xmm1 //
+ psubusb xmm3, xmm2 //
+
+ por xmm0, xmm3 // abs( d0 d1 d2 d3 d4 d5 d6 d7 xx xx xx xx xx xx xx xx )
+ movdqa xmm2, xmm0 // make a copy
+
+ punpcklbw xmm0, xmm7 // abs ( xxd0 xxd1 xxd2 xxd3 xxd4 xxd5 xxd6 xxd7)
+ punpcklbw xmm2, xmm7 // abs ( xxd0 xxd1 xxd2 xxd3 xxd4 xxd5 xxd6 xxd7)
+
+ movdqa xmm1, xmm2 // abs ( xxd0 xxd1 xxd2 xxd3 xxd4 xxd5 xxd6 xxd7)
+ pmaddwd xmm0, xmm0 //
+
+ punpcklwd xmm1, xmm7 // xxxx xxd0 xxxx xxd1 xxxx xxd2 xxxx xxd3
+ punpckhwd xmm2, xmm7 // xxxx xxd4 xxxx xxd5 xxxx xxd6 xxxx xxd7
+
+ pmaxsw xmm1, xmm2 // xxxx xxM0 xxxx xxM1 xxxx xxM2 xxxx xxM3
+ movdqa xmm2, xmm1 // xxxx xxM0 xxxx xxM1 xxxx xxM2 xxxx xxM3
+
+ punpckldq xmm1, xmm7 // xxxx xxxx xxxx xxM0 xxxx xxxx xxxx xxM1
+ punpckhdq xmm2, xmm7 // xxxx xxxx xxxx xxM2 xxxx xxxx xxxx xxM3
+
+ pmaxsw xmm1, xmm2 // xxxx xxxx xxxx max0 xxxx xxxx xxxx max1
+ movdqa xmm2, xmm1 // xxxx xxxx xxxx max0 xxxx xxxx xxxx max1
+
+ psrldq xmm1, 8 // xxxx xxxx xxxx xxxx xxxx xxxx xxxx max0
+ pmaxsw xmm1, xmm2 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx MMAX
+
+ pmaxsw xmm5, xmm1 //
+ paddd xmm6, xmm0 //
+
+ lea esi, [esi+eax]
+ lea edi, [edi+edx] //
+
+ cmp ecx, esi
+ jne WmtReconErrorLoop
+
+ movd MaxXXDiff, xmm5 // get the max
+
+ movdqa xmm4, xmm6 // xxxx xxs0 xxxx xxs1 xxxx xxs2 xxxx xxs3
+ psrldq xmm4, 8 // xxxx xxs2 xxxx xxs3 xxxx xxxx xxxx xxxx
+
+ paddd xmm6, xmm4 // xxxx s0s2 xxxx s1s3 xxxxxxxxxxxxxxxxxxx
+ movdqa xmm4, xmm6 // xxxx s0s2 xxxx s1s3 xxxxxxxxxxxxxxxxxxx
+
+ psrldq xmm4, 4 // xxxx s1s3 xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ paddd xmm6, xmm4 // 0123
+
+ movd XXSum, xmm6
+
+
+ }
+ return (UINT32)(XXSum + (2 * MaxXXDiff*MaxXXDiff)) << 6;
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/MmxEncodeMath.asm b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/MmxEncodeMath.asm
new file mode 100644
index 00000000..39fa7c3b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/MmxEncodeMath.asm
@@ -0,0 +1,371 @@
+;
+; **-MmxEncodeMath.asm
+;
+; MMX versions of SUB8, SUB8_AV2, SUB8 with fixed subtract of 128
+;
+;******************************************************************
+; Revision History
+;
+; 1.01 JBB 23-Mar-01 Fixed frame updating for preprocessor
+; 1.00 YWX dd-mmm-yy Configuration baseline from Jong Chen's code
+;
+;******************************************************************
+
+
+
+ .586
+ .387
+ .MODEL flat, SYSCALL, os_dos
+ .MMX
+
+; macros
+
+ .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
+ ALIGN 32
+
+; local constants go here
+OneTwentyEight QWORD 00080008000800080h
+
+@CurSeg ENDS
+
+
+;
+; external variables
+;
+
+; external variables go here
+
+
+
+; structures
+SUB8Params STRUC
+ dd 6 dup (?) ;6 pushed regs
+ dd ? ;return address
+ FiltPtr dd ?
+ ReconPtr dd ?
+ DctInputPtr dd ?
+ old_ptr1 dd ?
+ new_ptr1 dd ?
+ PixelsPerLine dd ?
+ ReconPixelsPerLine dd ?
+SUB8Params ENDS
+
+SUB8_128Params STRUC
+ dd 6 dup (?) ;6 pushed regs
+ dd ? ;return address
+ FiltPtr2 dd ?
+ DctInputPtr2 dd ?
+ old_ptr12 dd ?
+ new_ptr12 dd ?
+ PixelsPerLine2 dd ?
+SUB8_128Params ENDS
+
+SUB8AV2Params STRUC
+ dd 6 dup (?) ;6 pushed regs
+ dd ? ;return address
+ FiltPtr dd ?
+ ReconPtr1 dd ?
+ ReconPtr2 dd ?
+ DctInputPtr dd ?
+ old_ptr1 dd ?
+ new_ptr1 dd ?
+ PixelsPerLine dd ?
+ ReconPixelsPerLine dd ?
+SUB8AV2Params ENDS
+
+;
+; macro functions
+;
+SUB8Calc8Bytes MACRO Index:REQ
+ movq mm0,[eax] ; mm0 = FiltPtr
+ movq mm1,[ebx] ; mm1 = ReconPtr
+ movq mm2,mm0 ; dup to prepare for up conversion
+ movq mm3,mm1 ; dup to prepare for up conversion
+
+ ; convert from UINT8 to INT16
+; movq mm6,[esi]
+ punpcklbw mm0,mm7 ; mm0 = INT16(FiltPtr)
+ punpcklbw mm1,mm7 ; mm1 = INT16(ReconPtr)
+ punpckhbw mm2,mm7 ; mm2 = INT16(FiltPtr)
+ punpckhbw mm3,mm7 ; mm3 = INT16(ReconPtr)
+
+ ; start calculation
+ psubw mm0,mm1 ; mm0 = FiltPtr - ReconPtr
+ psubw mm2,mm3 ; mm2 = FiltPtr - ReconPtr
+
+ ; Update the screen canvas in one step
+ ;memcpy( old_ptr1, new_ptr1, BLOCK_HEIGHT_WIDTH );
+; movq [edx],mm6
+; add edx,edi
+; add esi,edi
+
+ movq [ecx+Index],mm0 ; write answer out
+ movq [ecx+Index+8],mm2 ; write answer out
+
+ ; Increment pointers
+ add eax,edi
+ add ebx,ebp
+ENDM
+
+;
+; **-SUB8_128Calc8Bytes
+;
+SUB8_128Calc8Bytes MACRO Index:REQ
+ movq mm0,[eax] ; mm0 = FiltPtr
+ movq mm2,mm0 ; dup to prepare for up conversion
+
+ ; convert from UINT8 to INT16
+; movq mm6,[esi]
+ punpcklbw mm0,mm7 ; mm0 = INT16(FiltPtr)
+ punpckhbw mm2,mm7 ; mm2 = INT16(FiltPtr)
+
+ ; start calculation
+ psubw mm0,mm1 ; mm0 = FiltPtr - 128
+ psubw mm2,mm1 ; mm2 = FiltPtr - 128
+
+ ; Update the screen canvas in one step
+ ;memcpy( old_ptr1, new_ptr1, BLOCK_HEIGHT_WIDTH );
+; movq [edx],mm6
+; add edx,edi
+; add esi,edi
+
+ movq [ecx+Index],mm0 ; write answer out
+ movq [ecx+Index+8],mm2 ; write answer out
+
+ ; Increment pointers
+ add eax,edi
+ENDM
+
+;
+; **-SUB8AV2Calc8Bytes
+;
+SUB8AV2Calc8Bytes MACRO Index:REQ
+ movq mm0,[eax] ; mm0 = FiltPtr
+ movq mm1,[ebx] ; mm1 = ReconPtr1
+ movq mm4,[ebp] ; mm4 = ReconPtr2
+ movq mm2,mm0 ; dup to prepare for up conversion
+ movq mm3,mm1 ; dup to prepare for up conversion
+ movq mm5,mm4 ; dup to prepere for up conversion
+
+ ; convert from UINT8 to INT16
+; movq mm6,[esi]
+ punpcklbw mm0,mm7 ; mm0 = INT16(FiltPtr)
+ punpcklbw mm1,mm7 ; mm1 = INT16(ReconPtr1)
+ punpcklbw mm4,mm7 ; mm4 = INT16(ReconPtr2)
+
+ punpckhbw mm2,mm7 ; mm2 = INT16(FiltPtr)
+ punpckhbw mm3,mm7 ; mm3 = INT16(ReconPtr1)
+ punpckhbw mm5,mm7 ; mm5 = INT16(ReconPtr2)
+
+ ; average ReconPtr1 and ReconPtr2
+ paddw mm1,mm4 ; mm1 = ReconPtr1 + ReconPtr2
+ paddw mm3,mm5 ; mm3 = ReconPtr1 + ReconPtr2
+ psrlw mm1,1 ; mm1 = (ReconPtr1 + ReconPtr2) / 2
+ psrlw mm3,1 ; mm3 = (ReconPtr1 + ReconPtr2) / 2
+
+ psubw mm0,mm1 ; mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2)
+ psubw mm2,mm3 ; mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2)
+
+ ; Update the screen canvas in one step
+ ;memcpy( old_ptr1, new_ptr1, BLOCK_HEIGHT_WIDTH );
+; movq [edx],mm6
+; add edx,edi
+; add esi,edi
+
+ movq [ecx+Index],mm0 ; write answer out
+ movq [ecx+Index+8],mm2 ; write answer out
+
+ ; Increment pointers
+ add eax,edi
+ add ebx,(SUB8AV2Params PTR [esp]).ReconPixelsPerLine
+ add ebp,(SUB8AV2Params PTR [esp]).ReconPixelsPerLine
+ENDM
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE EQU 0
+
+
+;
+; **-MmxSUB8
+;
+; Input:
+; FiltPtr
+; ReconPtr
+; DctInputPtr
+; old_ptr1
+; new_ptr1
+;
+; Output:
+;
+;------------------------------------------------
+; void MmxSUB8( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+; INT32 PixelsPerLine, INT32 ReconPixelsPerLine )
+;
+ .CODE
+
+NAME MmxSUB8
+
+PUBLIC MmxSUB8_
+PUBLIC _MmxSUB8
+MmxSUB8_:
+_MmxSUB8:
+
+ push ecx
+ push ebx
+ push edx
+ push esi
+ push edi
+ push ebp
+
+
+ mov eax,(SUB8Params PTR [esp]).FiltPtr
+ mov ebx,(SUB8Params PTR [esp]).ReconPtr
+ mov ecx,(SUB8Params PTR [esp]).DctInputPtr
+; mov edx,(SUB8Params PTR [esp]).old_ptr1
+; mov esi,(SUB8Params PTR [esp]).new_ptr1
+ mov edi,(SUB8Params PTR [esp]).PixelsPerLine
+ mov ebp,(SUB8Params PTR [esp]).ReconPixelsPerLine
+
+ pxor mm7,mm7 ; clear mm7 for up precision conversion
+
+ LoopCtr = 0
+WHILE LoopCtr LT 128
+ SUB8Calc8Bytes <LoopCtr>
+ LoopCtr = LoopCtr + 16
+ENDM
+
+theExit1:
+ pop ebp
+ pop edi
+ pop esi
+ pop edx
+ pop ebx
+ pop ecx
+
+
+ ret
+
+;
+; **-MmxSUB8_128
+;
+; Input:
+; FiltPtr
+; ReconPtr
+; DctInputPtr
+; old_ptr1
+; new_ptr1
+;
+; Output:
+;
+;------------------------------------------------
+; void MmxSUB8_128( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+; INT32 PixelsPerLine )
+;
+ .CODE
+
+NAME MmxSUB8_128
+
+PUBLIC MmxSUB8_128_
+PUBLIC _MmxSUB8_128
+MmxSUB8_128_:
+_MmxSUB8_128:
+
+ push ecx
+ push ebx
+ push edx
+ push esi
+ push edi
+ push ebp
+
+
+ mov eax,(SUB8_128Params PTR [esp]).FiltPtr2
+ mov ecx,(SUB8_128Params PTR [esp]).DctInputPtr2
+; mov edx,(SUB8_128Params PTR [esp]).old_ptr12
+; mov esi,(SUB8_128Params PTR [esp]).new_ptr12
+ mov edi,(SUB8_128Params PTR [esp]).PixelsPerLine2
+
+ movq mm1,OneTwentyEight ; load value to subtract with
+ pxor mm7,mm7 ; clear mm7 for up precision conversion
+
+ LoopCtr = 0
+WHILE LoopCtr LT 128
+ SUB8_128Calc8Bytes <LoopCtr>
+ LoopCtr = LoopCtr + 16
+ENDM
+
+theExit3:
+ pop ebp
+ pop edi
+ pop esi
+ pop edx
+ pop ebx
+ pop ecx
+
+
+ ret
+
+;
+; **-MmxSUB8AV2
+;
+; Input:
+; FiltPtr
+; ReconPtr
+; DctInputPtr
+; old_ptr1
+; new_ptr1
+;
+; Output:
+;
+;------------------------------------------------
+; void MmxSUB8AV2( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr1, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+; INT32 PixelsPerLine, INT32 ReconPixelsPerLine )
+;
+ .CODE
+
+NAME MmxSUB8AV2
+
+PUBLIC MmxSUB8AV2_
+PUBLIC _MmxSUB8AV2
+MmxSUB8AV2_:
+_MmxSUB8AV2:
+
+ push ecx
+ push ebx
+ push edx
+ push esi
+ push edi
+ push ebp
+
+
+ mov eax,(SUB8AV2Params PTR [esp]).FiltPtr
+ mov ebx,(SUB8AV2Params PTR [esp]).ReconPtr1
+ mov ecx,(SUB8AV2Params PTR [esp]).DctInputPtr
+; mov edx,(SUB8AV2Params PTR [esp]).old_ptr1
+; mov esi,(SUB8AV2Params PTR [esp]).new_ptr1
+ mov edi,(SUB8AV2Params PTR [esp]).PixelsPerLine
+ mov ebp,(SUB8AV2Params PTR [esp]).ReconPtr2
+
+ pxor mm7,mm7 ; clear mm7 for up precision conversion
+
+ LoopCtr = 0
+WHILE LoopCtr LT 128
+ SUB8AV2Calc8Bytes <LoopCtr>
+ LoopCtr = LoopCtr + 16
+ENDM
+
+theExit2:
+ pop ebp
+ pop edi
+ pop esi
+ pop edx
+ pop ebx
+ pop ecx
+
+
+ ret
+
+;************************************************
+ END
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/WmtTransform.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/WmtTransform.c
new file mode 100644
index 00000000..ef0917d0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/WmtTransform.c
@@ -0,0 +1,255 @@
+/****************************************************************************
+ *
+ * Module Title : WmtTransform.c
+ *
+ * Description : Subtraction functions.
+ *
+ ***************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <stdio.h>
+#include "compdll.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#ifdef _MSC_VER
+#pragma warning(disable:4799)
+#pragma warning(disable:4731)
+#endif
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+_declspec(align(16)) static UINT16 Eight128s[8] = { 128, 128, 128, 128, 128, 128, 128, 128 };
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtSUB8
+ *
+ * INPUTS : UINT8 *FiltPtr :
+ * UINT8 *ReconPtr :
+ * INT16 *DctInputPtr :
+ * UINT8 *old_ptr1 :
+ * UINT8 *new_ptr1 :
+ * INT32 SourceStride :
+ * INT32 ReconStride :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Subtracts 2 8x8 blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void WmtSUB8
+(
+ UINT8 *FiltPtr,
+ UINT8 *ReconPtr,
+ INT16 *DctInputPtr,
+ UINT8 *old_ptr1,
+ UINT8 *new_ptr1,
+ INT32 SourceStride,
+ INT32 ReconStride
+)
+{
+ (void) old_ptr1;
+ (void) new_ptr1;
+
+ _asm
+ {
+ mov eax, [FiltPtr]
+ mov ebx, [ReconPtr]
+
+ mov ecx, [DctInputPtr]
+ mov edi, [SourceStride]
+
+ mov esi, [ReconStride]
+ pxor xmm7, xmm7
+
+ lea edx, [ecx+128]
+
+WmtSub8Loop:
+
+ movq xmm0, QWORD ptr [eax]
+ movq xmm1, QWORD ptr [ebx]
+
+ punpcklbw xmm0, xmm7
+ punpcklbw xmm1, xmm7
+
+ psubw xmm0, xmm1
+ lea ecx, [ecx+16]
+
+ cmp ecx, edx
+
+ lea eax, [eax+edi]
+ movdqa [ecx-16], xmm0
+
+ lea ebx, [ebx+esi]
+ jc WmtSub8Loop
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : Sub8_128
+ *
+ * INPUTS : UINT8 *FiltPtr :
+ * INT16 *DctInputPtr :
+ * UINT8 *old_ptr1 :
+ * UINT8 *new_ptr1 :
+ * INT32 SourceStride :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Subtracts 128 from each pixel in an 8x8 block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void WmtSUB8_128
+(
+ UINT8 *FiltPtr,
+ INT16 *DctInputPtr,
+ UINT8 *old_ptr1,
+ UINT8 *new_ptr1,
+ INT32 SourceStride
+)
+{
+ (void) old_ptr1;
+ (void) new_ptr1;
+
+ _asm
+ {
+ mov eax, [FiltPtr]
+ mov edx, [DctInputPtr]
+
+ mov ecx, [SourceStride]
+ lea edi, [edx + 128]
+
+ pxor xmm7, xmm7
+ movdqa xmm1, [Eight128s]
+
+wmtsub8_128loop:
+
+ movq xmm0, QWORD PTR [eax]
+ punpcklbw xmm0, xmm7
+
+ psubw xmm0, xmm1;
+ lea edx, [edx+16]
+
+ cmp edx, edi
+ movdqa [edx-16], xmm0
+
+ lea eax, [eax+ecx]
+ jc wmtsub8_128loop
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : Sub8AV2
+ *
+ * INPUTS :
+ *
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Subtracts 2 8x8 blocks
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+/****************************************************************************
+ *
+ * ROUTINE : WmtSUB8AV2
+ *
+ * INPUTS : UINT8 *FiltPtr :
+ * UINT8 *ReconPtr1 :
+ * UINT8 *ReconPtr2 :
+ * INT16 *DctInputPtr :
+ * UINT8 *old_ptr1 :
+ * UINT8 *new_ptr1 :
+ * INT32 SourceStride :
+ * INT32 ReconStride :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Subtracts 2 8x8 blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void WmtSUB8AV2
+(
+ UINT8 *FiltPtr,
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ INT16 *DctInputPtr,
+ UINT8 *old_ptr1,
+ UINT8 *new_ptr1,
+ INT32 SourceStride,
+ INT32 ReconStride
+)
+{
+ (void) old_ptr1;
+ (void) new_ptr1;
+
+ _asm
+ {
+ push ebp
+
+ mov esi, [FiltPtr]
+ mov edi, [DctInputPtr]
+
+ mov eax, [ReconPtr1]
+ mov ebx, [ReconPtr2]
+
+ mov ecx, [SourceStride]
+ mov edx, [ReconStride]
+
+ lea ebp, [edi+128]
+ pxor xmm7, xmm7
+
+WmtSUB8AV2loop:
+
+ movq xmm0, QWORD PTR [eax]
+ movq xmm1, QWORD PTR [ebx]
+
+ punpcklbw xmm0, xmm7
+ punpcklbw xmm1, xmm7
+
+ paddw xmm0, xmm1
+ movq xmm2, QWORD PTR [esi]
+
+ psraw xmm0, 1
+ psubw xmm2, xmm0
+
+ lea edi, [edi+16]
+ cmp edi, ebp
+
+ movdqa [edi-16], xmm2
+ lea eax, [eax+edx]
+
+ lea ebx, [ebx+edx]
+ lea esi, [ecx+esi]
+
+ jc WmtSUB8AV2loop
+
+ pop ebp
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetError.asm b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetError.asm
new file mode 100644
index 00000000..4d56ad37
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetError.asm
@@ -0,0 +1,308 @@
+; structures
+XmmGetErrorParams STRUC
+ dd 6 dup (?) ;6 pushed regs
+ dd ? ;return address
+ NewDataPtr dd ?
+ PixelsPerLine dd ?
+ ReconPtr1 dd ?
+ ReconPixelsPerLine dd ?
+ XSum dd ?
+ XXSum dd ?
+XmmGetErrorParams ENDS
+
+
+
+ .686P
+ .387
+ .MODEL flat, SYSCALL, os_dos
+ .XMM
+
+; macros
+
+ .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
+
+ ALIGN 32
+
+
+ .CODE
+
+NAME XmmGetError
+
+PUBLIC XmmGetError_
+PUBLIC _XmmGetError
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE EQU 0
+
+;------------------------------------------------
+; XmmGetError(UINT8* NewDataPtr,
+; UINT32 PixelsPerLine,
+; UINT8* RefDataPtr1,
+; UINT32 RefPixelsPerLine,
+; INT32* XSum,
+; INT32* XXSum)
+
+XmmGetError_:
+_XmmGetError:
+
+ push ecx
+ push ebx
+ push edx
+ push esi
+
+ mov ecx,(XmmGetErrorParams PTR [esp-8]).PixelsPerLine
+ mov eax,(XmmGetErrorParams PTR [esp-8]).NewDataPtr
+
+ push edi
+
+ mov ebx,(XmmGetErrorParams PTR [esp-4]).ReconPtr1
+ mov edx,(XmmGetErrorParams PTR [esp-4]).ReconPixelsPerLine
+
+ push ebp
+
+ mov esi,(XmmGetErrorParams PTR [esp]).XSum
+ mov edi,(XmmGetErrorParams PTR [esp]).XXSum
+
+ prefetcht0 [eax+ecx]
+ prefetcht0 [ebx+edx]
+
+ pxor mm5, mm5 ; Blank mmx6
+ pxor mm6, mm6 ; Blank mmx7
+
+ ;Row 1
+
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm0, [eax] ; Copy eight bytes to mm0
+
+ pxor mm7, mm7 ; Blank mmx7
+
+ prefetcht0 [eax+ecx*2]
+ prefetcht0 [ebx+edx*2]
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ prefetcht0 [ebx+edx*2]
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+
+ ; Row 2
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ prefetcht0 [eax+ecx*2]
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ prefetcht0 [ebx+edx*2]
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 3
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ prefetcht0 [eax+ecx*2]
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ prefetcht0 [ebx+edx*2]
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 4
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ prefetcht0 [eax+ecx*2]
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ prefetcht0 [ebx+edx*2]
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 5
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ prefetcht0 [eax+ecx*2]
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx, edx ; Inc pointer into ref data
+ add eax, ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ prefetcht0 [ebx+edx*2]
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 6
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ prefetcht0 [eax+ecx*2]
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ prefetcht0 [ebx+edx]
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 7
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ prefetcht0 [eax+ecx]
+
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ add ebx,edx ; Inc pointer into ref data
+ add eax,ecx ; Inc pointer into the new data
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+ ; Row 8
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm2, mm0 ; Take copies
+ movq mm3, mm1 ; Take copies
+
+ punpcklbw mm0, mm6 ; unpack to higher precision
+ punpcklbw mm1, mm6
+ punpckhbw mm2, mm6 ; unpack to higher precision
+ punpckhbw mm3, mm6
+ psubsw mm0, mm1 ; A-B (low order) to MM0
+ psubsw mm2, mm3 ; A-B (high order) to MM2
+
+ paddw mm5, mm0 ; accumulate differences in mm5
+ paddw mm5, mm2 ; accumulate differences in mm5
+
+ pmaddwd mm0, mm0 ; square and accumulate
+ pmaddwd mm2, mm2 ; square and accumulate
+ paddd mm7, mm0 ; accumulate in mm7
+ paddd mm7, mm2 ; accumulate in mm7
+
+
+ ; Now accumulate the final results.
+
+ movq mm4, mm5 ;
+ punpcklwd mm5, mm6
+ punpckhwd mm4, mm6
+ movq mm0, mm7
+ paddw mm5, mm4
+
+ punpckhdq mm0, mm6
+ punpckldq mm7, mm6
+ movq mm4, mm5
+ paddd mm0, mm7
+ punpckhdq mm4, mm6
+ punpckldq mm5, mm6
+ movd eax, mm0
+ paddw mm4, mm5
+ movd ebp, mm4
+ movsx ebx, bp;
+
+ pop ebp
+ mov DWORD PTR [edi], eax ;XXSum
+ mov DWORD PTR [esi], ebx; ;XSum
+ pop edi
+ emms ; Clear the MMX state.
+ pop esi
+ pop edx
+ pop ebx
+ pop ecx
+ ret
+;------------------------------------------------------------------------
+ END \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetSAD8.asm b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetSAD8.asm
new file mode 100644
index 00000000..6cbbbaab
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetSAD8.asm
@@ -0,0 +1,153 @@
+;------------------------------------------------
+XmmGetSAD8Params STRUC
+ dd 6 dup (?) ;6 pushed regs
+ dd ? ;return address
+ NewDataPtr dd ?
+ RefDataPtr dd ?
+ OffsetN dd ?
+ OffsetR dd ?
+XmmGetSAD8Params ENDS
+;------------------------------------------------
+
+
+ .686P
+ .387
+ .MODEL flat, SYSCALL, os_dos
+ .XMM
+
+; macros
+
+
+ .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
+
+ ALIGN 32
+
+
+ .CODE
+
+NAME XmmGetSAD8
+
+PUBLIC XmmGetSAD8_
+PUBLIC _XmmGetSAD8
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE EQU 0
+
+
+;------------------------------------------------
+;INT32 XmmGetSAD8( UINT8 * NewDataPtr, UINT8 * RefDataPtr,
+; INT32 OffsetN, INT32 OffsetR)
+;
+XmmGetSAD8_:
+_XmmGetSAD8:
+
+ push ecx
+ push ebx
+ push edx
+
+ push esi
+ mov ecx,(XmmGetSAD8Params PTR [esp-8]).OffsetN
+ mov eax,(XmmGetSAD8Params PTR [esp-8]).NewDataPtr ; Load base addresses
+
+ push edi
+ mov ebx,(XmmGetSAD8Params PTR [esp-4]).RefDataPtr
+ mov edx,(XmmGetSAD8Params PTR [esp-4]).OffsetR
+
+ push ebp
+
+
+;
+; ESP = Stack Pointer MM0 = Free
+; ESI = Free MM1 = Free
+; EDI = Free MM2 = Free
+; EBP = Free MM3 = Free
+; EBX = RefDataPtr MM4 = Free
+; ECX = OffsetN MM5 = Free
+; EDX = OffsetR MM6 = Free
+; EAX = NewDataPtr MM7 = Free
+;
+
+
+ ; Row 1
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ add eax,ecx ; Inc pointer into the new data
+ psadbw mm0, [ebx]
+
+ add ebx,edx ; Inc pointer into ref data
+
+ ; Row 2
+ movq mm1, [eax] ; Copy eight bytes to mm0
+ add eax,ecx ; Inc pointer into the new data
+ psadbw mm1, [ebx]
+
+ add ebx,edx ; Inc pointer into ref data
+
+ ; Row 3
+ movq mm2, [eax] ; Copy eight bytes to mm0
+ add eax,ecx ; Inc pointer into the new data
+ psadbw mm2, [ebx]
+
+ add ebx,edx ; Inc pointer into ref data
+
+ ; Row 4
+ movq mm3, [eax] ; Copy eight bytes to mm0
+ add eax,ecx ; Inc pointer into the new data
+ psadbw mm3, [ebx]
+
+ add ebx,edx ; Inc pointer into ref data
+
+ ; Row 5
+ movq mm4, [eax] ; Copy eight bytes to mm0
+ add eax,ecx ; Inc pointer into the new data
+ psadbw mm4, [ebx]
+
+ add ebx,edx ; Inc pointer into ref data
+
+ ; Row 6
+ movq mm5, [eax] ; Copy eight bytes to mm0
+ add eax,ecx ; Inc pointer into the new data
+ psadbw mm5, [ebx]
+
+ add ebx,edx ; Inc pointer into ref data
+
+ ; Row 7
+ movq mm6, [eax] ; Copy eight bytes to mm0
+ add eax,ecx ; Inc pointer into the new data
+ psadbw mm6, [ebx]
+
+ add ebx,edx ; Inc pointer into ref data
+
+ ; Row 8
+ movq mm7, [eax] ; Copy eight bytes to mm0
+ psadbw mm7, [ebx]
+
+ ; start accumulating differences
+ paddd mm0,mm1
+ paddd mm2,mm3
+
+ pop ebp
+ paddd mm4,mm5
+ paddd mm6,mm7
+
+ pop edi
+ paddd mm0,mm2
+ paddd mm4,mm6
+
+ pop esi
+ paddd mm0,mm4
+ movd ecx,mm0
+
+theExit:
+ pop edx
+ mov eax, ecx ; add in calculated error
+
+ pop ebx
+ pop ecx
+
+ ret
+
+;************************************************
+ END
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.ash b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.ash
new file mode 100644
index 00000000..8022acba
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.ash
@@ -0,0 +1,12 @@
+;------------------------------------------------
+XMMGetSADParams STRUC
+ dd 6 dup (?) ;6 pushed regs
+ dd ? ;return address
+ NewDataPtr dd ?
+ PixelsPerLine dd ?
+ RefDataPtr dd ?
+ RefPixelsPerLine dd ?
+ ErrorSoFar dd ?
+ BestSoFar dd ?
+XMMGetSADParams ENDS
+;------------------------------------------------
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.asm b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.asm
new file mode 100644
index 00000000..867b7200
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.asm
@@ -0,0 +1,141 @@
+
+.686P
+.387
+.MODEL flat, SYSCALL, os_dos
+.XMM
+
+; macros
+
+.DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
+
+ALIGN 32
+
+
+.CODE
+
+NAME XmmGetSAD
+
+PUBLIC XMMGetSAD_
+PUBLIC _XMMGetSAD
+
+INCLUDE XmmSAD.ash
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE EQU 0
+
+
+;------------------------------------------------
+;INT32 XMMGetSAD( UINT8 * NewDataPtr, INT32 PixelsPerLine,
+; UINT8 * RefDataPtr, INT32 RefPixelsPerLine,
+; INT32 ErrorSoFar, INT32 BestSoFar )
+;
+XMMGetSAD_:
+_XMMGetSAD:
+
+push ecx
+push ebx
+push edx
+
+push esi
+push edi
+push ebp
+
+mov ecx, (XMMGetSADParams PTR [esp]).PixelsPerLine
+mov eax, (XMMGetSADParams PTR [esp]).NewDataPtr
+mov ebx, (XMMGetSADParams PTR [esp]).RefDataPtr
+
+movq mm0, [eax] ; Copy eight bytes to mm0
+;
+; ESP = Stack Pointer MM0 = Free
+; ESI = Free MM1 = Free
+; EDI = Free MM2 = Free
+; EBP = Free MM3 = Free
+; EBX = RefDataPtr MM4 = Free
+; ECX = PixelsPerLine MM5 = Free
+; EDX = RefPixelsPerLine MM6 = Free
+; EAX = NewDataPtr MM7 = Free
+
+
+; Row 1
+mov edx, (XMMGetSADParams PTR [esp]).RefPixelsPerLine
+lea esi, [eax+2*ecx]; ; Calculate the source ptr for row4
+psadbw mm0, [ebx]
+
+; Row 2
+movq mm1, [eax+ecx] ; Copy eight bytes to mm1
+lea edi, [ebx+2*edx] ; Calculate the source ptr for row4
+psadbw mm1, [ebx+edx]
+
+
+
+; Row 3
+movq mm2, [eax+2*ecx] ; Copy eight bytes to mm2
+add esi, ecx; ; Calculate the source ptr for row4
+psadbw mm2, [ebx+2*edx]
+
+
+add edi, edx; ; Calculate the source ptr for row4
+
+; Row 4
+movq mm3, [esi] ; Copy eight bytes to mm3
+psadbw mm3, [edi]
+
+
+
+; Row 5
+movq mm4, [eax+4*ecx] ; Copy eight bytes to mm4
+paddd mm0,mm1
+psadbw mm4, [ebx+4*edx]
+
+
+
+; Row 6
+movq mm5, [esi+2*ecx] ; Copy eight bytes to mm5
+lea eax, [esi+2*ecx]
+psadbw mm5, [edi+2*edx]
+
+
+lea ebx, [edi+2*edx]
+
+; Row 7
+movq mm6, [eax+ecx] ; Copy eight bytes to mm0
+psadbw mm6, [ebx+edx]
+paddd mm2,mm3
+
+
+
+; Row 8
+movq mm7, [esi+4*ecx] ; Copy eight bytes to mm0
+psadbw mm7, [edi+4*edx]
+
+; start accumulating differences
+
+mov eax, (XMMGetSADParams PTR [esp]).ErrorSoFar
+
+pop ebp
+paddd mm4,mm5
+paddd mm6,mm7
+
+pop edi
+paddd mm0,mm2
+paddd mm4,mm6
+
+pop esi
+paddd mm0,mm4
+movd ecx,mm0
+
+theExit:
+pop edx
+add eax,ecx ; add in calculated error
+
+pop ebx
+pop ecx
+
+
+ret
+
+;************************************************
+END
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/csystemdependant.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/csystemdependant.c
new file mode 100644
index 00000000..49d2d0ff
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/csystemdependant.c
@@ -0,0 +1,181 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "compdll.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define MMX_ENABLED 1
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+// Functions that should only be used in assembly versions of the code
+extern unsigned long VP6_GetProcessorFrequency();
+extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
+
+extern UINT32 ComputeBlockReconError ( CP_INSTANCE *cpi, UINT32 bp );
+extern UINT32 GetSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8 * RefPtr,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern UINT32 GetHalfPixelSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8 * RefPtr,UINT8 * RefPtr2,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern UINT32 WmtGetSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8 * RefPtr,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern UINT32 WmtGetHalfPixelSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8 * RefPtr,UINT8 * RefPtr2,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+
+extern UINT32 GetIntraErrorC( UINT8* DataPtr, INT32 SourceStride);
+extern UINT32 GetInterErr( UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride );
+extern UINT32 GetSumAbsDiffs( UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+extern UINT32 GetHalfPixelSumAbsDiffs( UINT8 * SrcData, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+
+extern UINT32 MmxGetSAD( UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+extern UINT32 MmxGetHalfPixelSAD( UINT8 * SrcData, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+extern UINT32 MmxGetInterErr( UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride );
+extern UINT32 MmxGetIntraError( UINT8* DataPtr, INT32 SourceStride);
+extern void MmxSUB8( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconSourceStride );
+extern void MmxSUB8_128( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride );
+extern void MmxSUB8AV2( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconSourceStride );
+
+extern UINT32 WmtComputeBlockReconError ( CP_INSTANCE *cpi, UINT32 bp );
+extern void WmtSUB8( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconSourceStride );
+extern void WmtSUB8_128( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride );
+extern void WmtSUB8AV2( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconSourceStride );
+
+extern UINT32 XmmGetInterErr( UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride );
+extern UINT32 XMMGetSAD( UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+extern UINT32 WmtGetIntraError( UINT8* DataPtr, INT32 SourceStride);
+extern UINT32 WmtGetHalfPixelSAD( UINT8 * SrcData, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar );
+extern UINT32 WmtGetInterErr( UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride );
+
+extern void VP6_quantize_c( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+extern void VP6_quantize_wmt( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+extern void VP6_quantize_mmx( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+
+
+extern UINT32 GetMBFrameVerticalVariance( CP_INSTANCE *cpi);
+extern UINT32 MmxGetMBFrameVertVar( CP_INSTANCE *cpi);
+extern UINT32 WmtGetMBFrameVertVar( CP_INSTANCE *cpi);
+
+extern UINT32 GetMBFieldVerticalVariance( CP_INSTANCE *cpi);
+extern UINT32 MmxGetMBFieldVertVar( CP_INSTANCE *cpi);
+extern UINT32 WmtGetMBFieldVertVar( CP_INSTANCE *cpi);
+
+extern UINT32 FiltBlockBilGetSad_C(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+extern UINT32 FiltBlockBilGetSad_mmx(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+extern UINT32 FiltBlockBilGetSad_wmt(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+
+/****************************************************************************
+ *
+ * ROUTINE : MachineSpecificConfig
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support
+ * sets appropriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CMachineSpecificConfig( void )
+{
+ INT32 MmxEnabled;
+ INT32 XmmEnabled;
+ INT32 WmtEnabled;
+
+ GetProcessorFlags( &MmxEnabled, &XmmEnabled, &WmtEnabled);
+
+ GetSAD = GetSumAbsDiffs;
+ GetSadHalfPixel = GetHalfPixelSumAbsDiffs;
+ GetInterError = GetInterErr;
+
+ if( WmtEnabled )
+ {
+ GetSAD16 = WmtGetSumAbsDiffs16;
+ GetSadHalfPixel16 = WmtGetHalfPixelSumAbsDiffs16;
+
+ GetSAD = XMMGetSAD;
+ GetSadHalfPixel = WmtGetHalfPixelSAD;
+ GetInterError = WmtGetInterErr;
+ GetIntraError = WmtGetIntraError;
+ Sub8 = WmtSUB8;
+ Sub8_128 = WmtSUB8_128;
+ Sub8Av2 = WmtSUB8AV2;
+ VP6_quantize = VP6_quantize_wmt;
+ GetMBFrameVertVar = WmtGetMBFrameVertVar;
+ GetMBFieldVertVar = WmtGetMBFieldVertVar;
+ FiltBlockBilGetSad = FiltBlockBilGetSad_wmt;
+ GetBlockReconErr = WmtComputeBlockReconError;
+
+ }
+ else if ( XmmEnabled )
+ {
+ GetSAD16 = GetSumAbsDiffs16;
+ GetSadHalfPixel16 = GetHalfPixelSumAbsDiffs16;
+
+ GetSAD = XMMGetSAD;
+ GetSadHalfPixel = MmxGetHalfPixelSAD;
+ GetInterError = MmxGetInterErr;
+ GetIntraError = MmxGetIntraError;
+ Sub8 = MmxSUB8;
+ Sub8_128 = MmxSUB8_128;
+ Sub8Av2 = MmxSUB8AV2;
+ VP6_quantize = VP6_quantize_mmx;
+ GetMBFrameVertVar = MmxGetMBFrameVertVar;
+ GetMBFieldVertVar = MmxGetMBFieldVertVar;
+ FiltBlockBilGetSad = FiltBlockBilGetSad_mmx;
+ GetBlockReconErr = ComputeBlockReconError;
+
+ }
+ else if ( MmxEnabled )
+ {
+ GetSAD16 = GetSumAbsDiffs16;
+ GetSadHalfPixel16 = GetHalfPixelSumAbsDiffs16;
+
+ GetSAD = MmxGetSAD;
+ GetSadHalfPixel = MmxGetHalfPixelSAD;
+ GetInterError = MmxGetInterErr;
+ GetIntraError = MmxGetIntraError;
+ Sub8 = MmxSUB8;
+ Sub8_128 = MmxSUB8_128;
+ Sub8Av2 = MmxSUB8AV2;
+ VP6_quantize = VP6_quantize_mmx;
+ GetMBFrameVertVar = MmxGetMBFrameVertVar;
+ GetMBFieldVertVar = MmxGetMBFieldVertVar;
+ FiltBlockBilGetSad = FiltBlockBilGetSad_mmx;
+ GetBlockReconErr = ComputeBlockReconError;
+
+ }
+ else
+ {
+ GetSAD16 = GetSumAbsDiffs16;
+ GetSadHalfPixel16 = GetHalfPixelSumAbsDiffs16;
+
+ GetSAD = GetSumAbsDiffs;
+ GetSadHalfPixel = GetHalfPixelSumAbsDiffs;
+ GetInterError = GetInterErr;
+ GetIntraError = GetIntraErrorC;
+ fdct_short = fdct_short_C;
+ VP6_quantize = VP6_quantize_c;
+ Sub8 = SUB8;
+ Sub8_128 = SUB8_128;
+ Sub8Av2 = SUB8AV2;
+ GetMBFrameVertVar = GetMBFrameVerticalVariance;
+ GetMBFieldVertVar = GetMBFieldVerticalVariance;
+ FiltBlockBilGetSad = FiltBlockBilGetSad_C;
+ GetBlockReconErr = ComputeBlockReconError;
+
+ }
+
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/fdct_m.asm b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/fdct_m.asm
new file mode 100644
index 00000000..01d694f1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/fdct_m.asm
@@ -0,0 +1,1000 @@
+;***********************************************************************
+; File: fdct_m.asm
+;
+; Description:
+; This function perform 2-D Forward DCT on a 8x8 block
+;
+;
+; Input: Pointers to input source data buffer and destination
+; buffer.
+;
+; Note: none
+;
+; Special Notes: We try to do the truncation right to match the result
+; of the c version.
+;
+;************************************************************************
+;
+;
+;
+
+ .586
+ .387
+ .MODEL flat, SYSCALL, os_dos
+ .MMX
+;
+; macro functions
+;
+Fdct MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7
+ ; execute stage 1 of forward DCT
+
+
+ movq mm0,ip0 ; mm0 = ip0
+ movq mm1,ip1 ; mm1 = ip1
+ movq mm2,ip3 ; mm2 = ip3
+ movq mm3,ip5 ; mm3 = ip5
+ movq mm4,mm1 ; mm4 = ip1
+ movq mm5,mm3 ; mm5 = ip5
+ movq mm6,mm0 ; mm0 = ip0
+ movq mm7,mm2 ; mm7 = ip3
+
+ paddsw mm0,ip7 ; mm0 = ip0 + ip7 = is07
+ paddsw mm1,ip2 ; mm1 = ip1 + ip2 = is12
+ paddsw mm2,ip4 ; mm2 = ip3 + ip4 = is34
+ paddsw mm3,ip6 ; mm3 = ip5 + ip6 = is56
+ psubsw mm6,ip7 ; mm6 = ip0 - ip7 = id07
+ psubsw mm7,ip4 ; mm7 = ip3 - ip4 = id34
+ psubsw mm4,ip2 ; mm4 = ip1 - ip2 = id12
+ psubsw mm5,ip6 ; mm5 = ip5 - ip6 = id56
+
+ movq TID07,mm6 ; save id07
+ movq TID34,mm7 ; save id34
+
+ ; free = mm6, mm7
+
+ movq mm6,mm4 ; mm6 = id12
+ psubsw mm4,mm5 ; mm4 = id12 - id56 = irot_input_x
+
+ movq TIRX,mm4 ; save irot_input_x
+ paddsw mm6,mm5 ; mm6 = id12 + id56
+ movq mm5,mm6 ;
+
+ pmulhw mm6,xC4S4 ; (xC4S4 * (id12 + id56)) - (id12 + id56)
+ paddw mm6,mm5 ; (xC4S4 * (id12 + id56))
+ psrlw mm5,15 ;
+
+ paddw mm6,mm5; ;
+
+
+ ; free = mm4 ,mm5, mm7
+
+ movq mm4,mm0 ; mm4 = is07
+ psubsw mm0,mm2 ; mm0 = is07 - is34 = irot_input_y
+
+ movq TIRY,mm0 ; save irot_input_y
+
+ ; free = mm0, mm5, mm7
+
+ movq mm0,mm1 ; mm0 = is12
+ psubsw mm1,mm3 ; mm1 = is12 - is56
+
+ movq TIC2,mm6 ; save icommon_product2
+ movq mm7, mm1
+
+ pmulhw mm1,xC4S4 ; mm1 = (xC4S4 * (is12 - is56)) - (is12 - is56)
+ paddw mm1, mm7 ; mm1 = (xC4S4 * (is12 - is56))
+ psrlw mm7, 15 ;
+
+ paddw mm1, mm7
+ movq TIC1,mm1 ; save icommon_product1
+
+ ; free = mm1, mm5, mm6, mm7
+
+ paddsw mm4,mm2 ; mm4 = is07 + is34 = is0734
+ paddsw mm0,mm3 ; mm0 = is12 + is56 = is1256
+ movq mm1,mm4 ; mm1 = is07 + is34 = is0734
+
+ paddsw mm4,mm0 ; mm4 = is0734 + is1256
+ psubsw mm1,mm0 ; mm1 = is0734 - is1256
+
+ movq mm7,mm4
+ movq mm6,mm1
+
+ pmulhw mm4,xC4S4 ; mm4 = (xC4S4 * (is0734 + is1256)) - (is0734 + is1256)
+ pmulhw mm1,xC4S4 ; mm1 = (xC4S4 * (is0734 - is1256)) - (is0734 - is1256)
+ paddw mm4,mm7 ; mm4 = (xC4S4 * (is0734 + is1256))
+ paddw mm1,mm6 ; mm1 = (xC4S4 * (is0734 - is1256))
+
+ psrlw mm7, 15
+ psrlw mm6, 15
+
+ paddw mm4, mm7
+ movq ip0,mm4 ; write out ip0
+
+ paddw mm1, mm6
+ movq ip4,mm1 ; write out ip4
+
+ ; free = mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7
+
+ movq mm0,TIRY ; mm0 = irot_input_y
+ movq mm1,TIRX ; mm1 = irot_input_x
+
+ movq mm2,mm0 ; mm2 = irot_input_y
+ movq mm3,mm1 ; mm3 = irot_input_x
+
+ movq mm4,mm0 ;
+ movq mm5,mm1 ;
+
+ movq mm6,xC2S6 ;
+ movq mm7,xC6s2 ;
+
+ pmulhw mm0,mm6 ; mm0 = xC2S6*irot_input_y - irot_input_y
+ pmulhw mm3,mm6 ; mm3 = xC2S6*irot_input_x - irot_input_x
+ psrlw mm4, 15
+ psrlw mm5, 15
+ paddw mm0,mm2 ; mm0 = xC2S6*irot_input_y
+ paddw mm3,mm1 ; mm3 = xC2S6*irot_input_x
+ paddw mm0,mm4
+ paddw mm3,mm5;
+
+ pmulhw mm1,mm7 ; mm1 = xC6S2*irot_input_x
+ pmulhw mm2,mm7 ; mm2 = xC6S2*irot_input_y
+
+ paddw mm1,mm5 ;
+ paddw mm2,mm4 ;
+
+ paddsw mm0,mm1 ; mm0 = xC2S6(irot_input_y * 2) + xC6S2(irot_input_x * 2) = ip2
+ psubsw mm2,mm3 ; mm2 = xC6S2(irot_input_y * 2) - xC2S6(irot_input_x * 2) = ip6
+
+ movq ip2,mm0 ; write out ip2
+ movq ip6,mm2 ; write out ip6
+
+ ;
+
+ movq mm6,TIC1 ; mm6 = icommon_product1
+ movq mm4,TID07 ; mm4 = id07
+
+ movq mm5,TID34 ; mm5 = id34
+ movq mm7,TIC2 ; mm7 = icommon_product2
+
+ movq mm1,mm6 ; mm1 = icommon_product1
+ movq mm3,mm7 ; mm3 = icommon_product2
+
+ pxor mm0,mm0 ; clear mm0
+ paddsw mm7,mm5 ; mm7 = icommon_product2 + id34
+
+ paddsw mm6,mm4 ; mm6 = icommon_product1 + id07 = irot_input_x
+ psubsw mm0,mm7 ; mm0 = -(icommon_product2 + id34) = irot_input_y
+
+
+ ; free = mm2, mm7, mm4, mm5;
+
+ movq mm2,mm6 ; mm2 = irot_input_x
+ movq mm7,mm0 ; mm7 = irot_input_y
+
+ movq mm4,mm6;
+ movq mm5,mm0;
+
+ pmulhw mm6,xC1S7 ; mm6 = xC1S7*irot_input_x -irot_input_x
+ psrlw mm4,15;
+
+ psrlw mm5,15;
+ pmulhw mm7,xC1S7 ; mm7 = xC1S7*irot_input_y -irot_input_y
+
+ paddw mm6,mm2 ; mm6 = xC1S7*irot_input_x
+ paddw mm7,mm0 ; mm7 = xC1S7*irot_input_y
+
+ pmulhw mm0,xC7S1 ; mm0 = xC7S1*irot_input_y
+ paddw mm6,mm4 ;
+
+ paddw mm7,mm5 ;
+ pmulhw mm2,xC7S1 ; mm2 = xC7S1*irot_input_x
+
+ paddw mm0,mm5 ;
+ paddw mm2,mm4 ;
+
+ psubsw mm6,mm0 ; mm6 = xC1S7*irot_input_x - xC7S1*irot_input_y = ip1
+ paddsw mm2,mm7 ; mm2 = xC7S1*irot_input_x + xC1S7*irot_input_y = ip7
+
+ movq ip1,mm6 ; write out ip1
+
+ movq mm4,TID07 ; mm4 = id07
+ movq mm5,TID34 ; mm5 = id34
+
+ movq ip7,mm2 ; write out ip7
+
+
+ psubsw mm4,mm1 ; mm4 = id07 - icommon_product1 = irot_input_x
+ psubsw mm5,mm3 ; mm5 = id34 - icommon_product2 = irot_input_y
+
+ movq mm6,mm4 ; mm6 = irot_input_x
+ movq mm0,mm4 ; mm0 = irot_input_x
+
+ movq mm7,mm5 ; mm7 = irot_input_y
+ movq mm2,mm5 ; mm2 = irot_input_y
+
+ movq mm1,xC3S5
+ movq mm3,xC5S3
+
+ pmulhw mm4,mm1 ; mm4 = xC3S5*irot_input_x - irot_input_x
+ pmulhw mm6,mm3 ; mm6 = xC5S3*irot_input_x - irot_input_x
+ pmulhw mm5,mm3 ; mm5 = xC5S3*irot_input_y - irot_input_y
+ pmulhw mm7,mm1 ; mm7 = xC3S5*irot_input_y - irot_input_y
+
+ paddw mm4, mm0 ; mm4 = xC3S5*irot_input_x
+ paddw mm6, mm0 ; mm6 = xC5S3*irot_input_x
+ paddw mm5, mm2 ; mm5 = xC5S3*irot_input_y
+ paddw mm7, mm2 ; mm7 = xC3S5*irot_input_y
+
+
+ psrlw mm0, 15 ;
+ psrlw mm2, 15 ;
+
+ paddw mm4, mm0 ;
+ paddw mm6, mm0 ;
+ paddw mm5, mm2 ;
+ paddw mm7, mm2 ;
+
+ psubsw mm4,mm5 ; mm4 = xC3S4*irot_input_x - xC5S3*irot_input_y = ip3
+ paddsw mm6,mm7 ; mm6 = xC5S3*irot_input_x + xC3S5*irot_input_y = ip5
+
+ movq ip3,mm4 ; write out ip3
+ movq ip5,mm6 ; write out ip5
+
+
+ENDM
+
+Fdct_new MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7
+ ; execute stage 1 of forward DCT
+
+
+ movq mm0,ip0 ; mm0 = ip0
+ movq mm1,ip1 ; mm1 = ip1
+ movq mm2,ip3 ; mm2 = ip3
+ movq mm3,ip5 ; mm3 = ip5
+ movq mm4,ip0 ; mm0 = ip0
+ movq mm5,ip1 ; mm1 = ip1
+ movq mm6,ip3 ; mm2 = ip3
+ movq mm7,ip5 ; mm3 = ip5
+
+
+ paddsw mm0,ip7 ; mm0 = ip0 + ip7 = is07
+ paddsw mm1,ip2 ; mm1 = ip1 + ip2 = is12
+ paddsw mm2,ip4 ; mm2 = ip3 + ip4 = is34
+ paddsw mm3,ip6 ; mm3 = ip5 + ip6 = is56
+ psubsw mm4,ip7 ; mm4 = ip0 - ip7 = id07
+ psubsw mm5,ip2 ; mm5 = ip1 - ip2 = id12
+
+ psubsw mm0,mm2 ; mm0 = is07 - is34
+
+ paddsw mm2,mm2
+
+ psubsw mm6,ip4 ; mm6 = ip3 - ip4 = id34
+
+ paddsw mm2,mm0 ; mm2 = is07 + is34 = is0734
+ psubsw mm1,mm3 ; mm1 = is12 - is56
+ movq TIRY,mm0 ; Save is07 - is34 to free mm0;
+ paddsw mm3,mm3
+ paddsw mm3,mm1 ; mm3 = is12 + 1s56 = is1256
+
+ psubsw mm7,ip6 ; mm7 = ip5 - ip6 = id56
+
+;--------------------------------------------------------------------
+;
+
+ psubsw mm5,mm7 ; mm5 = id12 - id56
+ paddsw mm7,mm7
+ paddsw mm7,mm5 ; mm7 = id12 + id56
+
+ ; mm4 = id07
+
+ ; mm6 = id34
+;---------------------------------------------------------------------
+; ip[0], ip[4]
+; mm0 Free
+; mm2 is0734
+; mm3 is1256
+
+
+ psubsw mm2,mm3 ; mm2 = is0734 - is1256
+ paddsw mm3,mm3
+
+ movq mm0,mm2 ; make a copy
+ paddsw mm3,mm2 ; mm3 = is0734 + is1256
+
+ pmulhw mm0,xC4S4 ; mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 )
+ paddw mm0,mm2 ; mm0 = xC4S4 * ( is0734 - is1256 )
+ psrlw mm2,15 ;
+ paddw mm0,mm2 ; Truncate mm0, now it is op[4]
+
+ movq mm2,mm3 ;
+ movq ip4,mm0 ; save ip4, now mm0,mm2 are free
+
+ movq mm0,mm3 ;
+ pmulhw mm3,xC4S4 ; mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 )
+
+ psrlw mm2,15 ;
+ paddw mm3,mm0 ; mm3 = xC4S4 * ( is0734 +is1256 )
+ paddw mm3,mm2 ; Truncate mm3, now it is op[0]
+
+ movq ip0,mm3 ;
+
+;----------------------------------------------------------------------
+; ip[2], ip[6]
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 id12 - id56 irot_input_x
+; TIRY is07 - is34 irot_input_y
+
+ movq mm3,TIRY ; mm3 = irot_input_y
+ pmulhw mm3,xC2S6 ; mm3 = xC2S6 * irot_input_y - irot_input_y
+
+ movq mm2,TIRY ;
+ movq mm0,mm2 ;
+
+ psrlw mm2,15 ; mm3 = xC2S6 * irot_input_y
+ paddw mm3,mm0
+
+ paddw mm3,mm2 ; Truncated
+ movq mm0, mm5; ;
+
+
+ movq mm2, mm5;
+ pmulhw mm0, xC6S2 ; mm0 = xC6S2 * irot_input_x
+
+ psrlw mm2, 15
+ paddw mm0, mm2 ; Truncated
+
+ paddsw mm3, mm0 ; ip[2]
+ movq ip2, mm3 ; Save ip2
+
+
+ movq mm0, mm5 ;
+ movq mm2, mm5 ;
+
+ pmulhw mm5, xC2S6 ; mm5 = xC2S6 * irot_input_x - irot_input_x
+ psrlw mm2, 15 ;
+
+ movq mm3, TIRY ;
+ paddw mm5, mm0 ; mm5 = xC2S6 * irot_input_x
+
+ paddw mm5, mm2 ; Truncated
+ movq mm2, mm3
+
+ pmulhw mm3, xC6S2 ; mm3 = xC6S2 * irot_input_y
+ psrlw mm2, 15
+
+ paddw mm3, mm2 ; Truncated
+ psubsw mm3, mm5 ;
+
+ movq ip6, mm3 ;
+
+
+
+;-----------------------------------------------------------------------
+; icommon_product1, icommon_product2
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm1 is12 - is56
+; mm7 id12 + id56
+
+ movq mm0, xC4S4
+ movq mm2, mm1
+ movq mm3, mm1
+
+ pmulhw mm1, mm0 ; mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 )
+ psrlw mm2, 15
+
+ paddw mm1, mm3 ; mm0 = xC4S4 * ( is12 - is56 )
+ paddw mm1, mm2 ; Truncate mm1, now it is icommon_product1
+
+ movq mm2, mm7
+ movq mm3, mm7
+
+ pmulhw mm7, mm0 ; mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 )
+ psrlw mm2, 15
+
+ paddw mm7, mm3 ; mm7 = xC4S4 * ( id12 + id56 )
+ paddw mm7, mm2 ; Truncate mm7, now it is icommon_product2
+
+;------------------------------------------------------------------------
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm1 icommon_product1
+; mm7 icommon_product2
+; mm4 id07
+; mm6 id34
+
+
+ pxor mm0, mm0 ; Clear mm0
+ psubsw mm0, mm6 ; mm0 = - id34
+
+ psubsw mm0, mm7 ; mm0 = - ( id34 + idcommon_product2 )
+ paddsw mm6, mm6 ;
+ paddsw mm6, mm0 ; mm6 = id34 - icommon_product2
+
+ psubsw mm4, mm1 ; mm4 = id07 - icommon_product1
+ paddsw mm1, mm1 ;
+ paddsw mm1, mm4 ; mm1 = id07 + icommon_product1
+
+
+;-------------------------------------------------------------------------
+; ip1, ip7
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm7 Free
+; mm1 irot_input_x
+; mm0 irot_input_y
+
+ movq mm7, xC1S7
+ movq mm2, mm1
+
+ movq mm3, mm1;
+ pmulhw mm1, mm7 ; mm1 = xC1S7 * irot_input_x - irot_input_x
+
+ movq mm7, xC7S1 ;
+ psrlw mm2, 15
+
+ paddw mm1, mm3 ; mm1 = xC1S7 * irot_input_x
+ paddw mm1, mm2 ; Trucated
+
+ pmulhw mm3, mm7 ; mm3 = xC7S1 * irot_input_x
+ paddw mm3, mm2 ; Truncated
+
+ movq mm5, mm0
+ movq mm2, mm0
+
+ movq mm7, xC1S7
+ pmulhw mm0, mm7 ; mm0 = xC1S7 * irot_input_y - irot_input_y
+
+ movq mm7, xC7S1
+ psrlw mm2, 15
+
+ paddw mm0, mm5 ; mm0 = xC1S7 * irot_input_y
+ paddw mm0, mm2 ; Truncated
+
+ pmulhw mm5, mm7 ; mm5 = xC7S1 * irot_input_y
+ paddw mm5, mm2 ; Truncated
+
+ psubsw mm1, mm5 ; mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1
+ paddsw mm3, mm0 ; mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7
+
+ movq ip1, mm1
+ movq ip7, mm3
+;-----------------------------------------------------------------------------
+; ip3, ip5
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm7 Free
+; mm1 Free
+; mm0 Free
+; mm4 id07 - icommon_product1 = irot_input_x
+; mm6 id34 - icommon_product2 = irot_input_y
+
+ movq mm0, xC3S5
+ movq mm1, xC5S3
+
+ movq mm5, mm6
+ movq mm7, mm6
+
+ movq mm2, mm4
+ movq mm3, mm4
+
+ pmulhw mm4, mm0 ; mm4 = xC3S5 * irot_input_x - irot_input_x
+ pmulhw mm6, mm1 ; mm6 = xC5S3 * irot_input_y - irot_input_y
+
+ psrlw mm2, 15
+ psrlw mm5, 15
+
+ paddw mm4, mm3 ; mm4 = xC3S5 * irot_input_x
+ paddw mm6, mm7 ; mm6 = xC5S3 * irot_input_y
+
+ paddw mm4, mm2 ; Truncated
+ paddw mm6, mm5 ; Truncated
+
+ psubsw mm4, mm6 ; ip3
+ movq ip3, mm4 ;
+
+ movq mm4, mm3 ;
+ movq mm6, mm7 ;
+
+ pmulhw mm3, mm1 ; mm3 = xC5S3 * irot_input_x - irot_input_x
+ pmulhw mm7, mm0 ; mm7 = xC3S5 * irot_input_y - irot_input_y
+
+ paddw mm4, mm2
+ paddw mm6, mm5
+
+ paddw mm3, mm4 ; mm3 = xC5S3 * irot_input_x
+ paddw mm7, mm6 ; mm7 = xC3S5 * irot_input_y
+
+ paddw mm3, mm7 ; ip5
+ movq ip5, mm3 ;
+
+ENDM
+
+Transpose MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7,
+ op0, op1, op2, op3, op4, op5, op6, op7
+ movq mm0,ip0 ; mm0 = a0 a1 a2 a3
+ movq mm4,ip4 ; mm4 = e4 e5 e6 e7
+ movq mm1,ip1 ; mm1 = b0 b1 b2 b3
+ movq mm5,ip5 ; mm5 = f4 f5 f6 f7
+ movq mm2,ip2 ; mm2 = c0 c1 c2 c3
+ movq mm6,ip6 ; mm6 = g4 g5 g6 g7
+ movq mm3,ip3 ; mm3 = d0 d1 d2 d3
+ movq op1,mm1 ; save b0 b1 b2 b3
+ movq mm7,ip7 ; mm7 = h0 h1 h2 h3
+
+ ; Transpose 2x8 block
+ movq mm1, mm4 ; mm1 = e3 e2 e1 e0
+ punpcklwd mm4, mm5 ; mm4 = f1 e1 f0 e0
+ movq op0, mm0 ; save a3 a2 a1 a0
+ punpckhwd mm1, mm5 ; mm1 = f3 e3 f2 e2
+ movq mm0, mm6 ; mm0 = g3 g2 g1 g0
+ punpcklwd mm6, mm7 ; mm6 = h1 g1 h0 g0
+ movq mm5, mm4 ; mm5 = f1 e1 f0 e0
+ punpckldq mm4, mm6 ; mm4 = h0 g0 f0 e0 = MM4
+ punpckhdq mm5, mm6 ; mm5 = h1 g1 f1 e1 = MM5
+ movq mm6, mm1 ; mm6 = f3 e3 f2 e2
+ movq op4, mm4 ;
+ punpckhwd mm0, mm7 ; mm0 = h3 g3 h2 g2
+ movq op5, mm5 ;
+ punpckhdq mm6, mm0 ; mm6 = h3 g3 f3 e3 = MM7
+ movq mm4, op0 ; mm4 = a3 a2 a1 a0
+ punpckldq mm1, mm0 ; mm1 = h2 g2 f2 e2 = MM6
+ movq mm5, op1 ; mm5 = b3 b2 b1 b0
+ movq mm0, mm4 ; mm0 = a3 a2 a1 a0
+ movq op7, mm6 ;
+ punpcklwd mm0, mm5 ; mm0 = b1 a1 b0 a0
+ movq op6, mm1 ;
+ punpckhwd mm4, mm5 ; mm4 = b3 a3 b2 a2
+ movq mm5, mm2 ; mm5 = c3 c2 c1 c0
+ punpcklwd mm2, mm3 ; mm2 = d1 c1 d0 c0
+ movq mm1, mm0 ; mm1 = b1 a1 b0 a0
+ punpckldq mm0, mm2 ; mm0 = d0 c0 b0 a0 = MM0
+ punpckhdq mm1, mm2 ; mm1 = d1 c1 b1 a1 = MM1
+ movq mm2, mm4 ; mm2 = b3 a3 b2 a2
+ movq op0, mm0 ;
+ punpckhwd mm5, mm3 ; mm5 = d3 c3 d2 c2
+ movq op1, mm1 ;
+ punpckhdq mm4, mm5 ; mm4 = d3 c3 b3 a3 = MM3
+ punpckldq mm2, mm5 ; mm2 = d2 c2 b2 a2 = MM2
+ movq op3, mm4
+ movq op2, mm2
+ENDM
+
+;------------------------------------------------
+fdctParams STRUC
+ dd 6 dup (?) ;6 pushed regs
+ dd ? ;return address
+ InputPtr dd ?
+ OutputPtr dd ?
+fdctParams ENDS
+;------------------------------------------------
+
+
+
+ .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
+
+ ALIGN 32
+
+xC1S7 QWORD 0fb15fb15fb15fb15h
+xC2S6 QWORD 0ec83ec83ec83ec83h
+xC3S5 QWORD 0d4dbd4dbd4dbd4dbh
+xC4S4 QWORD 0b505b505b505b505h
+xC5S3 QWORD 08e3a8e3a8e3a8e3ah
+xC6S2 QWORD 061f861f861f861f8h
+xC7S1 QWORD 031f131f131f131f1h
+TIRX QWORD 00000000000000000h
+TIRY QWORD 00000000000000000h
+TIC1 QWORD 00000000000000000h
+TIC2 QWORD 00000000000000000h
+TID07 QWORD 00000000000000000h
+TID34 QWORD 00000000000000000h
+
+; data goes here
+
+ .CODE
+
+NAME fdct
+
+PUBLIC fdct_MMX_
+PUBLIC _fdct_MMX
+
+; includes go here
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE EQU 0
+
+
+;------------------------------------------------
+; void fdct_MMX ( INT16 * InputData, INT16 * OutputData )
+;
+fdct_MMX_:
+_fdct_MMX:
+ push esi
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+;
+; ESP = Stack Pointer MM0 = Free
+; ESI = Free MM1 = Free
+; EDI = Free MM2 = Free
+; EBP = Free MM3 = Free
+; EBX = Free MM4 = Free
+; ECX = Free MM5 = Free
+; EDX = Free MM6 = Free
+; EAX = Free MM7 = Free
+;
+
+ mov eax,(fdctParams PTR [esp]).InputPtr ; load pointer to input data
+ mov edx,(fdctParams PTR [esp]).OutputPtr ; load pointer to output data
+
+ ;
+ ; Input data is an 8x8 block. To make processing of the data more efficent
+ ; we will transpose the block of data to two 4x8 blocks???
+ ;
+
+ Transpose [eax], [eax+16], [eax+32], [eax+48], [eax+8], [eax+24], [eax+40], [eax+56], [edx], [edx+16], [edx+32], [edx+48], [edx+8], [edx+24], [edx+40], [edx+56]
+ Fdct_new [edx], [edx+16], [edx+32], [edx+48], [edx+8], [edx+24], [edx+40], [edx+56]
+
+ Transpose [eax+64], [eax+80], [eax+96], [eax+112], [eax+72], [eax+88], [eax+104], [eax+120], [edx+64], [edx+80], [edx+96], [edx+112], [edx+72], [edx+88], [edx+104], [edx+120]
+ Fdct_new [edx+64], [edx+80], [edx+96], [edx+112], [edx+72], [edx+88], [edx+104], [edx+120]
+
+ Transpose [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112], [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112]
+ Fdct_new [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112]
+
+ Transpose [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120], [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120]
+ Fdct_new [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120]
+
+
+theExit:
+
+ emms
+
+ pop edx
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+
+ ret
+
+
+NAME FDct1D4Mmx
+
+PUBLIC FDct1D4Mmx_
+PUBLIC _FDct1D4Mmx
+
+; includes go here
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE EQU 0
+
+;------------------------------------------------
+; void FDct1D4Mmx ( INT16 * InputData, INT16 * OutputData )
+;
+FDct1D4Mmx_:
+_FDct1D4Mmx:
+ push esi
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+;
+; ESP = Stack Pointer MM0 = Free
+; ESI = Free MM1 = Free
+; EDI = Free MM2 = Free
+; EBP = Free MM3 = Free
+; EBX = Free MM4 = Free
+; ECX = Free MM5 = Free
+; EDX = Free MM6 = Free
+; EAX = Free MM7 = Free
+;
+
+ mov eax,(fdctParams PTR [esp]).InputPtr ; load pointer to input data
+ mov edx,(fdctParams PTR [esp]).OutputPtr ; load pointer to output data
+
+
+ movq mm0,[eax] ; mm0 = ip0
+ movq mm1,[eax + 8] ; mm1 = ip1
+ movq mm2,[eax + 24] ; mm2 = ip3
+ movq mm3,[eax + 40] ; mm3 = ip5
+ movq mm4,[eax] ; mm0 = ip0
+ movq mm5,[eax + 8] ; mm1 = ip1
+ movq mm6,[eax + 24] ; mm2 = ip3
+ movq mm7,[eax + 40] ; mm3 = ip5
+
+
+ paddsw mm0,[eax + 56] ; mm0 = ip0 + ip7 = is07
+ paddsw mm1,[eax + 16] ; mm1 = ip1 + ip2 = is12
+ paddsw mm2,[eax + 32] ; mm2 = ip3 + ip4 = is34
+ paddsw mm3,[eax + 48] ; mm3 = ip5 + ip6 = is56
+ psubsw mm4,[eax + 56] ; mm4 = ip0 - ip7 = id07
+ psubsw mm5,[eax + 16] ; mm5 = ip1 - ip2 = id12
+
+ psubsw mm0,mm2 ; mm0 = is07 - is34
+
+ paddsw mm2,mm2
+
+ psubsw mm6,[eax + 32] ; mm6 = ip3 - ip4 = id34
+
+ paddsw mm2,mm0 ; mm2 = is07 + is34 = is0734
+ psubsw mm1,mm3 ; mm1 = is12 - is56
+ movq TIRY,mm0 ; Save is07 - is34 to free mm0;
+ paddsw mm3,mm3
+ paddsw mm3,mm1 ; mm3 = is12 + 1s56 = is1256
+
+ psubsw mm7,[eax + 48] ; mm7 = ip5 - ip6 = id56
+
+;--------------------------------------------------------------------
+;
+
+ psubsw mm5,mm7 ; mm5 = id12 - id56
+ paddsw mm7,mm7
+ paddsw mm7,mm5 ; mm7 = id12 + id56
+
+ ; mm4 = id07
+
+ ; mm6 = id34
+;---------------------------------------------------------------------
+; ip[0], ip[4]
+; mm0 Free
+; mm2 is0734
+; mm3 is1256
+
+
+ psubsw mm2,mm3 ; mm2 = is0734 - is1256
+ paddsw mm3,mm3
+
+ movq mm0,mm2 ; make a copy
+ paddsw mm3,mm2 ; mm3 = is0734 + is1256
+
+ pmulhw mm0,xC4S4 ; mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 )
+ paddw mm0,mm2 ; mm0 = xC4S4 * ( is0734 - is1256 )
+ psrlw mm2,15 ;
+ paddw mm0,mm2 ; Truncate mm0, now it is op[4]
+
+ movq mm2,mm3 ;
+ movq [edx + 32],mm0 ; save op4, now mm0,mm2 are free
+
+ movq mm0,mm3 ;
+ pmulhw mm3,xC4S4 ; mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 )
+
+ psrlw mm2,15 ;
+ paddw mm3,mm0 ; mm3 = xC4S4 * ( is0734 +is1256 )
+ paddw mm3,mm2 ; Truncate mm3, now it is op[0]
+
+ movq [edx],mm3 ;
+
+;----------------------------------------------------------------------
+; ip[2], ip[6]
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 id12 - id56 irot_input_x
+; TIRY is07 - is34 irot_input_y
+
+ movq mm3,TIRY ; mm3 = irot_input_y
+ pmulhw mm3,xC2S6 ; mm3 = xC2S6 * irot_input_y - irot_input_y
+
+ movq mm2,TIRY ;
+ movq mm0,mm2 ;
+
+ psrlw mm2,15 ; mm3 = xC2S6 * irot_input_y
+ paddw mm3,mm0
+
+ paddw mm3,mm2 ; Truncated
+ movq mm0, mm5; ;
+
+
+ movq mm2, mm5;
+ pmulhw mm0, xC6S2 ; mm0 = xC6S2 * irot_input_x
+
+ psrlw mm2, 15
+ paddw mm0, mm2 ; Truncated
+
+ paddsw mm3, mm0 ; ip[2]
+ movq [edx + 16], mm3 ; Save ip2
+
+
+ movq mm0, mm5 ;
+ movq mm2, mm5 ;
+
+ pmulhw mm5, xC2S6 ; mm5 = xC2S6 * irot_input_x - irot_input_x
+ psrlw mm2, 15 ;
+
+ movq mm3, TIRY ;
+ paddw mm5, mm0 ; mm5 = xC2S6 * irot_input_x
+
+ paddw mm5, mm2 ; Truncated
+ movq mm2, mm3
+
+ pmulhw mm3, xC6S2 ; mm3 = xC6S2 * irot_input_y
+ psrlw mm2, 15
+
+ paddw mm3, mm2 ; Truncated
+ psubsw mm3, mm5 ;
+
+ movq [edx + 48], mm3 ;
+
+
+
+;-----------------------------------------------------------------------
+; icommon_product1, icommon_product2
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm1 is12 - is56
+; mm7 id12 + id56
+
+ movq mm0, xC4S4
+ movq mm2, mm1
+ movq mm3, mm1
+
+ pmulhw mm1, mm0 ; mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 )
+ psrlw mm2, 15
+
+ paddw mm1, mm3 ; mm0 = xC4S4 * ( is12 - is56 )
+ paddw mm1, mm2 ; Truncate mm1, now it is icommon_product1
+
+ movq mm2, mm7
+ movq mm3, mm7
+
+ pmulhw mm7, mm0 ; mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 )
+ psrlw mm2, 15
+
+ paddw mm7, mm3 ; mm7 = xC4S4 * ( id12 + id56 )
+ paddw mm7, mm2 ; Truncate mm7, now it is icommon_product2
+
+;------------------------------------------------------------------------
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm1 icommon_product1
+; mm7 icommon_product2
+; mm4 id07
+; mm6 id34
+
+
+ pxor mm0, mm0 ; Clear mm0
+ psubsw mm0, mm6 ; mm0 = - id34
+
+ psubsw mm0, mm7 ; mm0 = - ( id34 + idcommon_product2 )
+ paddsw mm6, mm6 ;
+ paddsw mm6, mm0 ; mm6 = id34 - icommon_product2
+
+ psubsw mm4, mm1 ; mm4 = id07 - icommon_product1
+ paddsw mm1, mm1 ;
+ paddsw mm1, mm4 ; mm1 = id07 + icommon_product1
+
+
+;-------------------------------------------------------------------------
+; ip1, ip7
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm7 Free
+; mm1 irot_input_x
+; mm0 irot_input_y
+
+ movq mm7, xC1S7
+ movq mm2, mm1
+
+ movq mm3, mm1;
+ pmulhw mm1, mm7 ; mm1 = xC1S7 * irot_input_x - irot_input_x
+
+ movq mm7, xC7S1 ;
+ psrlw mm2, 15
+
+ paddw mm1, mm3 ; mm1 = xC1S7 * irot_input_x
+ paddw mm1, mm2 ; Trucated
+
+ pmulhw mm3, mm7 ; mm3 = xC7S1 * irot_input_x
+ paddw mm3, mm2 ; Truncated
+
+ movq mm5, mm0
+ movq mm2, mm0
+
+ movq mm7, xC1S7
+ pmulhw mm0, mm7 ; mm0 = xC1S7 * irot_input_y - irot_input_y
+
+ movq mm7, xC7S1
+ psrlw mm2, 15
+
+ paddw mm0, mm5 ; mm0 = xC1S7 * irot_input_y
+ paddw mm0, mm2 ; Truncated
+
+ pmulhw mm5, mm7 ; mm5 = xC7S1 * irot_input_y
+ paddw mm5, mm2 ; Truncated
+
+ psubsw mm1, mm5 ; mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1
+ paddsw mm3, mm0 ; mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7
+
+ movq [edx + 8], mm1
+ movq [edx + 56], mm3
+;-----------------------------------------------------------------------------
+; ip3, ip5
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm7 Free
+; mm1 Free
+; mm0 Free
+; mm4 id07 - icommon_product1 = irot_input_x
+; mm6 id34 - icommon_product2 = irot_input_y
+
+ movq mm0, xC3S5
+ movq mm1, xC5S3
+
+ movq mm5, mm6
+ movq mm7, mm6
+
+ movq mm2, mm4
+ movq mm3, mm4
+
+ pmulhw mm4, mm0 ; mm4 = xC3S5 * irot_input_x - irot_input_x
+ pmulhw mm6, mm1 ; mm6 = xC5S3 * irot_input_y - irot_input_y
+
+ psrlw mm2, 15
+ psrlw mm5, 15
+
+ paddw mm4, mm3 ; mm4 = xC3S5 * irot_input_x
+ paddw mm6, mm7 ; mm6 = xC5S3 * irot_input_y
+
+ paddw mm4, mm2 ; Truncated
+ paddw mm6, mm5 ; Truncated
+
+ psubsw mm4, mm6 ; ip3
+ movq [edx + 24], mm4 ;
+
+ movq mm4, mm3 ;
+ movq mm6, mm7 ;
+
+ pmulhw mm3, mm1 ; mm3 = xC5S3 * irot_input_x - irot_input_x
+ pmulhw mm7, mm0 ; mm7 = xC3S5 * irot_input_y - irot_input_y
+
+ paddw mm4, mm2
+ paddw mm6, mm5
+
+ paddw mm3, mm4 ; mm3 = xC5S3 * irot_input_x
+ paddw mm7, mm6 ; mm7 = xC3S5 * irot_input_y
+
+ paddw mm3, mm7 ; ip5
+ movq [edx + 40], mm3 ;
+
+
+ emms
+
+ pop edx
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+
+ ret
+
+
+;************************************************
+ END
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DFrameR.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DFrameR.c
new file mode 100644
index 00000000..a2e7f774
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DFrameR.c
@@ -0,0 +1,470 @@
+/****************************************************************************
+*
+* Module Title : DFrameR.C
+*
+* Description : Functions to read from the input bitstream.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Frames
+****************************************************************************/
+#include "pbdll.h"
+#include "postproc_if.h"
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_bitread
+ *
+ * INPUTS : BOOL_CODER *br : Pointer to a Bool Decoder instance.
+ * int bits : Number of bits to be read from input stream.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: The requested bits.
+ *
+ * FUNCTION : Decodes the requested number of bits from the encoded data buffer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 VP6_bitread ( BOOL_CODER *br, int bits )
+{
+ UINT32 z = 0;
+ int bit;
+ for ( bit=bits-1; bit>=0; bit-- )
+ {
+ z |= (VP6_DecodeBool128(br)<<bit);
+ }
+ return z;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_bitread1
+ *
+ * INPUTS : BOOL_CODER *br : Pointer to a Bool Decoder instance.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : UINT32: The next decoded bit (0 or 1).
+ *
+ * FUNCTION : Decodes the next bit from the encoded data buffer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INLINE
+UINT32 VP6_bitread1 ( BOOL_CODER *br )
+{
+ return (VP6_DecodeBool128(br));
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : InitHeaderBuffer
+ *
+ * INPUTS : FRAME_HEADER *Header : Pointer to FRAME_HEADER data structure.
+ * unsigned char *Buffer : Pointer to buffer containing bitstream header.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : void
+ *
+ *
+ * FUNCTION : Initialises extraction of bits from header buffer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void InitHeaderBuffer ( FRAME_HEADER *Header, unsigned char *Buffer )
+{
+ Header->buffer = Buffer;
+ Header->value = (Buffer[0]<<24)+(Buffer[1]<<16)+(Buffer[2]<<8)+Buffer[3];
+ Header->bits_available = 32;
+ Header->pos = 4;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ReadHeaderBits
+ *
+ * INPUTS : FRAME_HEADER *Header : Pointer to FRAME_HEADER data structure.
+ * UINT32 BitsRequired : Number of bits to extract.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : UINT32: Bits requested
+ *
+ * FUNCTION : Extracts requested number of bits from header buffer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 ReadHeaderBits ( FRAME_HEADER *Header, UINT32 BitsRequired )
+{
+ UINT32 pos = Header->pos;
+ UINT32 available = Header->bits_available;
+ UINT32 value = Header->value;
+ UINT8 *Buffer = &Header->buffer[pos];
+ UINT32 RetVal = 0;
+
+ if ( available < BitsRequired )
+ {
+ // Need more bits from input buffer...
+ RetVal = value >> (32-available);
+ BitsRequired -= available;
+ RetVal <<= BitsRequired;
+
+ value = (Buffer[0]<<24)+(Buffer[1]<<16)+(Buffer[2]<<8)+(Buffer[3]);
+ pos += 4;
+ available = 32;
+ }
+
+ RetVal |= value >> (32-BitsRequired);
+
+ // Update data struucture
+ Header->value = value<<BitsRequired;
+ Header->bits_available = available-BitsRequired;
+ Header->pos = pos;
+
+ return RetVal;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : LoadFrameHeader
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : BOOL: FALSE in case of error, TRUE otherwise.
+ *
+ * FUNCTION : Loads a frame header & carries out some initialization.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+
+static BOOL LoadFrameHeader ( PB_INSTANCE *pbi )
+{
+ UINT8 DctQMask;
+ FRAME_HEADER *Header = &pbi->Header;
+ BOOL RetVal = TRUE;
+
+ // Is the frame and inter frame or a key frame
+ pbi->FrameType = (UINT8)ReadHeaderBits(Header, 1);
+
+ // Quality (Q) index
+ DctQMask = (UINT8)ReadHeaderBits(Header, 6);
+
+ // Are we using two BOOL coder data streams/partitions
+ pbi->MultiStream = (UINT8)ReadHeaderBits(Header, 1);
+
+ // If the frame was a base frame then read the frame dimensions and build a bitmap structure.
+ if ( (pbi->FrameType == BASE_FRAME) )
+ {
+ // Read the frame dimensions bytes (0,0 indicates vp31 or later)
+ pbi->Vp3VersionNo = (UINT8)ReadHeaderBits(Header, 5 );
+ pbi->VpProfile = (UINT8)ReadHeaderBits(Header, 2 );
+
+ if(pbi->Vp3VersionNo > CURRENT_DECODE_VERSION)
+ {
+ RetVal = FALSE;
+ return RetVal;
+ }
+
+ // Initialise version specific quantiser values
+ VP6_InitQTables( pbi->quantizer, pbi->Vp3VersionNo );
+
+ // is this keyframe section of the file interlaced
+ pbi->Configuration.Interlaced = (UINT8)ReadHeaderBits(Header, 1);
+
+ // Start the first bool decoder (modes, mv, probs and some flags)
+ // The offset depends on whether or not we are using multiple bool code streams
+ if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+ {
+ VP6_StartDecode(&pbi->br, ((unsigned char*)(Header->buffer + 4)));
+
+ // Read the buffer offset for the second bool decoder buffer if it is being used
+ pbi->Buff2Offset = (UINT32)ReadHeaderBits(Header, 16);
+ }
+ else
+ VP6_StartDecode(&pbi->br, ((unsigned char*)(Header->buffer + 2)));
+
+ // SCALING related stuff
+ SetPPInterlacedMode(pbi->postproc, pbi->Configuration.Interlaced);
+ if(pbi->Configuration.Interlaced)
+ {
+ SetDeInterlaceMode(pbi->postproc, pbi->DeInterlaceMode);
+ }
+
+ {
+ UINT32 HFragments;
+ UINT32 VFragments;
+ UINT32 HOldScaled;
+ UINT32 VOldScaled;
+ UINT32 HNewScaled;
+ UINT32 VNewScaled;
+ UINT32 OutputHFragments;
+ UINT32 OutputVFragments;
+
+ VFragments = 2 * ((UINT8)VP6_bitread( &pbi->br, 8 ));
+ HFragments = 2 * ((UINT8)VP6_bitread( &pbi->br, 8 ));
+
+ OutputVFragments = 2 * ((UINT8)VP6_bitread( &pbi->br, 8 ));
+ OutputHFragments = 2 * ((UINT8)VP6_bitread( &pbi->br, 8 ));
+
+ if(pbi->Configuration.HRatio == 0)
+ pbi->Configuration.HRatio = 1;
+
+ if(pbi->Configuration.VRatio == 0)
+ pbi->Configuration.VRatio = 1;
+
+ HOldScaled = pbi->Configuration.HScale * pbi->HFragments * 8 / pbi->Configuration.HRatio;
+ VOldScaled = pbi->Configuration.VScale * pbi->VFragments * 8 / pbi->Configuration.VRatio;
+
+ pbi->Configuration.ExpandedFrameWidth = OutputHFragments * 8;
+ pbi->Configuration.ExpandedFrameHeight = OutputVFragments * 8;
+
+ if(VFragments >= OutputVFragments)
+ {
+ pbi->Configuration.VScale = 1;
+ pbi->Configuration.VRatio = 1;
+ }
+ else if (5*VFragments >= 4*OutputVFragments)
+ {
+ pbi->Configuration.VScale = 5;
+ pbi->Configuration.VRatio = 4;
+ }
+ else if (5*VFragments >= 3*OutputVFragments)
+ {
+ pbi->Configuration.VScale = 5;
+ pbi->Configuration.VRatio = 3;
+ }
+ else
+ {
+ pbi->Configuration.VScale = 2;
+ pbi->Configuration.VRatio = 1;
+ }
+
+ if(HFragments >= OutputHFragments)
+ {
+ pbi->Configuration.HScale = 1;
+ pbi->Configuration.HRatio = 1;
+ }
+ else if (5*HFragments >= 4*OutputHFragments)
+ {
+ pbi->Configuration.HScale = 5;
+ pbi->Configuration.HRatio = 4;
+ }
+ else if (5*HFragments >= 3*OutputHFragments)
+ {
+ pbi->Configuration.HScale = 5;
+ pbi->Configuration.HRatio = 3;
+ }
+ else
+ {
+ pbi->Configuration.HScale = 2;
+ pbi->Configuration.HRatio = 1;
+ }
+
+ HNewScaled = pbi->Configuration.HScale * HFragments * 8 / pbi->Configuration.HRatio;
+ VNewScaled = pbi->Configuration.VScale * VFragments * 8 / pbi->Configuration.VRatio;
+
+ pbi->ScaleWidth = HNewScaled;
+ pbi->ScaleHeight = VNewScaled;
+
+ pbi->Configuration.ScalingMode = ((UINT32)VP6_bitread( &pbi->br, 2 ));
+
+ // we have a new input size
+ if( VFragments != pbi->VFragments || HFragments != pbi->HFragments )
+ {
+ // Validate the combination of height and width.
+ pbi->Configuration.VideoFrameWidth = HFragments*8;
+ pbi->Configuration.VideoFrameHeight = VFragments*8;
+ VP6_InitFrameDetails(pbi);
+ }
+
+ // we have a new intermediate buffer clean the screen
+ if( pbi->ScaleBuffer != 0 &&
+ (HOldScaled != HNewScaled || VOldScaled != VNewScaled) )
+ {
+ // turn the screen black!!
+ memset(pbi->ScaleBuffer, 0x0, (pbi->OutputWidth+32) * (pbi->OutputHeight+32) );
+ memset(pbi->ScaleBuffer + (pbi->OutputWidth+32) * (pbi->OutputHeight+32),
+ 0x80, (pbi->OutputWidth+32) * (pbi->OutputHeight+32) / 2 );
+ }
+ }
+
+ // Unless in SIMPLE_PROFILE read the the filter strategy for fractional pels
+ if ( pbi->VpProfile != SIMPLE_PROFILE )
+ {
+ // Find out if selective bicubic filtering should be used for motion prediction.
+ if ( (BOOL)VP6_DecodeBool(&pbi->br, 128) )
+ {
+ pbi->PredictionFilterMode = AUTO_SELECT_PM;
+
+ // Read in the variance threshold to be used
+ pbi->PredictionFilterVarThresh = ((UINT32)VP6_bitread( &pbi->br, 5) << ((pbi->Vp3VersionNo > 7) ? 0 : 5) );
+
+ // Read the bicubic vector length limit (0 actually means ignore vector length)
+ pbi->PredictionFilterMvSizeThresh = (UINT8)VP6_bitread( &pbi->br, 3);
+ }
+ else
+ {
+ if ( (BOOL)VP6_DecodeBool(&pbi->br, 128) )
+ pbi->PredictionFilterMode = BICUBIC_ONLY_PM;
+ else
+ pbi->PredictionFilterMode = BILINEAR_ONLY_PM;
+ }
+
+ if ( pbi->Vp3VersionNo > 7 )
+ pbi->PredictionFilterAlpha = VP6_bitread( &pbi->br, 4);
+ else
+ pbi->PredictionFilterAlpha = 16; // VP61 backwards compatibility
+ }
+ }
+ // Non key frame sopecific stuff
+ else
+ {
+ // Start the first bool decoder (modes, mv, probs and some flags)
+ // The offset depends on whether or not we are using multiple bool code streams
+ if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+ {
+ VP6_StartDecode(&pbi->br, ((unsigned char*)(Header->buffer + 3)));
+
+ // Read the buffer offset for the second bool decoder buffer if it is being used
+ pbi->Buff2Offset = (UINT32)ReadHeaderBits(Header, 16);
+ }
+ else
+ VP6_StartDecode(&pbi->br, ((unsigned char*)(Header->buffer + 1)));
+
+ // Find out if the golden frame should be refreshed this frame - use bool decoder
+ pbi->RefreshGoldenFrame = (BOOL)VP6_DecodeBool(&pbi->br, 128);
+
+ if ( pbi->VpProfile != SIMPLE_PROFILE )
+ {
+ // Determine if loop filtering is on and if so what type should be used
+ pbi->UseLoopFilter = VP6_DecodeBool(&pbi->br, 128);
+ if ( pbi->UseLoopFilter )
+ {
+ pbi->UseLoopFilter = (pbi->UseLoopFilter << 1) | VP6_DecodeBool(&pbi->br, 128);
+ }
+
+ if ( pbi->Vp3VersionNo > 7 )
+ {
+ // Are the prediction characteristics being updated this frame
+ if ( VP6_DecodeBool(&pbi->br, 128) )
+ {
+ // Find out if selective bicubic filtering should be used for motion prediction.
+ if ( (BOOL)VP6_DecodeBool(&pbi->br, 128) )
+ {
+ pbi->PredictionFilterMode = AUTO_SELECT_PM;
+
+ // Read in the variance threshold to be used
+ pbi->PredictionFilterVarThresh = (UINT32)VP6_bitread( &pbi->br, 5);
+
+ // Read the bicubic vector length limit (0 actually means ignore vector length)
+ pbi->PredictionFilterMvSizeThresh = (UINT8)VP6_bitread( &pbi->br, 3);
+ }
+ else
+ {
+ if ( (BOOL)VP6_DecodeBool(&pbi->br, 128) )
+ pbi->PredictionFilterMode = BICUBIC_ONLY_PM;
+ else
+ pbi->PredictionFilterMode = BILINEAR_ONLY_PM;
+ }
+
+ pbi->PredictionFilterAlpha = VP6_bitread( &pbi->br, 4 );
+ }
+ }
+ else
+ pbi->PredictionFilterAlpha = 16; // VP61 backwards compatibility
+ }
+ }
+
+ // All frames (Key & Inter frames)
+ if(pbi->Vp3VersionNo < 3 )
+ RetVal = FALSE;
+
+ // Should this frame use huffman for the dct data
+ pbi->UseHuffman = (BOOL)VP6_DecodeBool(&pbi->br, 128);
+
+ // Set this frame quality value from Q Index
+ pbi->quantizer->FrameQIndex = DctQMask;
+ VP6_UpdateQ( pbi->quantizer, pbi->Vp3VersionNo );
+
+ return RetVal;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_LoadFrame
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : BOOL: FALSE on error or frame empty, TRUE otherwise.
+ *
+ * FUNCTION : Loads the next frame from the encoded data buffer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+BOOL VP6_LoadFrame ( PB_INSTANCE *pbi )
+{
+ BOOL RetVal = TRUE;
+
+ // Load the frame header (including the frame size).
+ if ( !LoadFrameHeader(pbi) )
+ RetVal = FALSE;
+ return RetVal;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_SetFrameType
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT8 FrType : Type of the frame.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets the current frame type.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_SetFrameType ( PB_INSTANCE *pbi, UINT8 FrType )
+{
+ /* Set the appropriate frame type according to the request */
+ pbi->FrameType = FrType;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_GetFrameType
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT8: The current frame type.
+ *
+ * FUNCTION : Gets the current frame type.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT8 VP6_GetFrameType ( PB_INSTANCE *pbi )
+{
+ return pbi->FrameType;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DSystemDependant.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DSystemDependant.c
new file mode 100644
index 00000000..311cb78a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DSystemDependant.c
@@ -0,0 +1,160 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "pbdll.h"
+extern void VP6_BuildQuantIndex_Generic ( QUANTIZER *pbi );
+
+/****************************************************************************
+*
+* ROUTINE : VP6_SetPbParam
+*
+* INPUTS : PB_INSTANCE **pbi : Pointer to decoder instance.
+* PB_COMMAND_TYPE Command : Command action specifier.
+* UINT32 *Parameter : Command dependent value.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Generalised command interface to decoder.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void CCONV VP6_SetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, uintptr_t Parameter )
+{
+#if defined(POSTPROCESS)
+ switch ( Command )
+ {
+ case PBC_SET_CPUFREE:
+ {
+#if defined(_MSC_VER)
+ double Pixels = pbi->Configuration.VideoFrameWidth * pbi->Configuration.VideoFrameHeight;
+ double FreeMhz = pbi->ProcessorFrequency * Parameter / 100;
+ double PixelsPerMhz = 100 * sqrt(1.0*Pixels) / FreeMhz;
+#else
+ double PixelsPerMhz = 100 *10;
+#endif
+ pbi->CPUFree = Parameter;
+
+ if( PixelsPerMhz > 150 )
+ pbi->PostProcessingLevel = 0;
+ else if( PixelsPerMhz > 100 )
+ pbi->PostProcessingLevel = 8;
+ else if( PixelsPerMhz > 90 )
+ pbi->PostProcessingLevel = 4;
+ else if( PixelsPerMhz > 80 )
+ pbi->PostProcessingLevel = 5;
+ else
+ pbi->PostProcessingLevel = 6;
+ break;
+ }
+
+ case PBC_SET_ADDNOISE:
+ pbi->AddNoiseMode = Parameter;
+ //SetAddNoiseMode(pbi->postproc, Parameter);
+ break;
+
+ case PBC_SET_REFERENCEFRAME:
+ CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->LastFrameRecon);
+ CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->GoldenFrame);
+ break;
+
+ case PBC_SET_POSTPROC:
+ if( Parameter == 9 )
+ VP6_SetPbParam( pbi, PBC_SET_CPUFREE, 70);
+ else
+ {
+ pbi->CPUFree = 0;
+ pbi->PostProcessingLevel = Parameter;
+ }
+ break;
+
+ case PBC_SET_DEINTERLACEMODE:
+ pbi->DeInterlaceMode = Parameter;
+ break;
+
+ case PBC_SET_BLACKCLAMP:
+ pbi->BlackClamp = Parameter;
+ break;
+
+ case PBC_SET_WHITECLAMP:
+ pbi->WhiteClamp = Parameter;
+ break;
+
+ default:
+ break;
+ }
+#endif
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_GetProcessorFrequency()
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : unsigned long: clock speed of the host processor.
+ *
+ * FUNCTION : Get the Processor's working freqency.
+ *
+ * SPECIAL NOTES : Stub function--always returns value 0.
+ *
+ ****************************************************************************/
+unsigned long VP6_GetProcessorFrequency ( void )
+{
+ return 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DMachineSpecificConfig
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets up pointers to platform dependant functions.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_DMachineSpecificConfig ( void )
+{
+ VP6_BuildQuantIndex = VP6_BuildQuantIndex_Generic;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_IssueWarning
+ *
+ * INPUTS : char *WarningMessage : Message to be issued.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Issues a warning message.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_IssueWarning ( char *WarningMessage )
+{
+ (void) WarningMessage;
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/FrameIni.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/FrameIni.c
new file mode 100644
index 00000000..704f3fe0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/FrameIni.c
@@ -0,0 +1,478 @@
+/****************************************************************************
+*
+* Module Title : FrameIni.c
+*
+* Description : Initialization functions.
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "pbdll.h"
+#include "duck_mem.h"
+#include <intsafe.h> // TODO: make a mac version of this
+/****************************************************************************
+* Module Static Variables
+****************************************************************************/
+static const struct
+{
+ INT32 row;
+ INT32 col;
+} NearMacroBlocks[12] =
+{
+ { -1, 0 },
+ { 0, -1 },
+ { -1, -1 },
+ { -1, 1 },
+ { -2, 0 },
+ { 0, -2 },
+ { -1, -2 },
+ { -2, -1 },
+ { -2, 1 },
+ { -1, 2 },
+ { -2, -2 },
+ { -2, 2 }
+};
+
+/****************************************************************************
+*
+* ROUTINE : VP6_InitMBI
+*
+* INPUTS : PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Initialize MBI structure.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void
+VP6_InitMBI(PB_INSTANCE *pbi)
+{
+
+ pbi->mbi.blockDxInfo[0].ZeroRunProbsBasePtr =
+ pbi->mbi.blockDxInfo[1].ZeroRunProbsBasePtr =
+ pbi->mbi.blockDxInfo[2].ZeroRunProbsBasePtr =
+ pbi->mbi.blockDxInfo[3].ZeroRunProbsBasePtr =
+ pbi->mbi.blockDxInfo[4].ZeroRunProbsBasePtr =
+ pbi->mbi.blockDxInfo[5].ZeroRunProbsBasePtr = (UINT8 *)pbi->ZeroRunProbs;
+
+ pbi->mbi.blockDxInfo[0].AcProbsBasePtr =
+ pbi->mbi.blockDxInfo[1].AcProbsBasePtr =
+ pbi->mbi.blockDxInfo[2].AcProbsBasePtr =
+ pbi->mbi.blockDxInfo[3].AcProbsBasePtr = pbi->AcProbs + ACProbOffset(0,0,0,0);
+ pbi->mbi.blockDxInfo[4].AcProbsBasePtr =
+ pbi->mbi.blockDxInfo[5].AcProbsBasePtr = pbi->AcProbs + ACProbOffset(1,0,0,0);
+
+ pbi->mbi.blockDxInfo[0].DcProbsBasePtr =
+ pbi->mbi.blockDxInfo[1].DcProbsBasePtr =
+ pbi->mbi.blockDxInfo[2].DcProbsBasePtr =
+ pbi->mbi.blockDxInfo[3].DcProbsBasePtr = pbi->DcProbs + DCProbOffset(0,0);
+ pbi->mbi.blockDxInfo[4].DcProbsBasePtr =
+ pbi->mbi.blockDxInfo[5].DcProbsBasePtr = pbi->DcProbs + DCProbOffset(1,0);
+
+ pbi->mbi.blockDxInfo[0].DcNodeContextsBasePtr =
+ pbi->mbi.blockDxInfo[1].DcNodeContextsBasePtr =
+ pbi->mbi.blockDxInfo[2].DcNodeContextsBasePtr =
+ pbi->mbi.blockDxInfo[3].DcNodeContextsBasePtr = pbi->DcNodeContexts + DcNodeOffset(0,0,0);
+ pbi->mbi.blockDxInfo[4].DcNodeContextsBasePtr =
+ pbi->mbi.blockDxInfo[5].DcNodeContextsBasePtr = pbi->DcNodeContexts + DcNodeOffset(1,0,0);
+
+ pbi->mbi.blockDxInfo[0].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[0]];
+ pbi->mbi.blockDxInfo[1].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[1]];
+ pbi->mbi.blockDxInfo[2].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[2]];
+ pbi->mbi.blockDxInfo[3].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[3]];
+ pbi->mbi.blockDxInfo[4].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[4]];
+ pbi->mbi.blockDxInfo[5].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[5]];
+
+ pbi->mbi.blockDxInfo[0].LastDc =
+ pbi->mbi.blockDxInfo[1].LastDc =
+ pbi->mbi.blockDxInfo[2].LastDc =
+ pbi->mbi.blockDxInfo[3].LastDc = pbi->fc.LastDcY;
+ pbi->mbi.blockDxInfo[4].LastDc = pbi->fc.LastDcU;
+ pbi->mbi.blockDxInfo[5].LastDc = pbi->fc.LastDcV;
+
+ pbi->mbi.blockDxInfo[0].Left = &pbi->fc.LeftY[0];
+ pbi->mbi.blockDxInfo[1].Left = &pbi->fc.LeftY[0];
+ pbi->mbi.blockDxInfo[2].Left = &pbi->fc.LeftY[1];
+ pbi->mbi.blockDxInfo[3].Left = &pbi->fc.LeftY[1];
+ pbi->mbi.blockDxInfo[4].Left = &pbi->fc.LeftU;
+ pbi->mbi.blockDxInfo[5].Left = &pbi->fc.LeftV;
+
+ pbi->mbi.blockDxInfo[0].MvShift =
+ pbi->mbi.blockDxInfo[1].MvShift =
+ pbi->mbi.blockDxInfo[2].MvShift =
+ pbi->mbi.blockDxInfo[3].MvShift = Y_MVSHIFT;
+ pbi->mbi.blockDxInfo[4].MvShift =
+ pbi->mbi.blockDxInfo[5].MvShift = UV_MVSHIFT;
+
+ pbi->mbi.blockDxInfo[0].MvModMask =
+ pbi->mbi.blockDxInfo[1].MvModMask =
+ pbi->mbi.blockDxInfo[2].MvModMask =
+ pbi->mbi.blockDxInfo[3].MvModMask = Y_MVMODMASK;
+ pbi->mbi.blockDxInfo[4].MvModMask =
+ pbi->mbi.blockDxInfo[5].MvModMask = UV_MVMODMASK;
+
+ pbi->mbi.blockDxInfo[0].CurrentReconStride =
+ pbi->mbi.blockDxInfo[1].CurrentReconStride =
+ pbi->mbi.blockDxInfo[2].CurrentReconStride =
+ pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride;
+ pbi->mbi.blockDxInfo[4].CurrentReconStride =
+ pbi->mbi.blockDxInfo[5].CurrentReconStride = pbi->Configuration.UVStride;
+
+ pbi->mbi.blockDxInfo[0].FrameReconStride =
+ pbi->mbi.blockDxInfo[1].FrameReconStride =
+ pbi->mbi.blockDxInfo[2].FrameReconStride =
+ pbi->mbi.blockDxInfo[3].FrameReconStride = pbi->Configuration.YStride;
+ pbi->mbi.blockDxInfo[4].FrameReconStride =
+ pbi->mbi.blockDxInfo[5].FrameReconStride = pbi->Configuration.UVStride;
+
+ // Default clear data area down to 0s
+ memset(pbi->mbi.blockDxInfo[0].coeffsPtr, 0, 6*64*sizeof(Q_LIST_ENTRY));
+
+ //______ compressor only ______
+ pbi->mbi.blockDxInfo[0].FrameSourceStride =
+ pbi->mbi.blockDxInfo[1].FrameSourceStride =
+ pbi->mbi.blockDxInfo[2].FrameSourceStride =
+ pbi->mbi.blockDxInfo[3].FrameSourceStride = pbi->Configuration.VideoFrameWidth;
+ pbi->mbi.blockDxInfo[4].FrameSourceStride =
+ pbi->mbi.blockDxInfo[5].FrameSourceStride = pbi->Configuration.VideoFrameWidth/2;
+
+ pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth;
+ pbi->mbi.blockDxInfo[4].CurrentSourceStride =
+ pbi->mbi.blockDxInfo[5].CurrentSourceStride = pbi->Configuration.VideoFrameWidth/2;
+
+ pbi->mbi.blockDxInfo[0].Plane =
+ pbi->mbi.blockDxInfo[1].Plane =
+ pbi->mbi.blockDxInfo[2].Plane =
+ pbi->mbi.blockDxInfo[3].Plane = 0;
+ pbi->mbi.blockDxInfo[4].Plane =
+ pbi->mbi.blockDxInfo[5].Plane = 1;
+ //______ compressor only ______
+
+}
+
+/****************************************************************************
+*
+* ROUTINE : VP6_DeleteFragmentInfo
+*
+* INPUTS : PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : De-allocates memory associated with decoder data structures.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void VP6_DeleteFragmentInfo ( PB_INSTANCE *pbi )
+{
+ // Free prior allocs if present
+ if(pbi->mbi.blockDxInfo[0].coeffsPtr)
+ duck_free(pbi->mbi.blockDxInfo[0].coeffsPtr);
+ pbi->mbi.blockDxInfo[0].coeffsPtr = 0;
+
+ if( pbi->FragInfo)
+ duck_free(pbi->FragInfo);
+ pbi->FragInfo = 0;
+
+ if( pbi->fc.AboveY)
+ duck_free(pbi->fc.AboveY);
+ pbi->fc.AboveY = 0;
+
+ if( pbi->fc.AboveU)
+ duck_free(pbi->fc.AboveU);
+ pbi->fc.AboveU = 0;
+
+ if( pbi->fc.AboveV)
+ duck_free(pbi->fc.AboveV);
+ pbi->fc.AboveV = 0;
+
+ if( pbi->MBInterlaced)
+ duck_free(pbi->MBInterlaced);
+ pbi->MBInterlaced = 0;
+
+ if( pbi->MBMotionVector)
+ duck_free(pbi->MBMotionVector);
+ pbi->MBMotionVector = 0;
+
+ if( pbi->predictionMode)
+ duck_free(pbi->predictionMode);
+ pbi->predictionMode = 0;
+
+#ifdef DMAREADREFERENCE
+ if(pbi->ReferenceBlocks)
+ duck_free(pbi->ReferenceBlocks);
+ pbi->ReferenceBlocks = 0;
+#endif
+#ifdef DMAWRITERECON
+ if(pbi->ReconstructedMBs)
+ duck_free(pbi->ReconstructedMBs);
+ pbi->ReconstructedMBs = 0;
+#endif
+}
+
+/****************************************************************************
+*
+* ROUTINE : VP6_AllocateFragmentInfo
+*
+* INPUTS : PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+* OUTPUTS : None.
+*
+* RETURNS : BOOL: TRUE if successful, FALSE on error.
+*
+* FUNCTION : Initializes the Playback instance passed in.
+*
+* SPECIAL NOTES : Uses duck_memalign to ensure data structures are aligned
+* on 32-byte boundaries to improve cache performance.
+*
+****************************************************************************/
+BOOL VP6_AllocateFragmentInfo ( PB_INSTANCE *pbi )
+{
+ // Clear any existing info
+ VP6_DeleteFragmentInfo(pbi);
+
+ pbi->mbi.blockDxInfo[0].coeffsPtr = (Q_LIST_ENTRY *) duck_memalign(32, sizeof(Q_LIST_ENTRY)*64*6, DMEM_GENERAL);
+ if(!pbi->mbi.blockDxInfo[0].coeffsPtr) {VP6_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->mbi.blockDxInfo[1].coeffsPtr = pbi->mbi.blockDxInfo[0].coeffsPtr + 64;
+ pbi->mbi.blockDxInfo[2].coeffsPtr = pbi->mbi.blockDxInfo[1].coeffsPtr + 64;
+ pbi->mbi.blockDxInfo[3].coeffsPtr = pbi->mbi.blockDxInfo[2].coeffsPtr + 64;
+ pbi->mbi.blockDxInfo[4].coeffsPtr = pbi->mbi.blockDxInfo[3].coeffsPtr + 64;
+ pbi->mbi.blockDxInfo[5].coeffsPtr = pbi->mbi.blockDxInfo[4].coeffsPtr + 64;
+
+ // context allocations
+ pbi->fc.AboveY = (BLOCK_CONTEXT *) duck_memalign(32, (8+pbi->HFragments) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+ if(!pbi->fc.AboveY) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+ pbi->fc.AboveU = (BLOCK_CONTEXT *) duck_memalign(32, (8+pbi->HFragments / 2) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+ if(!pbi->fc.AboveU) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+ pbi->fc.AboveV = (BLOCK_CONTEXT *) duck_memalign(32, (8+pbi->HFragments / 2) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+ if(!pbi->fc.AboveV) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+ // the encoder is the only thing using this move it to compdll
+ pbi->MBInterlaced = (char *) duck_memalign(32, pbi->MacroBlocks * sizeof(char), DMEM_GENERAL);
+ if(!pbi->MBInterlaced) { VP6_DeleteFragmentInfo(pbi); return FALSE; }
+
+ pbi->predictionMode = (char *) duck_memalign(32, pbi->MacroBlocks * sizeof(char), DMEM_GENERAL);
+ if(!pbi->predictionMode) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+ pbi->MBMotionVector = (MOTION_VECTOR *) duck_memalign(32, pbi->MacroBlocks * sizeof(MOTION_VECTOR ), DMEM_GENERAL);
+ if(!pbi->MBMotionVector) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+ // the encoder is the only thing using this move it to compdll
+ pbi->FragInfo = (FRAG_INFO *) duck_memalign(32, pbi->UnitFragments * sizeof(FRAG_INFO), DMEM_GENERAL);
+ if(!pbi->FragInfo) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+#ifdef DMAREADREFERENCE
+ pbi->ReferenceBlocks=(UINT8(*)[192])duck_memalign(32, 6*192, DMEM_GENERAL);
+ if(!pbi->ReferenceBlocks){ VP6_DeleteFragmentInfo(pbi); return FALSE;}
+#endif
+
+#ifdef DMAWRITERECON
+ pbi->ReconstructedMBs = (UINT8*) duck_memalign(32, 768, DMEM_GENERAL);
+ if(!pbi->ReconstructedMBs){ VP6_DeleteFragmentInfo(pbi); return FALSE;}
+#endif
+
+ return TRUE;
+}
+
+/****************************************************************************
+*
+* ROUTINE : VP6_DeleteFrameInfo
+*
+* INPUTS : PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : De-allocate memory associated with frame level data
+* structures.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void VP6_DeleteFrameInfo ( PB_INSTANCE *pbi )
+{
+ if(pbi->ThisFrameRecon )
+ duck_free(pbi->ThisFrameRecon );
+ if(pbi->GoldenFrame)
+ duck_free(pbi->GoldenFrame);
+ if(pbi->LastFrameRecon)
+ duck_free(pbi->LastFrameRecon);
+ if(pbi->PostProcessBuffer)
+ duck_free(pbi->PostProcessBuffer);
+
+ pbi->ThisFrameRecon = 0;
+ pbi->GoldenFrame = 0;
+ pbi->LastFrameRecon = 0;
+ pbi->PostProcessBuffer = 0;
+}
+
+/****************************************************************************
+*
+* ROUTINE : VP6_AllocateFrameInfo
+*
+* INPUTS : PB_INSTANCE * pbi : Pointer to decoder instance.
+* unsigned int FrameSize : Size of the YUV frame in bytes.
+*
+* OUTPUTS : None
+*
+* RETURNS : BOOL: TRUE if successful, FALSE on error.
+*
+* FUNCTION : Initializes the Playback instance passed in
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+BOOL VP6_AllocateFrameInfo ( PB_INSTANCE *pbi, unsigned int FrameSize )
+{
+ // clear any existing info
+ VP6_DeleteFrameInfo(pbi);
+
+ // Allocate frame buffers:
+ // Added 2 extra lines to framebuffer so that copy12x12 doesn't fail
+ // when we have a large motion vector in V on the last v block.
+ // Note : We never use these pixels anyway so this doesn't hurt.
+ pbi->ThisFrameRecon = (UINT8 *)duck_memalign(32, pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+ if(!pbi->ThisFrameRecon) { VP6_DeleteFrameInfo(pbi); return FALSE;}
+
+ pbi->GoldenFrame = (UINT8 *)duck_memalign(32, pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY ), DMEM_GENERAL);
+ if(!pbi->GoldenFrame) { VP6_DeleteFrameInfo(pbi); return FALSE;}
+
+ pbi->LastFrameRecon = (UINT8 *)duck_memalign(32, pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+ if(!pbi->LastFrameRecon) { VP6_DeleteFrameInfo(pbi); return FALSE;}
+
+ pbi->PostProcessBuffer = (UINT8 *)duck_memalign(32, pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+ if(!pbi->PostProcessBuffer) { VP6_DeleteFrameInfo(pbi); return FALSE;}
+
+ return TRUE;
+}
+
+/****************************************************************************
+*
+* ROUTINE : VP6_InitFrameDetails
+*
+* INPUTS : PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+* OUTPUTS : None.
+*
+* RETURNS : BOOL: TRUE on success, FALSE on failure.
+*
+* FUNCTION : Initialises various details about the frame.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+BOOL VP6_InitFrameDetails ( PB_INSTANCE *pbi )
+{
+ UINT32 i;
+ int FrameSize;
+
+ if ( pbi->CPUFree > 0 )
+ VP6_SetPbParam( pbi, PBC_SET_CPUFREE, pbi->CPUFree );
+
+ /* Set the frame size etc. */
+ if (UIntMult(pbi->Configuration.VideoFrameWidth, pbi->Configuration.VideoFrameHeight, &pbi->YPlaneSize) == S_OK)
+ {
+ pbi->UVPlaneSize = pbi->YPlaneSize / 4;
+ pbi->HFragments = pbi->Configuration.VideoFrameWidth / pbi->Configuration.HFragPixels;
+ pbi->VFragments = pbi->Configuration.VideoFrameHeight / pbi->Configuration.VFragPixels;
+ if (UIntMult(pbi->VFragments, pbi->HFragments, &pbi->YPlaneFragments) == S_OK &&
+ UIntMult(pbi->YPlaneFragments, 3, &pbi->UnitFragments) == S_OK)
+ {
+ pbi->UnitFragments /= 2;
+ pbi->UVPlaneFragments = pbi->YPlaneFragments / 4;
+
+ pbi->Configuration.YStride = (pbi->Configuration.VideoFrameWidth + STRIDE_EXTRA);
+ pbi->Configuration.UVStride = pbi->Configuration.YStride / 2;
+
+ if (UIntMult(pbi->Configuration.YStride, pbi->Configuration.VideoFrameHeight + STRIDE_EXTRA, &pbi->ReconYPlaneSize) == S_OK)
+ {
+ pbi->ReconUVPlaneSize = pbi->ReconYPlaneSize / 4;
+
+ FrameSize = pbi->ReconYPlaneSize + 2 * pbi->ReconUVPlaneSize;
+
+ pbi->YDataOffset = 0;
+ pbi->UDataOffset = pbi->YPlaneSize;
+ pbi->VDataOffset = pbi->YPlaneSize + pbi->UVPlaneSize;
+ pbi->ReconYDataOffset = 0;
+ pbi->ReconUDataOffset = pbi->ReconYPlaneSize;
+ pbi->ReconVDataOffset = pbi->ReconYPlaneSize + pbi->ReconUVPlaneSize;
+
+ // Image dimensions in Macro-Blocks
+ pbi->MBRows = (2*BORDER_MBS)+(pbi->Configuration.VideoFrameHeight/16) + ( pbi->Configuration.VideoFrameHeight%16 ? 1 : 0 );
+ pbi->MBCols = (2*BORDER_MBS)+(pbi->Configuration.VideoFrameWidth/16) + ( pbi->Configuration.VideoFrameWidth%16 ? 1 : 0 );
+ pbi->MacroBlocks = pbi->MBRows * pbi->MBCols;
+
+ for( i=0; i<12; i++ )
+ pbi->mvNearOffset[i] = MBOffset(NearMacroBlocks[i].row, NearMacroBlocks[i].col);
+
+ ChangePostProcConfiguration(pbi->postproc, &pbi->Configuration);
+
+ if ( !VP6_AllocateFragmentInfo(pbi) )
+ return FALSE;
+
+ if ( !VP6_AllocateFrameInfo(pbi, FrameSize) )
+ {
+ VP6_DeleteFragmentInfo(pbi);
+ return FALSE;
+ }
+
+ // We have a differently output size than our scaling provides
+ if ( pbi->ScaleBuffer == 0 && pbi->OutputWidth &&
+ (pbi->Configuration.VideoFrameWidth != pbi->OutputWidth ||
+ pbi->Configuration.VideoFrameHeight != pbi->OutputHeight ) )
+ {
+ // Add 32 to outputwidth to ensure that we have enough to overscale
+ // (ie scale to a size that's bigger than our output size). Do this
+ // now even though we don't use it so we don't have to check border conditions.
+ pbi->ScaleBuffer = (UINT8 *)
+ duck_malloc(32 + 3 *
+ (pbi->OutputWidth + 32) *
+ (pbi->OutputHeight + 32)*
+ sizeof(YUV_BUFFER_ENTRY) / 2, DMEM_GENERAL);
+
+ }
+
+
+ VP6_InitMBI(pbi);
+
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+/****************************************************************************
+*
+* ROUTINE : VP6_InitialiseConfiguration
+*
+* INPUTS : PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Sets the base size of a coding block (8x8).
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void VP6_InitialiseConfiguration ( PB_INSTANCE *pbi )
+{
+ pbi->Configuration.HFragPixels = 8;
+ pbi->Configuration.VFragPixels = 8;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/Huffman.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/Huffman.c
new file mode 100644
index 00000000..c6dcffdd
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/Huffman.c
@@ -0,0 +1,350 @@
+/****************************************************************************
+*
+* Module Title : Huffman.c
+*
+* Description : Huffman coding routines.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "huffman.h"
+#include "pbdll.h"
+
+/****************************************************************************
+* Typedefs
+****************************************************************************/
+typedef struct _SORT_NODE
+{
+ int next;
+ int freq;
+ unsigned char value;
+} SORT_NODE;
+
+typedef struct _sortnode
+{
+ int next;
+ int freq;
+ tokenorptr value;
+} sortnode;
+
+/****************************************************************************
+ *
+ * ROUTINE : InsertSorted
+ *
+ * INPUTS : sortnode *sn : Array of sort nodes.
+ * int node : Index of node to be inserted.
+ * int *startnode : Pointer to _head of linked-list.
+ *
+ * OUTPUTS : int *startnode : Pointer to _head of linked-list.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Inserts a node into a sorted linklist.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+static void InsertSorted ( sortnode *sn, int node, int *startnode )
+{
+ int which = *startnode;
+ int prior = *startnode;
+
+ // find the position at which to insert the node
+ while( which != -1 && sn[node].freq > sn[which].freq )
+ {
+ prior = which;
+ which = sn[which].next;
+ }
+
+ if(which == *startnode)
+ {
+ *startnode = node;
+ sn[node].next = which;
+ }
+ else
+ {
+ sn[prior].next = node;
+ sn[node].next = which;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_BuildHuffTree
+ *
+ * INPUTS : int values : Number of values in the tree.
+ * unsigned int *counts : Histogram of token frequencies.
+ *
+ * OUTPUTS : HUFF_NODE *hn : Array of nodes (containing token frequency)
+ * from which to create tree.
+ * unsigned int *counts : Histogram of token frequencies (0 freq clipped to 1).
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Creates a Huffman tree data structure from list
+ * of token frequencies.
+ *
+ * SPECIAL NOTES : Maximum of 256 nodes can be handled.
+ *
+ ****************************************************************************/
+void VP6_BuildHuffTree ( HUFF_NODE *hn, unsigned int *counts, int values )
+{
+ int i;
+ sortnode sn[256];
+ int sncount=0;
+ int startnode=0;
+
+ // NOTE:
+ // Create huffman tree in reverse order so that the root will always be 0
+ int huffptr=values-1;
+
+ // Set up sorted linked list of values/pointers into the huffman tree
+ for ( i=0; i<values; i++ )
+ {
+ sn[i].value.selector = 1;
+ sn[i].value.value = i;
+ if ( counts[i] == 0 )
+ counts[i] = 1;
+ sn[i].freq = counts[i];
+ sn[i].next = -1;
+ }
+
+ sncount = values;
+
+ // Connect above list into a linked list
+ for ( i=1; i<values; i++ )
+ InsertSorted ( sn, i, &startnode );
+
+ // while there is more than one node in our linked list
+ while ( sn[startnode].next != -1 )
+ {
+ int first = startnode;
+ int second = sn[startnode].next;
+ int sumfreq = sn[first].freq + sn[second].freq;
+
+ // set-up new merged huffman node
+ --huffptr;
+ hn[huffptr].leftunion.left = sn[first].value;
+ hn[huffptr].rightunion.right = sn[second].value;
+ hn[huffptr].freq = 256 * sn[first].freq / sumfreq;
+
+ // set up new merged sort node pointing to our huffnode
+ sn[sncount].value.selector = 0;
+ sn[sncount].value.value = huffptr;
+ sn[sncount].freq = sumfreq;
+ sn[sncount].next = -1;
+
+ // remove the two nodes we just merged from the linked list
+ startnode = sn[second].next;
+
+ // insert the new sort node into the proper location
+ InsertSorted(sn, sncount, &startnode);
+
+ // account for new nodes
+ sncount++;
+ }
+
+ return;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_BuildHuffLookupTable
+ *
+ * INPUTS : HUFF_NODE *HuffTreeRoot : Pointer to root of Huffman tree.
+ *
+ * OUTPUTS : UINT16 *HuffTable : Array (LUT) of Huffman codes.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Traverse Huffman tree to create LUT of Huffman codes.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_BuildHuffLookupTable ( HUFF_NODE *HuffTreeRoot, UINT16 *HuffTable )
+{
+ int i, j;
+ int bits;
+ tokenorptr torp;
+
+ for ( i=0; i<(1<<HUFF_LUT_LEVELS); i++ )
+ {
+ bits = i;
+ j=0;
+
+ torp.value = 0;
+ torp.selector = 0;
+
+ do
+ {
+ j++;
+ if ( (bits>>(HUFF_LUT_LEVELS - j)) & 1 )
+ torp = HuffTreeRoot[torp.value].rightunion.right;
+ else
+ torp = HuffTreeRoot[torp.value].leftunion.left;
+ }
+ while ( !(torp.selector) && (j < HUFF_LUT_LEVELS) );
+
+// HuffTable[i] = torp.value<<1 | torp.selector | (j << 12);
+ ((HUFF_TABLE_NODE *)HuffTable)[i].value = torp.value;
+ ((HUFF_TABLE_NODE *)HuffTable)[i].flag = torp.selector;
+ ((HUFF_TABLE_NODE *)HuffTable)[i].length = j;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_BuildHuffLookupTable
+ *
+ * INPUTS : HUFF_NODE *hn : List of Huffman tree nodes.
+ * int node : Current position within list of Huffman tree nodes.
+ * int codevalue : Huffman code as found so far.
+ * int codelength : Length of Huffman code so far (in bits).
+ *
+ * OUTPUTS : unsigned int *codearray : Array to hold Huffman codes.
+ * unsigned char *lengtharray : Array to hold lengths of Huffman codes.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Recursively traverse Huffman tree to create LUT of Huffman codes.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_CreateCodeArray
+(
+ HUFF_NODE *hn,
+ int node,
+ unsigned int *codearray,
+ unsigned char *lengtharray,
+ int codevalue,
+ int codelength
+)
+{
+ /* If we are at a leaf then fill in a code array entry */
+ /* Use recursive calls to scan down the tree */
+ if( hn[node].leftunion.left.selector )
+ {
+ codearray[hn[node].leftunion.left.value] = (codevalue<<1)+0;
+ lengtharray[hn[node].leftunion.left.value] = codelength+1;
+ }
+ else
+ {
+ VP6_CreateCodeArray (
+ hn,
+ hn[node].leftunion.left.value,
+ codearray,
+ lengtharray,
+ ((codevalue << 1) + 0),
+ (codelength + 1) );
+ }
+
+ if( hn[node].rightunion.right.selector )
+ {
+ codearray[hn[node].rightunion.right.value] = (codevalue<<1)+1;
+ lengtharray[hn[node].rightunion.right.value] = codelength+1;
+ }
+ else
+ {
+ VP6_CreateCodeArray (
+ hn,
+ hn[node].rightunion.right.value,
+ codearray,
+ lengtharray,
+ ((codevalue << 1) + 1),
+ (codelength + 1) );
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DecodeValue
+ *
+ * INPUTS : BOOL_CODER *bc : Pointer to a Bool Coder instance.
+ * HUFF_NODE *hn : List of Huffman tree nodes.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : int: Decoded token value.
+ *
+ * FUNCTION : Traverse the Huffman tree by reading node decisions
+ * from the bitstream until a leaf node is reached. Returns
+ * the value associated with this leaf node.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int VP6_DecodeValue ( BOOL_CODER *bc, HUFF_NODE *hn )
+{
+ tokenorptr torp;
+
+ torp.value = 0;
+ torp.selector = 0;
+
+ // Loop searches down through tree based upon bits read from the bitstream
+ // until it hits a leaf at which point we have decoded a token.
+ do
+ {
+ if ( VP6_DecodeBool(bc, hn[torp.value].freq) )
+ torp = hn[torp.value].rightunion.right;
+ else
+ torp = hn[torp.value].leftunion.left;
+ }
+ while ( !(torp.selector) );
+
+ return torp.value;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_EncodeValue
+ *
+ * INPUTS : BOOL_CODER *bc : Pointer to a Bool Coder instance.
+ * HUFF_NODE *hn : List of Huffman tree nodes.
+ * int value : Value to be encoded.
+ * int length : Length of value to be encoded (in bits).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Given a Huffman code either output its bits to the encoded
+ * stream or measure the cost of doing so, depending on the
+ * flag bc->MeasureCost. Use VP6_EncodeBool2 if only measuring
+ * approximate number of bits required to encode the Huffman code
+ * or VP6_EncodeBool if actually producing the coded bits using
+ * the specified Bool Coder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_EncodeValue
+(
+ BOOL_CODER *bc,
+ HUFF_NODE *hn,
+ int value,
+ int length
+)
+{
+ int i;
+ int node = 0;
+
+ for ( i=length-1; i>=0; i-- )
+ {
+ int v = (value>>i) & 1;
+
+ if ( bc->MeasureCost )
+ VP6_EncodeBool2 ( bc, (BOOL)v, hn[node].freq );
+ else
+ VP6_EncodeBool ( bc, (BOOL)v, hn[node].freq );
+
+ if ( v )
+ node = hn[node].rightunion.right.value;
+ else
+ node = hn[node].leftunion.left.value;
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/TokenEntropy.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/TokenEntropy.c
new file mode 100644
index 00000000..4c952d04
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/TokenEntropy.c
@@ -0,0 +1,195 @@
+/****************************************************************************
+*
+* Module Title : TokenEntropy.c
+*
+* Description : Entropy configuration routines.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking. */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "tokenentropy.h"
+#include "pbdll.h"
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+// Costs in bits for different probabilities (expressed in range 0-255)
+// Costs are multiplied by 256
+const UINT32 VP6_ProbCost[256] =
+{
+2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046,
+1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778,
+ 767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625,
+ 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516,
+ 511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433,
+ 428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365,
+ 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307,
+ 304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257,
+ 255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214,
+ 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174,
+ 172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139,
+ 137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107,
+ 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77,
+ 75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50,
+ 48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24,
+ 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1
+};
+
+// Index categories for previous tokens in this block
+const UINT8 VP6_PrevTokenIndex[MAX_ENTROPY_TOKENS] = { 0,1,2,2,2,2,2,2,2,2,2,0 };
+
+// For details of tokens and extra bit breakdown see token definitions in huffman.h
+const UINT8 ExtraBitLengths_VP6[MAX_ENTROPY_TOKENS] = { 0, 1, 1, 1, 1, 2, 3, 4, 5, 6, 12, 0 };
+const UINT32 VP6_DctRangeMinVals[MAX_ENTROPY_TOKENS] = { 0, 1, 2, 3, 4, 5, 7, 11, 19, 35, 67, 0 };
+
+const UINT8 VP6_DcUpdateProbs[2][MAX_ENTROPY_TOKENS-1] =
+{
+ { 146, 255, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+ { 179, 255, 214, 240, 250, 255, 244, 255, 255, 255, 255 }
+};
+
+const UINT8 ScanBandUpdateProbs[BLOCK_SIZE] =
+{
+ 255, 132, 132, 159, 153, 151, 161, 170,
+ 164, 162, 136, 110, 103, 114, 129, 118,
+ 124, 125, 132, 136, 114, 110, 142, 135,
+ 134, 123, 143, 126, 153, 183, 166, 161,
+ 171, 180, 179, 164, 203, 218, 225, 217,
+ 215, 206, 203, 217, 229, 241, 248, 243,
+ 253, 255, 253, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255
+};
+
+const UINT8 ZrlUpdateProbs[ZRL_BANDS][ZERO_RUN_PROB_CASES] =
+{
+ { 219, 246, 238, 249, 232, 239, 249, 255, 248, 253, 239, 244, 241, 248 },
+ { 198, 232, 251, 253, 219, 241, 253, 255, 248, 249, 244, 238, 251, 255 },
+};
+
+// Zero run probs
+const UINT8 ZeroRunProbDefaults[ZRL_BANDS][ZERO_RUN_PROB_CASES] =
+{
+ { 198, 197, 196, 146, 198, 204, 169, 142, 130, 136, 149, 149, 191, 249 },
+ { 135, 201, 181, 154, 98, 117, 132, 126, 146, 169, 184, 240, 246, 254 },
+};
+
+const UINT8 VP6_AcUpdateProbs[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS-1] =
+{
+ { // preceded by 0
+ {
+ { 227, 246, 230, 247, 244, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 209, 231, 231, 249, 249, 253, 255, 255, 255 },
+ { 255, 255, 225, 242, 241, 251, 253, 255, 255, 255, 255 },
+ { 255, 255, 241, 253, 252, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ },
+ {
+ { 240, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 240, 253, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ },
+ },
+ { // preceded by 1
+ {
+ { 206, 203, 227, 239, 247, 255, 253, 255, 255, 255, 255 },
+ { 207, 199, 220, 236, 243, 252, 252, 255, 255, 255, 255 },
+ { 212, 219, 230, 243, 244, 253, 252, 255, 255, 255, 255 },
+ { 236, 237, 247, 252, 253, 255, 255, 255, 255, 255, 255 },
+ { 240, 240, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ },
+ {
+ { 230, 233, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 238, 238, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 248, 251, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ },
+ },
+ { // preceded by > 1
+ {
+ { 225, 239, 227, 231, 244, 253, 243, 255, 255, 253, 255 },
+ { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 255 },
+ { 235, 249, 238, 240, 251, 255, 249, 255, 253, 253, 255 },
+ { 249, 253, 251, 250, 255, 255, 255, 255, 255, 255, 255 },
+ { 251, 250, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ },
+ {
+ { 243, 244, 250, 250, 255, 255, 255, 255, 255, 255, 255 },
+ { 249, 248, 250, 253, 255, 255, 255, 255, 255, 255, 255 },
+ { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+ },
+ },
+};
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+// Dc context equations: Dc Token contexts are 00 0!0 and !0!0
+static const LINE_EQ VP6_DcNodeEqs[CONTEXT_NODES][DC_TOKEN_CONTEXTS] =
+{
+ { { 122, 133 },{ 133, 51 },{ 142, -16 } }, // Zero Node
+ { { 0, 1 },{ 0, 1 },{ 0, 1 } }, // EOB Node Dummy as no EOBs in DC
+ { { 78, 171 },{ 169, 71 },{ 221, -30 } }, // One Node
+ { { 139, 117 },{ 214, 44 },{ 246, -3 } }, // Low Val Node
+ { { 168, 79 },{ 210, 38 },{ 203, 17 } }, // Two Node (2 vs 3 or 4)
+};
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_ConfigureContexts
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Configures the context dependant entropy probabilities.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_ConfigureContexts ( PB_INSTANCE *pbi )
+{
+ UINT32 i;
+ UINT32 Node;
+ UINT32 Plane;
+ INT32 Temp;
+
+ // Clear MMX state so floating point can work again
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+ // DC Node Probabilities
+ for ( Plane=0; Plane<2; Plane++ )
+ {
+ for ( i=0; i<DC_TOKEN_CONTEXTS; i++ )
+ {
+ // Tree Nodes
+ for ( Node=0; Node<CONTEXT_NODES; Node++ )
+ {
+ Temp = ( ( pbi->DcProbs[DCProbOffset(Plane,Node)] * VP6_DcNodeEqs[Node][i].M + 128 ) >> 8)
+ + VP6_DcNodeEqs[Node][i].C;
+ Temp = (Temp > 255)? 255: Temp;
+ Temp = (Temp < 1)? 1 : Temp;
+
+ //pbi->DcNodeContexts[Plane][i][Node] = (UINT8)Temp;
+ *(pbi->DcNodeContexts + DcNodeOffset(Plane,i,Node)) = (UINT8)Temp;
+ }
+ }
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/boolhuff.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/boolhuff.c
new file mode 100644
index 00000000..901241f1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/boolhuff.c
@@ -0,0 +1,687 @@
+/****************************************************************************
+*
+* Module Title : boolhuff.c
+*
+* Description : Boolean Encoder/Decoder
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "boolhuff.h"
+#include "TokenEntropy.h"
+#include <stdio.h>
+
+// STATS Variables for measuring section costs
+#if defined MEASURE_SECTION_COSTS
+UINT32 Sectionbits[10] = {0,0,0,0,0,0,0,0,0,0};
+UINT32 ActiveSection = 0;
+#endif
+
+#ifdef NOTNORMALIZED
+/****************************************************************************
+ *
+ * ROUTINE : VP6_StartDecode
+ *
+ * INPUTS : BOOL_CODER *bc : pointer to instance of a boolean decoder.
+ * unsigned char *buffer : pointer to buffer of data to be decoded.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initializes the boolean decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_StartDecode ( BOOL_CODER *bc, unsigned char *buffer )
+{
+ bc->pos = 0;
+ bc->value = 0;
+ bc->range = 0;
+ bc->buffer = buffer;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DecodeBool
+ *
+ * INPUTS : BOOL_CODER *bc : pointer to instance of a boolean decoder.
+ * int probability : probability next symbol is a 0 (0-255)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Next decoded bit: 0 or 1
+ *
+ * FUNCTION : Determines the next value stored in the boolean decoder
+ * based upon the probability passed in. It uses a simple
+ * probability model to approximate an arithmetic coder.
+ *
+ * SPECIAL NOTES : The accuracy of this decoder gets worse as the range
+ * approaches 0. This can be avoided with more complex
+ * normalization functions (as in a standard arithmetic)
+ * coder. Chosen to avoid this for speed reasons.
+ *
+ ****************************************************************************/
+int VP6_DecodeBool ( BOOL_CODER *bc, int probability )
+{
+ unsigned int split;
+
+ // Don't have enough in our range to tell between a 0 and 1 so get
+ // 3 new bytes.
+ if( bc->range < 2)
+ {
+ unsigned char *spot = bc->buffer+bc->pos;
+ bc->v[0] = spot[0];
+ bc->v[1] = spot[1];
+ bc->v[2] = spot[2];
+
+ // range is set to 0x01000001 to avoid having the range * probability
+ // calculation outrange (this can be handled differently at the cost
+ // of an extra if).
+ bc->range = 0x01000000;
+ bc->pos += 3;
+ }
+
+ // calculate the decision point
+ // black magic: This code works better than if I calculate probability *
+ // range and then truncating to 1 (can't explain why)
+ split = bc->range;
+ split --; // we always have to maintain
+ split *= probability;
+ split >>= 8;
+ split ++;
+
+ if( bc->value < split )
+ {
+ bc->range = split;
+ return 0;
+ }
+ else
+ {
+ bc->range-=split;
+ bc->value-=split;
+ return 1;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_StopDecode
+ *
+ * INPUTS : BOOL_CODER *bc : pointer to instance of a boolean decoder.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs clean-up for boolean decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_StopDecode ( BOOL_CODER *bc )
+{
+ return;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_StartEncode
+ *
+ * INPUTS : BOOL_CODER *bc : pointer to instance of a boolean encoder.
+ * unsigned char *buffer : pointer to buffer to hold encoded data.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initializes the boolean encoder
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_StartEncode ( BOOL_CODER *bc, unsigned char *buffer )
+{
+ bc->pos = 0;
+ bc->value = 0;
+ bc->range = 0x01000000;
+ bc->buffer = buffer;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_EncodeBool
+ *
+ * INPUTS : BOOL_CODER *bc : pointer to instance of a boolean encoder.
+ * int x : value to be encoded (0 or 1).
+ * int probability : probability of getting a 0 (0-255)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Encodes a boolean value (0 or 1) using the specified
+ * boolean encoder.
+ *
+ * SPECIAL NOTES : The accuracy of this encoder gets worse as the range
+ * approaches 0. This can be avoided with more complex
+ * normalization functions (as in a standard arithmetic
+ * coder). Chose to avoid this for speed reasons.
+ *
+ ****************************************************************************/
+void VP6_EncodeBool ( BOOL_CODER *bc, int x, int probability )
+{
+ unsigned int split;
+
+ // we don't have enough in our range to tell between a 0 and 1,
+ // so get 3 new bytes.
+ if( bc->range < 2 )
+ {
+ bc->buffer[bc->pos] = bc->v[0];
+ bc->buffer[bc->pos+1] = bc->v[1];
+ bc->buffer[bc->pos+2] = bc->v[2];
+ bc->pos+=3;
+
+ // range is set to 0x01000001 to avoid having the range * probability
+ // calculation outrange ( this can be handled differently at the cost
+ // of an extra if).
+ bc->range = 0x01000000;
+ bc->value = 0;
+ }
+
+ // calculate the decision point
+ // black magic: This code works better than if I calculate probability *
+ // range and then truncating to 1 (can't explain why)
+ split = bc->range;
+ split --;
+ split *= probability;
+ split >>= 8;
+ split ++;
+
+ if( x )
+ {
+ bc->range-=split;
+ bc->value+=split;
+ }
+ else
+ {
+ bc->range = split;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_StopEncode
+ *
+ * INPUTS : BOOL_CODER *bc : pointer to instance of a boolean encoder.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs clean-up for boolean encoder
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_StopEncode( BOOL_CODER *bc )
+{
+ int i;
+
+ for ( i=0; i<3; i++ )
+ {
+ bc->buffer[bc->pos + i] = *((unsigned char *) &bc->value + i);
+ }
+ bc->pos += 3;
+}
+
+#else
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_StartEncode
+ *
+ * INPUTS : BOOL_CODER *br : pointer to instance of a boolean encoder.
+ * unsigned char *source : pointer to buffer to hold encoded data.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Perform initialization of the boolean encoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_StartEncode ( BOOL_CODER *br, unsigned char *source )
+{
+ br->lowvalue = 0;
+ br->range = 255;
+ br->value = 0;
+ br->count = -24;
+ br->buffer = source;
+ br->pos = 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_StopEncode
+ *
+ * INPUTS : BOOL_CODER *br : pointer to instance of a boolean encoder.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs clean-up for a boolean encoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_StopEncode ( BOOL_CODER *br )
+{
+ if(br->count<-16)
+ br->lowvalue <<= (24-(br->count&7));
+ else if(br->count<-8)
+ br->lowvalue <<= (16-(br->count&7));
+ else
+ br->lowvalue <<= (8-(br->count&7));
+
+ br->buffer[br->pos++] = (br->lowvalue>>24);
+ br->buffer[br->pos++] = (br->lowvalue>>16) & 0xff;
+ br->buffer[br->pos++] = (br->lowvalue>> 8) & 0xff;
+ br->buffer[br->pos++] = (br->lowvalue ) & 0xff;
+ br->buffer[br->pos++] = 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_EncodeBool
+ *
+ * INPUTS : BOOL_CODER *br : pointer to instance of a boolean encoder.
+ * int bit : value to be encoded (0 or 1).
+ * int probability : probability of getting a 0 (0-255)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes a boolean value (0 or 1) using the
+ * specified boolean encoder.
+ *
+ * SPECIAL NOTES : This encoder uses normalizations, and is fairly accurate,
+ *
+ ****************************************************************************/
+void VP6_EncodeBool ( BOOL_CODER *br, int bit, int probability )
+{
+ unsigned int split;
+ unsigned int count = br->count;
+ unsigned int range = br->range;
+ unsigned int lowvalue = br->lowvalue;
+
+#if defined MEASURE_SECTION_COSTS
+ if (bit)
+ Sectionbits[ActiveSection] += VP6_ProbCost[255-probability];
+ else
+ Sectionbits[ActiveSection] += VP6_ProbCost[probability];
+#endif
+
+ split = 1 + (((range-1) * probability) >> 8);
+
+ range = split;
+ if(bit)
+ {
+ lowvalue += split;
+ range = br->range-split;
+ }
+
+ while(range < 0x80)
+ {
+ range <<= 1;
+
+ if((lowvalue & 0x80000000 ))
+ {
+ int x = br->pos-1;
+ while(x>=0 && br->buffer[x] == 0xff)
+ {
+ br->buffer[x] =(unsigned char)0;
+ x--;
+ }
+ br->buffer[x]+=1;
+
+ }
+ lowvalue <<= 1;
+ if (!++count)
+ {
+ count = -8;
+ br->buffer[br->pos++]=(lowvalue >> 24);
+ lowvalue &= 0xffffff;
+ }
+ }
+ br->count = count;
+ br->lowvalue = lowvalue;
+ br->range = range;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_EncodeBoolOne
+ *
+ * INPUTS : BOOL_CODER *br : pointer to instance of a boolean encoder.
+ * int bit : value to be encoded (UNUSED).
+ * int probability : probability of getting a 0 (0-255)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes the boolean value 1 using the specified boolean encoder.
+ *
+ * SPECIAL NOTES : This encoder uses normalizations, and is fairly accurate,
+ *
+ ****************************************************************************/
+void VP6_EncodeBoolOne ( BOOL_CODER *br, int bit, int probability )
+{
+ unsigned int split;
+ unsigned int count = br->count;
+ unsigned int range = br->range;
+ unsigned int lowvalue = br->lowvalue;
+
+#if defined MEASURE_SECTION_COSTS
+ if (bit)
+ Sectionbits[ActiveSection] += VP6_ProbCost[255-probability];
+ else
+ Sectionbits[ActiveSection] += VP6_ProbCost[probability];
+#endif
+
+ split = 1 + (((range-1) * probability) >> 8);
+
+ lowvalue += split;
+ range = range-split;
+
+ while(range < 0x80)
+ {
+ range <<= 1;
+
+ if((lowvalue & 0x80000000 ))
+ {
+ int x = br->pos-1;
+ while(x>=0 && br->buffer[x] == 0xff)
+ {
+ br->buffer[x] =(unsigned char)0;
+ x--;
+ }
+ br->buffer[x]+=1;
+
+ }
+ lowvalue <<= 1;
+ if (!++count)
+ {
+ count = -8;
+ br->buffer[br->pos++]=(lowvalue >> 24);
+ lowvalue &= 0xffffff;
+ }
+ }
+ br->count = count;
+ br->lowvalue = lowvalue;
+ br->range = range;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_EncodeBoolZero
+ *
+ * INPUTS : BOOL_CODER *br : pointer to instance of a boolean encoder.
+ * int bit : value to be encoded (UNUSED).
+ * int probability : probability of getting a 0 (0-255)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Encodes the boolean value 0 using the specified boolean encoder.
+ *
+ * SPECIAL NOTES : This encoder uses normalizations, and is fairly accurate,
+ *
+ ****************************************************************************/
+void VP6_EncodeBoolZero ( BOOL_CODER *br, int bit, int probability )
+{
+ unsigned int count = br->count;
+ unsigned int range = br->range;
+ unsigned int lowvalue = br->lowvalue;
+
+#if defined MEASURE_SECTION_COSTS
+ if (bit)
+ Sectionbits[ActiveSection] += VP6_ProbCost[255-probability];
+ else
+ Sectionbits[ActiveSection] += VP6_ProbCost[probability];
+#endif
+
+ range = 1 + (((range-1) * probability) >> 8);
+
+ while(range < 0x80)
+ {
+ range <<= 1;
+
+ if((lowvalue & 0x80000000 ))
+ {
+ int x = br->pos-1;
+ while(x>=0 && br->buffer[x] == 0xff)
+ {
+ br->buffer[x] =(unsigned char)0;
+ x--;
+ }
+ br->buffer[x]+=1;
+
+ }
+ lowvalue <<= 1;
+ if (!++count)
+ {
+ count = -8;
+ br->buffer[br->pos++]=(lowvalue >> 24);
+ lowvalue &= 0xffffff;
+ }
+ }
+ br->count = count;
+ br->lowvalue = lowvalue;
+ br->range = range;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_EncodeBool2
+ *
+ * INPUTS : BOOL_CODER *br : pointer to instance of a boolean encoder.
+ * int bit : value to be encoded (0 or 1).
+ * int probability : probability of getting a 0 (0-255)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates br->BitCounter with approximate cost of encoding
+ * bit.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_EncodeBool2 ( BOOL_CODER *br, int bit, int probability )
+{
+ if (bit)
+ br->BitCounter += VP6_ProbCost[255-probability];
+ else
+ br->BitCounter += VP6_ProbCost[probability];
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DecodeBool
+ *
+ * INPUTS : BOOL_CODER *br : pointer to instance of a boolean decoder.
+ * int probability : probability that next symbol is a 0 (0-255)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Next decoded symbol (0 or 1)
+ *
+ * FUNCTION : Decodes the next symbol (0 or 1) using the specified
+ * boolean decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int VP6_DecodeBool ( BOOL_CODER *br, int probability )
+{
+ unsigned int bit=0;
+ unsigned int split;
+ unsigned int bigsplit;
+ unsigned int count = br->count;
+ unsigned int range = br->range;
+ unsigned int value = br->value;
+
+ split = 1 + (((range-1) * probability) >> 8);
+ bigsplit = (split<<24);
+
+ range = split;
+ if(value >= bigsplit)
+ {
+ range = br->range-split;
+ value = value-bigsplit;
+ bit = 1;
+ }
+
+ if(range>=0x80)
+ {
+ br->value = value;
+ br->range = range;
+ return bit;
+ }
+ else
+ {
+ do
+ {
+ range +=range;
+ value +=value;
+
+ if (!--count)
+ {
+ count = 8;
+ value |= br->buffer[br->pos];
+ br->pos++;
+ }
+ }
+ while(range < 0x80 );
+ }
+ br->count = count;
+ br->value = value;
+ br->range = range;
+ return bit;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DecodeBool128
+ *
+ * INPUTS : BOOL_CODER *br : pointer to instance of a boolean decoder.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : int: Next decoded symbol (0 or 1)
+ *
+ * FUNCTION : This function determines the next value stored in the
+ * boolean coder based upon a fixed probability of 0.5
+ * (128 in normalized units).
+ *
+ * SPECIAL NOTES : VP6_DecodeBool128() is a special case of VP6_DecodeBool()
+ * where the input probability is fixed at 128.
+ *
+ ****************************************************************************/
+int VP6_DecodeBool128 ( BOOL_CODER *br )
+{
+ unsigned int bit;
+ unsigned int split;
+ unsigned int bigsplit;
+ unsigned int count = br->count;
+ unsigned int range = br->range;
+ unsigned int value = br->value;
+
+ split = ( range + 1) >> 1;
+ bigsplit = (split<<24);
+
+ if(value >= bigsplit)
+ {
+ range = (range-split)<<1;
+ value = (value-bigsplit)<<1;
+ bit = 1;
+ }
+ else
+ {
+ range = split<<1;
+ value = value<<1;
+ bit = 0;
+ }
+
+ if(!--count)
+ {
+ count=8;
+ value |= br->buffer[br->pos];
+ br->pos++;
+ }
+ br->count = count;
+ br->value = value;
+ br->range = range;
+ return bit;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_StartDecode
+ *
+ * INPUTS : BOOL_CODER *bc : pointer to instance of a boolean decoder.
+ * unsigned char *source : pointer to buffer of data to be decoded.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs initialization of the boolean decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_StartDecode ( BOOL_CODER *br, unsigned char *source )
+{
+ br->lowvalue = 0;
+ br->range = 255;
+ br->count = 8;
+ br->buffer = source;
+ br->pos = 0;
+ br->value = (br->buffer[0]<<24)+(br->buffer[1]<<16)+(br->buffer[2]<<8)+(br->buffer[3]);
+ br->pos += 4;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_StopDecode
+ *
+ * INPUTS : BOOL_CODER *bc : pointer to instance of a boolean decoder (UNUSED).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs clean-up of the specified boolean decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_StopDecode ( BOOL_CODER *bc )
+{
+}
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/debug.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/debug.c
new file mode 100644
index 00000000..c88878a7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/debug.c
@@ -0,0 +1,225 @@
+#include "pbdll.h"
+#include "misc_common.h"
+#include "xprintf.h"
+
+/****************************************************************************
+ * Debugging Aid Only
+ ****************************************************************************
+*/
+
+#ifdef _MSC_VER
+
+#include <stdio.h>
+
+void vp6_writeframe(PB_INSTANCE *pbi, char * address,int x)
+{
+ // write the frame
+ FILE *yframe;
+ char filename[255];
+ sprintf(filename,"y%04d.raw",x);
+ yframe=fopen(filename,"wb");
+ fwrite(address,pbi->ReconYPlaneSize+2*pbi->ReconUVPlaneSize,1,yframe);
+ fclose(yframe);
+}
+
+void vp6_writeframe2(PB_INSTANCE *pbi, char * address,int x)
+{
+ // write the frame
+ FILE *yframe;
+ char filename[255];
+ sprintf(filename,"y%d.raw",x);
+ yframe=fopen(filename,"wb");
+ fwrite(address,pbi->YPlaneSize,1,yframe);
+ fclose(yframe);
+}
+
+void vp6_draw(unsigned char *prefix, int frame, char * address,int size)
+{
+ // write the frame
+ FILE *yframe;
+ char filename[255];
+ sprintf(filename,"%s%04d.raw",prefix,frame);
+ yframe=fopen(filename,"wb");
+ fwrite(address,size,1,yframe);
+ fclose(yframe);
+}
+
+void vp6_drawb(unsigned char *prefix, int frame, char * address,int pitch,int width,int height)
+{
+ // write the frame
+ FILE *yframe;
+ int i;
+ char filename[255];
+ sprintf(filename,"%s%04d.raw",prefix,frame);
+ yframe=fopen(filename,"wb");
+ for(i=0;i<height;i++)
+ {
+ fwrite(address,width,1,yframe);
+ address+=pitch;
+ }
+ fclose(yframe);
+}
+
+void vp6_drawc(char *filename, char * address,int pitch,int width,int height)
+{
+ // write the frame
+ FILE *yframe;
+ int i;
+ yframe=fopen(filename,"ab");
+ for(i=0;i<height;i++)
+ {
+ fwrite(address,width,1,yframe);
+ address+=pitch;
+ }
+ fclose(yframe);
+}
+
+void vp6_showinfo2(PB_INSTANCE *pbi)
+{
+ vp6_xprintf(pbi,
+ pbi->Configuration.YStride * UMV_BORDER + UMV_BORDER,
+ "F:%d G:%d Q:%d S:%d B: %d W:%d H:%d V:%d Decode:%8d, Blit:%8d, PP:%8d, P:%d",
+ pbi->FrameType,
+ pbi->RefreshGoldenFrame,
+ pbi->quantizer->FrameQIndex,
+ pbi->CurrentFrameSize,
+ pbi->br.pos,
+ pbi->HFragments,
+ pbi->VFragments,
+ pbi->Vp3VersionNo,
+ pbi->avgDecodeTime,
+ pbi->avgBlitTime,
+ pbi->avgPPTime[8],
+ pbi->PostProcessingLevel);
+}
+
+void vp6_appendframe(PB_INSTANCE *pbi)
+{
+ // write the frame
+ FILE *yframe;
+ yframe=fopen("test.raw","ab");
+ fwrite(pbi->LastFrameRecon,pbi->ReconYPlaneSize+2*pbi->ReconUVPlaneSize,1,yframe);
+ fclose(yframe);
+}
+
+void vp6_showinfo(PB_INSTANCE *pbi)
+{
+ UINT32 MBrow, MBcol;
+ UINT32 MBRows = pbi->MBRows;
+ UINT32 MBCols = pbi->MBCols;
+
+ // for each row of macroblocks
+ for ( MBrow=0; MBrow<MBRows; MBrow++ )
+ {
+ // for each macroblock within a row of macroblocks
+ for ( MBcol=0; MBcol<MBCols; MBcol++)
+ {
+ vp6_xprintf(pbi,
+ ((MBrow)* 16+5) * pbi->Configuration.YStride + (MBcol)*16+5,
+ "%d",
+ pbi->predictionMode[MBOffset(MBrow,MBcol)]);
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PredictBlockToPostProcessBuffer
+ *
+ * INPUTS :
+ *
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Codes a DCT block
+ *
+ * Motion vectors and modes asumed to be defined at the MB level.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP6_PredictBlockToPostProcessBuffer ( PB_INSTANCE *pbi, BLOCK_POSITION bp )
+{
+/*
+we need a VP6_PredictMacroBlockToPostProcessBuffer
+
+ memset(pbi->ReconDataBuffer,0,64*sizeof(short));
+
+ // Action depends on decode mode.
+ if ( pbi->mbi.Mode == CODE_INTER_NO_MV ) // Inter with no motion vector
+ {
+ ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon],
+ (UINT8 *)&pbi->LastFrameRecon[pbi->mbi.Recon],
+ pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride);
+ }
+ else if ( VP6_ModeUsesMC[pbi->mbi.Mode] ) // The mode uses a motion vector.
+ {
+ // For the compressor we did this already ( possible optimization).
+ VP6_PredictFilteredBlock( pbi, pbi->TmpDataBuffer,bp);
+
+ ReconBlock(
+ pbi->TmpDataBuffer,
+ pbi->ReconDataBuffer,
+ (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon],
+ pbi->mbi.CurrentReconStride );
+ }
+ else if ( pbi->mbi.Mode == CODE_USING_GOLDEN ) // Golden frame with motion vector
+ {
+ // Reconstruct the pixel data using the golden frame reconstruction and change data
+ ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon],
+ (UINT8 *)&pbi->GoldenFrame[ pbi->mbi.Recon ],
+ pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+ }
+ else // Simple Intra coding
+ {
+ // Get the pixel index for the first pixel in the fragment.
+ ReconIntra( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon], (UINT16 *)pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+ }
+*/
+}
+
+void VP6_printmodes(PB_INSTANCE *pbi)
+{
+ static int nFrame = 0; // PB_INSTANCE doesn't provide a frame number, does it?
+ FILE *f=fopen("modes.txt","a");
+ unsigned int i,j;
+
+ fprintf(f, "Frame %d\n\n", nFrame);
+
+ for(i=BORDER_MBS;i<pbi->MBRows-BORDER_MBS;i++)
+ {
+ if(pbi->Configuration.Interlaced == 1)
+ {
+ for(j=BORDER_MBS;j<pbi->MBCols-BORDER_MBS;j++)
+ {
+ fprintf(f,"%d",pbi->MBInterlaced[MBOffset(i,j)]);
+ }
+ fprintf(f," ");
+ }
+ for(j=BORDER_MBS;j<pbi->MBCols-BORDER_MBS;j++)
+ {
+ fprintf(f,"%d",pbi->predictionMode[MBOffset(i,j)]);
+ }
+ fprintf(f," ");
+ for(j=BORDER_MBS;j<pbi->MBCols-BORDER_MBS;j++)
+ {
+ fprintf(f,"%3d:%-3d",pbi->MBMotionVector[MBOffset(i,j)].x,pbi->MBMotionVector[MBOffset(i,j)].y);
+ }
+ fprintf(f,"\n");
+ }
+
+ fprintf(f,"\n");
+ fprintf(f,"\n");
+ fclose(f);
+
+ ++nFrame;
+
+ return;
+}
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodembs.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodembs.c
new file mode 100644
index 00000000..168e5a80
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodembs.c
@@ -0,0 +1,2125 @@
+/****************************************************************************
+*
+* Module Title : Decodembs.c
+*
+* Description : Compressor functions for block order transmittal
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "pbdll.h"
+#include "decodemode.h"
+#include "decodemv.h"
+
+/****************************************************************************
+* Typedefs
+****************************************************************************/
+
+// For details of tokens and extra bit breakdown see token definitions in huffman.h
+typedef struct
+{
+ UINT16 MinVal;
+ INT16 Length;
+ UINT8 Probs[11];
+} TOKENEXTRABITS;
+
+/****************************************************************************
+* Module constants
+****************************************************************************/
+static const UINT32 VP6_HuffTokenMinVal[MAX_ENTROPY_TOKENS] = { 0,1, 2, 3, 4, 5, 7, 11, 19, 35, 67, 0};
+
+static const TOKENEXTRABITS VP6_TokenExtraBits2[MAX_ENTROPY_TOKENS] =
+{
+ { 0,-1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN
+ { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN
+ { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN
+ { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN
+ { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN
+ { 5, 0, { 159,0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1
+ { 7, 1, { 145,165,0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2
+ { 11, 2, { 140,148,173,0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3
+ { 19, 3, { 135,140,155,176,0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4
+ { 35, 4, { 130,134,141,157,180,0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5
+ { 67,10, { 129,130,133,140,153,177,196,230,243,254,254 } }, //DCT_VAL_CATEGORY6
+ { 0,-1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN
+};
+
+// Definition of AC coefficient banding
+const INT32 VP6_CoeffToBand[65] =
+{
+ -1,0,1,1,1,2,2,2,
+ 2,2,2,3,3,3,3,3,
+ 3,3,3,3,3,3,4,4,
+ 4,4,4,4,4,4,4,4,
+ 4,4,4,4,4,5,5,5,
+ 5,5,5,5,5,5,5,5,
+ 5,5,5,5,5,5,5,5,
+ 5,5,5,5,5,5,5,5,7
+};
+
+static const INT32 VP6_CoeffToHuffBand[65] =
+{
+ -1,0,1,1,1,2,2,2,
+ 2,2,2,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,3
+};
+
+// Default scan bands for non-interlaced frames
+const UINT8 DefaultNonInterlacedScanBands[BLOCK_SIZE] =
+{
+ 0, 0, 1, 1, 1, 2, 2, 2,
+ 2, 2, 2, 3, 3, 4, 4, 4,
+ 5, 5, 5, 5, 6, 6, 7, 7,
+ 7, 7, 7, 8, 8, 9, 9, 9,
+ 9, 9, 9,10,10,11,11,11,
+ 11,11,11,12,12,12,12,12,
+ 12,13,13,13,13,13,14,14,
+ 14,14,15,15,15,15,15,15
+};
+
+// Default scan badns for interlaced frames
+const UINT8 DefaultInterlacedScanBands[BLOCK_SIZE] =
+{
+ 0, 1, 0, 1, 1, 2, 5, 3,
+ 2, 2, 2, 2, 4, 7, 8,10,
+ 9, 7, 5, 4, 2, 3, 5, 6,
+ 8, 9,11,12,13,12,11,10,
+ 9, 7, 5, 4, 6, 7, 9,11,
+ 12,12,13,13,14,12,11, 9,
+ 7, 9,11,12,14,14,14,15,
+ 13,11,13,15,15,15,15,15,
+};
+
+// AWG Should export this in decodembs.h rather than pbdll.h
+const int VP6_Mode2Frame[] =
+{
+ 1, // CODE_INTER_NO_MV 0 => Encoded diff from same MB last frame
+ 0, // CODE_INTRA 1 => DCT Encoded Block
+ 1, // CODE_INTER_PLUS_MV 2 => Encoded diff from included MV MB last frame
+ 1, // CODE_INTER_LAST_MV 3 => Encoded diff from MRU MV MB last frame
+ 1, // CODE_INTER_PRIOR_MV 4 => Encoded diff from included 4 separate MV blocks
+ 2, // CODE_USING_GOLDEN 5 => Encoded diff from same MB golden frame
+ 2, // CODE_GOLDEN_MV 6 => Encoded diff from included MV MB golden frame
+ 1, // CODE_INTER_FOUR_MV 7 => Encoded diff from included 4 separate MV blocks
+ 2, // CODE_GOLD_NEAREST_MV 8 => Encoded diff from MRU MV MB last frame
+ 2, // CODE_GOLD_NEAR_MV 9 => Encoded diff from included 4 separate MV blocks
+};
+
+// For Bitread functions
+static const UINT32 loMaskTbl_VP60[] =
+{
+ 0x00000000,
+ 0x00000001, 0x00000003, 0x00000007, 0x0000000F,
+ 0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
+ 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
+ 0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
+ 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
+ 0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
+ 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
+ 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
+};
+
+/****************************************************************************
+ *
+ * ROUTINE : NextWord (MACRO)
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Reads 32 bits from the input buffer for processing and
+ * reverts data to little endian.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+# define BitsAreBigEndian 1
+# if BitsAreBigEndian
+# define NextWord \
+{ br->remainder = (br->position[0] << 24) + (br->position[1] << 16) + (br->position[2] << 8) + br->position[3]; br->position += 4;}
+# else
+# define NextWord \
+{ br->remainder = (br->position[3] << 24) + (br->position[2] << 16) + (br->position[1] << 8) + br->position[0]; br->position += 4;}
+# endif
+
+/****************************************************************************
+ *
+ * ROUTINE : bitread
+ *
+ * INPUTS : BITREADER *br : Wrapper for the encoded data buffer.
+ * int bits : Number of bits to read.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Value of the number of bits requested (as UINT32)
+ *
+ * FUNCTION : Extracts requested number of bits from the encoded data buffer.
+ *
+ * SPECIAL NOTES : Uses the NextWord macro.
+ *
+ ****************************************************************************/
+FORCEINLINE
+UINT32 bitread ( BITREADER *br, int bits )
+{
+ UINT32 z = 0;
+
+ br->remainder &= loMaskTbl_VP60[br->bitsinremainder];
+
+ if( (bits -= br->bitsinremainder) > 0)
+ {
+ z |= br->remainder << bits;
+ NextWord
+ bits -= 32;
+ }
+ return z | br->remainder >> (br->bitsinremainder = -bits);
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : bitreadonly
+ *
+ * INPUTS : BITREADER *br : Wrapper for the encoded data buffer.
+ * int bits : Number of bits to read.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Value of the number of bits requested (as UINT32)
+ *
+ * FUNCTION : Extracts requested number of bits from the encoded data buffer.
+ *
+ * SPECIAL NOTES : This reader variant will only read a further byte from the
+ * encoded data buffer.
+ *
+ ****************************************************************************/
+FORCEINLINE
+UINT32 bitreadonly ( BITREADER *br, UINT32 bits )
+{
+ UINT32 x = br->bitsinremainder;
+ UINT32 z = (1<<x)-1;
+
+ z &= br->remainder;
+ if ( x >= bits )
+ {
+ return z>>(x-bits);
+ }
+ z <<= 8;
+ z |= br->position[0];
+ return (z>>(8+x-bits));
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : bitShift
+ *
+ * INPUTS : BITREADER *br : Wrapper for the encoded data buffer.
+ * int bits : Number of bits to discard (shift off).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Discards requested number of bits from the encoded data buffer.
+ *
+ * SPECIAL NOTES : Uses the NextWord macro.
+ *
+ ****************************************************************************/
+FORCEINLINE
+void bitShift ( BITREADER *br, int bits )
+{
+ br->bitsinremainder -= bits;
+ if ( br->bitsinremainder < 0 )
+ {
+ NextWord
+ br->bitsinremainder += 32;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : bitread1
+ *
+ * INPUTS : BITREADER *br : Wrapper for the encoded data buffer.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : A single bit (as UINT32)
+ *
+ * FUNCTION : Extracts a single bit from the encoded data buffer.
+ *
+ * SPECIAL NOTES : Uses the NextWord macro.
+ *
+ ****************************************************************************/
+FORCEINLINE
+UINT32 bitread1 ( BITREADER *br )
+{
+ if( br->bitsinremainder)
+ return (br->remainder >> --br->bitsinremainder) & 1;
+ NextWord
+ return br->remainder >> (br->bitsinremainder = 31);
+}
+
+#undef NextWord
+
+/****************************************************************************
+ *
+ * ROUTINE : nDecodeBool
+ *
+ * INPUTS : BITREADER *br : Wrapper for the encoded data buffer.
+ * int probability : Probability that next symbol in Boolean
+ * Coded buffer is a 0.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Value of the next encoded token 0 or 1 (as int)
+ *
+ * FUNCTION : Extracts next token (0 or 1) from the Boolean encoded data buffer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+FORCEINLINE
+int nDecodeBool ( BOOL_CODER *br, int probability )
+{
+ unsigned int split;
+ int count = br->count;
+ unsigned int range = br->range;
+ unsigned int value = br->value;
+
+ // perform the actual decoding
+ split = 1 + (((range-1) * probability) >> 8);
+
+ if ( value >= split<<24 )
+ {
+ value -= (split<<24);
+ range = range - split;
+
+ while(range < 0x80 )
+ {
+ range += range;
+ value += value;
+
+ if ( !--count )
+ {
+ count = 8;
+ value |= br->buffer[br->pos];
+ br->pos++;
+ }
+ }
+
+ br->count = count;
+ br->value = value;
+ br->range = range;
+
+ return 1;
+
+ }
+ range = split;
+
+ while(range < 0x80 )
+ {
+ range += range;
+ value += value;
+
+ if ( !--count )
+ {
+ count = 8;
+ value |= br->buffer[br->pos];
+ br->pos++;
+ }
+ }
+ br->count = count;
+ br->value = value;
+ br->range = range;
+ return 0;
+}
+
+/****************************************************************************
+ *
+ *
+ ****************************************************************************/
+#define APPLYSIGN(dest, valueToSign) \
+{ \
+ unsigned int split; \
+ split = (range + 1) >> 1; \
+ if ( value >= split<<24 ) \
+ { \
+ value = value - (split<<24); \
+ value += value; \
+ range = range - split; \
+ range += range; \
+ if( !--count ) \
+ { \
+ count = 8; \
+ value |= *brBuffer; \
+ brBuffer++; \
+ } \
+ dest = -valueToSign; \
+ } \
+ else \
+ { \
+ range = split; \
+ range += range; \
+ value += value; \
+ if( !--count ) \
+ { \
+ count = 8; \
+ value |= *brBuffer; \
+ brBuffer++; \
+ } \
+ dest = valueToSign; \
+ } \
+}
+
+/****************************************************************************
+ *
+ *
+ ****************************************************************************/
+// register int count = _mm_cvtsi64_si32(m64_brCount);
+#define NDECODEBOOL_AND_BRANCH_IF_ONE(probability, branch) \
+{ \
+ unsigned int split; \
+ split = 1 + (((range-1) * probability) >> 8); \
+ if ( value >= split<<24 ) \
+ { \
+ value -= (split<<24); \
+ range = range - split; \
+ while(range < 0x80 ) \
+ { \
+ range += range; \
+ value += value; \
+ if ( !--count ) \
+ { \
+ count = 8; \
+ value |= *brBuffer; \
+ brBuffer++; \
+ } \
+ } \
+ goto branch; \
+ } \
+ range = split; \
+ while(range < 0x80 ) \
+ { \
+ range += range; \
+ value += value; \
+ if ( !--count ) \
+ { \
+ count = 8; \
+ value |= *brBuffer; \
+ brBuffer++; \
+ } \
+ } \
+}
+/****************************************************************************
+ *
+ *
+ ****************************************************************************/
+#define NDECODEBOOL_AND_BRANCH_IF_ZERO(probability, branch) \
+{ \
+ unsigned int split; \
+ split = 1 + (((range-1) * probability) >> 8); \
+ if ( value < split<<24 ) \
+ { \
+ range = split; \
+ while(range < 0x80 ) \
+ { \
+ range += range; \
+ value += value; \
+ if ( !--count ) \
+ { \
+ count = 8; \
+ value |= *brBuffer; \
+ brBuffer++; \
+ } \
+ } \
+ goto branch; \
+ } \
+ value -= (split<<24); \
+ range = range - split; \
+ while(range < 0x80 ) \
+ { \
+ range += range; \
+ value += value; \
+ if ( !--count ) \
+ { \
+ count = 8; \
+ value |= *brBuffer; \
+ brBuffer++; \
+ } \
+ } \
+}
+
+
+/****************************************************************************
+*
+* ROUTINE : BuildScanOrder
+*
+* INPUTS : PB_INSTANCE *pbi : Pointer to instance of a decoder.
+* UINT8 *ScanBands : Pointer to array containing band for
+* each DCT coeff position.
+*
+* OUTPUTS : None
+*
+* RETURNS : void
+*
+* FUNCTION : Builds a custom dct scan order from a set of band data.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void BuildScanOrder( PB_INSTANCE *pbi, UINT8 *ScanBands )
+{
+ UINT32 i, j;
+ UINT32 ScanOrderIndex = 1;
+ UINT32 MaxOffset;
+ UINT32 *TransIndex = pbi->quantizer->transIndex;
+
+ // DC is fixed
+ pbi->ModifiedScanOrder[0] = 0;
+
+ // Create a scan order where within each band the coefs are in ascending order
+ // (in terms of their original zig-zag positions).
+ for ( i = 0; i < SCAN_ORDER_BANDS; i++ )
+ {
+ for ( j = 1; j < BLOCK_SIZE; j++ )
+ {
+ if ( ScanBands[j] == i )
+ {
+ pbi->ModifiedScanOrder[ScanOrderIndex] = j;
+ ScanOrderIndex++;
+ }
+ }
+ }
+
+ // For each of the positions in the modified scan order work out the
+ // worst case EOB offset in zig zag order. This is used in selecting
+ // the appropriate idct variant
+ for ( i = 0; i < BLOCK_SIZE; i++ )
+ {
+ MaxOffset = 0;
+ for ( j = 0; j <= i; j++ )
+ {
+ if ( pbi->ModifiedScanOrder[j] > MaxOffset )
+ MaxOffset = pbi->ModifiedScanOrder[j];
+ }
+
+ pbi->EobOffsetTable[i] = MaxOffset;
+
+ if(pbi->Vp3VersionNo > 6)
+ pbi->EobOffsetTable[i] = MaxOffset+1;
+
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : BoolTreeToHuffCodes
+*
+* INPUTS : UINT8 *BoolTreeProbs : Dct coeff tree node probabilities
+*
+* OUTPUTS : UINT32 *HuffProbs : Dct coeff probability distribution
+*
+* RETURNS : void
+*
+* FUNCTION : Convert set of internal tree node probabilities to set of
+* token probabilities (run lengths 1--8, and >8 are the tokens).
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void BoolTreeToHuffCodes ( UINT8 *BoolTreeProbs, UINT32 *HuffProbs )
+{
+ UINT32 Prob;
+ UINT32 Prob1;
+
+ HuffProbs[DCT_EOB_TOKEN] = ((UINT32)BoolTreeProbs[0] * (UINT32)BoolTreeProbs[1]) >> 8;
+ HuffProbs[ZERO_TOKEN] = ((UINT32)BoolTreeProbs[0] * (255 - (UINT32)BoolTreeProbs[1])) >> 8;
+
+ Prob = (255 - (UINT32)BoolTreeProbs[0]);
+ HuffProbs[ONE_TOKEN] = (Prob * (UINT32)BoolTreeProbs[2]) >> 8;
+
+ Prob = (Prob*(255 - (UINT32)BoolTreeProbs[2])) >> 8;
+ Prob1 = (Prob * (UINT32)BoolTreeProbs[3]) >> 8;
+ HuffProbs[TWO_TOKEN] = (Prob1 * (UINT32)BoolTreeProbs[4]) >> 8;
+ Prob1 = (Prob1 * (255 - (UINT32)BoolTreeProbs[4])) >> 8;
+ HuffProbs[THREE_TOKEN] = (Prob1 * (UINT32)BoolTreeProbs[5]) >> 8;
+ HuffProbs[FOUR_TOKEN] = (Prob1 * (255 - (UINT32)BoolTreeProbs[5])) >> 8;
+
+ Prob = (Prob * (255 - (UINT32)BoolTreeProbs[3])) >> 8;
+ Prob1 = (Prob * (UINT32)BoolTreeProbs[6]) >> 8;
+ HuffProbs[DCT_VAL_CATEGORY1] = (Prob1 * (UINT32)BoolTreeProbs[7]) >> 8;
+ HuffProbs[DCT_VAL_CATEGORY2] = (Prob1 * (255 - (UINT32)BoolTreeProbs[7])) >> 8;
+
+ Prob = (Prob * (255 - (UINT32)BoolTreeProbs[6])) >> 8;
+ Prob1 = (Prob * (UINT32)BoolTreeProbs[8]) >> 8;
+ HuffProbs[DCT_VAL_CATEGORY3] = (Prob1 * (UINT32)BoolTreeProbs[9]) >> 8;
+ HuffProbs[DCT_VAL_CATEGORY4] = (Prob1 * (255 - (UINT32)BoolTreeProbs[9])) >> 8;
+
+ Prob = (Prob * (255 - (UINT32)BoolTreeProbs[8])) >> 8;
+ HuffProbs[DCT_VAL_CATEGORY5] = (Prob * (UINT32)BoolTreeProbs[10]) >> 8;
+ HuffProbs[DCT_VAL_CATEGORY6] = (Prob * (255 - (UINT32)BoolTreeProbs[10])) >> 8;
+}
+
+/****************************************************************************
+*
+* ROUTINE : ZerosBoolTreeToHuffCodes
+*
+* INPUTS : UINT8 *BoolTreeProbs : Zrl tree node probabilities
+*
+* OUTPUTS : UINT32 *HuffProbs : Zrl run-length distribution
+*
+* RETURNS : void
+*
+* FUNCTION : Convert zero run-length tree node probs to set
+* of run-length probs (run lengths 1--8, and >8
+* are the tokens).
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void ZerosBoolTreeToHuffCodes ( UINT8 *BoolTreeProbs, UINT32 *HuffProbs )
+{
+ UINT32 Prob;
+
+ Prob = ((UINT32)BoolTreeProbs[0] * (UINT32)BoolTreeProbs[1]) >> 8;
+ HuffProbs[0] = (Prob * (UINT32)BoolTreeProbs[2]) >> 8;
+ HuffProbs[1] = (Prob * (UINT32)(255 - BoolTreeProbs[2])) >> 8;
+
+ Prob = ((UINT32)BoolTreeProbs[0] * (UINT32)(255 - BoolTreeProbs[1])) >> 8;
+ HuffProbs[2] = (Prob * (UINT32)BoolTreeProbs[3]) >> 8;
+ HuffProbs[3] = (Prob * (UINT32)(255 - BoolTreeProbs[3])) >> 8;
+
+ Prob = ((UINT32)(255 - BoolTreeProbs[0]) * (UINT32)BoolTreeProbs[4]) >> 8;
+ Prob = (Prob * (UINT32)BoolTreeProbs[5]) >> 8;
+ HuffProbs[4] = (Prob * (UINT32)BoolTreeProbs[6]) >> 8;
+ HuffProbs[5] = (Prob * (UINT32)(255 - BoolTreeProbs[6])) >> 8;
+
+ Prob = ((UINT32)(255 - BoolTreeProbs[0]) * (UINT32)BoolTreeProbs[4]) >> 8;
+ Prob = (Prob * (UINT32)(255 - BoolTreeProbs[5])) >> 8;
+ HuffProbs[6] = (Prob * (UINT32)BoolTreeProbs[7]) >> 8;
+ HuffProbs[7] = (Prob * (UINT32)(255 - BoolTreeProbs[7])) >> 8;
+
+ Prob = ((UINT32)(255 - BoolTreeProbs[0]) * (UINT32)(255 - BoolTreeProbs[4])) >> 8;
+ HuffProbs[8] = Prob;
+}
+
+
+/****************************************************************************
+*
+* ROUTINE : ConvertBoolTrees
+*
+* INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Create set of Huffman codes for tokens from a set of
+* internal binary tree node probabilities.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void ConvertBoolTrees ( PB_INSTANCE *pbi )
+{
+ UINT32 i;
+ UINT32 Plane;
+ UINT32 Band;
+ INT32 Prec;
+
+ // Convert bool tree node probabilities into array of token
+ // probabilities. Use these to create a set of Huffman codes
+
+ // DC
+ for ( Plane = 0; Plane < 2; Plane++ )
+ {
+ BoolTreeToHuffCodes ( pbi->DcProbs+DCProbOffset(Plane,0), pbi->DcHuffProbs[Plane] );
+ VP6_BuildHuffTree ( pbi->DcHuffTree[Plane], pbi->DcHuffProbs[Plane], MAX_ENTROPY_TOKENS );
+ VP6_BuildHuffLookupTable(pbi->DcHuffTree[Plane], pbi->DcHuffLUT[Plane]);
+ VP6_CreateCodeArray ( pbi->DcHuffTree[Plane], 0, pbi->DcHuffCode[Plane], pbi->DcHuffLength[Plane], 0, 0 );
+ }
+
+ // ZEROS
+ for ( i = 0; i < ZRL_BANDS; i++ )
+ {
+ ZerosBoolTreeToHuffCodes ( pbi->ZeroRunProbs[i], pbi->ZeroHuffProbs[i] );
+ VP6_BuildHuffTree ( pbi->ZeroHuffTree[i], pbi->ZeroHuffProbs[i], 9 );
+ VP6_BuildHuffLookupTable(pbi->ZeroHuffTree[i], pbi->ZeroHuffLUT[i]);
+ VP6_CreateCodeArray ( pbi->ZeroHuffTree[i], 0, pbi->ZeroHuffCode[i], pbi->ZeroHuffLength[i], 0, 0 );
+ }
+
+ // AC
+ for ( Prec = 0; Prec < PREC_CASES; Prec++ )
+ {
+ // Baseline probabilities for each AC band.
+ for ( Plane = 0; Plane < 2; Plane++ )
+ {
+ for ( Band = 0; Band < VP6_AC_BANDS; Band++ )
+ {
+ BoolTreeToHuffCodes ( pbi->AcProbs+ACProbOffset(Plane,Prec,Band,0), pbi->AcHuffProbs[Prec][Plane][Band] );
+ VP6_BuildHuffTree ( pbi->AcHuffTree[Prec][Plane][Band], pbi->AcHuffProbs[Prec][Plane][Band], MAX_ENTROPY_TOKENS );
+ VP6_BuildHuffLookupTable(pbi->AcHuffTree[Prec][Plane][Band],pbi->AcHuffLUT[Prec][Plane][Band]);
+ VP6_CreateCodeArray ( pbi->AcHuffTree[Prec][Plane][Band], 0, pbi->AcHuffCode[Prec][Plane][Band], pbi->AcHuffLength[Prec][Plane][Band], 0, 0 );
+ }
+ }
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : VP6_ConfigureEntropyDecoder
+*
+* INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+* UINT8 FrameType : Type of frame.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Configure entropy subsystem ready for decode
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void VP6_ConfigureEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType )
+{
+ UINT32 i,j;
+ UINT32 Plane;
+ UINT32 Band;
+ INT32 Prec;
+ UINT8 PrecNonZero;
+ UINT8 LastProb[MAX_ENTROPY_TOKENS-1];
+
+ // Clear down Last Probs data structure
+ memset( LastProb, 128, MAX_ENTROPY_TOKENS-1 );
+
+ // Read in the Baseline DC probabilities and initialise the DC context for Y and then UV plane
+ for ( Plane = 0; Plane < 2; Plane++ )
+ {
+ // If so then read them in.
+ for ( i = 0; i < MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ if ( nDecodeBool(&pbi->br, VP6_DcUpdateProbs[Plane][i] ) )
+ {
+ // 0 is not a legal value, clip to 1.
+ LastProb[i] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ LastProb[i] += ( LastProb[i] == 0 );
+ pbi->DcProbs[DCProbOffset(Plane,i)] = LastProb[i];
+
+ }
+ else if ( FrameType == BASE_FRAME )
+ {
+ pbi->DcProbs[DCProbOffset(Plane,i)] = LastProb[i];
+ }
+ }
+ }
+
+ // Set Zero run probabilities to defaults if this is a key frame
+ if ( FrameType == BASE_FRAME )
+ {
+ memcpy( pbi->ZeroRunProbs, ZeroRunProbDefaults, sizeof(pbi->ZeroRunProbs) );
+ }
+
+ // If this frame contains updates to the scan order then read them
+ if ( nDecodeBool( &pbi->br, 128 ) )
+ {
+ // Read in the AC scan bands and build the custom scan order
+ for ( i = 1; i < BLOCK_SIZE; i++ )
+ {
+ // Has the band for this coef been updated ?
+ if ( nDecodeBool( &pbi->br, ScanBandUpdateProbs[i] ) )
+ pbi->ScanBands[i] = VP6_bitread( &pbi->br, SCAN_BAND_UPDATE_BITS );
+ }
+ // Build the scan order
+ BuildScanOrder( pbi, pbi->ScanBands );
+ }
+
+ // Update the Zero Run probabilities
+ for ( i = 0; i < ZRL_BANDS; i++ )
+ {
+ for ( j = 0; j < ZERO_RUN_PROB_CASES; j++ )
+ {
+ if ( nDecodeBool( &pbi->br, ZrlUpdateProbs[i][j] ) )
+ {
+ // Probabilities sent
+ pbi->ZeroRunProbs[i][j] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ pbi->ZeroRunProbs[i][j] += ( pbi->ZeroRunProbs[i][j] == 0 );
+ }
+ }
+ }
+
+ // Read in the Baseline AC band probabilities and initialise the appropriate contexts
+ // Prec=0 means last token in current block was 0: Prec=1 means it was 1. Prec=2 means it was > 1
+ for ( Prec = 0; Prec < PREC_CASES; Prec++ )
+ {
+ PrecNonZero = ( Prec > 0 ) ? 1 : 0;
+ for ( Plane = 0; Plane < 2; Plane++ )
+ {
+ for ( Band = 0; Band < VP6_AC_BANDS; Band++ )
+ {
+ // If so then read them in.
+ for ( i = 0; i < MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ if ( nDecodeBool(&pbi->br, VP6_AcUpdateProbs[Prec][Plane][Band][i] ) )
+ {
+ // Probabilities transmitted at reduced resolution.
+ // 0 is not a legal value, clip to 1.
+ LastProb[i] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ LastProb[i] += ( LastProb[i] == 0 );
+ pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = LastProb[i];
+ }
+ else if ( FrameType == BASE_FRAME )
+ {
+ pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = LastProb[i];
+ }
+ }
+ }
+ }
+ }
+
+ // Create all the context specific propabilities based upon the new baseline data
+ VP6_ConfigureContexts(pbi);
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_ResetLeftContext
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates the left contexts.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_ResetLeftContext ( PB_INSTANCE *pbi)
+{
+ memset((void *) &pbi->fc.LeftY[0], 0, sizeof(BLOCK_CONTEXT));
+ memset((void *) &pbi->fc.LeftY[1], 0, sizeof(BLOCK_CONTEXT));
+ memset((void *) &pbi->fc.LeftU, 0, sizeof(BLOCK_CONTEXT));
+ memset((void *) &pbi->fc.LeftV, 0, sizeof(BLOCK_CONTEXT));
+
+ pbi->fc.LeftY[0].Mode = (CODING_MODE)-1;
+ pbi->fc.LeftY[1].Mode = (CODING_MODE)-1;
+ pbi->fc.LeftU.Mode = (CODING_MODE)-1;
+ pbi->fc.LeftV.Mode = (CODING_MODE)-1;
+
+ pbi->fc.LeftY[0].Frame = 4;
+ pbi->fc.LeftY[1].Frame = 4;
+ pbi->fc.LeftU.Frame = 4;
+ pbi->fc.LeftV.Frame = 4;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_ResetAboveContext
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates the above contexts.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_ResetAboveContext ( PB_INSTANCE *pbi )
+{
+ UINT32 i;
+
+ for ( i = 0 ; i < pbi->HFragments+8;i++)
+ {
+ pbi->fc.AboveY[i].Mode = -1;
+ pbi->fc.AboveY[i].Frame = 4;
+ pbi->fc.AboveY[i].Dc =0;
+ pbi->fc.AboveY[i].Token=0;
+ }
+ for ( i = 0 ; i < pbi->HFragments/2 + 8;i++)
+ {
+ pbi->fc.AboveU[i].Mode = -1;
+ pbi->fc.AboveU[i].Frame = 4;
+ pbi->fc.AboveU[i].Token=0;
+ pbi->fc.AboveU[i].Dc=0;
+ pbi->fc.AboveV[i].Mode = -1;
+ pbi->fc.AboveV[i].Frame = 4;
+ pbi->fc.AboveV[i].Token=0;
+ pbi->fc.AboveV[i].Dc=0;
+ }
+
+ if(pbi->Vp3VersionNo < 6)
+ {
+ pbi->fc.AboveU[1].Mode = 0;
+ pbi->fc.AboveU[1].Frame = 0;
+ pbi->fc.AboveV[1].Mode = 0;
+ pbi->fc.AboveV[1].Frame = 0;
+ }
+
+ pbi->fc.LastDcY[0] = 0;
+ pbi->fc.LastDcU[0] = 128;
+ pbi->fc.LastDcV[0] = 128;
+ for ( i = 1 ; i < 3 ; i++)
+ {
+ pbi->fc.LastDcY[i] = 0;
+ pbi->fc.LastDcU[i] = 0;
+ pbi->fc.LastDcV[i] = 0;
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_UpdateContext
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * BLOCK_CONTEXT *c : Pointer to
+ * BLOCK_POSITION bp : Position of the block in the containing MB.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates the context for a particular block within a MB.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_UpdateContext ( PB_INSTANCE *pbi, BLOCK_CONTEXT *c, BLOCK_POSITION bp )
+{
+ c->Mode = pbi->mbi.BlockMode[bp];
+ c->Dc = pbi->mbi.blockDxInfo[bp].coeffsPtr[0]; //pbi->mbi.Coeffs[bp][0];
+ c->Frame = VP6_Mode2Frame[pbi->mbi.Mode];
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_UpdateContextA
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * BLOCK_CONTEXT *c : Pointer to
+ * BLOCK_POSITION bp : Position of the block in the containing MB.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates the context for a particular block within a MB.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_UpdateContextA ( PB_INSTANCE *pbi, BLOCK_CONTEXT *c, BLOCK_POSITION bp )
+{
+ c->Mode = pbi->mbi.BlockMode[bp];
+ c->Dc = pbi->mbi.blockDxInfo[bp].coeffsPtr[0]; //pbi->mbi.Coeffs[bp][0];
+ c->Frame = VP6_Mode2Frame[pbi->mbi.Mode];
+}
+
+#define HIGHBITDUPPED(X) (((signed short) X) >> 15)
+
+/****************************************************************************
+ *
+ *
+ ****************************************************************************/
+void VP6_PredictDC
+(
+ PB_INSTANCE *pbi,
+ BLOCK_POSITION bp
+)
+{
+ UINT8 Frame = VP6_Mode2Frame[pbi->mbi.Mode];
+ Q_LIST_ENTRY *LastDC = pbi->mbi.blockDxInfo[bp].LastDc;
+ BLOCK_CONTEXT *Above = pbi->mbi.blockDxInfo[bp].Above;
+ BLOCK_CONTEXT *Left = pbi->mbi.blockDxInfo[bp].Left;
+ INT32 Avg;
+
+ Avg = LastDC[Frame];
+
+ if(Frame == Left->Frame)
+ {
+ Avg = Left->Dc;
+ }
+ if(Frame == Above->Frame)
+ {
+ Avg = Above->Dc;
+ if(Frame == Left->Frame)
+ {
+ Avg += Left->Dc;
+ Avg += (HIGHBITDUPPED(Avg)&1);
+ Avg >>= 1;
+
+ }
+ }
+
+ pbi->mbi.blockDxInfo[bp].coeffsPtr[0] += Avg;
+ LastDC[Frame] = pbi->mbi.blockDxInfo[bp].coeffsPtr[0];
+
+ return;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_PredictDC_MB
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Predicts coefficients in this macroblock based on the
+ * contexts provided.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void
+VP6_PredictDC_MB(PB_INSTANCE *pbi)
+{
+ UINT8 Frame = VP6_Mode2Frame[pbi->mbi.Mode];
+ Q_LIST_ENTRY * LastDC;
+ BLOCK_CONTEXT* Above;
+ BLOCK_CONTEXT * Left;
+
+ BLOCK_DX_INFO *bdi = pbi->mbi.blockDxInfo;
+ BLOCK_DX_INFO *bdiEnd = bdi + 6;
+
+ do
+ {
+ INT32 Avg;
+
+ LastDC = bdi->LastDc;
+ Above = bdi->Above;
+ Left = bdi->Left;
+
+ Avg = LastDC[Frame];
+
+ if(Frame == Left->Frame)
+ {
+ Avg = Left->Dc;
+ }
+ if(Frame == Above->Frame)
+ {
+ Avg = Above->Dc;
+ if(Frame == Left->Frame)
+ {
+ Avg += Left->Dc;
+ Avg += (HIGHBITDUPPED(Avg)&1);
+ Avg >>= 1;
+
+ }
+ }
+
+ bdi->coeffsPtr[0] += Avg;
+ LastDC[Frame] = bdi->coeffsPtr[0];
+
+ Above->Dc = bdi->coeffsPtr[0];
+ Above->Frame = Frame;
+
+ Left->Dc = bdi->coeffsPtr[0];
+ Left->Frame = Frame;
+
+ } while(++bdi < bdiEnd);
+}
+
+
+/****************************************************************************
+*
+* ROUTINE : VP6_ExtractTokenN
+*
+* INPUTS : BITREADER *br : Pointer to bitreader to grab the bits from.
+* HUFF_NODE *hn : Pointer to root of huffman tree to use for decoding.
+* UINT16* hlt : Pointer to Huffman table node.
+*
+* OUTPUTS : None.
+*
+* RETURNS : The number of bits decoded.
+*
+* FUNCTION : Unpacks and expands a DCT token.
+*
+* SPECIAL NOTES : PROBLEM !!!!!!!!!!! right now handles only left
+* justified bits in bitreader. The C version keeps every
+* thing in place so I can't use it!!
+*
+****************************************************************************/
+FORCEINLINE
+int VP6_ExtractTokenN ( BITREADER *br, HUFF_NODE *hn, UINT16* hlt )
+{
+ tokenorptr torp;
+ HUFF_TABLE_NODE *htptr = (HUFF_TABLE_NODE *)hlt;
+ UINT32 x = bitreadonly(br, HUFF_LUT_LEVELS);
+
+ bitShift(br, (htptr[x].length));
+ if(htptr[x].flag)
+ {
+ return htptr[x].value;
+ }
+
+ torp.value = htptr[x].value;
+ do
+ {
+ if( bitread1(br) )
+ {
+ torp = hn[torp.value].rightunion.right;
+ }
+ else
+ {
+ torp = hn[torp.value].leftunion.left;
+ }
+ }
+ while ( !(torp.selector));
+
+ return torp.value;
+
+}
+
+/****************************************************************************
+****************************************************************************/
+void
+ReadHuffTokensPredictA_MB(PB_INSTANCE *pbi)
+{
+ BITREADER *br = &pbi->br3;
+ INT32 SignBit;
+ UINT32 Prec;
+
+ UINT32 token;
+ UINT32 blockIndex;
+
+ UINT32 Plane = 0;
+
+ INT16 *CoeffData;
+ MACROBLOCK_INFO *mbi = &pbi->mbi;
+
+ UINT8 *MergedScanOrderPtr;
+
+ //BLOCK_DX_INFO *bdi = pbi->mbi.blockDxInfo;
+ //BLOCK_DX_INFO *bdiEnd = bdi + 6;
+
+ for(blockIndex = 0; blockIndex < 6; blockIndex++)
+ {
+ MergedScanOrderPtr = pbi->MergedScanOrder;
+
+ CoeffData = pbi->mbi.blockDxInfo[blockIndex].coeffsPtr; //mbi->Coeffs[blockIndex];
+
+
+ if(blockIndex > 3)
+ {
+ Plane = 1;
+ }
+
+ if ( pbi->CurrentDcRunLen[Plane] > 0 )
+ {
+ // DC -- run of zeros in progress
+ --pbi->CurrentDcRunLen[Plane];
+ Prec = 0;
+ }
+ else
+ {
+ // DC -- no current run of zeros
+ token = VP6_ExtractTokenN(br, pbi->DcHuffTree[Plane], pbi->DcHuffLUT[Plane]);
+
+ if(token == DCT_EOB_TOKEN)
+ goto Finished;
+
+ if(token == ZERO_TOKEN)
+ {
+ // Read zero run-length
+ {
+ // Run of zeros at DC is coded as a tree
+ UINT32 val = 1 + bitread(br, 2);
+
+ if ( val == 3 )
+ val += bitread(br, 2);
+ else if ( val == 4 )
+ {
+ if ( bitread1(br) )
+ val = 11 + bitread(br, 6);
+ else
+ val = 7 + bitread(br, 2);
+ }
+ pbi->CurrentDcRunLen[Plane] = val - 1;
+ }
+ Prec = 0;
+ }
+ else
+ {
+ register INT32 value;
+
+ value = VP6_HuffTokenMinVal[token];
+
+ if(token <=FOUR_TOKEN)
+ {
+ SignBit = bitread1(br);
+ }
+ else if(token <=DCT_VAL_CATEGORY5)
+ {
+ value += bitread(br, (token-4));
+ SignBit = bitread1(br);
+ }
+ else
+ {
+ value += bitread(br, 11);
+ SignBit = bitread1(br);
+
+ }
+ CoeffData[0] = (Q_LIST_ENTRY)((value ^ -SignBit) + SignBit);
+ Prec = (value>1)?2:1;
+ }
+
+ }
+ //first AC
+
+ MergedScanOrderPtr++;
+
+ if ( pbi->CurrentAc1RunLen[Plane] > 0 )
+ {
+ // First AC in scan order -- run of EOBs in progress
+ --pbi->CurrentAc1RunLen[Plane];
+ goto Finished;
+ }
+
+ do
+ {
+
+ UINT32 Band = *(MergedScanOrderPtr + 64); //VP6_CoeffToHuffBand[EncodedCoeffs];
+
+ token = VP6_ExtractTokenN(br, pbi->AcHuffTree[Prec][Plane][Band], pbi->AcHuffLUT[Prec][Plane][Band]);
+
+ if(token == ZERO_TOKEN)
+ {
+ {
+ //UINT32 ZrlBand;
+ //UINT32 ZrlToken;
+ #define ZrlBand Band
+ #define ZrlToken token
+
+ // Read zero run-length
+ ZrlBand = (MergedScanOrderPtr >= (pbi->MergedScanOrder + ZRL_BAND2));
+
+ ZrlToken = VP6_ExtractTokenN(br, pbi->ZeroHuffTree[ZrlBand], pbi->ZeroHuffLUT[ZrlBand]);
+
+ if ( ZrlToken<8 )
+ MergedScanOrderPtr += ZrlToken; // Zero run <= 8
+ else
+ MergedScanOrderPtr += 8 + bitread(br, 6); // Zero run > 8
+ }
+ Prec =0;
+ MergedScanOrderPtr ++;
+ continue;
+ }
+
+ if(token == DCT_EOB_TOKEN)
+ {
+ if ( MergedScanOrderPtr == (pbi->MergedScanOrder + 1) )
+ {
+ // Read run of EOB at first AC position
+ UINT32 val = 1 + bitread(br, 2);
+
+ if ( val == 3 )
+ val += bitread(br, 2);
+ else if ( val == 4 )
+ {
+ if ( bitread1(br) )
+ val = 11 + bitread(br, 6);
+ else
+ val = 7 + bitread(br, 2);
+ }
+ pbi->CurrentAc1RunLen[Plane] = val - 1;
+ }
+ goto Finished;
+
+ }
+
+ {
+ register INT32 value;
+
+ value = VP6_HuffTokenMinVal[token];
+
+ if(token <=FOUR_TOKEN)
+ {
+ SignBit = bitread1(br);
+ }
+ else if(token <=DCT_VAL_CATEGORY5)
+ {
+ value += bitread(br, (token-4));
+ SignBit = bitread1(br);
+ }
+ else
+ {
+ value += bitread(br, 11);
+ SignBit = bitread1(br);
+
+ }
+
+ CoeffData[*(MergedScanOrderPtr)] = (Q_LIST_ENTRY)((value ^ -SignBit) + SignBit);
+ Prec = (value>1)?2:1;
+ MergedScanOrderPtr ++;
+ }
+
+ } while (MergedScanOrderPtr < (pbi->MergedScanOrder + BLOCK_SIZE));
+
+ MergedScanOrderPtr--;
+
+ Finished:
+ //EobArray[blockIndex] = pbi->EobOffsetTable[(UINT32)(MergedScanOrderPtr - (pbi->MergedScanOrder))];
+ pbi->mbi.blockDxInfo[blockIndex].EobPos = (unsigned int)(MergedScanOrderPtr - pbi->MergedScanOrder);
+
+ } //for(blockIndex = 0; blockIndex < 6; blockIndex++)
+ //}while(++bdi < bdiEnd);
+
+}
+
+/****************************************************************************
+****************************************************************************/
+
+void
+VP6_ReadTokensPredictA_MB(PB_INSTANCE *pbi)
+{
+ BLOCK_DX_INFO *bdi = pbi->mbi.blockDxInfo;
+ BLOCK_DX_INFO *bdiEnd = bdi + 6;
+
+ INT32 token;
+
+ int count = pbi->mbi.br->count;
+ unsigned int range = pbi->mbi.br->range;
+ unsigned int value = pbi->mbi.br->value;
+
+ UINT8 *brBuffer = pbi->mbi.br->buffer;
+
+ UINT8 *MergedScanOrder = pbi->MergedScanOrder;
+ UINT8 *MergedScanOrderEnd = pbi->MergedScanOrder + BLOCK_SIZE;
+ UINT8 *MergedScanOrderPtr;
+
+ //bdi->br->buffer += bdi->br->pos;
+ brBuffer += pbi->mbi.br->pos;
+
+ //register __m64 m64_brCount;
+ //__m64 m64_brBuffer;
+
+ //{
+ // BOOL_CODER *br = pbi->mbi.blockDxInfo[0].br;
+ //m64_brCount = _mm_cvtsi32_si64((int)br->count);
+ //}
+
+#define BaselineProbsPtr bdi->BaselineProbsPtr
+#define ContextProbsPtr bdi->ContextProbsPtr
+#define AcProbsPtr bdi->AcProbsBasePtr
+//#define token bdi->token
+
+ do
+ {
+ MergedScanOrderPtr = MergedScanOrder;
+
+ ContextProbsPtr = bdi->DcNodeContextsBasePtr + DcNodeOffset(0, (bdi->Left->Token + bdi->Above->Token), 0);
+ BaselineProbsPtr = bdi->DcProbsBasePtr;
+
+ // Decode the dc token -- first test to see if it is zero
+ NDECODEBOOL_AND_BRANCH_IF_ONE(ContextProbsPtr[ZERO_CONTEXT_NODE], DC_NON_ZERO_);
+
+ // Zero is implicit for DC token
+ //*(bdi->PrecTokenIndexPtr) = 0;
+ bdi->Left->Token = 0; // Update the above and left token contexts to indicate a zero
+ bdi->Above->Token = 0;
+
+ MergedScanOrderPtr++;
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+
+ goto AC_DO_WHILE;
+
+DC_NON_ZERO_:
+ // A non zero DC value
+ bdi->Left->Token = 1; // Update the above and left token contexts to indicate non zero
+ bdi->Above->Token = 1;
+
+ // Was the value a 1
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(ContextProbsPtr[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
+
+ //PrecTokenIndex = 2;
+ //*(bdi->PrecTokenIndexPtr) = 2;
+
+ // Value token > 1
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(ContextProbsPtr[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_);
+
+ // High value (value category) token
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_);
+
+ // Cat3,Cat4 or Cat5
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_);
+
+ token = DCT_VAL_CATEGORY5;
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_FIVE_CONTEXT_NODE], DC_EXTRA_BITS_);
+
+ token += 1;
+
+ goto DC_EXTRA_BITS_;
+
+CAT_THREEFOUR_CONTEXT_NODE_0_:
+ token = DCT_VAL_CATEGORY3;
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_THREE_CONTEXT_NODE], DC_EXTRA_BITS_);
+
+ token += 1;
+
+ goto DC_EXTRA_BITS_;
+
+HIGH_LOW_CONTEXT_NODE_0_:
+ // Either Cat1 or Cat2
+ token = DCT_VAL_CATEGORY1;
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_ONE_CONTEXT_NODE], DC_EXTRA_BITS_);
+
+ token += 1;
+
+DC_EXTRA_BITS_:
+ {
+ INT32 tValue;
+ INT32 BitsCount;
+
+ unsigned int split;
+
+ tValue = VP6_TokenExtraBits2[token].MinVal;
+
+ // Read the extra bits
+ BitsCount = VP6_TokenExtraBits2[token].Length;
+ do
+ {
+ //value += (NDECODEBOOL(VP6_TokenExtraBits2[token].Probs[BitsCount]) << BitsCount );
+ // perform the actual decoding
+ split = 1 + (((range-1) * VP6_TokenExtraBits2[token].Probs[BitsCount] ) >> 8);
+
+ if ( value >= split<<24 )
+ {
+ value -= (split<<24);
+ split = range - split;
+
+ tValue += (1 << BitsCount);
+
+ }
+
+ while(split < 0x80 )
+ {
+ split += split;
+ value += value;
+
+ if ( !--count )
+ {
+ count = 8;
+ value |= *brBuffer;
+ brBuffer++;
+ }
+ }
+ range = split;
+
+ }
+ while(--BitsCount >= 0);
+
+
+ // apply the sign to the value
+ APPLYSIGN(bdi->coeffsPtr[0], tValue);
+
+ MergedScanOrderPtr++;
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+ goto AC_DO_WHILE;
+ }
+
+LOW_VAL_CONTEXT_NODE_0_:
+ // Low value token
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(ContextProbsPtr[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
+
+ // Either a 3 or a 4
+ token = THREE_TOKEN;
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
+
+ token += 1;
+
+THREE_CONTEXT_NODE_0_:
+ // apply the sign to the value
+ APPLYSIGN(bdi->coeffsPtr[0], token);
+
+ MergedScanOrderPtr++;
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+ goto AC_DO_WHILE;
+
+TWO_CONTEXT_NODE_0_:
+ // Is it a 2
+ // apply the sign to the value
+ APPLYSIGN(bdi->coeffsPtr[0], TWO_TOKEN);
+
+ MergedScanOrderPtr++;
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+ goto AC_DO_WHILE;
+
+ONE_CONTEXT_NODE_0_:
+ MergedScanOrderPtr++;
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 1, *(MergedScanOrderPtr + 64), 0 );
+
+ // apply the sign to the value
+ APPLYSIGN(bdi->coeffsPtr[0], 1);
+
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+AC_DO_WHILE:
+ // calculate the context for the next token.
+ NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[ZERO_CONTEXT_NODE], NON_ZERO_RUN_);
+
+//ZERO_RUN_:
+ // Is the token a Zero or EOB
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[EOB_CONTEXT_NODE], BLOCK_FINISHED_1);
+
+ // Select the appropriate Zero run context
+ BaselineProbsPtr = bdi->ZeroRunProbsBasePtr;
+
+ if(MergedScanOrderPtr >= (pbi->MergedScanOrder + ZRL_BAND2))
+ BaselineProbsPtr += ZERO_RUN_PROB_CASES;
+
+ // Now decode the zero run length
+ // Run lenght 1-4
+ NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[0], ZERO_RUN_5_8);
+
+//ZERO_RUN_1_4:
+ NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[1], ZERO_RUN_1_4_a);
+
+ MergedScanOrderPtr += 1;
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[2], ZERO_RUN_1_4_done);
+
+ MergedScanOrderPtr += 1;
+
+ZERO_RUN_1_4_done:
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+ if( MergedScanOrderPtr < MergedScanOrderEnd)
+ goto NON_ZERO_RUN_;
+
+ goto BLOCK_FINISHED;
+
+ZERO_RUN_1_4_a:
+ MergedScanOrderPtr += 3;
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[3], ZERO_RUN_1_4_a_done);
+
+ MergedScanOrderPtr += 1;
+
+ZERO_RUN_1_4_a_done:
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+ if( MergedScanOrderPtr < MergedScanOrderEnd)
+ goto NON_ZERO_RUN_;
+
+ goto BLOCK_FINISHED;
+
+ZERO_RUN_5_8:
+ // Run length 5-8
+ NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[4], ZERO_RUN_gt_8);
+
+ NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[5], ZERO_RUN_5_8_a);
+
+ MergedScanOrderPtr += 5;
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[6], ZERO_RUN_5_8_done);
+
+ MergedScanOrderPtr += 1;
+
+ZERO_RUN_5_8_done:
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+ if( MergedScanOrderPtr < MergedScanOrderEnd)
+ goto NON_ZERO_RUN_;
+
+ goto BLOCK_FINISHED;
+
+ZERO_RUN_5_8_a:
+ MergedScanOrderPtr += 7;
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[7], ZERO_RUN_5_8_a_done);
+
+ MergedScanOrderPtr += 1;
+
+ZERO_RUN_5_8_a_done:
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+ if( MergedScanOrderPtr < MergedScanOrderEnd)
+ goto NON_ZERO_RUN_;
+
+ goto BLOCK_FINISHED;
+
+ZERO_RUN_gt_8:
+ // Run length > 8
+ {
+ unsigned int decodeCount;
+ unsigned int split;
+
+ decodeCount = 0;
+ do
+ {
+ // perform the actual decoding
+ split = 1 + (((range-1) * BaselineProbsPtr[8 + decodeCount]) >> 8);
+
+ if ( value >= split<<24 )
+ {
+ value -= (split<<24);
+ split = range - split;
+
+ MergedScanOrderPtr += (1 << decodeCount);
+
+ }
+
+ while(split < 0x80 )
+ {
+ split += split;
+ value += value;
+
+ if ( !--count )
+ {
+ count = 8;
+ value |= *brBuffer;
+ brBuffer++;
+ }
+ }
+
+ range = split;
+
+ } while (++decodeCount < 6);
+
+ MergedScanOrderPtr += 9;
+
+ }
+
+ if( MergedScanOrderPtr >= MergedScanOrderEnd)
+ goto BLOCK_FINISHED;
+
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+
+
+NON_ZERO_RUN_:
+ // The token codes a non zero value
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[ONE_CONTEXT_NODE], AC_ONE_CONTEXT_0_);
+
+ // Value token > 1
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[LOW_VAL_CONTEXT_NODE], AC_LOW_VAL_CONTEXT_0_);
+
+ // High value (value category) token
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE], AC_HIGH_LOW_CONTEXT_0_);
+
+ // Cat3,Cat4
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE], AC_CAT_THREEFOUR_CONTEXT_0_);
+
+ token = DCT_VAL_CATEGORY5;
+
+ // Cat5,Cat6
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_FIVE_CONTEXT_NODE], AC_EXTRA_BITS_);
+
+ //It is Cat6
+ token += 1;
+
+ goto AC_EXTRA_BITS_;
+
+AC_CAT_THREEFOUR_CONTEXT_0_:
+ token = DCT_VAL_CATEGORY3;
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_THREE_CONTEXT_NODE], AC_EXTRA_BITS_);
+
+ //It is Cat4
+ token += 1;
+
+ goto AC_EXTRA_BITS_;
+
+AC_HIGH_LOW_CONTEXT_0_:
+ // Either Cat1 or Cat2
+ token = DCT_VAL_CATEGORY1;
+
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_ONE_CONTEXT_NODE], AC_EXTRA_BITS_);
+
+ //It is Cat2
+ token += 1;
+
+AC_EXTRA_BITS_:
+ {
+ INT32 BitsCount;
+ INT32 tValue;
+
+ unsigned int split;
+
+ tValue = VP6_TokenExtraBits2[token].MinVal;
+
+ // Read the extra bits
+ BitsCount = VP6_TokenExtraBits2[token].Length;
+
+ do
+ {
+ //tValue += (NDECODEBOOL(VP6_TokenExtraBits2[token].Probs[BitsCount]) << BitsCount);
+ split = 1 + (((range-1) * VP6_TokenExtraBits2[token].Probs[BitsCount] ) >> 8);
+
+ if ( value >= split<<24 )
+ {
+ value -= (split<<24);
+ split = range - split;
+
+ tValue += (1 << BitsCount);
+
+ }
+
+ while(split < 0x80 )
+ {
+ split += split;
+ value += value;
+
+ if ( !--count )
+ {
+ count = 8;
+ value |= *brBuffer;
+ brBuffer++;
+ }
+ }
+
+ range = split;
+ }
+ while(--BitsCount >= 0);
+
+
+ // apply the sign to the value
+ APPLYSIGN(bdi->coeffsPtr[*(MergedScanOrderPtr)], tValue);
+ MergedScanOrderPtr++;
+ }
+
+
+ //*(bdi->PrecTokenIndexPtr) = 2;
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+ if( MergedScanOrderPtr < MergedScanOrderEnd)
+ goto AC_DO_WHILE;
+
+ goto BLOCK_FINISHED;
+
+
+AC_LOW_VAL_CONTEXT_0_:
+ // Low value token
+ NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[TWO_CONTEXT_NODE], AC_TWO_CONTEXT_0_);
+
+ // Either a 3 or a 4
+ token = THREE_TOKEN + 1;
+
+ NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[THREE_CONTEXT_NODE], AC_THREE_CONTEXT_1_);
+
+ //It is a 3
+ token = token - 1;
+
+AC_THREE_CONTEXT_1_:
+ // apply the sign to the value
+ APPLYSIGN(bdi->coeffsPtr[*(MergedScanOrderPtr)], token);
+ MergedScanOrderPtr++;
+
+ //*(bdi->PrecTokenIndexPtr) = 2;
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+ if( MergedScanOrderPtr < MergedScanOrderEnd)
+ goto AC_DO_WHILE;
+
+ goto BLOCK_FINISHED;
+
+
+AC_TWO_CONTEXT_0_:
+ // Is it a 2
+ // apply the sign to the TWO_TOKEN
+ APPLYSIGN(bdi->coeffsPtr[*(MergedScanOrderPtr)], TWO_TOKEN);
+ MergedScanOrderPtr++;
+
+ //*(bdi->PrecTokenIndexPtr) = 2;
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+ if( MergedScanOrderPtr < MergedScanOrderEnd)
+ goto AC_DO_WHILE;
+
+ goto BLOCK_FINISHED;
+
+AC_ONE_CONTEXT_0_:
+ // apply the sign to the value
+ APPLYSIGN(bdi->coeffsPtr[*(MergedScanOrderPtr)], 1);
+
+ MergedScanOrderPtr++;
+
+ //*(bdi->PrecTokenIndexPtr) = 1;
+
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 1, *(MergedScanOrderPtr + 64), 0 );
+
+ if( MergedScanOrderPtr < MergedScanOrderEnd)
+ goto AC_DO_WHILE;
+
+BLOCK_FINISHED:
+ MergedScanOrderPtr--;
+
+BLOCK_FINISHED_1:
+ bdi->EobPos = (unsigned int)(MergedScanOrderPtr - MergedScanOrder);
+ }while(++bdi < bdiEnd);
+
+ //bdi = pbi->mbi.blockDxInfo;
+ brBuffer -= pbi->mbi.br->pos;
+ pbi->mbi.br->pos += (unsigned int)(brBuffer - pbi->mbi.br->buffer);
+ //bdi->br->buffer = brBuffer;
+
+ pbi->mbi.br->count = count;
+ pbi->mbi.br->value = value;
+ pbi->mbi.br->range = range;
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DecodeMacroBlock
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT32 MBrow : Row of MBs that block is in.
+ * UINT32 MBcol : Col of MBs that block is in.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Decodes a single MacroBlock.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INLINE
+void
+VP6_DecodeMacroBlock ( PB_INSTANCE *pbi, UINT32 MBrow, UINT32 MBcol )
+{
+ UINT32 thisRecon;
+ UINT32 bp;
+
+ MACROBLOCK_INFO *mbi = &pbi->mbi;
+
+ //***********************************************************************
+ // Copy the existing structures into what we have now I'll fix this next.
+
+ pbi->mbi.Mode = CODE_INTRA;
+// pbi->mbi.Interlaced = 0;
+
+ // dumb way to encode the interlaced decision but it works!!!
+ if(pbi->Configuration.Interlaced)
+ {
+ UINT8 prob = pbi->probInterlaced;
+
+ // super simple context adjustment
+ if(MBcol>BORDER_MBS)
+ {
+ // adjust the probability per the last one we did
+ if(pbi->mbi.Interlaced)
+ prob = prob-(prob>>1);
+ else
+ prob = prob+((256-prob)>>1);
+ }
+ pbi->mbi.Interlaced = nDecodeBool( &pbi->br, prob);
+
+ if ( pbi->mbi.Interlaced == 1 )
+ {
+ pbi->mbi.blockDxInfo[0].CurrentReconStride =
+ pbi->mbi.blockDxInfo[1].CurrentReconStride =
+ pbi->mbi.blockDxInfo[2].CurrentReconStride =
+ pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride * 2;
+
+ pbi->mbi.blockDxInfo[2].thisRecon -= (pbi->Configuration.YStride * 7);
+ pbi->mbi.blockDxInfo[3].thisRecon -= (pbi->Configuration.YStride * 7);
+
+ }
+ }
+
+ if(pbi->FrameType != BASE_FRAME )
+ {
+ VP6_decodeModeAndMotionVector ( pbi, MBrow, MBcol );
+ }
+
+ // read tokens from the bitstream and convert to coefficients.
+ if ( pbi->UseHuffman )
+ {
+ ReadHuffTokensPredictA_MB(pbi);
+ }
+ else
+ {
+ VP6_ReadTokensPredictA_MB(pbi);
+ }
+
+ VP6_PredictDC_MB(pbi);
+
+ bp = 0;
+ do
+ {
+//note: maybe offset table can contain a func ptr and the amount to meset
+//we can then get rid of the if then else....
+ UINT32 EOBPos = pbi->EobOffsetTable[mbi->blockDxInfo[bp].EobPos];
+
+ // Default clear data area down to 0s
+ if ( EOBPos <= 1 )
+ {
+ idct[1]( mbi->blockDxInfo[bp].coeffsPtr, mbi->blockDxInfo[bp].dequantPtr, pbi->ReconDataBuffer[bp] );
+ mbi->blockDxInfo[bp].coeffsPtr[0] = 0;
+ }
+ else if ( EOBPos <= 10 )
+ {
+ idct[9]( mbi->blockDxInfo[bp].coeffsPtr, mbi->blockDxInfo[bp].dequantPtr, pbi->ReconDataBuffer[bp] );
+ memset(mbi->blockDxInfo[bp].coeffsPtr, 0,8*sizeof(Q_LIST_ENTRY));
+ memset(mbi->blockDxInfo[bp].coeffsPtr+8, 0,4*sizeof(Q_LIST_ENTRY));
+ memset(mbi->blockDxInfo[bp].coeffsPtr+16, 0,4*sizeof(Q_LIST_ENTRY));
+ memset(mbi->blockDxInfo[bp].coeffsPtr+24, 0,4*sizeof(Q_LIST_ENTRY));
+ //if(mbi->Coeffs[bp][32] )
+ mbi->blockDxInfo[bp].coeffsPtr[32] =0;
+ }
+ else
+ {
+ idct[63]( mbi->blockDxInfo[bp].coeffsPtr, mbi->blockDxInfo[bp].dequantPtr, pbi->ReconDataBuffer[bp] );
+ memset(mbi->blockDxInfo[bp].coeffsPtr, 0, 64*sizeof(Q_LIST_ENTRY));
+ }
+
+ } while(++bp < 6);
+
+
+
+//note:all of the recon function should be written for mb's not blocks
+//also lets create a func table that selects the recon based on mode
+//i hate if then elses........
+
+ bp = 0;
+ // Action depends on decode mode.
+ if ( pbi->mbi.Mode == CODE_INTER_NO_MV ) // Inter with no motion vector
+ {
+ do
+ {
+ thisRecon = pbi->mbi.blockDxInfo[bp].thisRecon;
+ ReconInter( pbi->TmpDataBuffer,
+ (UINT8 *)&pbi->ThisFrameRecon[thisRecon],
+ (UINT8 *)&pbi->LastFrameRecon[thisRecon],
+ pbi->ReconDataBuffer[bp],
+ pbi->mbi.blockDxInfo[bp].CurrentReconStride);
+ } while(++bp < 6);
+ }
+ else if ( VP6_ModeUsesMC[pbi->mbi.Mode] ) // The mode uses a motion vector.
+ {
+ do
+ {
+ thisRecon = pbi->mbi.blockDxInfo[bp].thisRecon;
+ // For the compressor we did this already ( possible optimization).
+ VP6_PredictFilteredBlock( pbi, pbi->TmpDataBuffer,bp);
+
+ ReconBlock( pbi->TmpDataBuffer,
+ pbi->ReconDataBuffer[bp],
+ (UINT8 *)&pbi->ThisFrameRecon[thisRecon],
+ pbi->mbi.blockDxInfo[bp].CurrentReconStride);
+ } while(++bp < 6);
+ }
+ else if ( pbi->mbi.Mode == CODE_USING_GOLDEN ) // Golden frame with motion vector
+ {
+ do
+ {
+ thisRecon = pbi->mbi.blockDxInfo[bp].thisRecon;
+ // Reconstruct the pixel data using the golden frame reconstruction and change data
+ ReconInter( pbi->TmpDataBuffer,
+ (UINT8 *)&pbi->ThisFrameRecon[thisRecon],
+ (UINT8 *)&pbi->GoldenFrame[thisRecon],
+ pbi->ReconDataBuffer[bp],
+ pbi->mbi.blockDxInfo[bp].CurrentReconStride );
+ } while(++bp < 6);
+ }
+ else // Simple Intra coding
+ {
+ do
+ {
+ thisRecon = pbi->mbi.blockDxInfo[bp].thisRecon;
+ // Get the pixel index for the first pixel in the fragment.
+ ReconIntra( pbi->TmpDataBuffer,
+ (UINT8 *)&pbi->ThisFrameRecon[thisRecon],
+ (UINT16 *)pbi->ReconDataBuffer[bp],
+ pbi->mbi.blockDxInfo[bp].CurrentReconStride);
+ } while(++bp < 6);
+ }
+
+
+ if ( pbi->mbi.Interlaced == 1 )
+ {
+ /* reset to non interlaced */
+ pbi->mbi.blockDxInfo[0].CurrentReconStride =
+ pbi->mbi.blockDxInfo[1].CurrentReconStride =
+ pbi->mbi.blockDxInfo[2].CurrentReconStride =
+ pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride;
+
+ pbi->mbi.blockDxInfo[2].thisRecon += (pbi->Configuration.YStride * 7);
+ pbi->mbi.blockDxInfo[3].thisRecon += (pbi->Configuration.YStride * 7);
+ }
+
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DecodeFrameMbs
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Decodes all the MacroBlocks of a frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_DecodeFrameMbs ( PB_INSTANCE *pbi )
+{
+ //UINT32 blockIndex;
+
+ if(pbi->FrameType != BASE_FRAME )
+ {
+ VP6_DecodeModeProbs(pbi);
+ VP6_ConfigureMvEntropyDecoder( pbi, pbi->FrameType );
+ pbi->LastMode = CODE_INTER_NO_MV;
+ }
+ else
+ {
+ memcpy ( pbi->probXmitted,VP6_BaselineXmittedProbs,sizeof(pbi->probXmitted));
+
+ memcpy ( pbi->IsMvShortProb, DefaultIsShortProbs, sizeof(pbi->IsMvShortProb) );
+ memcpy ( pbi->MvShortProbs, DefaultMvShortProbs, sizeof(pbi->MvShortProbs) );
+ memcpy ( pbi->MvSignProbs, DefaultSignProbs, sizeof(pbi->MvSignProbs) );
+ memcpy ( pbi->MvSizeProbs, DefaultMvLongProbs, sizeof(pbi->MvSizeProbs) );
+
+ memset ( pbi->MBModeProb,128,sizeof(pbi->MBModeProb));
+ memset ( pbi->BModeProb,128,sizeof(pbi->MBModeProb));
+ memset ( pbi->predictionMode,1,sizeof(char)*pbi->MacroBlocks );
+
+ // Set up default scan order banding
+ if( pbi->Configuration.Interlaced == 1 )
+ memcpy( pbi->ScanBands, DefaultInterlacedScanBands, sizeof(pbi->ScanBands) );
+ else
+ memcpy( pbi->ScanBands, DefaultNonInterlacedScanBands, sizeof(pbi->ScanBands) );
+
+ // Build the scan order
+ BuildScanOrder( pbi, pbi->ScanBands );
+
+
+ }
+
+ VP6_ConfigureEntropyDecoder( pbi, pbi->FrameType );
+
+ {
+ UINT32 i;
+
+ for(i=0;i<64;i++)
+ {
+ pbi->MergedScanOrder[i] = pbi->quantizer->transIndex[pbi->ModifiedScanOrder[i]];
+ }
+
+
+ // Create Huffman codes for tokens based on tree probabilities
+ if ( pbi->UseHuffman )
+ {
+ ConvertBoolTrees ( pbi );
+
+ for(i = 64; i < 64+65; i++)
+ {
+ pbi->MergedScanOrder[i] = VP6_CoeffToHuffBand[i - 64];
+ }
+
+ // Reset Dc zero & Ac EOB run counters
+ pbi->CurrentDcRunLen[0] = 0;
+ pbi->CurrentDcRunLen[1] = 0;
+ pbi->CurrentAc1RunLen[0] = 0;
+ pbi->CurrentAc1RunLen[1] = 0;
+ }
+ else
+ {
+ for(i = 64; i < 64+65; i++)
+ {
+ pbi->MergedScanOrder[i] = VP6_CoeffToBand[i - 64];
+ }
+ }
+ }
+
+ if(pbi->Configuration.Interlaced == 1)
+ pbi->probInterlaced = ((UINT8)VP6_bitread( &pbi->br, 8 ));
+
+ // since we are on a new frame reset the above contexts
+ VP6_ResetAboveContext(pbi);
+
+ {
+ UINT32 MBrow;
+ UINT32 MBRows = pbi->MBRows;
+ UINT32 MBCols = pbi->MBCols;
+
+ MBCols -= BORDER_MBS;
+ MBRows -= BORDER_MBS;
+
+ // for each row of macroblocks
+ MBrow=BORDER_MBS;
+ do
+ {
+ MACROBLOCK_INFO *mbi = &pbi->mbi;
+ UINT32 MBcol;
+
+ VP6_ResetLeftContext(pbi);
+
+ // for each macroblock within a row of macroblocks
+
+ mbi->blockDxInfo[0].Above = &pbi->fc.AboveY[BORDER_MBS*2];
+ mbi->blockDxInfo[1].Above = &pbi->fc.AboveY[BORDER_MBS*2+1];
+ mbi->blockDxInfo[2].Above = &pbi->fc.AboveY[BORDER_MBS*2];
+ mbi->blockDxInfo[3].Above = &pbi->fc.AboveY[BORDER_MBS*2+1];
+ mbi->blockDxInfo[4].Above = &pbi->fc.AboveU[BORDER_MBS];
+ mbi->blockDxInfo[5].Above = &pbi->fc.AboveV[BORDER_MBS];
+
+
+ mbi->blockDxInfo[0].thisRecon = pbi->ReconYDataOffset + ((MBrow * pbi->Configuration.YStride) << 4) + (BORDER_MBS * 16);
+ mbi->blockDxInfo[1].thisRecon = mbi->blockDxInfo[0].thisRecon + 8;
+ mbi->blockDxInfo[2].thisRecon = mbi->blockDxInfo[0].thisRecon + (pbi->Configuration.YStride << 3);
+ mbi->blockDxInfo[3].thisRecon = mbi->blockDxInfo[1].thisRecon + (pbi->Configuration.YStride << 3);
+
+ mbi->blockDxInfo[4].thisRecon = pbi->ReconUDataOffset + ((MBrow * pbi->Configuration.UVStride) << 3) + (BORDER_MBS * 8);
+ mbi->blockDxInfo[5].thisRecon = pbi->ReconVDataOffset + ((MBrow * pbi->Configuration.UVStride) << 3) + (BORDER_MBS * 8);
+
+
+ MBcol=BORDER_MBS;
+ do
+ {
+ // Decode the macroblock
+ VP6_DecodeMacroBlock(pbi, MBrow, MBcol);
+
+
+ mbi->blockDxInfo[0].Above += 2;
+ mbi->blockDxInfo[1].Above += 2;
+ mbi->blockDxInfo[2].Above += 2;
+ mbi->blockDxInfo[3].Above += 2;
+ mbi->blockDxInfo[4].Above += 1;
+ mbi->blockDxInfo[5].Above += 1;
+
+ mbi->blockDxInfo[0].thisRecon += 16;
+ mbi->blockDxInfo[1].thisRecon += 16;
+ mbi->blockDxInfo[2].thisRecon += 16;
+ mbi->blockDxInfo[3].thisRecon += 16;
+ mbi->blockDxInfo[4].thisRecon += 8;
+ mbi->blockDxInfo[5].thisRecon += 8;
+
+ } while(++MBcol < MBCols);
+
+
+ } while(++MBrow < MBRows);
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemode.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemode.c
new file mode 100644
index 00000000..4d47f4da
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemode.c
@@ -0,0 +1,656 @@
+/****************************************************************************
+*
+* Module Title : Decodemode.c
+*
+* Description : Functions for decoding modes and motionvectors
+*
+****************************************************************************/
+
+//************************************************************************************
+// Decoding the Modes:
+//
+// Decode Mode Tree Looks like this:
+//
+//
+//
+//
+// zz
+//
+// 0 Mode Same As Last
+//
+//
+// 1 2
+//
+// 3 4 5 6
+//
+// NoMV +MV Nest Near Intra FourMV 7 8
+//
+// 00Gold GoldMV GNrst GNear
+//
+//
+// 30 probabilitity contexts are set up at each branch (in probMode) corresponding to
+//
+// 3 for what situation we are in at the mode level (all modes available,
+// no nearest mv found, and no near mv found)
+//
+// 10 one for each possible last mode
+//
+// Note: if the last mode was near then the probability of getting near at position 4
+// above is set to 0 (it would have been coded as same as last). Note also that the
+// probablity of getting near when no near mv is available is also always set to 0.
+//
+// These probs are created from the 20 that can be xmitted in the bitstream (probXmitted)
+// For each mode 2 probabilities can be transmitted:
+// probability that the mode will appear if the last mode was the same
+// probability that the mode will appear if the last mode is not that mode
+//
+//************************************************************************************
+
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "pbdll.h"
+#include "decodemode.h"
+#include "decodemv.h"
+
+/****************************************************************************
+* Implicit Imports
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+
+//*****************************************************************************
+// ModeVQ: This structure holds a table of probability vectors for encoding modes
+// To build this table a number of clips were run through and allowed to
+// select each of the probabilities that were best for them on each frame. These
+// choices were output and a vector quantizer was used to optimize the selection
+// of 16 vectors for each MODETYPE (allmodes available, nonearest, and no near)
+//*****************************************************************************
+const UINT8 VP6_ModeVq[MODETYPES][MODEVECTORS][MAX_MODES*2] =
+{
+ 9, 15, 32, 25, 7, 19, 9, 21, 1, 12, 14, 12, 3, 18, 14, 23, 3, 10, 0, 4,
+ 48, 39, 1, 2, 11, 27, 29, 44, 7, 27, 1, 4, 0, 3, 1, 6, 1, 2, 0, 0,
+ 21, 32, 1, 2, 4, 10, 32, 43, 6, 23, 2, 3, 1, 19, 1, 6, 12, 21, 0, 7,
+ 69, 83, 0, 0, 0, 2, 10, 29, 3, 12, 0, 1, 0, 3, 0, 3, 2, 2, 0, 0,
+ 11, 20, 1, 4, 18, 36, 43, 48, 13, 35, 0, 2, 0, 5, 3, 12, 1, 2, 0, 0,
+ 70, 44, 0, 1, 2, 10, 37, 46, 8, 26, 0, 2, 0, 2, 0, 2, 0, 1, 0, 0,
+ 8, 15, 0, 1, 8, 21, 74, 53, 22, 42, 0, 1, 0, 2, 0, 3, 1, 2, 0, 0,
+141, 42, 0, 0, 1, 4, 11, 24, 1, 11, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0,
+ 8, 19, 4, 10, 24, 45, 21, 37, 9, 29, 0, 3, 1, 7, 11, 25, 0, 2, 0, 1,
+ 46, 42, 0, 1, 2, 10, 54, 51, 10, 30, 0, 2, 0, 2, 0, 1, 0, 1, 0, 0,
+ 28, 32, 0, 0, 3, 10, 75, 51, 14, 33, 0, 1, 0, 2, 0, 1, 1, 2, 0, 0,
+100, 46, 0, 1, 3, 9, 21, 37, 5, 20, 0, 1, 0, 2, 1, 2, 0, 1, 0, 0,
+ 27, 29, 0, 1, 9, 25, 53, 51, 12, 34, 0, 1, 0, 3, 1, 5, 0, 2, 0, 0,
+ 80, 38, 0, 0, 1, 4, 69, 33, 5, 16, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
+ 16, 20, 0, 0, 2, 8,104, 49, 15, 33, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0,
+194, 16, 0, 0, 1, 1, 1, 9, 1, 3, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
+
+ 41, 22, 1, 0, 1, 31, 0, 0, 0, 0, 0, 1, 1, 7, 0, 1, 98, 25, 4, 10,
+123, 37, 6, 4, 1, 27, 0, 0, 0, 0, 5, 8, 1, 7, 0, 1, 12, 10, 0, 2,
+ 26, 14, 14, 12, 0, 24, 0, 0, 0, 0, 55, 17, 1, 9, 0, 36, 5, 7, 1, 3,
+209, 5, 0, 0, 0, 27, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,
+ 2, 5, 4, 5, 0,121, 0, 0, 0, 0, 0, 3, 2, 4, 1, 4, 2, 2, 0, 1,
+175, 5, 0, 1, 0, 48, 0, 0, 0, 0, 0, 2, 0, 1, 0, 2, 0, 1, 0, 0,
+ 83, 5, 2, 3, 0,102, 0, 0, 0, 0, 1, 3, 0, 2, 0, 1, 0, 0, 0, 0,
+233, 6, 0, 0, 0, 8, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
+ 34, 16,112, 21, 1, 28, 0, 0, 0, 0, 6, 8, 1, 7, 0, 3, 2, 5, 0, 2,
+159, 35, 2, 2, 0, 25, 0, 0, 0, 0, 3, 6, 0, 5, 0, 1, 4, 4, 0, 1,
+ 75, 39, 5, 7, 2, 48, 0, 0, 0, 0, 3, 11, 2, 16, 1, 4, 7, 10, 0, 2,
+212, 21, 0, 1, 0, 9, 0, 0, 0, 0, 1, 2, 0, 2, 0, 0, 2, 2, 0, 0,
+ 4, 2, 0, 0, 0,172, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 2, 0, 0, 0,
+187, 22, 1, 1, 0, 17, 0, 0, 0, 0, 3, 6, 0, 4, 0, 1, 4, 4, 0, 1,
+133, 6, 1, 2, 1, 70, 0, 0, 0, 0, 0, 2, 0, 4, 0, 3, 1, 1, 0, 0,
+251, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 2, 3, 2, 3, 0, 2, 0, 2, 0, 0, 11, 4, 1, 4, 0, 2, 3, 2, 0, 4,
+ 49, 46, 3, 4, 7, 31, 42, 41, 0, 0, 2, 6, 1, 7, 1, 4, 2, 4, 0, 1,
+ 26, 25, 1, 1, 2, 10, 67, 39, 0, 0, 1, 1, 0, 14, 0, 2, 31, 26, 1, 6,
+103, 46, 1, 2, 2, 10, 33, 42, 0, 0, 1, 4, 0, 3, 0, 1, 1, 3, 0, 0,
+ 14, 31, 9, 13, 14, 54, 22, 29, 0, 0, 2, 6, 4, 18, 6, 13, 1, 5, 0, 1,
+ 85, 39, 0, 0, 1, 9, 69, 40, 0, 0, 0, 1, 0, 3, 0, 1, 2, 3, 0, 0,
+ 31, 28, 0, 0, 3, 14,130, 34, 0, 0, 0, 1, 0, 3, 0, 1, 3, 3, 0, 1,
+171, 25, 0, 0, 1, 5, 25, 21, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
+ 17, 21, 68, 29, 6, 15, 13, 22, 0, 0, 6, 12, 3, 14, 4, 10, 1, 7, 0, 3,
+ 51, 39, 0, 1, 2, 12, 91, 44, 0, 0, 0, 2, 0, 3, 0, 1, 2, 3, 0, 1,
+ 81, 25, 0, 0, 2, 9,106, 26, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0,
+140, 37, 0, 1, 1, 8, 24, 33, 0, 0, 1, 2, 0, 2, 0, 1, 1, 2, 0, 0,
+ 14, 23, 1, 3, 11, 53, 90, 31, 0, 0, 0, 3, 1, 5, 2, 6, 1, 2, 0, 0,
+123, 29, 0, 0, 1, 7, 57, 30, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0,
+ 13, 14, 0, 0, 4, 20,175, 20, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0,
+202, 23, 0, 0, 1, 3, 2, 9, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0
+};
+
+// These are the probabilities that we reset to after each keyframe.
+// It was created as the average probabilities of the trees.
+const UINT8 VP6_BaselineXmittedProbs[4][2][10] =
+{
+ 42, 2, 7, 42, 22, 3, 2, 5, 1, 0, 69, 1, 1, 44, 6, 1, 0, 1, 0, 0,
+ 8, 1, 8, 0, 0, 2, 1, 0, 1, 0, 229, 1, 0, 0, 0, 1, 0, 0, 1, 0,
+ 35, 1, 6, 34, 0, 2, 1, 1, 1, 0, 122, 1, 1, 46, 0, 1, 0, 0, 1, 0,
+ 64, 0, 64, 64, 64, 0, 0, 0, 0, 0, 64, 0, 64, 64, 64, 0, 0, 0, 0, 0,
+};
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_BuildModeTree
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Fills in probabilities at each branch of the huffman tree
+ * based upon probXmitted, the frequencies transmitted in the bitstream.
+ *
+ ****************************************************************************/
+void VP6_BuildModeTree ( PB_INSTANCE *pbi )
+{
+ int i,j,k;
+
+ // create a huffman tree and code array for each of our modes
+ // Note: each of the trees is minus the node give by probmodesame
+ for ( i=0; i<10; i++ )
+ {
+ unsigned int Counts[MAX_MODES];
+ unsigned int total;
+
+ // set up the probabilities for each tree
+ for(k=0;k<MODETYPES;k++)
+ {
+ total=0;
+ for ( j=0; j<10; j++ )
+ {
+ if ( i == j )
+ {
+ Counts[j]=0;
+ }
+ else
+ {
+ Counts[j]=100*pbi->probXmitted[k][0][j];
+ }
+
+ total+=Counts[j];
+ }
+
+ pbi->probModeSame[k][i] = 255-
+ 255 * pbi->probXmitted[k][1][i]
+ /
+ ( 1 +
+ pbi->probXmitted[k][1][i] +
+ pbi->probXmitted[k][0][i]
+ );
+
+ // each branch is basically calculated via
+ // summing all posibilities at that branch.
+ pbi->probMode[k][i][0]= 1 + 255 *
+ (
+ Counts[CODE_INTER_NO_MV]+
+ Counts[CODE_INTER_PLUS_MV]+
+ Counts[CODE_INTER_NEAREST_MV]+
+ Counts[CODE_INTER_NEAR_MV]
+ ) /
+ ( 1 +
+ total
+ );
+
+ pbi->probMode[k][i][1]= 1 + 255 *
+ (
+ Counts[CODE_INTER_NO_MV]+
+ Counts[CODE_INTER_PLUS_MV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_INTER_NO_MV]+
+ Counts[CODE_INTER_PLUS_MV]+
+ Counts[CODE_INTER_NEAREST_MV]+
+ Counts[CODE_INTER_NEAR_MV]
+ );
+
+ pbi->probMode[k][i][2]= 1 + 255 *
+ (
+ Counts[CODE_INTRA]+
+ Counts[CODE_INTER_FOURMV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_INTRA]+
+ Counts[CODE_INTER_FOURMV]+
+ Counts[CODE_USING_GOLDEN]+
+ Counts[CODE_GOLDEN_MV]+
+ Counts[CODE_GOLD_NEAREST_MV]+
+ Counts[CODE_GOLD_NEAR_MV]
+ );
+
+ pbi->probMode[k][i][3]= 1 + 255 *
+ (
+ Counts[CODE_INTER_NO_MV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_INTER_NO_MV]+
+ Counts[CODE_INTER_PLUS_MV]
+ );
+
+ pbi->probMode[k][i][4]= 1 + 255 *
+ (
+ Counts[CODE_INTER_NEAREST_MV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_INTER_NEAREST_MV]+
+ Counts[CODE_INTER_NEAR_MV]
+ ) ;
+
+ pbi->probMode[k][i][5]= 1 + 255 *
+ (
+ Counts[CODE_INTRA]
+ ) /
+ (
+ 1 +
+ Counts[CODE_INTRA]+
+ Counts[CODE_INTER_FOURMV]
+ );
+
+ pbi->probMode[k][i][6]= 1 + 255 *
+ (
+ Counts[CODE_USING_GOLDEN]+
+ Counts[CODE_GOLDEN_MV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_USING_GOLDEN]+
+ Counts[CODE_GOLDEN_MV]+
+ Counts[CODE_GOLD_NEAREST_MV]+
+ Counts[CODE_GOLD_NEAR_MV]
+ );
+
+ pbi->probMode[k][i][7]= 1 + 255 *
+ (
+ Counts[CODE_USING_GOLDEN]
+ ) /
+ (
+ 1 +
+ Counts[CODE_USING_GOLDEN]+
+ Counts[CODE_GOLDEN_MV]
+ );
+
+ pbi->probMode[k][i][8]= 1 + 255 *
+ (
+ Counts[CODE_GOLD_NEAREST_MV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_GOLD_NEAREST_MV]+
+ Counts[CODE_GOLD_NEAR_MV]
+ );
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_decodeModeDiff
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : a probability difference value decoded from the bitstream.
+ *
+ * FUNCTION : this function returns a probability difference value in
+ * the range -256 to +256 (in steps of 4) transmitted in the
+ * bitstream using a fixed tree with hardcoded probabilities.
+ *
+ * SPECIAL NOTES : The hard coded probabilities for the difference tree
+ * were calcualated by taking the average number of times a
+ * branch was taken on some sample material ie
+ * (bond,bike,beautifulmind)
+ *
+ ****************************************************************************/
+int VP6_decodeModeDiff ( PB_INSTANCE *pbi )
+{
+ int sign;
+
+ if ( VP6_DecodeBool(&pbi->br, 205) == 0 )
+ return 0;
+
+ sign = 1 + -2 * VP6_DecodeBool128(&pbi->br);
+
+ if( !VP6_DecodeBool(&pbi->br,171) )
+ {
+ return sign<<(3-VP6_DecodeBool( &pbi->br,83));
+ }
+ else
+ {
+ if( !VP6_DecodeBool( &pbi->br,199) )
+ {
+ if(VP6_DecodeBool( &pbi->br,140))
+ return sign * 12;
+
+ if(VP6_DecodeBool( &pbi->br,125))
+ return sign * 16;
+
+ if(VP6_DecodeBool( &pbi->br,104))
+ return sign * 20;
+
+ return sign * 24;
+ }
+ else
+ {
+ int diff = VP6_bitread(&pbi->br,7);
+ return sign * diff * 4;
+ }
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DecodeModeProbs
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : This function parses the probabilities transmitted in
+ * the bitstream. The bitstream may either use the
+ * last frames' baselines, or transmit a pointer to a
+ * vector of new probabilities. It may then additionally
+ * contain updates to each of these probabilities.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_DecodeModeProbs ( PB_INSTANCE *pbi )
+{
+ int i,j;
+
+ // For each mode type (all modes available, no nearest, no near mode)
+ for ( j=0; j<MODETYPES; j++ )
+ {
+ // determine whether we are sending a vector for this mode byte
+ if ( VP6_DecodeBool( &pbi->br, PROBVECTORXMIT ) )
+ {
+ // figure out which vector we have encoded
+ int whichVector = VP6_bitread(&pbi->br, 4);
+
+ // adjust the vector
+ for ( i=0; i<MAX_MODES; i++ )
+ {
+ pbi->probXmitted[j][1][i] = VP6_ModeVq[j][whichVector][i*2];
+ pbi->probXmitted[j][0][i] = VP6_ModeVq[j][whichVector][i*2+1];
+ }
+ }
+
+ // decode whether updates to bring it closer to ideal
+ if ( VP6_DecodeBool( &pbi->br, PROBIDEALXMIT) )
+ {
+ for ( i=0; i<10; i++ )
+ {
+ int diff;
+
+ // determine difference
+ diff = VP6_decodeModeDiff(pbi);
+ diff += pbi->probXmitted[j][1][i];
+
+ pbi->probXmitted[j][1][i] = ( diff<0 ? 0 : (diff>255?255:diff) );
+
+ // determine difference
+ diff = VP6_decodeModeDiff(pbi);
+ diff += pbi->probXmitted[j][0][i];
+
+ pbi->probXmitted[j][0][i] = ( diff<0 ? 0 : (diff>255?255:diff) );
+
+ }
+ }
+ }
+
+ VP6_BuildModeTree(pbi);
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DecodeBlockMode
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Decoded coding mode (as a CODING_MODE)
+ *
+ * FUNCTION : Decodes a coding mode for a block from 2 bits in the bitstream.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+CODING_MODE VP6_DecodeBlockMode ( PB_INSTANCE *pbi )
+{
+ int choice = VP6_DecodeBool128(&pbi->br)<<1;
+
+ choice += VP6_DecodeBool128(&pbi->br);
+
+ switch ( choice )
+ {
+ case 0: return CODE_INTER_NO_MV; // 0
+ case 1: return CODE_INTER_PLUS_MV; // 2
+ case 2: return CODE_INTER_NEAREST_MV; // 3
+ case 3: return CODE_INTER_NEAR_MV; // 4
+ }
+ return (CODING_MODE)0;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DecodeMode
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * CODING_MODE lastmode : Mode of the last coded macroblock.
+ * UINT32 type : Mode type (all modes available,
+ * nonearest macroblock, no near macroblock).
+ * OUTPUTS : None.
+ *
+ * RETURNS : Decoded coding mode (as a CODING_MODE)
+ *
+ * FUNCTION : decodes a MBmode from the bitstream using modecodearray
+ * and probabilities that the value is the same as
+ * lastmode stored in probModeSame, and the probability
+ * of mode occuring if lastmode != mode stored in
+ * probMode.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+CODING_MODE VP6_DecodeMode ( PB_INSTANCE *pbi, CODING_MODE lastmode, UINT32 type )
+{
+ CODING_MODE mode;
+
+ if ( VP6_DecodeBool(&pbi->br,pbi->probModeSame[type][lastmode]) )
+ {
+ mode = lastmode;
+ }
+ else
+ { // 0
+ UINT8 *Stats =pbi->probMode[type][lastmode];
+
+ if ( VP6_DecodeBool(&pbi->br,Stats[0]) )
+ { // 2
+ if ( VP6_DecodeBool(&pbi->br,Stats[2]) )
+ { //6
+ if ( VP6_DecodeBool(&pbi->br,Stats[6]) )
+ { // 8
+ mode = CODE_GOLD_NEAREST_MV + VP6_DecodeBool(&pbi->br,Stats[8]);
+ }
+ else
+ { // 7
+ mode = CODE_USING_GOLDEN + VP6_DecodeBool(&pbi->br,Stats[7]);
+ }
+ }
+ else
+ { //5
+ mode = CODE_INTRA;
+ if ( VP6_DecodeBool(&pbi->br,Stats[5]) )
+ {
+ mode = CODE_INTER_FOURMV;
+ }
+ }
+ }
+ else
+ { // 1
+ if ( VP6_DecodeBool(&pbi->br,Stats[1]) )
+ { // 4
+ mode = CODE_INTER_NEAREST_MV + VP6_DecodeBool(&pbi->br,Stats[4]);
+ }
+ else
+ { // 3
+ mode = CODE_INTER_NO_MV + 2 * VP6_DecodeBool(&pbi->br,Stats[3]);
+ }
+ }
+ }
+ return mode;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_decodeModeAndMotionVector
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT32 MBrow : Row number for MB.
+ * UINT32 MBcol : Col number for MB.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Decodes a macroblock's coding mode and any associated
+ * motion vectors from the bitstream .
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_decodeModeAndMotionVector ( PB_INSTANCE *pbi, UINT32 MBrow, UINT32 MBcol )
+{
+ int type;
+ int x, y;
+ UINT32 k;
+ CODING_MODE mode; //lastmode;
+ MOTION_VECTOR mv;
+
+ VP6_FindNearestandNextNearest(pbi,MBrow,MBcol,1,&type);
+
+ mode = VP6_DecodeMode(pbi,pbi->LastMode,type);
+ pbi->LastMode = mode;
+
+ pbi->predictionMode[MBOffset(MBrow,MBcol)] = mode;
+ pbi->mbi.Mode = mode;
+ if ( mode == CODE_INTER_FOURMV )
+ {
+ pbi->mbi.BlockMode[0] = VP6_DecodeBlockMode(pbi);
+ pbi->mbi.BlockMode[1] = VP6_DecodeBlockMode(pbi);
+ pbi->mbi.BlockMode[2] = VP6_DecodeBlockMode(pbi);
+ pbi->mbi.BlockMode[3] = VP6_DecodeBlockMode(pbi);
+
+ pbi->mbi.BlockMode[4] = CODE_INTER_FOURMV;
+ pbi->mbi.BlockMode[5] = CODE_INTER_FOURMV;
+ x=0;
+ y=0;
+ for ( k=0; k<4; k++ )
+ {
+ if ( pbi->mbi.BlockMode[k] == CODE_INTER_NO_MV )
+ {
+ pbi->mbi.Mv[k].x = 0;
+ pbi->mbi.Mv[k].y = 0;
+ }
+ else if( pbi->mbi.BlockMode[k] == CODE_INTER_NEAREST_MV )
+ {
+ pbi->mbi.Mv[k].x = pbi->mbi.NearestInterMVect.x;
+ pbi->mbi.Mv[k].y = pbi->mbi.NearestInterMVect.y;
+ x+=pbi->mbi.NearestInterMVect.x;
+ y+=pbi->mbi.NearestInterMVect.y;
+ }
+ else if ( pbi->mbi.BlockMode[k] == CODE_INTER_NEAR_MV )
+ {
+ pbi->mbi.Mv[k].x = pbi->mbi.NearInterMVect.x;
+ pbi->mbi.Mv[k].y = pbi->mbi.NearInterMVect.y;
+ x+=pbi->mbi.NearInterMVect.x;
+ y+=pbi->mbi.NearInterMVect.y;
+ }
+ else if ( pbi->mbi.BlockMode[k] == CODE_INTER_PLUS_MV )
+ {
+ VP6_decodeMotionVector(pbi,&mv,CODE_INTER_PLUS_MV);
+ pbi->mbi.Mv[k].x = mv.x;
+ pbi->mbi.Mv[k].y = mv.y;
+ x+=mv.x;
+ y+=mv.y;
+ }
+ }
+ x = (x+1+(x>=0))>>2;
+ y = (y+1+(y>=0))>>2;
+
+ pbi->MBMotionVector[MBOffset(MBrow,MBcol)].x = pbi->mbi.Mv[3].x;
+ pbi->MBMotionVector[MBOffset(MBrow,MBcol)].y = pbi->mbi.Mv[3].y;
+
+ pbi->mbi.Mv[4].x = x;
+ pbi->mbi.Mv[4].y = y;
+
+ pbi->mbi.Mv[5].x = x;
+ pbi->mbi.Mv[5].y = y;
+ }
+ else
+ {
+ switch ( mode )
+ {
+ case CODE_INTER_NEAREST_MV:
+ x = pbi->mbi.NearestInterMVect.x;
+ y = pbi->mbi.NearestInterMVect.y;
+ break;
+ case CODE_INTER_NEAR_MV:
+ x = pbi->mbi.NearInterMVect.x;
+ y = pbi->mbi.NearInterMVect.y;
+ break;
+ case CODE_GOLD_NEAREST_MV:
+ VP6_FindNearestandNextNearest(pbi, MBrow, MBcol, 2, &type);
+ x = pbi->mbi.NearestGoldMVect.x;
+ y = pbi->mbi.NearestGoldMVect.y;
+ break;
+ case CODE_GOLD_NEAR_MV:
+ VP6_FindNearestandNextNearest(pbi, MBrow, MBcol, 2, &type);
+ x = pbi->mbi.NearGoldMVect.x;
+ y = pbi->mbi.NearGoldMVect.y;
+ break;
+ case CODE_INTER_PLUS_MV:
+ VP6_decodeMotionVector(pbi,&mv,CODE_INTER_PLUS_MV);
+ x = mv.x;
+ y = mv.y;
+ break;
+ case CODE_GOLDEN_MV:
+ VP6_FindNearestandNextNearest(pbi, MBrow, MBcol, 2, &type);
+ VP6_decodeMotionVector(pbi,&mv,CODE_GOLDEN_MV);
+ x = mv.x;
+ y = mv.y;
+ break;
+ default:
+ x =0;
+ y =0;
+ break;
+ }
+ pbi->MBMotionVector[MBOffset(MBrow,MBcol)].x = x;
+ pbi->MBMotionVector[MBOffset(MBrow,MBcol)].y = y;
+ for ( k=0; k<6 ; k++ )
+ {
+ pbi->mbi.Mv[k].x = x;
+ pbi->mbi.Mv[k].y = y;
+ pbi->mbi.BlockMode[k] = mode;
+ }
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemv.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemv.c
new file mode 100644
index 00000000..ff116871
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemv.c
@@ -0,0 +1,339 @@
+/****************************************************************************
+*
+* Module Title : Decodemv.c
+*
+* Description : Functions for decoding modes and motion vectors.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "pbdll.h"
+#include "decodemode.h"
+#include "decodemv.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+const UINT8 VP6_MvUpdateProbs[2][MV_NODES] =
+{
+ { 237, 246, 253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 250, 250, 252 },
+ { 231, 243, 245, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 251, 251, 254 }
+};
+
+const UINT8 DefaultMvShortProbs[2][7] =
+{
+ { 225, 146, 172, 147, 214, 39, 156 },
+ { 204, 170, 119, 235, 140, 230, 228 }
+};
+
+const UINT8 DefaultMvLongProbs[2][LONG_MV_BITS] =
+{
+ { 247, 210, 135, 68, 138, 220, 239, 246 },
+ { 244, 184, 201, 44, 173, 221, 239, 253 }
+};
+
+const UINT8 DefaultIsShortProbs[2] = { 162, 164 };
+const UINT8 DefaultSignProbs[2] = { 128, 128 };
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_ConfigureMvEntropyDecoder
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT8 FrameType : Type of the frame.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Builds the MV entropy decoding tree.
+ *
+ * SPECIAL NOTES : None.
+ *
+***************************************************************************/
+void VP6_ConfigureMvEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType )
+{
+ int i;
+
+ // This funciton is not called at all for a BASE_FRAME
+ // Read any changes to mv probabilities.
+ for ( i = 0; i < 2; i++ )
+ {
+ // Short vector probability
+ if ( VP6_DecodeBool(&pbi->br, VP6_MvUpdateProbs[i][0]) )
+ {
+ pbi->IsMvShortProb[i] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->IsMvShortProb[i] == 0 )
+ pbi->IsMvShortProb[i] = 1;
+ }
+
+ // Sign probability
+ if ( VP6_DecodeBool(&pbi->br, VP6_MvUpdateProbs[i][1]) )
+ {
+ pbi->MvSignProbs[i] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvSignProbs[i] == 0 )
+ pbi->MvSignProbs[i] = 1;
+ }
+ }
+
+ // Short vector tree node probabilities
+ for ( i = 0; i < 2; i++ )
+ {
+ UINT32 j;
+ UINT32 MvUpdateProbsOffset = 2; // Offset into MvUpdateProbs[i][]
+
+ for ( j = 0; j < 7; j++ )
+ {
+ if ( VP6_DecodeBool(&pbi->br, VP6_MvUpdateProbs[i][MvUpdateProbsOffset]) )
+ {
+ pbi->MvShortProbs[i][j] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvShortProbs[i][j] == 0 )
+ pbi->MvShortProbs[i][j] = 1;
+ }
+ MvUpdateProbsOffset++;
+ }
+ }
+
+ // Long vector tree node probabilities
+ for ( i = 0; i < 2; i++ )
+ {
+ UINT32 j;
+ UINT32 MvUpdateProbsOffset = 2 + 7;
+
+ for ( j = 0; j < LONG_MV_BITS; j++ )
+ {
+ if ( VP6_DecodeBool(&pbi->br, VP6_MvUpdateProbs[i][MvUpdateProbsOffset]) )
+ {
+ pbi->MvSizeProbs[i][j] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvSizeProbs[i][j] == 0 )
+ pbi->MvSizeProbs[i][j] = 1;
+ }
+ MvUpdateProbsOffset++;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_decodeMotionVector
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * CODING_MODE Mode : MV coding mode.
+ *
+ * OUTPUTS : MOTION_VECTOR *mv : Returned motion vector.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Decodes a motion vector from the bitstream.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_decodeMotionVector
+(
+ PB_INSTANCE *pbi,
+ MOTION_VECTOR *mv,
+ CODING_MODE Mode
+)
+{
+ UINT32 i;
+ INT32 Vector = 0;
+ INT32 SignBit = 0;
+ INT32 MvOffsetX = 0;
+ INT32 MvOffsetY = 0;
+
+ // Work out how the MV was coded so that the appropriate origin offset can be applied
+ if ( Mode == CODE_INTER_PLUS_MV )
+ {
+ // Normal Inter MV
+ if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestInterMVect.x;
+ MvOffsetY = pbi->mbi.NearestInterMVect.y;
+ }
+ }
+ else
+ {
+ // Golden Frame MV
+ if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+ {
+ MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+ MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+ }
+ }
+
+ for ( i = 0; i < 2; i++ )
+ {
+ Vector = 0;
+
+ // Is the vector a small vector or a large vector
+ if ( !VP6_DecodeBool(&pbi->br, pbi->IsMvShortProb[i]) )
+ {
+ // Small magnitude vector
+ if ( VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][0] ) )
+ {
+ Vector += (1 << 2);
+ if ( VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][4]) )
+ {
+ Vector += (1 << 1);
+ Vector += VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][6]);
+ }
+ else
+ {
+ Vector += VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][5]);
+ }
+ }
+ else
+ {
+ if ( VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][1]) )
+ {
+ Vector += (1 << 1);
+ Vector += VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][3]);
+ }
+ else
+ {
+ Vector = VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][2]);
+ }
+ }
+ }
+ else
+ {
+ // Large magnitude vector
+ Vector = VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][0] );
+ Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][1] ) << 1);
+ Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][2] ) << 2);
+
+ Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][7] ) << 7);
+ Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][6] ) << 6);
+ Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][5] ) << 5);
+ Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][4] ) << 4);
+
+ // If none of the higher order bits are set then this bit is implicit
+ if ( Vector & 0xF0 )
+ Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][3] ) << 3);
+ else
+ Vector += 0x08;
+ }
+
+ // Read the sign bit if needed.
+ if ( Vector != 0 )
+ {
+ SignBit = VP6_DecodeBool(&pbi->br, pbi->MvSignProbs[i]);
+
+ if ( SignBit )
+ Vector = -Vector;
+ }
+
+ if ( i )
+ mv->y = Vector + MvOffsetY;
+ else
+ mv->x = Vector + MvOffsetX;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_FindNearestandNextNearest
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT32 MBrow : Row of macroblock to check.
+ * UINT32 MBcol : Col of macroblock to check.
+ * UINT8 Frame : Frame type which MV should come
+ * from (Golden or Last).
+ *
+ * OUTPUTS : int *type : Type of the vector returned.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Find a Nearest and NextNearest MV in nearby MBs in
+ * frames having the same type (Golden or Last).
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_FindNearestandNextNearest
+(
+ PB_INSTANCE *pbi,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ UINT8 Frame,
+ int *type
+)
+{
+ int i;
+ UINT32 OffsetMB;
+ UINT32 BaseMB = MBOffset(MBrow,MBcol);
+ INT32 Nearest = 0;
+ INT32 NextNearest = 0;
+ INT32 nearestIndex;
+ UINT32 thisMv;
+ INT32 typet;
+
+ typet = NONEAREST_MACROBLOCK;
+
+ // BEWARE:
+ // The use of (unsigned int *) casting here is potentially dangerous
+ // and will only work if the motion vector structure consists of
+ // two 16 bit values and is 32 bit aligned.
+ for ( i=0; i<12 ; i++ )
+ {
+ OffsetMB = pbi->mvNearOffset[i] + BaseMB;
+
+ if ( VP6_Mode2Frame[pbi->predictionMode[OffsetMB]] != Frame )
+ continue;
+
+ thisMv = *((unsigned int *) &pbi->MBMotionVector[OffsetMB]);
+
+ if ( thisMv )
+ {
+
+ *((unsigned int *) &Nearest) = thisMv;
+ typet = NONEAR_MACROBLOCK;
+ break;
+ }
+ }
+
+ nearestIndex = i;
+
+ for ( i=i+1; i<12; i++ )
+ {
+ OffsetMB = pbi->mvNearOffset[i] + BaseMB;
+
+ if ( VP6_Mode2Frame[pbi->predictionMode[OffsetMB]] != Frame )
+ continue;
+
+ thisMv = *((unsigned int *) &pbi->MBMotionVector[OffsetMB]);
+ if( thisMv == *((unsigned int *) &Nearest) )
+ continue;
+
+ if( thisMv )
+ {
+ *((unsigned int *) &NextNearest) = thisMv;
+ typet = MACROBLOCK;
+ break;
+ }
+ }
+
+ // Only update type if normal frame
+ if ( Frame == 1 )
+ {
+ *type = typet;
+ pbi->mbi.NearestMvIndex = nearestIndex;
+ *((unsigned int *) &pbi->mbi.NearestInterMVect) = *((unsigned int *) &Nearest);
+ *((unsigned int *) &pbi->mbi.NearInterMVect) = *((unsigned int *) &NextNearest);
+ }
+ else
+ {
+ pbi->mbi.NearestGMvIndex = nearestIndex;
+ *((unsigned int *) &pbi->mbi.NearestGoldMVect) = *((unsigned int *) &Nearest);
+ *((unsigned int *) &pbi->mbi.NearGoldMVect) = *((unsigned int *) &NextNearest);
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/modestats.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/modestats.c
new file mode 100644
index 00000000..12f4bd01
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/modestats.c
@@ -0,0 +1,330 @@
+#include "type_aliases.h"
+
+
+UINT8 Stats[9][4][4][4]=
+{
+
+ 32 , 56 , 78 ,161 ,
+ 105 ,129 ,182 ,241 ,
+ 78 ,132 ,189 ,212 ,
+ 112 ,169 ,203 ,226 ,
+
+
+ 100 ,122 ,178 ,217 ,
+ 200 ,175 ,239 ,247 ,
+ 183 ,153 ,239 ,237 ,
+ 201 ,192 ,242 ,244 ,
+
+
+ 75 ,127 ,181 ,205 ,
+ 183 ,178 ,238 ,249 ,
+ 192 ,226 ,243 ,241 ,
+ 190 ,205 ,239 ,244 ,
+
+
+ 98 ,150 ,195 ,224 ,
+ 219 ,189 ,243 ,244 ,
+ 177 ,215 ,240 ,248 ,
+ 190 ,207 ,241 ,247 ,
+
+
+ 19 , 26 , 14 , 33 ,
+ 113 ,121 , 97 , 87 ,
+ 11 , 14 , 8 , 14 ,
+ 21 , 14 , 14 , 20 ,
+
+
+ 100 , 88 ,112 , 73 ,
+ 188 ,169 ,158 ,140 ,
+ 70 , 81 , 46 , 58 ,
+ 100 , 82 , 49 , 55 ,
+
+
+ 11 , 20 , 9 , 21 ,
+ 76 , 91 , 68 , 74 ,
+ 6 , 6 , 5 , 11 ,
+ 15 , 18 , 10 , 14 ,
+
+
+ 17 , 24 , 16 , 16 ,
+ 87 , 68 , 64 , 61 ,
+ 9 , 8 , 9 , 15 ,
+ 17 , 23 , 10 , 18 ,
+
+
+ 186 ,157 ,180 ,162 ,
+ 184 ,208 ,206 ,189 ,
+ 176 ,128 ,203 ,179 ,
+ 184 ,196 ,192 ,176 ,
+
+
+ 164 ,149 ,155 ,141 ,
+ 133 ,130 ,156 ,116 ,
+ 145 ,154 ,175 ,144 ,
+ 124 ,150 ,149 ,126 ,
+
+
+ 194 ,113 ,173 ,160 ,
+ 191 ,208 ,205 ,164 ,
+ 210 ,179 ,197 ,174 ,
+ 175 ,154 ,190 ,182 ,
+
+
+ 144 ,111 ,183 ,165 ,
+ 123 ,118 ,186 ,144 ,
+ 189 ,128 ,168 ,141 ,
+ 99 ,164 ,174 ,145 ,
+
+
+ 196 ,160 ,197 ,102 ,
+ 156 ,125 ,173 , 83 ,
+ 219 ,208 ,226 ,137 ,
+ 189 ,148 ,191 ,100 ,
+
+
+ 173 ,122 ,146 , 76 ,
+ 111 ,128 ,124 , 81 ,
+ 177 ,176 ,203 ,116 ,
+ 118 ,103 ,145 , 82 ,
+
+
+ 223 ,201 ,228 ,129 ,
+ 181 ,137 ,199 , 99 ,
+ 236 ,227 ,236 ,159 ,
+ 200 ,177 ,204 ,121 ,
+
+
+ 184 ,157 ,194 ,103 ,
+ 128 ,121 ,135 , 81 ,
+ 204 ,177 ,207 ,121 ,
+ 158 ,127 ,173 , 88 ,
+
+
+ 81 , 46 , 70 , 84 ,
+ 135 ,107 , 81 , 73 ,
+ 128 ,128 ,113 , 94 ,
+ 109 ,128 , 84 , 81 ,
+
+
+ 122 ,128 , 62 ,111 ,
+ 184 ,171 ,145 ,131 ,
+ 172 ,174 ,151 ,180 ,
+ 160 ,114 ,131 , 91 ,
+
+
+ 111 ,128 , 81 , 84 ,
+ 157 ,147 ,127 ,133 ,
+ 113 ,148 ,107 ,135 ,
+ 140 ,140 ,104 ,139 ,
+
+
+ 112 ,128 , 92 , 86 ,
+ 146 ,128 ,143 ,137 ,
+ 110 ,128 , 83 ,133 ,
+ 128 ,136 ,144 ,120 ,
+
+
+ 108 ,117 ,109 ,108 ,
+ 101 ,107 ,112 ,105 ,
+ 71 , 67 , 49 , 82 ,
+ 66 , 86 , 54 , 68 ,
+
+
+ 117 , 93 ,122 ,111 ,
+ 78 ,106 ,152 ,113 ,
+ 49 , 64 , 54 , 96 ,
+ 67 , 62 , 68 , 62 ,
+
+
+ 59 , 59 , 43 , 85 ,
+ 70 , 60 , 72 , 72 ,
+ 55 , 65 , 62 , 92 ,
+ 93 , 98 , 92 , 87 ,
+
+
+ 54 , 65 , 51 , 64 ,
+ 56 , 31 , 56 , 54 ,
+ 137 ,144 ,147 ,161 ,
+ 110 ,113 ,128 ,134 ,
+
+
+ 39 , 46 , 29 , 61 ,
+ 76 , 86 , 98 , 81 ,
+ 47 , 53 , 39 , 63 ,
+ 61 , 69 , 45 , 92 ,
+
+
+ 104 , 89 , 76 , 63 ,
+ 97 ,128 , 88 ,108 ,
+ 64 , 56 , 34 , 82 ,
+ 135 , 93 , 82 , 98 ,
+
+
+ 43 , 51 , 36 , 56 ,
+ 71 , 64 , 50 , 79 ,
+ 40 , 32 , 33 , 58 ,
+ 73 , 75 , 53 , 85 ,
+
+
+ 59 , 64 , 47 , 71 ,
+ 123 , 49 , 61 , 96 ,
+ 70 , 58 , 47 , 78 ,
+ 100 ,105 , 56 ,102 ,
+
+
+ 18 ,128 ,128 , 16 ,
+ 175 ,128 ,162 ,128 ,
+ 53 ,128 , 40 ,128 ,
+ 49 ,128 , 41 , 1 ,
+
+
+ 122 ,128 ,128 ,128 ,
+ 181 ,189 ,193 ,100 ,
+ 162 ,128 ,166 ,133 ,
+ 110 ,128 , 89 ,128 ,
+
+
+ 23 ,128 , 12 ,128 ,
+ 171 ,149 ,182 ,135 ,
+ 36 , 56 , 47 , 16 ,
+ 32 ,128 , 39 , 26 ,
+
+
+ 128 ,128 , 37 ,128 ,
+ 106 ,128 ,131 ,134 ,
+ 128 ,128 , 19 , 18 ,
+ 128 ,128 , 12 , 9 ,
+
+
+ 128 ,128 ,128 ,128 ,
+ 9 ,128 , 52 ,128 ,
+ 128 ,128 ,128 ,128 ,
+ 128 ,128 ,128 ,128 ,
+
+
+ 128 ,128 ,128 ,128 ,
+ 52 , 57 , 68 ,128 ,
+ 13 ,128 , 7 ,128 ,
+ 128 ,128 ,128 ,128 ,
+
+
+ 128 ,128 ,128 ,128 ,
+ 11 ,128 , 12 , 25 ,
+ 128 ,128 , 1 ,128 ,
+ 128 ,128 ,128 ,128 ,
+
+
+ 128 ,128 ,128 ,128 ,
+ 128 ,128 , 45 ,128 ,
+ 128 ,128 ,128 ,128 ,
+ 128 ,128 ,128 ,128 ,
+
+};
+UINT32 MBBitCosts[4][4][4][10]=
+{
+
+ 11 , 524 , 351 , 293 , 324 , 523 , 644 , 458 , 815 , 815 , 19 , 394 , 275 , 286 , 286 , 511 , 575 , 351 , 639 , 639 , 32 , 462 , 266 , 200 , 236 , 431 , 571 , 371 , 635 , 635 , 96 , 319 , 131 , 189 , 249 , 322 , 449 , 194 , 513 , 513 ,
+ 48 , 265 , 258 , 246 , 287 , 252 , 360 , 294 , 296 , 482 , 62 , 262 , 178 , 237 , 288 , 233 , 330 , 333 , 394 , 394 , 116 , 261 , 189 , 176 , 236 , 155 , 362 , 322 , 261 , 540 , 227 , 195 , 137 , 235 , 235 , 159 , 310 , 170 , 374 , 374 ,
+ 31 , 485 , 311 , 162 , 267 , 509 , 548 , 433 , 671 , 671 , 62 , 418 , 254 , 110 , 236 , 472 , 536 , 347 , 600 , 600 , 129 , 459 , 258 , 53 , 223 , 439 , 499 , 382 , 746 , 746 , 163 , 379 , 121 , 119 , 197 , 345 , 399 , 199 , 653 , 653 ,
+ 51 , 492 , 237 , 145 , 250 , 404 , 458 , 285 , 573 , 573 , 82 , 408 , 181 , 135 , 244 , 318 , 498 , 225 , 562 , 562 , 140 , 440 , 184 , 74 , 202 , 369 , 391 , 304 , 693 , 693 , 201 , 330 , 114 , 139 , 267 , 336 , 381 , 130 , 618 , 618 ,
+
+
+ 38 , 219 , 270 , 303 , 311 , 285 , 392 , 287 , 456 , 456 , 53 , 206 , 209 , 317 , 317 , 264 , 328 , 280 , 392 , 392 , 106 , 161 , 196 , 198 , 238 , 193 , 377 , 292 , 441 , 441 , 198 , 189 , 119 , 210 , 236 , 214 , 369 , 170 , 433 , 433 ,
+ 155 , 115 , 211 , 314 , 302 , 203 , 250 , 298 , 214 , 305 , 101 , 136 , 243 , 279 , 279 , 213 , 285 , 243 , 349 , 349 , 268 , 131 , 206 , 245 , 277 , 148 , 291 , 229 , 210 , 293 , 363 , 107 , 201 , 242 , 335 , 177 , 267 , 150 , 350 , 350 ,
+ 109 , 225 , 200 , 116 , 254 , 313 , 311 , 276 , 430 , 430 , 81 , 233 , 249 , 144 , 261 , 281 , 345 , 239 , 409 , 409 , 283 , 244 , 199 , 64 , 223 , 233 , 376 , 278 , 310 , 560 , 268 , 215 , 122 , 143 , 230 , 262 , 279 , 179 , 374 , 374 ,
+ 144 , 178 , 183 , 161 , 254 , 289 , 294 , 177 , 379 , 379 , 105 , 219 , 198 , 228 , 218 , 245 , 309 , 177 , 373 , 373 , 250 , 212 , 138 , 100 , 240 , 316 , 299 , 214 , 392 , 392 , 291 , 196 , 115 , 156 , 315 , 278 , 382 , 130 , 446 , 446 ,
+
+
+ 27 , 503 , 341 , 166 , 290 , 510 , 622 , 472 , 686 , 686 , 61 , 355 , 242 , 115 , 258 , 468 , 532 , 335 , 596 , 596 , 118 , 439 , 262 , 55 , 235 , 472 , 490 , 388 , 824 , 824 , 149 , 333 , 128 , 125 , 206 , 359 , 474 , 194 , 538 , 538 ,
+ 93 , 256 , 234 , 149 , 237 , 233 , 302 , 247 , 392 , 392 , 89 , 272 , 180 , 178 , 254 , 217 , 362 , 243 , 426 , 426 , 283 , 296 , 183 , 86 , 186 , 206 , 336 , 266 , 261 , 517 , 363 , 220 , 120 , 187 , 236 , 196 , 294 , 156 , 278 , 488 ,
+ 147 , 496 , 297 , 47 , 193 , 476 , 516 , 388 , 667 , 667 , 215 , 444 , 231 , 46 , 162 , 454 , 453 , 374 , 643 , 643 , 309 , 477 , 261 , 30 , 166 , 497 , 501 , 407 , 784 , 784 , 275 , 410 , 125 , 96 , 153 , 424 , 419 , 200 , 746 , 746 ,
+ 125 , 448 , 198 , 90 , 162 , 400 , 483 , 299 , 547 , 547 , 157 , 357 , 181 , 104 , 148 , 411 , 418 , 221 , 482 , 482 , 275 , 443 , 185 , 65 , 134 , 448 , 430 , 282 , 773 , 773 , 283 , 366 , 109 , 119 , 181 , 430 , 384 , 152 , 765 , 765 ,
+
+
+ 41 , 467 , 241 , 154 , 271 , 494 , 566 , 324 , 630 , 630 , 75 , 318 , 179 , 129 , 284 , 475 , 539 , 245 , 603 , 603 , 140 , 369 , 178 , 72 , 212 , 398 , 417 , 304 , 993 , 993 , 192 , 339 , 100 , 130 , 247 , 373 , 506 , 166 , 570 , 570 ,
+ 165 , 190 , 178 , 147 , 280 , 241 , 259 , 170 , 463 , 463 , 136 , 209 , 164 , 160 , 273 , 229 , 329 , 187 , 393 , 393 , 275 , 237 , 135 , 108 , 234 , 249 , 287 , 220 , 314 , 415 , 384 , 190 , 103 , 178 , 269 , 261 , 365 , 134 , 429 , 429 ,
+ 115 , 472 , 218 , 124 , 114 , 485 , 549 , 278 , 613 , 613 , 176 , 358 , 170 , 132 , 103 , 430 , 494 , 230 , 558 , 558 , 291 , 451 , 185 , 111 , 73 , 464 , 446 , 288 , 827 , 827 , 332 , 342 , 103 , 172 , 131 , 406 , 419 , 152 , 656 , 656 ,
+ 128 , 300 , 165 , 146 , 162 , 403 , 446 , 188 , 510 , 510 , 161 , 361 , 135 , 162 , 158 , 340 , 404 , 164 , 468 , 468 , 275 , 397 , 154 , 110 , 108 , 390 , 500 , 209 , 564 , 564 , 320 , 313 , 106 , 169 , 185 , 362 , 420 , 112 , 747 , 747 ,
+
+};
+UINT32 NNMBBitCosts[4][4][4][10]=
+{
+
+ 2 , 572 , 404 , 0 , 0 , 557 , 648 , 594 , 871 , 871 , 4 , 466 , 332 , 0 , 0 , 474 , 757 , 559 , 821 , 821 , 6 , 512 , 293 , 0 , 0 , 461 , 613 , 531 , 677 , 677 , 49 , 338 , 124 , 0 , 0 , 252 , 405 , 285 , 775 , 775 ,
+ 35 , 231 , 289 , 0 , 0 , 213 , 346 , 413 , 283 , 459 , 46 , 176 , 268 , 0 , 0 , 248 , 312 , 268 , 376 , 376 , 65 , 167 , 195 , 0 , 0 , 166 , 342 , 465 , 406 , 406 , 184 , 100 , 131 , 0 , 0 , 162 , 319 , 296 , 383 , 383 ,
+ 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+ 16 , 297 , 297 , 0 , 0 , 361 , 425 , 297 , 489 , 489 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+
+
+ 33 , 218 , 241 , 0 , 0 , 244 , 362 , 421 , 335 , 483 , 37 , 171 , 268 , 0 , 0 , 259 , 394 , 323 , 458 , 458 , 125 , 156 , 144 , 0 , 0 , 119 , 350 , 357 , 414 , 414 , 215 , 156 , 84 , 0 , 0 , 144 , 361 , 245 , 425 , 425 ,
+ 231 , 110 , 334 , 0 , 0 , 146 , 213 , 334 , 152 , 272 , 111 , 121 , 289 , 0 , 0 , 107 , 394 , 289 , 458 , 458 , 245 , 76 , 302 , 0 , 0 , 115 , 416 , 302 , 239 , 308 , 299 , 78 , 219 , 0 , 0 , 129 , 281 , 219 , 345 , 345 ,
+ 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+ 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+
+
+ 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+ 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+ 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+ 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+
+
+ 14 , 312 , 312 , 0 , 0 , 376 , 440 , 312 , 504 , 504 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+ 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+ 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+ 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 , 64 , 192 , 192 , 0 , 0 , 256 , 320 , 192 , 384 , 384 ,
+
+};
+UINT32 NN2MBBitCosts[4][4][4][10]=
+{
+
+ 7 , 556 , 377 , 291 , 0 , 551 , 569 , 523 , 813 , 813 , 12 , 411 , 317 , 243 , 0 , 562 , 626 , 497 , 690 , 690 , 22 , 478 , 280 , 178 , 0 , 490 , 579 , 447 , 643 , 643 , 68 , 325 , 129 , 183 , 0 , 298 , 445 , 275 , 509 , 509 ,
+ 47 , 249 , 250 , 221 , 0 , 272 , 346 , 415 , 231 , 501 , 48 , 198 , 293 , 229 , 0 , 262 , 326 , 293 , 390 , 390 , 90 , 210 , 237 , 193 , 0 , 130 , 349 , 338 , 413 , 413 , 195 , 193 , 134 , 225 , 0 , 139 , 304 , 185 , 368 , 368 ,
+ 32 , 542 , 349 , 128 , 0 , 493 , 583 , 539 , 647 , 647 , 107 , 462 , 363 , 43 , 0 , 526 , 590 , 363 , 654 , 654 , 122 , 520 , 289 , 37 , 0 , 461 , 576 , 508 , 640 , 640 , 113 , 364 , 143 , 87 , 0 , 428 , 492 , 250 , 556 , 556 ,
+ 54 , 431 , 231 , 106 , 0 , 495 , 559 , 364 , 623 , 623 , 81 , 477 , 159 , 103 , 0 , 541 , 605 , 255 , 669 , 669 , 112 , 417 , 195 , 63 , 0 , 481 , 545 , 278 , 609 , 609 , 132 , 372 , 120 , 104 , 0 , 436 , 500 , 186 , 564 , 564 ,
+
+
+ 46 , 228 , 230 , 218 , 0 , 247 , 420 , 410 , 318 , 508 , 54 , 161 , 273 , 250 , 0 , 285 , 349 , 273 , 413 , 413 , 121 , 203 , 199 , 180 , 0 , 129 , 294 , 323 , 358 , 358 , 215 , 187 , 107 , 172 , 0 , 163 , 397 , 208 , 461 , 461 ,
+ 129 , 126 , 283 , 341 , 0 , 201 , 238 , 283 , 170 , 329 , 76 , 112 , 316 , 252 , 0 , 255 , 319 , 316 , 383 , 383 , 235 , 125 , 192 , 255 , 0 , 181 , 281 , 232 , 160 , 346 , 275 , 98 , 184 , 214 , 0 , 183 , 264 , 203 , 328 , 328 ,
+ 122 , 208 , 319 , 75 , 0 , 262 , 326 , 319 , 390 , 390 , 60 , 196 , 260 , 196 , 0 , 260 , 324 , 260 , 388 , 388 , 227 , 178 , 253 , 56 , 0 , 285 , 349 , 253 , 413 , 413 , 211 , 168 , 169 , 126 , 0 , 232 , 296 , 169 , 360 , 360 ,
+ 116 , 184 , 204 , 140 , 0 , 248 , 312 , 204 , 376 , 376 , 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 , 211 , 373 , 132 , 89 , 0 , 437 , 501 , 132 , 565 , 565 , 283 , 205 , 162 , 86 , 0 , 269 , 333 , 162 , 397 , 397 ,
+
+
+ 27 , 531 , 343 , 144 , 0 , 495 , 525 , 498 , 779 , 779 , 67 , 445 , 247 , 80 , 0 , 509 , 573 , 433 , 637 , 637 , 163 , 430 , 236 , 29 , 0 , 449 , 630 , 492 , 694 , 694 , 153 , 393 , 130 , 72 , 0 , 313 , 503 , 291 , 567 , 567 ,
+ 116 , 299 , 257 , 81 , 0 , 247 , 311 , 257 , 375 , 375 , 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 , 240 , 246 , 153 , 70 , 0 , 204 , 337 , 318 , 401 , 401 , 140 , 160 , 189 , 166 , 0 , 224 , 288 , 189 , 352 , 352 ,
+ 184 , 500 , 324 , 19 , 0 , 499 , 563 , 525 , 627 , 627 , 198 , 588 , 303 , 17 , 0 , 652 , 716 , 458 , 780 , 780 , 268 , 496 , 315 , 10 , 0 , 535 , 591 , 513 , 655 , 655 , 198 , 375 , 143 , 51 , 0 , 439 , 503 , 267 , 567 , 567 ,
+ 140 , 434 , 254 , 42 , 0 , 498 , 562 , 254 , 626 , 626 , 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 , 181 , 489 , 240 , 33 , 0 , 553 , 617 , 240 , 681 , 681 , 165 , 356 , 99 , 98 , 0 , 420 , 484 , 212 , 548 , 548 ,
+
+
+ 51 , 427 , 230 , 114 , 0 , 394 , 536 , 368 , 600 , 600 , 93 , 362 , 182 , 82 , 0 , 426 , 490 , 289 , 554 , 554 , 137 , 408 , 183 , 49 , 0 , 472 , 536 , 363 , 600 , 600 , 163 , 302 , 95 , 110 , 0 , 329 , 434 , 226 , 498 , 498 ,
+ 134 , 149 , 221 , 157 , 0 , 213 , 277 , 221 , 341 , 341 , 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 , 219 , 207 , 175 , 86 , 0 , 271 , 335 , 175 , 399 , 399 , 227 , 212 , 141 , 118 , 0 , 276 , 340 , 141 , 404 , 404 ,
+ 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 , 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 , 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 , 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 ,
+ 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 , 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 , 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 , 64 , 192 , 256 , 192 , 0 , 256 , 320 , 256 , 384 , 384 ,
+
+};
+UINT32 BBitCosts[4][4][4][10]=
+{
+
+ 144 , 0 , 25 , 418 , 418 , 0 , 0 , 0 , 0 , 0 , 121 , 0 , 38 , 308 , 308 , 0 , 0 , 0 , 0 , 0 , 144 , 0 , 32 , 294 , 294 , 0 , 0 , 0 , 0 , 0 , 151 , 0 , 26 , 300 , 387 , 0 , 0 , 0 , 0 , 0 ,
+ 142 , 0 , 34 , 278 , 278 , 0 , 0 , 0 , 0 , 0 , 97 , 0 , 104 , 168 , 168 , 0 , 0 , 0 , 0 , 0 , 129 , 0 , 43 , 253 , 253 , 0 , 0 , 0 , 0 , 0 , 144 , 0 , 34 , 278 , 278 , 0 , 0 , 0 , 0 , 0 ,
+ 186 , 0 , 39 , 162 , 302 , 0 , 0 , 0 , 0 , 0 , 179 , 0 , 47 , 190 , 190 , 0 , 0 , 0 , 0 , 0 , 195 , 0 , 35 , 180 , 272 , 0 , 0 , 0 , 0 , 0 , 195 , 0 , 31 , 196 , 288 , 0 , 0 , 0 , 0 , 0 ,
+ 184 , 0 , 21 , 283 , 375 , 0 , 0 , 0 , 0 , 0 , 195 , 0 , 21 , 268 , 323 , 0 , 0 , 0 , 0 , 0 , 181 , 0 , 29 , 226 , 295 , 0 , 0 , 0 , 0 , 0 , 201 , 0 , 23 , 241 , 303 , 0 , 0 , 0 , 0 , 0 ,
+
+
+ 105 , 0 , 57 , 247 , 247 , 0 , 0 , 0 , 0 , 0 , 131 , 0 , 30 , 373 , 373 , 0 , 0 , 0 , 0 , 0 , 94 , 0 , 55 , 284 , 284 , 0 , 0 , 0 , 0 , 0 , 161 , 0 , 26 , 313 , 313 , 0 , 0 , 0 , 0 , 0 ,
+ 121 , 0 , 34 , 368 , 368 , 0 , 0 , 0 , 0 , 0 , 64 , 0 , 128 , 192 , 192 , 0 , 0 , 0 , 0 , 0 , 144 , 0 , 36 , 265 , 265 , 0 , 0 , 0 , 0 , 0 , 179 , 0 , 31 , 245 , 245 , 0 , 0 , 0 , 0 , 0 ,
+ 227 , 0 , 31 , 212 , 212 , 0 , 0 , 0 , 0 , 0 , 179 , 0 , 56 , 171 , 171 , 0 , 0 , 0 , 0 , 0 , 195 , 0 , 33 , 221 , 221 , 0 , 0 , 0 , 0 , 0 , 176 , 0 , 39 , 215 , 215 , 0 , 0 , 0 , 0 , 0 ,
+ 235 , 0 , 18 , 280 , 280 , 0 , 0 , 0 , 0 , 0 , 176 , 0 , 27 , 274 , 274 , 0 , 0 , 0 , 0 , 0 , 186 , 0 , 27 , 235 , 298 , 0 , 0 , 0 , 0 , 0 , 227 , 0 , 21 , 239 , 290 , 0 , 0 , 0 , 0 , 0 ,
+
+
+ 192 , 0 , 30 , 206 , 285 , 0 , 0 , 0 , 0 , 0 , 134 , 0 , 52 , 217 , 217 , 0 , 0 , 0 , 0 , 0 , 157 , 0 , 34 , 238 , 282 , 0 , 0 , 0 , 0 , 0 , 181 , 0 , 31 , 225 , 260 , 0 , 0 , 0 , 0 , 0 ,
+ 176 , 0 , 38 , 218 , 218 , 0 , 0 , 0 , 0 , 0 , 134 , 0 , 89 , 153 , 153 , 0 , 0 , 0 , 0 , 0 , 222 , 0 , 48 , 133 , 242 , 0 , 0 , 0 , 0 , 0 , 256 , 0 , 34 , 179 , 214 , 0 , 0 , 0 , 0 , 0 ,
+ 245 , 0 , 39 , 155 , 233 , 0 , 0 , 0 , 0 , 0 , 320 , 0 , 44 , 169 , 157 , 0 , 0 , 0 , 0 , 0 , 250 , 0 , 39 , 166 , 201 , 0 , 0 , 0 , 0 , 0 , 291 , 0 , 39 , 165 , 187 , 0 , 0 , 0 , 0 , 0 ,
+ 186 , 0 , 31 , 204 , 292 , 0 , 0 , 0 , 0 , 0 , 250 , 0 , 29 , 210 , 210 , 0 , 0 , 0 , 0 , 0 , 283 , 0 , 25 , 201 , 226 , 0 , 0 , 0 , 0 , 0 , 275 , 0 , 29 , 196 , 215 , 0 , 0 , 0 , 0 , 0 ,
+
+
+ 171 , 0 , 24 , 288 , 331 , 0 , 0 , 0 , 0 , 0 , 151 , 0 , 36 , 249 , 257 , 0 , 0 , 0 , 0 , 0 , 181 , 0 , 31 , 238 , 248 , 0 , 0 , 0 , 0 , 0 , 186 , 0 , 25 , 262 , 284 , 0 , 0 , 0 , 0 , 0 ,
+ 167 , 0 , 35 , 244 , 234 , 0 , 0 , 0 , 0 , 0 , 176 , 0 , 35 , 232 , 232 , 0 , 0 , 0 , 0 , 0 , 192 , 0 , 43 , 169 , 224 , 0 , 0 , 0 , 0 , 0 , 219 , 0 , 27 , 216 , 256 , 0 , 0 , 0 , 0 , 0 ,
+ 219 , 0 , 32 , 170 , 292 , 0 , 0 , 0 , 0 , 0 , 227 , 0 , 42 , 151 , 226 , 0 , 0 , 0 , 0 , 0 , 250 , 0 , 35 , 162 , 230 , 0 , 0 , 0 , 0 , 0 , 262 , 0 , 30 , 184 , 235 , 0 , 0 , 0 , 0 , 0 ,
+ 204 , 0 , 23 , 254 , 290 , 0 , 0 , 0 , 0 , 0 , 201 , 0 , 28 , 222 , 268 , 0 , 0 , 0 , 0 , 0 , 235 , 0 , 28 , 206 , 250 , 0 , 0 , 0 , 0 , 0 , 256 , 0 , 22 , 224 , 256 , 0 , 0 , 0 , 0 , 0 ,
+
+};
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/pb_globals.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/pb_globals.c
new file mode 100644
index 00000000..95b0a650
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/pb_globals.c
@@ -0,0 +1,248 @@
+/****************************************************************************
+*
+* Module Title : PB_Globals.c
+*
+* Description : Video CODEC Demo: playback dll global declarations
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "pbdll.h"
+#include "duck_mem.h"
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+static UINT32 VP6_DCQuantScaleP[Q_TABLE_SIZE];
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern unsigned long VP6_GetProcessorFrequency();
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+unsigned int CPUFrequency; // Process Frequency
+
+// Truth table to indicate if the given mode uses motion estimation
+BOOL VP6_ModeUsesMC[MAX_MODES] = { FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE };
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DeleteTmpBuffers
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-allocate buffers used during decoing.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_DeleteTmpBuffers ( PB_INSTANCE *pbi )
+{
+ if ( pbi->ReconDataBuffer[0] )
+ duck_free(pbi->ReconDataBuffer[0]);
+ if ( pbi->LoopFilteredBlock )
+ duck_free(pbi->LoopFilteredBlock);
+ if ( pbi->TmpDataBuffer )
+ duck_free(pbi->TmpDataBuffer);
+ if ( pbi->TmpReconBuffer )
+ duck_free(pbi->TmpReconBuffer);
+ if ( pbi->ScaleBuffer )
+ duck_free(pbi->ScaleBuffer);
+
+
+ pbi->ReconDataBuffer[0] = 0;
+ pbi->LoopFilteredBlock = 0;
+ pbi->TmpDataBuffer = 0;
+ pbi->TmpReconBuffer = 0;
+ pbi->ScaleBuffer = 0;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_AllocateTmpBuffers
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : BOOL: Always TRUE.
+ *
+ * FUNCTION : Allocates buffers required during decoding.
+ *
+ * SPECIAL NOTES : Uses ROUNDUP32 to ensure that buffers are aligned
+ * on 32-byte boundaries to improve cache performance.
+ *
+ ****************************************************************************/
+BOOL VP6_AllocateTmpBuffers ( PB_INSTANCE *pbi )
+{
+
+ // clear any existing info
+ VP6_DeleteTmpBuffers ( pbi );
+
+ pbi->ReconDataBuffer[0] = (INT16 *)duck_memalign(32, 6*64*sizeof(INT16), DMEM_GENERAL);
+ if ( !pbi->ReconDataBuffer[0] ) { VP6_DeleteTmpBuffers(pbi); return FALSE;};
+ pbi->ReconDataBuffer[1] = pbi->ReconDataBuffer[0] + 64;
+ pbi->ReconDataBuffer[2] = pbi->ReconDataBuffer[1] + 64;
+ pbi->ReconDataBuffer[3] = pbi->ReconDataBuffer[2] + 64;
+ pbi->ReconDataBuffer[4] = pbi->ReconDataBuffer[3] + 64;
+ pbi->ReconDataBuffer[5] = pbi->ReconDataBuffer[4] + 64;
+
+ pbi->TmpDataBuffer = (INT16 *)duck_memalign(32, 64 * sizeof(INT16), DMEM_GENERAL);
+ if ( !pbi->TmpDataBuffer ) { VP6_DeleteTmpBuffers(pbi); return FALSE;};
+
+ pbi->LoopFilteredBlock = (UINT8 *)duck_memalign(32, 256 * sizeof(UINT8), DMEM_GENERAL);
+ if ( !pbi->LoopFilteredBlock ) { VP6_DeleteTmpBuffers(pbi); return FALSE;};
+
+ pbi->TmpReconBuffer = (INT16 *)duck_memalign(32, 64 * sizeof(INT16), DMEM_GENERAL);
+ if ( !pbi->TmpReconBuffer ) { VP6_DeleteTmpBuffers(pbi); return FALSE;};
+
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DeletePBInstance
+ *
+ * INPUTS : PB_INSTANCE **pbi : Pointer to the pointer to the
+ * decoder instance.
+ *
+ * OUTPUTS : PB_INSTANCE **pbi : Pointer to the pointer to the
+ * decoder instance. Set to 0 on exit.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-allocates the decoder instance data structure.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_DeletePBInstance ( PB_INSTANCE **pbi )
+{
+ if ( *pbi )
+ {
+ // Delete any other dynamically allocaed temporary buffers
+ VP6_DeleteTmpBuffers(*pbi);
+ VP6_DeleteQuantizer(&(*pbi)->quantizer);
+ DeletePostProcInstance(&(*pbi)->postproc);
+ }
+
+ // dealoocate and reset pointer to NULL
+ duck_free ( *pbi );
+ *pbi = 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_CreatePBInstance
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : PB_INSTANCE *: Pointer to allocated decoder instance.
+ *
+ * FUNCTION : Allocates space for and initializes decoder instance.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+PB_INSTANCE *VP6_CreatePBInstance ( void )
+{
+ PB_INSTANCE *pbi = 0;
+ CONFIG_TYPE ConfigurationInit = { 0,0,0,0,8,8,0,0,0,0,0,0,0,0 };
+ int pbi_size = sizeof(PB_INSTANCE);
+
+ pbi = (PB_INSTANCE *) duck_malloc ( pbi_size, DMEM_GENERAL );
+ if ( !pbi )
+ return 0;
+
+ // initialize whole structure to 0
+ memset ( (unsigned char *)pbi, 0, pbi_size );
+ memcpy ( (void *)&pbi->Configuration, (void *)&ConfigurationInit, sizeof(CONFIG_TYPE) );
+
+ if ( !VP6_AllocateTmpBuffers(pbi) )
+ {
+ duck_free(pbi);
+ return 0;
+ }
+
+ pbi->CPUFree = 70;
+ pbi->idct = idct;
+
+ // Initialise Entropy related data structures.
+ memset( pbi->DcProbs, 0, sizeof(pbi->DcProbs) );
+ memset( pbi->AcProbs, 0, sizeof(pbi->AcProbs) );
+
+ return pbi;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_VPInitLibrary
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Fully initializes the playback library.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_VPInitLibrary(void)
+{
+ int i;
+
+#if !defined(__POWERPC__)
+ CPUFrequency = VP6_GetProcessorFrequency();
+#endif
+
+
+ VP6_DMachineSpecificConfig();
+
+ for ( i=0 ; i<Q_TABLE_SIZE; i++ )
+ {
+ INT32 dcScale = VP6_DcQuant[i]/2 + 2;
+ VP6_DCQuantScaleP[i] = dcScale;
+ }
+
+ InitPostProcessing (
+ VP6_DCQuantScaleP,
+ VP6_DCQuantScaleP,
+ VP6_DCQuantScaleP,
+ CURRENT_DECODE_VERSION );
+
+ InitVPUtil();
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_VPDeInitLibrary
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-initializes the playback library.
+ *
+ * SPECIAL NOTES : Currently nothing to be done.
+ *
+ ****************************************************************************/
+void VP6_VPDeInitLibrary(void)
+{
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/quantize.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/quantize.c
new file mode 100644
index 00000000..1916c69f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/quantize.c
@@ -0,0 +1,769 @@
+/****************************************************************************
+*
+* Module Title : Quantise
+*
+* Description : Quantisation and dequanitsation of an 8x8 dct block. .
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Frames
+****************************************************************************/
+#include "quantize.h"
+#include "duck_mem.h"
+#include <stddef.h>
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+#define MIN16 ((1<<16)-1)
+
+// Scale factors used to improve precision of DCT/IDCT
+#define IDCT_SCALE_FACTOR 2 // Shift left bits to improve IDCT precision
+
+// AC Quantizer Tables
+static const UINT32 VP6_QThreshTable[Q_TABLE_SIZE] =
+{ 94, 92, 90, 88, 86, 82, 78, 74,
+ 70, 66, 62, 58, 54, 53, 52, 51,
+ 50, 49, 48, 47, 46, 45, 44, 43,
+ 42, 40, 39, 37, 36, 35, 34, 33,
+ 32, 31, 30, 29, 28, 27, 26, 25,
+ 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9,
+ 8, 7, 6, 5, 4, 3, 2, 1
+};
+
+static const UINT32 VP6_UvQThreshTable[Q_TABLE_SIZE] =
+{ 94, 92, 90, 88, 86, 82, 78, 74,
+ 70, 66, 62, 58, 54, 53, 52, 51,
+ 50, 49, 48, 47, 46, 45, 44, 43,
+ 42, 40, 39, 37, 36, 35, 34, 33,
+ 32, 31, 30, 29, 28, 27, 26, 25,
+ 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9,
+ 8, 7, 6, 5, 4, 3, 2, 1
+};
+
+// AC Zero Bin and Rounding Tables (include fdct normalisation)
+static const UINT32 VP6_ZBinTable[Q_TABLE_SIZE] =
+{
+ 330,314,298,284,264,246,228,213,
+ 201,190,178,167,156,153,149,146,
+ 144,141,138,135,132,130,127,124,
+ 121,115,110,104, 99, 96, 94, 90,
+ 85, 82, 79, 76, 74, 71, 69, 66,
+ 63, 61, 58, 55, 53, 50, 47, 45,
+ 43, 40, 38, 36, 33, 31, 28, 24,
+ 21, 18, 16, 13, 10, 7, 4, 2
+};
+
+static const UINT32 VP6_UvZBinTable[Q_TABLE_SIZE] =
+{
+ 330,314,298,284,264,246,228,213,
+ 201,190,178,167,156,153,149,146,
+ 144,141,138,135,132,130,127,124,
+ 121,115,110,104, 99, 96, 94, 90,
+ 85, 82, 79, 76, 74, 71, 69, 66,
+ 63, 61, 58, 55, 53, 50, 47, 45,
+ 43, 40, 38, 36, 33, 31, 28, 24,
+ 21, 18, 16, 13, 10, 7, 4, 2
+};
+
+static const UINT32 VP6_RTable[Q_TABLE_SIZE] =
+{
+ 48, 56, 64, 70, 78, 82, 86, 88,
+ 91, 92, 94, 94, 99,103,102,100,
+ 99, 97, 95, 93, 91, 89, 87, 85,
+ 83, 79, 77, 73, 71, 69, 67, 65,
+ 64, 62, 60, 58, 56, 54, 52, 50,
+ 48, 46, 44, 42, 40, 38, 36, 34,
+ 32, 30, 28, 26, 24, 22, 20, 18,
+ 16, 14, 12, 10, 8, 6, 4, 2
+};
+
+static const UINT32 VP6_UvRTable[Q_TABLE_SIZE] =
+{
+ 48, 56, 64, 70, 78, 82, 86, 88,
+ 91, 92, 94, 94, 99,103,102,100,
+ 99, 97, 95, 93, 91, 89, 87, 85,
+ 83, 79, 77, 73, 71, 69, 67, 65,
+ 64, 62, 60, 58, 56, 54, 52, 50,
+ 48, 46, 44, 42, 40, 38, 36, 34,
+ 32, 30, 28, 26, 24, 22, 20, 18,
+ 16, 14, 12, 10, 8, 6, 4, 2
+};
+
+// DC Quantizer tables
+const Q_LIST_ENTRY VP6_DcQuant[ Q_TABLE_SIZE ] =
+{
+ 47, 47, 47, 47, 45, 43, 43, 43,
+ 43, 43, 42, 41, 41, 40, 40, 40,
+ 40, 35, 35, 35, 35, 33, 33, 33,
+ 33, 32, 32, 32, 27, 27, 26, 26,
+ 25, 25, 24, 24, 23, 23, 19, 19,
+ 19, 19, 18, 18, 17, 16, 16, 16,
+ 16, 16, 15, 11, 11, 11, 10, 10,
+ 9, 8, 7, 5, 3, 3, 2, 2
+};
+
+static const Q_LIST_ENTRY VP6_UvDcQuant[ Q_TABLE_SIZE ] =
+{
+ 47, 47, 47, 47, 45, 43, 43, 43,
+ 43, 43, 42, 41, 41, 40, 40, 40,
+ 40, 35, 35, 35, 35, 33, 33, 33,
+ 33, 32, 32, 32, 27, 27, 26, 26,
+ 25, 25, 24, 24, 23, 23, 19, 19,
+ 19, 19, 18, 18, 17, 16, 16, 16,
+ 16, 16, 15, 11, 11, 11, 10, 10,
+ 9, 8, 7, 5, 3, 3, 2, 2
+};
+
+// DC Zero Bin and Rounding Tables (include fdct normalisation)
+static const UINT32 VP6_DcZBinTable[Q_TABLE_SIZE] =
+{
+ 170,162,152,150,140,130,125,121,
+ 121,118,113,111,110,108,108,106,
+ 105,96, 93, 87, 86, 83, 83, 83,
+ 83, 78, 78, 78, 66, 66, 63, 63,
+ 61, 61, 58, 58, 56, 56, 46, 46,
+ 46, 46, 43, 43, 41, 38, 38, 38,
+ 38, 38, 35, 24, 24, 24, 23, 23,
+ 20, 19, 16, 13, 6, 6, 4, 4
+};
+
+static const UINT32 VP6_UvDcZBinTable[Q_TABLE_SIZE] =
+{
+ 170,162,152,150,140,130,125,121,
+ 121,118,113,111,110,108,108,106,
+ 105,96, 93, 87, 86, 83, 83, 83,
+ 83, 78, 78, 78, 66, 66, 63, 63,
+ 61, 61, 58, 58, 56, 56, 46, 46,
+ 46, 46, 43, 43, 41, 38, 38, 38,
+ 38, 38, 35, 24, 24, 24, 23, 23,
+ 20, 19, 16, 13, 6, 6, 4, 4
+};
+
+static const UINT32 VP6_DcRTable[Q_TABLE_SIZE] =
+{
+ 20, 28, 38, 40, 44, 46, 50, 50,
+ 51, 57, 59, 61, 62, 64, 66, 67,
+ 67, 62, 63, 64, 64, 62, 62, 62,
+ 62, 62, 62, 62, 54, 54, 52, 52,
+ 50, 50, 48, 48, 46, 46, 38, 38,
+ 38, 38, 36, 36, 34, 32, 32, 32,
+ 32, 32, 30, 22, 22, 22, 20, 20,
+ 18, 16, 14, 10, 6, 6, 4, 4
+};
+
+static const UINT32 VP6_UvDcRTable[Q_TABLE_SIZE] =
+{
+ 20, 30, 38, 40, 44, 46, 50, 50,
+ 51, 57, 59, 61, 62, 64, 66, 67,
+ 67, 62, 63, 64, 64, 62, 62, 62,
+ 62, 62, 62, 62, 54, 54, 52, 52,
+ 50, 50, 48, 48, 46, 46, 38, 38,
+ 38, 38, 36, 36, 34, 32, 32, 32,
+ 32, 32, 30, 22, 22, 22, 20, 20,
+ 18, 16, 14, 10, 6, 6, 4, 4
+};
+
+
+// Correction factors for ZBin size.based upon zero run length leading up to the current coef
+// The factor is A % of the bin width to be added to the existing zero bin.
+static const INT32 VP6_ZlrZbinCorrection[Q_TABLE_SIZE] =
+{
+ -8, 0, 5, 10, 10, 10, 10, 10,
+ 15, 15, 15, 15, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20,
+ 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25,
+ 30, 30, 30, 30, 30, 30, 30, 30,
+};
+
+
+/****************************************************************************
+ *
+ * Inverse fast DCT index:
+ *
+ * This contains the offsets needed to convert zigzag order into x, y order
+ * for decoding. It is generated from the input zigzag index at at run time.
+ *
+ * For maximum speed during both quantisation and dequantisation we maintain
+ * separate quantisation and zigzag tables for each operation.
+ *
+ * qi->quant_index : zigzag index used during quantisation
+ * dequant_index : zigzag index used during dequantisation
+ *
+ * qi->quant_index is the inverse of dequant_index and is calculated during
+ * initialisation.
+ *
+ ****************************************************************************/
+static const UINT32 dequant_index[64] =
+{ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static const UINT32 transIndexC[64] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63
+};
+
+static const UINT32 quant_indexC[64] =
+{
+ 0, 1, 5, 6, 14, 15, 27, 28,
+ 2, 4, 7, 13, 16, 26, 29, 42,
+ 3, 8, 12, 17, 25, 30, 41, 43,
+ 9, 11, 18, 24, 31, 40, 44, 53,
+ 10, 19, 23, 32, 39, 45, 52, 54,
+ 20, 22, 33, 38, 46, 51, 55, 60,
+ 21, 34, 37, 47, 50, 56, 59, 61,
+ 35, 36, 48, 49, 57, 58, 62, 63
+};
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+void (*VP6_BuildQuantIndex)( QUANTIZER * qi);
+void (*VP6_quantize)( QUANTIZER *qi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+const UINT8 VP6_QTableSelect[6] = { 0,0,0,0,1,1 }; // Controls selection of Q Table,rounding,zero bin etc for Y, U & V blocks
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_InitQTables
+ *
+ * INPUTS : QUANTIZER *qi : Pointer to quantizer instance.
+ * UINT8 Vp3VersionNo : Decoder version number (NOT USED).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initialises Q table.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_InitQTables ( QUANTIZER *qi, UINT8 Vp3VersionNo )
+{
+ memcpy ( qi->QThreshTable, VP6_QThreshTable, sizeof(qi->QThreshTable) );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_BuildQuantIndex_Generic
+ *
+ * INPUTS : QUANTIZER *qi : Pointer to quantizer instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Builds the quant_index table.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_BuildQuantIndex_Generic ( QUANTIZER *qi )
+{
+ INT32 i,j;
+
+ qi->transIndex = (UINT32 *)transIndexC;
+
+ // invert the dequant index into the quant index
+ for ( i=0; i<BLOCK_SIZE; i++ )
+ {
+ j = dequant_index[i];
+ qi->quant_index[j] = i;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_init_dequantizer
+ *
+ * INPUTS : QUANTIZER *qi : Pointer to quantizer instance.
+ * UINT8 Vp3VersionNo : Decoder version number (NOT USED)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs initialization of the dequantizer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_init_dequantizer ( QUANTIZER *qi, UINT8 Vp3VersionNo )
+{
+ int i, j;
+
+ // *************** Y ******************/
+
+ // AC: set-up the dequant values and then place in the zig-zag/transposed order.
+ for ( i=1; i<64; i++ )
+ {
+ j = qi->quant_index[i];
+ qi->dequant_coeffs[0][j] = VP6_QThreshTable[qi->FrameQIndex] << IDCT_SCALE_FACTOR;
+ }
+
+ // DC
+ qi->dequant_coeffs[0][0] = VP6_DcQuant[qi->FrameQIndex] << IDCT_SCALE_FACTOR;
+
+ // *************** UV ******************/
+
+ // AC: set-up the dequant values and then place in the zig-zag/transposed order.
+ for ( i=1; i<64; i++ )
+ {
+ j = qi->quant_index[i];
+ qi->dequant_coeffs[1][j] = VP6_UvQThreshTable[qi->FrameQIndex] << IDCT_SCALE_FACTOR;
+ }
+
+ // DC
+ qi->dequant_coeffs[1][0] = VP6_UvDcQuant[qi->FrameQIndex] << IDCT_SCALE_FACTOR;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_UpdateQ
+ *
+ * INPUTS : QUANTIZER *qi : Pointer to quantizer instance.
+ * UINT8 Vp3VersionNo : Decoder version number.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates the quantisation tables for a new Q.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_UpdateQ ( QUANTIZER *qi, UINT8 Vp3VersionNo )
+{
+ if ( qi->FrameQIndex == qi->LastFrameQIndex )
+ return;
+
+ // Update the record of last Q index.
+ qi->LastFrameQIndex = qi->FrameQIndex;
+
+ // Invert the dequant index into the quant index --
+ // the decoder has a different order than the encoder.
+ VP6_BuildQuantIndex(qi);
+
+ // Re-initialise the q tables for forward and reverse transforms.
+ VP6_init_dequantizer ( qi, Vp3VersionNo );
+}
+
+/********************* COMPRESSOR SPECIFIC **********************************/
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_init_quantizer
+ *
+ * INPUTS : QUANTIZER *qi : Pointer to quantizer instance.
+ * UINT8 Vp3VersionNo : Decoder version number (NOT USED).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates the quantisation tables for a new Q.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+#define SHIFT16 (1<<16)
+void VP6_init_quantizer ( QUANTIZER *qi, UINT8 Vp3VersionNo )
+{
+ int i;
+ double temp_fp_quant_coeffs;
+
+ // Notes on setup of quantisers:
+ // The "* 4" is a normalisation factor for the forward DCT transform.
+
+ // ******************* Y *********************
+
+ // Calculate DC quant values (Include a *4 for FDCT normalization)
+ temp_fp_quant_coeffs = (double)( VP6_DcQuant[qi->FrameQIndex] * 4 );
+
+ // 1/X (Y)
+ temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+ qi->QuantCoeffs[0][0] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+ // DC rounding (Y)
+ qi->QuantRound[0][0] = VP6_DcRTable[qi->FrameQIndex];
+
+ // Set DC zero Bin (Y)
+ qi->ZeroBinSize[0][0] = VP6_DcZBinTable[qi->FrameQIndex];
+
+ // AC for Y
+ for ( i=1; i<64; i++ )
+ {
+ // Normalize the quantizer (* 4 for fdct normalisation)
+ temp_fp_quant_coeffs = (double)(VP6_QThreshTable[qi->FrameQIndex] * 4);
+
+ // Convert to 1/x
+ temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+ qi->QuantCoeffs[0][i] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+ // AC rounding
+ qi->QuantRound[0][i] = VP6_RTable[qi->FrameQIndex];
+
+ // Zero Bins
+ qi->ZeroBinSize[0][i] = VP6_ZBinTable[qi->FrameQIndex];
+ }
+
+ // ******************* UV *********************
+
+ // Calculate DC quant values (Include a *4 for FDCT normalization)
+ temp_fp_quant_coeffs = (double)( VP6_UvDcQuant[qi->FrameQIndex] * 4 );
+
+ // 1/X (UV)
+ temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+ qi->QuantCoeffs[1][0] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+ // DC rounding (UV)
+ qi->QuantRound[1][0] = VP6_UvDcRTable[qi->FrameQIndex];
+
+ // Set DC zero Bin (UV)
+ qi->ZeroBinSize[1][0] = VP6_UvDcZBinTable[qi->FrameQIndex];
+
+ // AC for UV
+ for ( i=1; i<64; i++ )
+ {
+ // Normalize the quantizer (* 4 for fdct normalisation)
+ temp_fp_quant_coeffs = (double)(VP6_UvQThreshTable[qi->FrameQIndex] * 4);
+
+ // 1/x
+ temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+ qi->QuantCoeffs[1][i] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+ // AC rounding
+ qi->QuantRound[1][i] = VP6_UvRTable[qi->FrameQIndex];
+
+ // Zero Bins
+ qi->ZeroBinSize[1][i] = VP6_UvZBinTable[qi->FrameQIndex];
+ }
+
+ for ( i=0; i<8; i++ )
+ {
+ qi->round[i] = qi->QuantRound[0][1];
+ qi->mult[i] = qi->QuantCoeffs[0][1];
+ qi->zbin[i] = qi->ZeroBinSize[0][1]-1;
+ }
+
+
+ // Work out the ZRL correction factors for ZBIN
+ for ( i = 0; i < 64; i++ )
+ {
+ qi->ZlrZbinCorrections[0][i] = ((INT32)VP6_QThreshTable[qi->FrameQIndex] * 4 * VP6_ZlrZbinCorrection[i]) / 100;
+ qi->ZlrZbinCorrections[1][i] = ((INT32)VP6_UvQThreshTable[qi->FrameQIndex] * 4 * VP6_ZlrZbinCorrection[i]) / 100;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateQC (compressor's update q)
+ *
+ * INPUTS : QUANTIZER *qi : Pointer to quantizer instance.
+ * UINT8 Vp3VersionNo : Decoder version number.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Updates the quantisation tables for a new Q
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_UpdateQC ( QUANTIZER *qi, UINT8 Vp3VersionNo )
+{
+ if ( qi->FrameQIndex == qi->LastFrameQIndex )
+ return;
+
+ // Update the record of last Q index.
+ qi->LastFrameQIndex = qi->FrameQIndex;
+
+ // Invert the dequant index into the quant index --
+ // the decoder has a different order than the encoder.
+ VP6_BuildQuantIndex_Generic(qi);
+
+ // Re-initialise the q tables for forward and reverse transforms.
+ VP6_init_quantizer ( qi, Vp3VersionNo );
+ VP6_init_dequantizer ( qi, Vp3VersionNo );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_quantize_c
+ *
+ * INPUTS : QUANTIZER *qi : Pointer to quantizer instance.
+ * INT16 *DCT_block : List of 64 DCT coefficients.
+ * UINT8 bp : Position of block within MB.
+ *
+ * OUTPUTS : Q_LIST_ENTRY *quantized_list : List of 64 quantized DCT coefficients.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Quantizes the DCT coefficients wrt the current
+ * quantization level.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+#define HIGHBITDUPPED(X) (((signed short) X) >> 15)
+
+void VP6_quantize_c( QUANTIZER *qi, INT16 *DCT_block, Q_LIST_ENTRY *quantized_list, UINT8 bp )
+{
+ UINT32 i, j;
+ INT32 temp;
+ UINT32 ColourPlane = VP6_QTableSelect[bp];
+
+ INT32 * QuantRoundPtr = qi->QuantRound[ColourPlane];
+ INT32 * QuantCoeffsPtr = qi->QuantCoeffs[ColourPlane];
+ INT32 * ZBinPtr = qi->ZeroBinSize[ColourPlane];
+ INT32 * ZrlCorrection = qi->ZlrZbinCorrections[ColourPlane];
+ INT16 * DCT_blockPtr = DCT_block;
+
+ UINT8 Zrl = 0;
+
+ // Set the quantized_list to default to 0
+ memset( quantized_list, 0, 64 * sizeof(Q_LIST_ENTRY) );
+
+ // DC quantization
+ if ( DCT_blockPtr[0] >= ZBinPtr[0] )
+ {
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] );
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+ }
+ else if ( DCT_blockPtr[0] <= -ZBinPtr[0] )
+ {
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+ }
+ else
+ Zrl++;
+
+ // Quantize AC
+ for( i=1; i<64; i++ )
+ {
+ // Zig Zag order...
+ j = dequant_index[i];
+
+ if ( DCT_blockPtr[j] >= (ZBinPtr[j] + ZrlCorrection[Zrl]) )
+ {
+ temp = QuantCoeffsPtr[j] * ( DCT_blockPtr[j] + QuantRoundPtr[j] );
+ quantized_list[i] = (Q_LIST_ENTRY) (temp>>16);
+ Zrl = 0;
+ }
+ else if ( DCT_blockPtr[j] <= -(ZBinPtr[j] + ZrlCorrection[Zrl]) )
+ {
+ temp = QuantCoeffsPtr[j] * ( DCT_blockPtr[j] - QuantRoundPtr[j] ) + MIN16;
+ quantized_list[i] = (Q_LIST_ENTRY) (temp>>16);
+ Zrl = 0;
+ }
+ else
+ Zrl++;
+ }
+
+}
+/**************************** END COMPRESSOR SPECIFIC **********************************/
+
+/****************************************************************************
+ *
+ * ROUTINE : DeleteQuantizerBuffers
+ *
+ * INPUTS : QUANTIZER *qi : Pointer to quantizer instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-allocates buffers associated with the quantizer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+static void DeleteQuantizerBuffers ( QUANTIZER *qi )
+{
+ if ( qi->dequant_coeffsAlloc[0] )
+ duck_free(qi->dequant_coeffsAlloc[0]);
+ qi->dequant_coeffsAlloc[0] = 0;
+ qi->dequant_coeffs[0] = 0;
+
+ if ( qi->dequant_coeffsAlloc[1] )
+ duck_free(qi->dequant_coeffsAlloc[1]);
+ qi->dequant_coeffsAlloc[1] = 0;
+ qi->dequant_coeffs[1] = 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : AllocateQuantizerBuffers
+ *
+ * INPUTS : QUANTIZER *qi : Pointer to quantizer instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : INT32: Always TRUE.
+ *
+ * FUNCTION : Allocates buffers associated with quantization.
+ *
+ * SPECIAL NOTES : Uses ROUNDUP32 to ensure that allocated buffers are
+ * aligned on 32-byte boundaries to improve cache performance.
+ *
+ ****************************************************************************/
+
+// TODO: benski> need better checks for other compilers
+#if defined(_M_AMD64) || defined(__LP64__)
+#define ROUNDUP32(X) ( ( ( (uintptr_t) X ) + 31 )&( 0xFFFFFFFFFFFFFFE0 ) )
+#else //#elif //defined(_M_IX86)
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+#endif
+
+
+static INT32 AllocateQuantizerBuffers ( QUANTIZER *qi )
+{
+ DeleteQuantizerBuffers(qi);
+
+ qi->dequant_coeffsAlloc[0] = (INT16 *)duck_malloc(32+64*sizeof(INT16), DMEM_GENERAL);
+ if ( !qi->dequant_coeffsAlloc[0] ) { DeleteQuantizerBuffers(qi); return FALSE; };
+ qi->dequant_coeffs[0] = (INT16 *)ROUNDUP32(qi->dequant_coeffsAlloc[0]);
+
+ qi->dequant_coeffsAlloc[1] = (INT16 *)duck_malloc(32+64*sizeof(INT16), DMEM_GENERAL);
+ if ( !qi->dequant_coeffsAlloc[1] ) { DeleteQuantizerBuffers(qi); return FALSE; };
+ qi->dequant_coeffs[1] = (INT16 *)ROUNDUP32(qi->dequant_coeffsAlloc[1]);
+
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DeleteQuantizer
+ *
+ * INPUTS : QUANTIZER **qi : Pointer to pointer to quantizer instance.
+ *
+ * OUTPUTS : QUANTIZER **qi : Pointer to pointer to quantizer instance,
+ * set to NULL on exit.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : De-allocates memory associated with the quantizer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_DeleteQuantizer ( QUANTIZER **qi )
+{
+ if ( *qi )
+ {
+ // Delete any other dynamically allocaed temporary buffers
+ DeleteQuantizerBuffers(*qi);
+
+ // De-allocate the quantizer
+ duck_free(*qi);
+ *qi=0;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_CreateQuantizer
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Pointer to allocated quantizer instance.
+ *
+ * FUNCTION : Allocated memory for and initializes a quantizer instance.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+QUANTIZER *VP6_CreateQuantizer ( void )
+{
+ QUANTIZER *qi = 0;
+ int quantizer_size = sizeof(QUANTIZER);
+
+ qi = (QUANTIZER *) duck_malloc(quantizer_size, DMEM_GENERAL);
+ if ( !qi )
+ return 0;
+
+ // initialize whole structure to 0
+ memset ( (unsigned char *)qi, 0, quantizer_size );
+
+ if ( !AllocateQuantizerBuffers(qi) )
+ VP6_DeleteQuantizer(&qi);
+
+ return qi;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : GetQuantizedCoeffsMSE_RD
+ *
+ * INPUTS : CP_INSTANCE *cpi : Pointer to encoder instance.
+ * INT16 * DctCodes : Result of Forward DCT
+ * INT16 * Coeffs, : Quantized Coeffs
+ * INT16 * DequantMatrix, : Dequantizaton Matrix
+ *
+ *
+ * OUTPUTS : UINT32 *MSE : Mean Square Error
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Computer MSE in transform domain.
+ *
+ * SPECIAL NOTES : From the arguement that the mse in frequency domain
+ * is same as the mse in spatial domain, this routine
+ * calculate the mse in transform domain to saving the
+ * idct and recon operations for distortion measurement.
+ *
+ ****************************************************************************/
+void GetQuantizedCoeffsMSE_RD
+(
+ INT16 * DctCodes,
+ INT16 * Coeffs,
+ INT16 * DequantMatrix,
+ UINT32 *MSE
+)
+{
+ UINT32 Error=0;
+ INT32 i;
+ INT32 diff;
+
+
+ for(i=0;i<64;i++)
+ {
+ int j = dequant_index[i];
+ diff = Coeffs[i] * DequantMatrix [i] - DctCodes[j];
+ Error += diff*diff;
+ }
+
+ *MSE = (Error<<2);
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/recon.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/recon.c
new file mode 100644
index 00000000..99f0318f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/recon.c
@@ -0,0 +1,603 @@
+/****************************************************************************
+*
+* Module Title : recon.c
+*
+* Description : Frame reconstruction functions.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking. */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h>
+#include "pbdll.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define TMAX 6
+#define TMIN 1
+
+#define Mod8(a) ((a) & 7)
+
+/***************************************************************************
+ *
+ * ROUTINE : Var16Point
+ *
+ * INPUTS : UINT8 *DataPtr : Pointer to data block.
+ * INT32 SourceStride : Block stride.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Calculated 16-point variance (no scaling).
+ *
+ * FUNCTION : Calculates variance for the 8x8 block *BUT* only samples
+ * every second pixel in every second row of the block. In
+ * other words for the 8x8 block only 16 sample points are used.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 Var16Point ( UINT8 *DataPtr, INT32 SourceStride )
+{
+ UINT32 i;
+ UINT32 XSum=0;
+ UINT32 XXSum=0;
+ UINT8 *DiffPtr;
+
+ // Loop expanded out for speed.
+ DiffPtr = DataPtr;
+
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i+=2 )
+ {
+ // Examine alternate pixel locations.
+ XSum += DiffPtr[0];
+ XXSum += DiffPtr[0] * DiffPtr[0];
+ XSum += DiffPtr[2];
+ XXSum += DiffPtr[2] * DiffPtr[2];
+ XSum += DiffPtr[4];
+ XXSum += DiffPtr[4] * DiffPtr[4];
+ XSum += DiffPtr[6];
+ XXSum += DiffPtr[6] * DiffPtr[6];
+
+ // Step to next row of block.
+ DiffPtr += (SourceStride << 1);
+ }
+
+ // Compute population variance as mis-match metric.
+ return (( (XXSum<<4) - XSum*XSum ) ) >> 8;
+}
+
+/***************************************************************************
+ *
+ * ROUTINE : DiffVar16Point
+ *
+ * INPUTS : UINT8 *DataPtr : Pointer to data block.
+ * INT32 SourceStride : Block stride.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : UINT32: Calculated 16-point variance (no scaling).
+ *
+ * FUNCTION : Calculates a variance for 16 data values.
+ * Each data value is the absolute difference between a pair of samples
+ * one line and one column apart
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 DiffVar16Point ( UINT8 *DataPtr, INT32 SourceStride )
+{
+ UINT32 i;
+ INT32 X;
+ UINT32 XSum=0;
+ UINT32 XXSum=0;
+ UINT8 *DiffPtr;
+ UINT8 *DiffPtr2;
+
+ // Loop expanded out for speed.
+ DiffPtr = DataPtr;
+ DiffPtr2 = DataPtr + SourceStride + 1;
+
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i+=2 )
+ {
+ // Examine alternate pixel locations.
+ X = abs( DiffPtr[0] - DiffPtr2[0]);
+ XSum += X;
+ XXSum += X * X;
+
+ X = abs( DiffPtr[2] - DiffPtr2[2]);
+ XSum += X;
+ XXSum += X * X;
+
+ X = abs( DiffPtr[4] - DiffPtr2[4]);
+ XSum += X;
+ XXSum += X * X;
+
+ X = abs( DiffPtr[6] - DiffPtr2[6]);
+ XSum += X;
+ XXSum += X * X;
+
+ // Step to next row of block.
+ DiffPtr += (SourceStride << 1);
+ DiffPtr2 += (SourceStride << 1);
+ }
+
+ // Compute population variance as mis-match metric.
+ return (( (XXSum<<4) - XSum*XSum ) ) >> 8;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : InitLoopDeringThresholds
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initialise thresholds used in the prediction/loop
+ * deringing filter.
+ *
+ * SPECIAL NOTES :
+ *
+ *****************************************************************************/
+void InitLoopDeringThresholds ( PB_INSTANCE *pbi )
+{
+ UINT32 i;
+
+ pbi->DrCutOff = 64;
+ for ( i=0; i<pbi->DrCutOff; i++ )
+ pbi->DrThresh[255 - i] = ((TMAX * pbi->DrCutOff) - ((TMAX - TMIN) * i)) / pbi->DrCutOff;
+
+ for ( i=pbi->DrCutOff; i<255; i++ )
+ pbi->DrThresh[255 - i] = TMIN;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : LoopDeringBlock
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT8 *SrcPtr : Pointer to block to be deringed.
+ * UINT32 Stride : Stride for input block data.
+ * UINT32 Width : Block width.
+ * UINT32 Height : Block height.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a thresholded dering/smoothing filter to a block
+ * of data.
+ *
+ * SPECIAL NOTES :
+ *
+ *****************************************************************************/
+void LoopDeringBlock
+(
+ PB_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT32 Stride,
+ UINT32 Width,
+ UINT32 Height
+)
+{
+ UINT32 i,j;
+
+ UINT8 *DataPtr0;
+ UINT8 *DataPtr1;
+ UINT8 *DataPtr2;
+ UINT8 TmpBuffer[16]; // TBD only one value needed... clean up code
+
+ INT32 ADiff1;
+ INT32 ADiff2;
+
+ INT32 Sum;
+ INT32 Thresh;
+
+ UINT8 Min = 255;
+ UINT8 Max = 0;
+
+ // Look for the min and max value in the block
+ DataPtr1 = SrcPtr;
+ for ( i=0; i<Height; i++ )
+ {
+ for ( j=0; j<Width; j++ )
+ {
+ if ( *DataPtr1 < Min )
+ Min = *DataPtr1;
+ if ( *DataPtr1 > Max )
+ Max = *DataPtr1;
+
+ DataPtr1++;
+ }
+ DataPtr1 = (DataPtr1 - Width) + Stride;
+ }
+
+ // Now choose the dering threshold
+ if ( pbi->DrThresh[255 - Min] > pbi->DrThresh[Max] )
+ Thresh = pbi->DrThresh[255 - Min];
+ else
+ Thresh = pbi->DrThresh[Max];
+
+ // Threshold bigger for bigger range
+ Thresh += ((Max - Min) >> 5);
+
+ // Horizontal dering
+ DataPtr1 = SrcPtr;
+ for ( i=0; i<Height; i++ )
+ {
+ for ( j=0; j<Width; j++ )
+ {
+ ADiff1 = abs( (INT32)DataPtr1[j] - (INT32)DataPtr1[j-1] );
+ ADiff2 = abs( (INT32)DataPtr1[j] - (INT32)DataPtr1[j+1] );
+
+ Sum = DataPtr1[j] + DataPtr1[j];
+
+ if ( ADiff1 <= Thresh )
+ Sum += DataPtr1[j-1];
+ else
+ Sum += DataPtr1[j];
+
+ if ( ADiff2 <= Thresh )
+ Sum += DataPtr1[j+1];
+ else
+ Sum += DataPtr1[j];
+
+ Sum = (Sum + 2) >> 2;
+
+ TmpBuffer[j] = Sum;
+ }
+
+ // Copy back the filtered line
+ memcpy ( DataPtr1, TmpBuffer, Width );
+
+ // Next line
+ DataPtr1 += Stride;
+ }
+
+ // Vertical dering
+ for ( i=0; i<Width; i++ )
+ {
+ DataPtr1 = SrcPtr + i;
+ DataPtr0 = DataPtr1 - Stride;
+ DataPtr2 = DataPtr1 + Stride;
+
+ for ( j=0; j<Height; j++ )
+ {
+ ADiff1 = abs( (INT32)*DataPtr1 - (INT32)*DataPtr0 );
+ ADiff2 = abs( (INT32)*DataPtr1 - (INT32)*DataPtr2 );
+
+ Sum = *DataPtr1 + *DataPtr1;
+
+ if ( ADiff1 <= Thresh )
+ Sum += *DataPtr0;
+ else
+ Sum += *DataPtr1;
+
+ if ( ADiff2 <= Thresh )
+ Sum += *DataPtr2;
+ else
+ Sum += *DataPtr1;
+
+ Sum = (Sum + 2) >> 2;
+
+ TmpBuffer[j] = Sum;
+
+ DataPtr0 += Stride;
+ DataPtr1 += Stride;
+ DataPtr2 += Stride;
+ }
+
+ // Copy back the filtered data
+ DataPtr1 = SrcPtr + i;
+ for ( j=0; j<Height; j++ )
+ {
+ *DataPtr1 = TmpBuffer[j];
+ DataPtr1 += Stride;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_PredictFiltered
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * UINT8 *SrcPtr : Pointer to block to be filtered.
+ * INT32 mx :
+ * INT32 my :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Build an 8x8 motion prediction block. If the block is
+ * copied across a block boundary, attempt to eliminate
+ * the internal border by applying the loop filter internally.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *****************************************************************************/
+void VP6_PredictFiltered
+(
+ PB_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ INT32 mx,
+ INT32 my,
+ UINT32 bp
+)
+{
+ INT32 mVx, mVy;
+ INT32 ReconIndex;
+ MACROBLOCK_INFO *mbi=&pbi->mbi;
+
+ UINT8 *TempBuffer = pbi->LoopFilteredBlock;
+
+ INT32 BoundaryX, BoundaryY;
+
+ // Calculate full pixel motion vector position
+ if(mx > 0 )
+ mVx = (mx >> pbi->mbi.blockDxInfo[bp].MvShift);
+ else
+ mVx = -((-mx) >> pbi->mbi.blockDxInfo[bp].MvShift);
+
+ if(my > 0 )
+ mVy = (my >> pbi->mbi.blockDxInfo[bp].MvShift);
+ else
+ mVy = -((-my) >> pbi->mbi.blockDxInfo[bp].MvShift);
+
+ // calculate offset in last frame matching motion vector
+ ReconIndex = mbi->blockDxInfo[bp].FrameReconStride * mVy + mVx;
+
+ // Give our selves a border of 2 extra pixel on all sides (for loop filter and half pixel moves)
+ ReconIndex -= 2 * mbi->blockDxInfo[bp].CurrentReconStride;
+ ReconIndex -= 2;
+
+ // copy the 12x12 region starting from reconpixel index into our temp buffer.
+ Copy12x12( SrcPtr + ReconIndex, TempBuffer, mbi->blockDxInfo[bp].CurrentReconStride, 16);
+
+ // What sort of loop filtering are we doing
+ // Dering loop filter is mandated to OFF in the current bitstream#
+ //if ( pbi->UseLoopFilter == LOOP_FILTER_DERING )
+ if ( FALSE )
+ {
+ // Apply prediction.loop dering filter
+ LoopDeringBlock( pbi, &TempBuffer[16+1], 16, 10, 10 );
+ }
+ else
+ {
+ // calculate block border position for x
+ BoundaryX = (8 - Mod8(mVx))&7;
+
+ // calculate block border position for y
+ BoundaryY = (8 - Mod8(mVy))&7;
+
+ // apply the loop filter at the horizontal boundary we selected
+ if(BoundaryX)
+ FilteringHoriz_12(
+ pbi->quantizer->FrameQIndex,
+ TempBuffer + 2 + BoundaryX,
+ 16);
+
+ // apply the loop filter at the vertical boundary we selected
+ if (BoundaryY)
+ FilteringVert_12(
+ pbi->quantizer->FrameQIndex,
+ TempBuffer + 2 * 16 + BoundaryY * 16,
+ 16);
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PredictFilteredBlock
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * INT16 *OutputPtr : Pointer to output data.
+ * BLOCK_POSITION bp : Position of block within MB.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Build an 8x8 motion prediction block. If the block is
+ * copied across a block boundary, attempt to eliminate
+ * the internal border by applying the loop filter internally.
+ *
+ * SPECIAL NOTES :
+ *
+ *****************************************************************************/
+void VP6_PredictFilteredBlock
+(
+ PB_INSTANCE *pbi,
+ INT16 *OutputPtr,
+ UINT32 bp
+)
+{
+ UINT8 *SrcPtr;
+ UINT8 *TempBuffer;
+ UINT32 TempPtr1;
+ UINT32 TempPtr2;
+ INT32 ModX, ModY;
+ UINT32 IVar;
+ UINT32 BicMvSizeLimit;
+ UINT32 Stride;
+
+ UINT32 MvShift = pbi->mbi.blockDxInfo[bp].MvShift; //pbi->mbi.MvShift;
+ UINT32 MvModMask = pbi->mbi.blockDxInfo[bp].MvModMask; //pbi->mbi.MvModMask;
+
+ // Which buffer are we working on?
+ SrcPtr = pbi->LastFrameRecon;
+ if ( VP6_Mode2Frame[pbi->mbi.Mode] == 2 )
+ {
+ SrcPtr = pbi->GoldenFrame;
+ }
+
+ // No loop filtering in simple profile
+ if ( pbi->VpProfile == SIMPLE_PROFILE || (pbi->UseLoopFilter == NO_LOOP_FILTER) )
+ {
+ INT32 mVx, mVy;
+ INT32 mx = pbi->mbi.Mv[bp].x;
+ INT32 my = pbi->mbi.Mv[bp].y;
+
+ // Mask off fractional pel bits.
+ ModX = (mx & MvModMask);
+ ModY = (my & MvModMask);
+
+ // Calculate full pixel motion vector position
+ mx += (MvModMask&(mx>>31));
+ my += (MvModMask&(my>>31));
+
+ mVx = (mx >> MvShift);
+ mVy = (my >> MvShift);
+
+ // Set up a pointer into the recon buffer
+ TempBuffer = SrcPtr + pbi->mbi.blockDxInfo[bp].thisRecon + (pbi->mbi.blockDxInfo[bp].FrameReconStride * mVy + mVx);
+ Stride = pbi->mbi.blockDxInfo[bp].CurrentReconStride;
+ TempPtr1 = TempPtr2 = 0;
+ }
+ else
+ {
+ // Loop filter the block
+ VP6_PredictFiltered( pbi, SrcPtr + pbi->mbi.blockDxInfo[bp].thisRecon, pbi->mbi.Mv[bp].x, pbi->mbi.Mv[bp].y, bp );
+ TempBuffer = pbi->LoopFilteredBlock;
+ Stride = 16;
+ TempPtr1 = 2*16+2; // Offset into the 12x12 loop filtered buffer
+ TempPtr2 = TempPtr1;
+
+ // Mask off fractional pel bits.
+ ModX = (pbi->mbi.Mv[bp].x & MvModMask);
+ ModY = (pbi->mbi.Mv[bp].y & MvModMask);
+ }
+
+ // determine if we have a fractional pixel move in the x direction
+ if ( ModX )
+ {
+ TempPtr2 += ( pbi->mbi.Mv[bp].x > 0 )*2 -1;
+ }
+
+ // handle fractional pixel motion in Y
+ if ( ModY )
+ {
+ TempPtr2 += (( pbi->mbi.Mv[bp].y > 0 ) * 2 - 1)*Stride;
+ }
+
+ // put the results back into the real reconstruction buffer
+ if ( TempPtr1 != TempPtr2 )
+ {
+ // The FilterBlock selects a filter based upon a ModX and ModY value that are at 1/8 point
+ // precision. Because U and V are subsampled the vector is already at the right precision
+ // for U and V but for Y we have to multiply by 2.
+ if ( bp < 4 )
+ {
+ // Filterblock expects input at 1/8 pel resolution (hence << 1 for Y)
+ ModX = ModX << 1;
+ ModY = ModY << 1;
+
+ // Select the filtering mode
+ if ( pbi->VpProfile == SIMPLE_PROFILE )
+ {
+ // Simple profile always uses bilinear filtering for speed
+ FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, FALSE, 14 );
+ }
+ else if ( pbi->PredictionFilterMode == AUTO_SELECT_PM )
+ {
+ // Work out the Mv size limit for selecting bicubic
+ if ( pbi->PredictionFilterMvSizeThresh > 0 )
+ BicMvSizeLimit = (1 << (pbi->PredictionFilterMvSizeThresh - 1)) << 2; // Convert to a value in 1/4 pel units
+ else
+ BicMvSizeLimit = ((MAX_MV_EXTENT >> 1) + 1) << 2; // Unrestricted
+
+ // Only use bicubic on shortish vectors
+ if ( ( pbi->PredictionFilterMvSizeThresh != 0 ) &&
+ ( ( (UINT32)abs(pbi->mbi.Mv[bp].x) > BicMvSizeLimit ) || ( (UINT32)abs(pbi->mbi.Mv[bp].y) > BicMvSizeLimit ) ) )
+ {
+ FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, FALSE, pbi->PredictionFilterAlpha);
+ }
+ // Should we use a variance test for bicubic as well
+ else if ( pbi->PredictionFilterVarThresh != 0 )
+ {
+ IVar = Var16Point( &TempBuffer[TempPtr1], Stride );
+ FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, (IVar >= pbi->PredictionFilterVarThresh), pbi->PredictionFilterAlpha );
+ }
+ else
+ {
+ FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, TRUE, pbi->PredictionFilterAlpha );
+ }
+ }
+ else
+ FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, (pbi->PredictionFilterMode == BICUBIC_ONLY_PM), pbi->PredictionFilterAlpha );
+ }
+ else
+ {
+ FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, FALSE, pbi->PredictionFilterAlpha );
+ }
+ }
+ // No fractional pels
+ else
+ UnpackBlock(&TempBuffer[TempPtr1], OutputPtr, Stride );
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ReconstructBlock
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * BLOCK_POSITION bp : Position of block within MB.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Reconstructs the coded block depending on coding mode.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_ReconstructBlock ( PB_INSTANCE *pbi, BLOCK_POSITION bp )
+{
+ UINT32 thisRecon = pbi->mbi.blockDxInfo[bp].thisRecon;
+
+ // Action depends on decode mode.
+ if ( pbi->mbi.Mode == CODE_INTER_NO_MV ) // Inter with no motion vector
+ {
+ ReconInter( pbi->TmpDataBuffer,
+ (UINT8 *)&pbi->ThisFrameRecon[thisRecon],
+ (UINT8 *)&pbi->LastFrameRecon[thisRecon],
+ (INT16 *)pbi->ReconDataBuffer[bp],
+ pbi->mbi.blockDxInfo[bp].CurrentReconStride);
+
+ }
+ else if ( VP6_ModeUsesMC[pbi->mbi.Mode] ) // The mode uses a motion vector.
+ {
+ // For the compressor we did this already ( possible optimization).
+ VP6_PredictFilteredBlock( pbi, pbi->TmpDataBuffer,bp);
+
+ ReconBlock( pbi->TmpDataBuffer,
+ (INT16 *)pbi->ReconDataBuffer[bp],
+ (UINT8 *)&pbi->ThisFrameRecon[thisRecon],
+ pbi->mbi.blockDxInfo[bp].CurrentReconStride );
+ }
+ else if ( pbi->mbi.Mode == CODE_USING_GOLDEN ) // Golden frame with motion vector
+ {
+ // Reconstruct the pixel data using the golden frame reconstruction and change data
+ ReconInter( pbi->TmpDataBuffer,
+ (UINT8 *)&pbi->ThisFrameRecon[thisRecon],
+ (UINT8 *)&pbi->GoldenFrame[thisRecon],
+ (INT16 *)pbi->ReconDataBuffer[bp],
+ pbi->mbi.blockDxInfo[bp].CurrentReconStride );
+ }
+ else // Simple Intra coding
+ {
+ // Get the pixel index for the first pixel in the fragment.
+ ReconIntra( pbi->TmpDataBuffer,
+ (UINT8 *)&pbi->ThisFrameRecon[thisRecon],
+ (UINT16 *)pbi->ReconDataBuffer[bp],
+ pbi->mbi.blockDxInfo[bp].CurrentReconStride );
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vfwpbdll_if.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vfwpbdll_if.c
new file mode 100644
index 00000000..b0aa83f3
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vfwpbdll_if.c
@@ -0,0 +1,605 @@
+/****************************************************************************
+*
+* Module Title : vfwpbdll_if.c
+*
+* Description : Video codec playback dll interface
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h>
+#include "pbdll.h"
+#include "vp60dversion.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#ifndef _MSC_VER
+#define __try
+#define CommentString "\nON2.COM VERSION VP60D " VP60DVERSION "\n"
+#pragma comment(exestr,CommentString)
+
+#endif
+
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern unsigned int CPUFrequency;
+extern void VP6_DecodeFrameMbs(PB_INSTANCE *pbi);
+extern void VP6_InitialiseConfiguration(PB_INSTANCE *pbi);
+extern void InitHeaderBuffer ( FRAME_HEADER *Header, unsigned char *Buffer );
+extern void SetAddNoiseMode(POSTPROC_INST , int);
+
+#include <stdio.h>
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+static const char vp31dVersion[] = VP60DVERSION;
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+#ifdef PBSTATS1
+static INT32 TotQ = 0; // TEMP diagnostic variables
+static INT32 PBFrameNumber = 0;
+#endif
+
+/****************************************************************************
+ *
+ * ROUTINE : VP60D_GetVersionNumber
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : const char *: Pointer to decoder version string.
+ *
+ * FUNCTION : Returns a pointer to the decoder version string.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+const char * CCONV VP60D_GetVersionNumber ( void )
+{
+ return vp31dVersion;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_StartDecoder
+ *
+ * INPUTS : PB_INSTANCE **pbi : Pointer to pointer to decoder instance.
+ * UINT32 ImageWidth : Width of the image.
+ * UINT32 ImageHeight : Height of the image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : TRUE if succeeds, FALSE otherwise.
+ *
+ * FUNCTION : Creates and initializes the decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+BOOL CCONV VP6_StartDecoder( PB_INSTANCE **pbi, UINT32 ImageWidth, UINT32 ImageHeight )
+{
+ __try
+ {
+ // set up our structure holding all formerly global information about a playback instance
+ *pbi = VP6_CreatePBInstance();
+
+ // Set Flag to indicate that a key frame is required as the first input
+ (*pbi)->ScaleWidth = ImageWidth;
+ (*pbi)->ScaleHeight = ImageHeight;
+ (*pbi)->OutputWidth = ImageWidth;
+ (*pbi)->OutputHeight = ImageHeight;
+
+
+ // Validate the combination of height and width.
+ (*pbi)->Configuration.VideoFrameWidth = ImageWidth;
+ (*pbi)->Configuration.VideoFrameHeight = ImageHeight;
+
+ (*pbi)->postproc = CreatePostProcInstance(&(*pbi)->Configuration);
+ (*pbi)->quantizer = VP6_CreateQuantizer();
+ (*pbi)->ProcessorFrequency = CPUFrequency;
+
+
+ // Fills in fragment counts as well
+ if ( !VP6_InitFrameDetails(*pbi) )
+ {
+ VP6_DeletePBInstance(pbi);
+ return FALSE;
+ }
+
+ // Set last_dct_thresh to an illegal value to make sure the
+ // Q tables are initialised for the new video sequence.
+ (*pbi)->quantizer->LastFrameQIndex = 0xFFFFFFFF;
+
+ // Set up various configuration parameters.
+ VP6_InitialiseConfiguration(*pbi);
+
+ return TRUE;
+ }
+#if defined(_MSC_VER)
+ __except( TRUE )
+ {
+ VP6_ErrorTrap( *pbi, GEN_EXCEPTIONS );
+ return FALSE;
+ }
+#endif
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_GetPbParam
+ *
+ * INPUTS : PB_INSTANCE **pbi : Pointer to decoder instance.
+ * PB_COMMAND_TYPE Command : Command action specifier.
+ *
+ * OUTPUTS : UINT32 *Parameter : Command dependent value requested.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Generalised command interface to decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV VP6_GetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, UINT32 *Parameter )
+{
+ switch ( Command )
+ {
+#if defined(POSTPROCESS)
+ case PBC_SET_POSTPROC:
+ *Parameter = pbi->PostProcessingLevel;
+#endif
+
+ default:
+ break;
+ }
+}
+/****************************************************************************
+ *
+ * ROUTINE : VP6_PickPostProcessingLevel
+ *
+ * INPUTS : PB_INSTANCE **pbi : Pointer to decoder instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : int: Selected post-processing level.
+ *
+ * FUNCTION : Select the post-processing level to be used based
+ * on how fast we're decoding.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+#define CRITICALWATERMARK (int) (31000 * pbi->CPUFree / 100)
+#define DOWNWATERMARK (int) (30000 * pbi->CPUFree / 100)
+#define UPWATERMARK (int) (28000 * pbi->CPUFree / 100)
+
+int VP6_PickPostProcessingLevel ( PB_INSTANCE *pbi )
+{
+ int minimumTime = pbi->thisDecodeTime + pbi->avgBlitTime + pbi->avgPPTime[8];
+ int thisTime = minimumTime + pbi->avgPPTime[pbi->PostProcessingLevel];
+ int avgTime = pbi->avgDecodeTime + pbi->avgBlitTime;
+
+ // estimate the times of all of our unknown postprocessors
+ if(pbi->avgPPTime[6]==0)
+ pbi->avgPPTime[6] = avgTime>>1;
+
+ if(pbi->avgPPTime[5]==0)
+ pbi->avgPPTime[5] = avgTime>>1;
+
+ if(pbi->avgPPTime[4]==0)
+ pbi->avgPPTime[4] = (avgTime ) >> 2;
+
+ if(pbi->avgPPTime[8]==0)
+ pbi->avgPPTime[8] = avgTime>>3;
+
+ if(pbi->CPUFree == 0 )
+ return pbi->PostProcessingLevel;
+
+ // automatically select a postprocessing level based on the amount
+ // of time taken to decode blit and postprocess etc
+
+ // more than 1/30 of a second no postprocessing at all (its better to show an
+ // ugly frame than none at all). We use 1/30th of a second because nothing
+ // tells us the actual framerate
+ if ( thisTime > (int)(CRITICALWATERMARK) )
+ {
+ // this frame's taking too long try to make up time on the subsequent frames
+ pbi->avgDecodeTime = pbi->thisDecodeTime;
+
+ // pick a post processor we can decode in less than 2/3 the time
+ if(pbi->avgPPTime[6] + minimumTime < CRITICALWATERMARK )
+ return 6;
+
+ if(pbi->avgPPTime[5] + minimumTime < CRITICALWATERMARK )
+ return 5;
+
+ if(pbi->avgPPTime[4] + minimumTime < CRITICALWATERMARK )
+ return 4;
+
+ if(pbi->avgPPTime[8] + minimumTime < CRITICALWATERMARK )
+ return 8;
+
+ return 0;
+ }
+
+ if(thisTime < DOWNWATERMARK && thisTime > UPWATERMARK)
+ return pbi->PostProcessingLevel;
+
+ // pick a post processor we can decode in less than 2/3 the time
+ if(pbi->avgPPTime[6] + avgTime < UPWATERMARK )
+ return 6;
+
+ if(pbi->avgPPTime[5] + avgTime < UPWATERMARK )
+ return 5;
+
+ if(pbi->avgPPTime[4] + avgTime < UPWATERMARK )
+ return 4;
+
+ if(pbi->avgPPTime[8] + avgTime < UPWATERMARK )
+ return 8;
+
+ return 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_GetYUVConfig
+ *
+ * INPUTS : PB_INSTANCE **pbi : Pointer to decoder instance.
+ * YUV_BUFFER_CONFIG *YuvConfig : Pointer to configuration
+ * data-structure.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Gets details of the reconstruction buffer
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV VP6_GetYUVConfig ( PB_INSTANCE *pbi, YUV_BUFFER_CONFIG *YuvConfig )
+{
+ __try
+ {
+#ifdef _MSC_VER
+ unsigned int duration;
+ unsigned int starttsc,endtsc;
+ VP6_readTSC(&starttsc);
+ pbi->PostProcessingLevel = VP6_PickPostProcessingLevel(pbi);
+#endif
+ if( pbi->PostProcessingLevel || (pbi->Configuration.Interlaced && pbi->DeInterlaceMode) )
+ {
+#ifdef _MSC_VER
+ extern void vp6_showinfo2(PB_INSTANCE *pbi);
+ extern void vp6_showinfo(PB_INSTANCE *pbi);
+
+ if ( pbi->PostProcessingLevel > 200 )
+ {
+ PostProcess (
+ pbi->postproc,
+ pbi->Vp3VersionNo,
+ pbi->FrameType,
+ pbi->PostProcessingLevel-200,
+ pbi->AvgFrameQIndex,
+ pbi->LastFrameRecon,
+ pbi->PostProcessBuffer,
+ (unsigned char *) pbi->FragInfo,
+ sizeof(FRAG_INFO),
+ 0x0001 );
+ VP6_readTSC(&endtsc);
+ vp6_showinfo(pbi);
+ }
+ else if ( pbi->PostProcessingLevel > 100 )
+ {
+ PostProcess (
+ pbi->postproc,
+ pbi->Vp3VersionNo,
+ pbi->FrameType,
+ pbi->PostProcessingLevel-100,
+ pbi->AvgFrameQIndex,
+ pbi->LastFrameRecon,
+ pbi->PostProcessBuffer,
+ (unsigned char *) pbi->FragInfo,
+ sizeof(FRAG_INFO),
+ 0x0001 );
+ VP6_readTSC(&endtsc);
+ vp6_showinfo2(pbi);
+ }
+ else
+#endif
+ {
+// pbi->AvgFrameQIndex = pbi->quantizer->FrameQIndex;
+
+ PostProcess (
+ pbi->postproc,
+ pbi->Vp3VersionNo,
+ pbi->FrameType,
+ pbi->PostProcessingLevel,
+ pbi->AvgFrameQIndex,
+ pbi->LastFrameRecon,
+ pbi->PostProcessBuffer,
+ (unsigned char *) pbi->FragInfo,
+ sizeof(FRAG_INFO),
+ 0x0001 );
+#ifdef _MSC_VER
+ VP6_readTSC(&endtsc);
+#endif
+ }
+ }
+
+ if(pbi->BlackClamp)
+ ClampLevels( pbi->postproc,pbi->BlackClamp,pbi->WhiteClamp,pbi->PostProcessBuffer, pbi->PostProcessBuffer);
+
+ if( pbi->Configuration.VideoFrameWidth < pbi->OutputWidth &&
+ pbi->Configuration.VideoFrameHeight == pbi->OutputHeight )
+ {
+ YuvConfig->YWidth = pbi->OutputWidth+32;
+ YuvConfig->YHeight = pbi->OutputHeight+32;
+ YuvConfig->YStride = YuvConfig->YWidth;
+
+ YuvConfig->UVWidth = YuvConfig->YWidth / 2;
+ YuvConfig->UVHeight = YuvConfig->YHeight / 2;
+ YuvConfig->UVStride = YuvConfig->YStride / 2;
+
+ YuvConfig->YBuffer = (char *)pbi->ScaleBuffer;
+ YuvConfig->UBuffer = (char *)pbi->ScaleBuffer+YuvConfig->YWidth*YuvConfig->YHeight;
+ YuvConfig->VBuffer = (char *)pbi->ScaleBuffer+YuvConfig->YWidth*YuvConfig->YHeight+YuvConfig->UVWidth*YuvConfig->UVHeight;
+
+ if(pbi->PostProcessingLevel)
+ ScaleOrCenter( pbi->postproc, pbi->PostProcessBuffer, YuvConfig );
+ else
+ ScaleOrCenter( pbi->postproc, pbi->LastFrameRecon, YuvConfig );
+
+ YuvConfig->YBuffer +=
+ (YuvConfig->YHeight - pbi->OutputHeight ) / 2 * YuvConfig->YStride
+ +(YuvConfig->YWidth - pbi->OutputWidth) / 2;
+ YuvConfig->YWidth = pbi->OutputWidth;
+ YuvConfig->YHeight = pbi->OutputHeight;
+
+ YuvConfig->UBuffer +=
+ (YuvConfig->UVHeight - pbi->OutputHeight/2 ) / 2 * YuvConfig->UVStride
+ +(YuvConfig->UVWidth - pbi->OutputWidth/2) / 2;
+
+ YuvConfig->VBuffer +=
+ (YuvConfig->UVHeight - pbi->OutputHeight/2 ) / 2 * YuvConfig->UVStride
+ +(YuvConfig->UVWidth - pbi->OutputWidth/2) / 2;
+
+ YuvConfig->UVWidth = pbi->OutputWidth / 2;
+ YuvConfig->UVHeight = pbi->OutputHeight / 2;
+ }
+ else
+ {
+ YuvConfig->YWidth = pbi->Configuration.VideoFrameWidth;
+ YuvConfig->YHeight = pbi->Configuration.VideoFrameHeight;
+ YuvConfig->YStride = pbi->Configuration.YStride;
+
+ YuvConfig->UVWidth = pbi->Configuration.VideoFrameWidth / 2;
+ YuvConfig->UVHeight = pbi->Configuration.VideoFrameHeight / 2;
+ YuvConfig->UVStride = pbi->Configuration.UVStride;
+
+ if( pbi->PostProcessingLevel ||(pbi->Configuration.Interlaced && pbi->DeInterlaceMode))
+ {
+ YuvConfig->YBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconYDataOffset+(pbi->Configuration.YStride * UMV_BORDER) + UMV_BORDER];
+ YuvConfig->UBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconUDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+ YuvConfig->VBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconVDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+ }
+ else
+ {
+ YuvConfig->YBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconYDataOffset+ (pbi->Configuration.YStride * UMV_BORDER) + UMV_BORDER];
+ YuvConfig->UBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconUDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+ YuvConfig->VBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconVDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+ }
+ }
+
+#if defined(_MSC_VER)
+ duration = ( endtsc - starttsc )/ pbi->ProcessorFrequency ;
+
+ if( pbi->avgPPTime[pbi->PostProcessingLevel%10] == 0)
+ pbi->avgPPTime[pbi->PostProcessingLevel%10] = duration;
+ else
+ pbi->avgPPTime[pbi->PostProcessingLevel%10] = ( 7 * pbi->avgPPTime[pbi->PostProcessingLevel%10] + duration ) >> 3;
+#endif
+ }
+#if defined(_MSC_VER)
+ __except ( TRUE )
+ {
+ VP6_ErrorTrap( pbi, GEN_EXCEPTIONS );
+ }
+#endif
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DecodeFrameToYUV
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * char *VideoBufferPtr : Pointer to compressed data buffer.
+ * unsigned int ByteCount : Size in bytes of compressed data buffer.
+ * UINT32 ImageWidth : Image width.
+ * UINT32 ImageHeight : Image height.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : int: 0 for success, negative value for error.
+ *
+ * FUNCTION : Decodes a frame into the internal YUV reconstruction buffer.
+ * Details of this buffer can be obtained by calling GetYUVConfig().
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int CurrentFrame = 0;
+int CCONV VP6_DecodeFrameToYUV(PB_INSTANCE *pbi, char *VideoBufferPtr, unsigned int ByteCount)
+{
+ unsigned char *tmp;
+
+ __try
+ {
+#ifdef _MSC_VER
+ unsigned int duration;
+ unsigned int starttsc,endtsc;
+ VP6_readTSC(&starttsc);
+#endif
+ pbi->CurrentFrameSize = ByteCount;
+
+ // Initialise the bit reader used to read the fixed raw part of the header
+ InitHeaderBuffer ( &pbi->Header, (unsigned char*)VideoBufferPtr );
+
+ // decode the frame header
+ if ( !VP6_LoadFrame(pbi) )
+ return -1;
+
+ // Start the second boolean decoder
+ if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+ {
+ pbi->mbi.br = &pbi->br2;
+
+ if ( pbi->UseHuffman )
+ {
+ // Initialise BITREADER for second bitstream partition
+ pbi->br3.bitsinremainder = 0;
+ pbi->br3.remainder = 0;
+ pbi->br3.position = ((unsigned char*)VideoBufferPtr)+pbi->Buff2Offset;
+ }
+ else
+ VP6_StartDecode(&pbi->br2,((unsigned char*)VideoBufferPtr)+pbi->Buff2Offset);
+ }
+ else
+ {
+ pbi->mbi.br = &pbi->br;
+ }
+
+ // decode and reconstruct frame
+ VP6_DecodeFrameMbs(pbi);
+
+ // switch pointers so lastframe recon is this frame
+ tmp = pbi->LastFrameRecon;
+ pbi->LastFrameRecon = pbi->ThisFrameRecon;
+ pbi->ThisFrameRecon = tmp;
+
+ // update the border
+ UpdateUMVBorder(pbi->postproc, pbi->LastFrameRecon);
+
+ // Update the golden frame buffer
+ if( (pbi->FrameType == BASE_FRAME) || pbi->RefreshGoldenFrame )
+ memcpy(pbi->GoldenFrame, pbi->LastFrameRecon, pbi->ReconYPlaneSize + 2* pbi->ReconUVPlaneSize);
+
+#if defined(_MSC_VER)
+ ClearSysState();
+#endif
+
+#ifdef PBSTATS1
+ // Update PB stats
+ TotQ += pbi->quantizer->ThisFrameQualityValue;
+ PBFrameNumber += 1;
+#endif
+
+ if(pbi->FrameType == BASE_FRAME )
+ pbi->AvgFrameQIndex = pbi->quantizer->FrameQIndex;
+ else
+ pbi->AvgFrameQIndex = (2 + 3 * pbi->AvgFrameQIndex + pbi->quantizer->FrameQIndex) / 4 ;
+
+#ifdef _MSC_VER
+ VP6_readTSC(&endtsc);
+ duration = (endtsc-starttsc)/ (pbi->ProcessorFrequency);
+ pbi->thisDecodeTime = duration;
+
+ if( pbi->avgDecodeTime == 0)
+ pbi->avgDecodeTime = duration;
+ else
+ pbi->avgDecodeTime = (7*pbi->avgDecodeTime + duration)>>3;
+#endif
+
+
+#if 0
+ if (pbi->br.pos>pbi->CurrentFrameSize)
+ {
+ FILE *f = fopen("badframes.stt","a");
+ fprintf(f,"%8d %8d %8d \n", CurrentFrame,pbi->br.pos,pbi->CurrentFrameSize);
+ fclose(f);
+ }
+#endif
+
+ CurrentFrame++;
+ }
+#if defined(_MSC_VER)
+ __except ( TRUE )
+ {
+ VP6_ErrorTrap( pbi, GEN_EXCEPTIONS );
+ return -2;
+ }
+#endif
+ return 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_StopDecoder
+ *
+ * INPUTS : PB_INSTANCE **pbi : Pointer to pointer to decoder instance.
+ *
+ * OUTPUTS : PB_INSTANCE **pbi : Pointer to pointer to decoder instance,
+ * set to NULL on return.
+ *
+ * RETURNS : int: TRUE on success, FALSE otherwise.
+ *
+ * FUNCTION : Detroys the decoder instance.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int CCONV VP6_StopDecoder ( PB_INSTANCE **pbi )
+{
+ __try
+ {
+ if ( *pbi )
+ {
+ // Set flag to say that the decoder is no longer initialised
+ VP6_DeleteQuantizer(&(*pbi)->quantizer);
+ DeletePostProcInstance(&(*pbi)->postproc);
+ VP6_DeleteFragmentInfo(*pbi);
+ VP6_DeleteFrameInfo(*pbi);
+ VP6_DeletePBInstance(pbi);
+ return TRUE;
+ }
+ }
+
+#if defined(_MSC_VER)
+ __except ( TRUE )
+ {
+ VP6_ErrorTrap( *pbi, GEN_EXCEPTIONS );
+ return FALSE;
+ }
+#endif
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_ErrorTrap
+ *
+ * INPUTS : PB_INSTANCE *pbi : Pointer to decoder instance.
+ * int ErrorCode : Error code to report.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Called when a fatal error is detected.
+ *
+ * SPECIAL NOTES : Currently does nothing.
+ *
+ ****************************************************************************/
+void VP6_ErrorTrap ( PB_INSTANCE *pbi, int ErrorCode )
+{
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vp60dxv.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vp60dxv.c
new file mode 100644
index 00000000..1e460f3e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vp60dxv.c
@@ -0,0 +1,454 @@
+/****************************************************************************
+*
+* Module Title : vp5dxv.c
+*
+* Description : VP50 interface to DXV.
+*
+* AUTHOR : SJL
+*
+*****************************************************************************
+* Revision History
+*
+* 1.03 SJL 17/10/02 Up the version to 1.0.0.3, added new dxv interface
+* 1.02 YWX 30/09/02 Up the version to 1.0.0.2, added support of scaling
+* 1.01 YWX 19/09/02 Fixed bug in blit and up the version to 1.0.0.1
+* 1.00 SJL 17/06/02 Base
+*
+*****************************************************************************
+*/
+//#include <stdlib.h>
+
+#include "duck_mem.h" /* interface to memory manager */
+#include "dxl_plugin.h" /* interface to dxv */
+
+#include "pbdll.h"
+
+
+const char* VP6LIBVERSION="ON2 VP6 Decode Library for MAC Version 1.0.0.3";
+
+typedef unsigned int FourCC;
+
+#define VP60_FOURCC DXL_MKFOURCC( 'V', 'P', '6', '0')
+#define VP61_FOURCC DXL_MKFOURCC( 'V', 'P', '6', '1')
+
+
+static dxvBitDepth bitDepths[] =
+{
+ DXYV12,DXRGBNULL
+};
+
+
+void vp60_SetParameter(DXL_XIMAGE_HANDLE src,int Command, unsigned int Parameter );
+
+extern void VP6_VPInitLibrary(void);
+extern void VP6_VPDeInitLibrary(void);
+
+#include "duck_dxl.h"
+
+#if 0
+typedef struct tFrameInfo
+{
+ int KeyFrame;
+ int Version;
+ int Quality;
+ int vp30Flag;
+} FrameInfo;
+
+void
+vp60_GetInfo(unsigned char * source, FrameInfo * frameInfo)
+{
+
+ // Is the frame and inter frame or a key frame
+ frameInfo->KeyFrame = !(source[0] > 0x7f);
+ frameInfo->Quality = source[0] >> 2;
+ if(frameInfo->KeyFrame)
+ frameInfo->Version = ((source[2]>>3) & 0x1f );
+ else
+ frameInfo->Version = 0;
+
+ frameInfo->vp30Flag = (int)source[1];
+
+}
+#endif
+
+// YUV buffer configuration structure
+typedef struct
+{
+ int YWidth;
+ int YHeight;
+ int YStride;
+
+ int UVWidth;
+ int UVHeight;
+ int UVStride;
+
+ char * YBuffer;
+ char * UBuffer;
+ char * VBuffer;
+
+ char * uvStart;
+ int uvDstArea;
+ int uvUsedArea;
+
+} DXV_YUV_BUFFER_CONFIG;
+
+/* define an algorithm base container */
+typedef struct tXImageCODEC
+{
+ FourCC myFourCC;
+ DXV_YUV_BUFFER_CONFIG FrameBuffer;
+ PB_INSTANCE *myPBI;
+ int owned;
+} vp60_XIMAGE, *vp60_XIMAGE_HANDLE;
+
+
+typedef void ((*VP6BLIT_FUNC)(unsigned char *, int, YUV_BUFFER_CONFIG *));
+//typedef void ((*vp6_VSCREEN_FUNC)(void));
+
+/****************************************************************************
+ *
+ * ROUTINE : vp50_decompress
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES :
+ *
+ ****************************************************************************/
+#include "huffman.h"
+
+static int
+vp60_decompress(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE vScreen)
+{
+
+ int retVal;
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+ unsigned char *cAddr;
+ int cSize;
+ int w, h;
+
+
+ DXL_GetXImageXYWH(src, NULL, NULL, &w, &h);
+
+
+
+ // if we have a compressed frame decompress it ( otherwise we'll just redo
+ // the scaling and postprocessing from the last frame )
+ cAddr = DXL_GetXImageCDataAddr(src);
+ cSize = DXL_GetXImageCSize(src);
+
+ if(cAddr)
+ {
+ if((cSize != 0) && (cAddr[0]>=1 || cAddr[1]>=1 || cAddr[2] >=1))
+ {
+ // decode the frame
+ retVal = VP6_DecodeFrameToYUV(thisAlgorithmBase->myPBI, (char *)cAddr, cSize, w, h);
+ if(retVal != 0 )
+ {
+ if(retVal == -1)
+ return DXL_VERSION_CONFLICT;
+ else
+ return DXL_BAD_DATA;
+ }
+ }
+ }
+
+
+ if (vScreen) /* if there is a vScreen, blit to it */
+ {
+ unsigned char * ptrScrn;
+ short thisPitch, vsHeight;
+ dxvBlitQuality bq;
+ dxvBitDepth bd;
+ VP6BLIT_FUNC blitter;
+
+ DXL_GetVScreenAttributes(vScreen, (void **)&ptrScrn, &bq, &bd, &thisPitch, &vsHeight);
+
+ if(ptrScrn)
+ {
+ int x, y, pSize;
+ int viewX, viewY;
+
+ DXL_GetVScreenView(vScreen, &viewX, &viewY, NULL, NULL);
+
+ /* get a frame pointer to the scaled and postprocessed reconstructed buffer */
+ VP6_GetYUVConfig(thisAlgorithmBase->myPBI, (YUV_BUFFER_CONFIG *) &(thisAlgorithmBase->FrameBuffer));
+
+ pSize = VPX_GetSizeOfPixel(bd);
+
+ DXL_GetXImageXYWH(src, &x, &y, NULL, NULL);
+
+ /* remember to offset if requested */
+ y += viewY;
+ x += viewX;
+
+ ptrScrn += (x * pSize) + (y * thisPitch);
+
+ /* setup ptrs so we can work backwards through Paul's frame buffers */
+ #if 1
+ thisAlgorithmBase->FrameBuffer.YBuffer = thisAlgorithmBase->FrameBuffer.YBuffer +
+ ((thisAlgorithmBase->FrameBuffer.YHeight - 1) *
+ (thisAlgorithmBase->FrameBuffer.YStride));
+
+ thisAlgorithmBase->FrameBuffer.UBuffer = thisAlgorithmBase->FrameBuffer.UBuffer +
+ ((thisAlgorithmBase->FrameBuffer.UVHeight - 1) *
+ (thisAlgorithmBase->FrameBuffer.UVStride));
+
+ thisAlgorithmBase->FrameBuffer.VBuffer = thisAlgorithmBase->FrameBuffer.VBuffer +
+ ((thisAlgorithmBase->FrameBuffer.UVHeight - 1) *
+ (thisAlgorithmBase->FrameBuffer.UVStride));
+ #endif
+
+ if((bd != DXYUY2) && (bd != DXYV12))
+ {
+ if(bq == DXBLIT_STRETCH)
+ {
+ thisPitch *= 2;
+ }
+ }
+
+ if(bd == DXYV12 || bd == DXI420)
+ {
+ if(thisPitch < 0)
+ {
+ thisAlgorithmBase->FrameBuffer.uvStart = (char *) (ptrScrn + abs(thisPitch) + abs(thisPitch) * h/4 + thisPitch/2 );
+ thisAlgorithmBase->FrameBuffer.uvDstArea = abs((thisPitch * h)/4);
+ thisAlgorithmBase->FrameBuffer.uvUsedArea = 0;
+ }
+ else
+ {
+ thisAlgorithmBase->FrameBuffer.uvStart = (char *) (ptrScrn + (thisPitch * h));
+ thisAlgorithmBase->FrameBuffer.uvDstArea = ((thisPitch * h)/4);
+ thisAlgorithmBase->FrameBuffer.uvUsedArea = ((thisPitch * thisAlgorithmBase->FrameBuffer.UVHeight)/2);
+ }
+
+ }
+
+ blitter = (VP6BLIT_FUNC)VPX_GetBlitter(bq, bd);
+
+ if ((void *)blitter != (void *)-1)
+ {
+ blitter(ptrScrn, thisPitch, (YUV_BUFFER_CONFIG *)(&thisAlgorithmBase->FrameBuffer));
+ }
+ else
+ {
+ return DXL_INVALID_BLIT;
+ }
+
+
+ }
+ }
+
+ return DXL_OK;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : vp60_xImageDestroy
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : close down a decompressor, releasing the wilk decompressor,
+ * the xImage (decompressor), and the intermediate vScreen (surface)
+ *
+ * SPECIAL NOTES :
+ *
+ ****************************************************************************/
+static int
+vp60_xImageDestroy(DXL_XIMAGE_HANDLE src)
+{
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ if(thisAlgorithmBase)
+ {
+ if ( thisAlgorithmBase->owned )
+ VP6_StopDecoder ( &(thisAlgorithmBase->myPBI) );
+ duck_free ( thisAlgorithmBase );
+ }
+ return DXL_OK;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : vp50_xImageReCreate
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES :
+ * called during initialization and/or when xImage (decompressor)
+ * attributes change, note that nImage and src are actually
+ * synonymous and should be cleared out a bit (to say the least!)
+ *
+ *
+ * !!!!!!
+ * This function should be prepared to get data that is NOT of the
+ * type native to the decoder, It should do it's best to verify it
+ * as valid data and should clean up after itself and return NULL
+ * if it doesn't recognize the format of the data
+ *
+ ****************************************************************************/
+static void *
+vp60_xImageReCreate(DXL_XIMAGE_HANDLE src, unsigned char *data, int type, enum BITDEPTH bitDepth, int w, int h)
+{
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ (void) bitDepth;
+
+
+ if (
+ (type != VP60_FOURCC) &&
+ (type != VP61_FOURCC) &&
+ (type != DXL_MKFOURCC( 'V', 'P', '6', '2'))
+ )
+ return NULL;
+
+ thisAlgorithmBase->myFourCC = type;
+
+ /* create new PBI */
+ if ( !VP6_StartDecoder( &(thisAlgorithmBase->myPBI), w, h ) )
+ {
+ vp60_xImageDestroy ( src );
+ thisAlgorithmBase = NULL;
+ }
+ else
+ {
+ thisAlgorithmBase->owned = 1;
+ }
+
+
+ return (DXL_HANDLE)thisAlgorithmBase;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : vp50_xImageCreate
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES : in this "glue" case, just calls through to the create function.
+ *
+ ****************************************************************************/
+static DXL_HANDLE
+vp60_xImageCreate (DXL_XIMAGE_HANDLE src, unsigned char *data)
+{
+// return vp60_xImageReCreate(src, data, VP60_FOURCC, (enum BITDEPTH ) 0, 320, 240);
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ /* create a new xImage, specific to this type of decoder,
+ (see "vp60_XIMAGE" struct above and dxl_main.h) */
+ thisAlgorithmBase = (vp60_XIMAGE_HANDLE)duck_calloc ( 1, sizeof(vp60_XIMAGE), DMEM_GENERAL );
+ if (thisAlgorithmBase == NULL)
+ return NULL;
+
+ DXL_RegisterXImageRecreate(src, (RECREATE_FUNC) vp60_xImageReCreate);
+
+ DXL_RegisterXImageDestroy(src, (DESTROY_FUNC) vp60_xImageDestroy);
+
+ DXL_RegisterXImageDx(src, (DX_FUNC) vp60_decompress);
+
+ DXL_RegisterXImageSetParameter(src, (SET_PARAMETER_FUNC) vp60_SetParameter);
+
+ thisAlgorithmBase->myFourCC = VP60_FOURCC;
+
+ return (DXL_HANDLE)thisAlgorithmBase;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : vp50_Init
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int
+vp60_Init(void)
+{
+ DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, VP60_FOURCC);
+
+ DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, VP61_FOURCC);
+
+ DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, DXL_MKFOURCC( 'V', 'P', '6', '2'));
+
+
+ vp3SetBlit();
+
+ /* initialize all the global variables */
+ VP6_VPInitLibrary();
+
+ return DXL_OK;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : vp60_Exit
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : main exit routine, called during DXL_ExitVideo()
+ * clean up any global information if necessary
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int
+vp60_Exit(void)
+{
+ VP6_VPDeInitLibrary();
+
+ return DXL_OK;
+}
+/****************************************************************************
+ *
+ * ROUTINE :
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void
+vp60_SetParameter(DXL_XIMAGE_HANDLE src, int Command, uinptr_t Parameter)
+{
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ VP6_SetPbParam(thisAlgorithmBase->myPBI, (PB_COMMAND_TYPE) Command, Parameter );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Makefile b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Makefile
new file mode 100644
index 00000000..7cf2436d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Makefile
@@ -0,0 +1,69 @@
+## Target to built
+
+TARGET =libvp6d
+
+## TOOLS
+CC = ecc
+LD = ecc
+AR = ar
+OBJDUMP = objdump
+RM = rm -f
+
+## Directories
+TOPDIR =C:\DuckSoft
+PRIVATEINCLUDE =${TOPDIR}\private\include
+PRIVATEINCLUDE2 =${TOPDIR}\private\include\vp60
+CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
+CDXVINCLUDE =${TOPDIR}\private\corelibs\cdxv\include
+VP6INCLUDE =${TOPDIR}\private\corelibs\cdxv\vp60\vp60\include
+CXGENERIC =${TOPDIR}\private\corelibs\cdxv\vp60\vp60\cx\generic
+OBJDIR =${TOPDIR}\ObjectCode\bspvp6e
+CURRENTDIR =${TOPDIR}\private\corelibs\cdxv\vp60\vp60
+LIBDIR =${TOPDIR}\private\corelibs\lib\mapca
+
+## Compile Flags
+ALLINCLUDES =-I${VP6INCLUDE} -I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${PRIVATEINCLUDE2}
+VP6DEFINES =-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
+ETIDEFINES =-DMAPCA
+ALLDEFINES =${VP6DEFINES} ${ETIDEFINES}
+DEBUG =-O2
+CFLAGS =-msvc -align 8 -etswp -mP3OPT_nonlocal_calls_through_register=true \
+ -mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
+ -magen_interroutine_padding
+ALLFLAGS = $(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
+
+
+## Files
+OBJS = bsp\boolhuff.o \
+ generic\decodembs.o \
+ generic\decodemode.o \
+ generic\decodemv.o \
+ generic\DFrameR.o \
+ generic\FrameIni.o \
+ generic\Huffman.o \
+ generic\pb_globals.o \
+ generic\quantize.o \
+ generic\recon.o \
+ generic\TokenEntropy.o \
+ bsp\bspQuantize.o \
+ bsp\DSystemDependant.o \
+ bsp\duck_mem.o \
+ generic\vfwpbdll_if.o
+
+SRCS = $(OBJS:.o=.c)
+
+ARTARGET = ${TARGET}.a
+
+# archive
+
+ARTARGET:${OBJS}
+ ${AR} -cr ${ARTARGET} ${OBJS}
+ mv ${ARTARGET} ${LIBDIR}
+
+${OBJS} : ${SRCS}
+ $(CC) $(ALLFLAGS) -c $*.c -o $*.o
+
+clean:
+ ${RM} ${OBJS} ${ARTARGET}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/OptFunctions.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/OptFunctions.c
new file mode 100644
index 00000000..0ac90ff1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/OptFunctions.c
@@ -0,0 +1,315 @@
+/****************************************************************************
+*
+* Module Title : OptFunctions.c
+*
+* Description : MMX or otherwise processor specific
+* optimised versions of functions
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+ * 1.08 JBB 13 Jun 01 VP4 Code Clean Out
+* 1.07 JBB 26/01/01 Removed unused function
+* 1.06 YWX 23/05/00 Remove the clamping in MmxReconPostProcess()
+* 1.05 YWX 15/05/00 Added MmxReconPostProcess()
+* 1.04 SJL 03/14/00 Added in Tim's versions of MmxReconInter and MmxReconInterHalfPixel2.
+* 1.03 PGW 12/10/99 Changes to reduce uneccessary dependancies.
+* 1.02 PGW 30/08/99 Minor changes to MmxReconInterHalfPixel2().
+* 1.01 PGW 13/07/99 Changes to keep reconstruction data to 16 bit
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/*
+ Use Tim's optimized version.
+*/
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT // Strict type checking.
+
+#include "codec_common.h"
+
+#include "pbdll.h"
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imports.
+*****************************************************************************
+*/
+
+extern INT32 * XX_LUT;
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+
+INT16 Ones[4] = {1,1,1,1};
+INT16 OneTwoEight[4] = {128,128,128,128};
+UINT8 Eight128s[8] = {128,128,128,128,128,128,128,128};
+
+#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
+
+/****************************************************************************
+* Forward References
+*****************************************************************************
+*/
+/****************************************************************************
+ *
+ * ROUTINE : ClearSysState()
+ *
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : DoesNothing
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ClearSysStateC(void)
+{
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ClearMmx()
+ *
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Clears down the MMX state
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ClearMmx(void)
+{
+ __asm
+ {
+ emms ; Clear the MMX state.
+ }
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : MMXReconIntra
+ *
+ * INPUTS : INT16 * idct
+ * Pointer to the output from the idct for this block
+ *
+ * UINT32 stride
+ * Line Length in pixels in recon and reference images
+ *
+ *
+ *
+ *
+ * OUTPUTS : UINT8 * dest
+ * The reconstruction buffer
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs an intra block - MMX version
+ *
+ * SPECIAL NOTES : Tim Murphy's optimized version
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void MMXReconIntra( PB_INSTANCE *pbi, UINT8 * dest, INT16 * idct, INT32 stride )
+{
+ __asm
+ {
+ // u pipe
+ // v pipe
+ mov eax,[idct] ; Signed 16 bit inputs
+ mov edx,[dest] ; Signed 8 bit outputs
+ movq mm0,[Eight128s] ; Set mm0 to 0x8080808080808080
+ ;
+ mov ebx,[stride] ; Line stride in output buffer
+ lea ecx,[eax+128] ; Endpoint in input buffer
+loop_label: ;
+ movq mm2,[eax] ; First four input values
+ ;
+ packsswb mm2,[eax+8] ; pack with next(high) four values
+ por mm0,mm0 ; stall
+ pxor mm2,mm0 ; Convert result to unsigned (same as add 128)
+ lea eax,[eax + 16] ; Step source buffer
+ cmp eax,ecx ; are we done
+ ;
+ movq [edx],mm2 ; store results
+ ;
+ lea edx,[edx+ebx] ; Step output buffer
+ jc loop_label ; Loop back if we are not done
+ }
+ // 6c/8 elts = 9c/8 = 1.125 c/pix
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxReconInter
+ *
+ * INPUTS : UINT8 * RefPtr
+ * The last frame reference
+ *
+ * INT16 * ChangePtr
+ * Pointer to the change data
+ *
+ * UINT32 LineStep
+ * Line Length in pixels in recon and ref images
+ *
+ * OUTPUTS : UINT8 * ReconPtr
+ * The reconstruction
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs data from last data and change
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void MmxReconInter( PB_INSTANCE *pbi, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+ (void) pbi;
+
+ _asm {
+ push edi
+;; mov ebx, [ref]
+;; mov ecx, [diff]
+;; mov eax, [dest]
+;; mov edx, [stride]
+ mov ebx, [RefPtr]
+ mov ecx, [ChangePtr]
+ mov eax, [ReconPtr]
+ mov edx, [LineStep]
+ pxor mm0, mm0
+ lea edi, [ecx + 128]
+ ;
+ L:
+ movq mm2, [ebx] ; (+3 misaligned) 8 reference pixels
+ ;
+ movq mm4, [ecx] ; first 4 changes
+ movq mm3, mm2
+ movq mm5, [ecx + 8] ; last 4 changes
+ punpcklbw mm2, mm0 ; turn first 4 refs into positive 16-bit #s
+ paddsw mm2, mm4 ; add in first 4 changes
+ punpckhbw mm3, mm0 ; turn last 4 refs into positive 16-bit #s
+ paddsw mm3, mm5 ; add in last 4 changes
+ add ebx, edx ; next row of reference pixels
+ packuswb mm2, mm3 ; pack result to unsigned 8-bit values
+ lea ecx, [ecx + 16] ; next row of changes
+ cmp ecx, edi ; are we done?
+ ;
+ movq [eax], mm2 ; store result
+ ;
+ lea eax, [eax+edx] ; next row of output
+ jc L ; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+ pop edi
+ }
+}
+
+
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : CopyBlockUsingMMX
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Copies a block from source to destination
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride)
+{
+ unsigned char *s = src;
+ unsigned char *d = dest;
+ unsigned int stride = srcstride;
+ // recon copy
+ _asm
+ {
+ mov ecx, [stride]
+ mov eax, [s]
+ mov ebx, [d]
+ lea edx, [ecx + ecx * 2]
+
+ movq mm0, [eax]
+ movq mm1, [eax + ecx]
+ movq mm2, [eax + ecx*2]
+ movq mm3, [eax + edx]
+
+ lea eax, [eax + ecx*4]
+
+ movq [ebx], mm0
+ movq [ebx + ecx], mm1
+ movq [ebx + ecx*2], mm2
+ movq [ebx + edx], mm3
+
+ lea ebx, [ebx + ecx * 4]
+
+ movq mm0, [eax]
+ movq mm1, [eax + ecx]
+ movq mm2, [eax + ecx*2]
+ movq mm3, [eax + edx]
+
+ movq [ebx], mm0
+ movq [ebx + ecx], mm1
+ movq [ebx + ecx*2], mm2
+ movq [ebx + edx], mm3
+ }
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/WmtOptFunctions.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/WmtOptFunctions.c
new file mode 100644
index 00000000..5ad15136
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/WmtOptFunctions.c
@@ -0,0 +1,208 @@
+ /****************************************************************************
+ *
+ * Module Title : WmtOptFunctions.c
+ *
+ * Description : willamette processor specific
+ * optimised versions of functions
+ *
+ * AUTHOR : Yaowu Xu
+ *
+ * Special Note:
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ *
+ * 1.04 JBB 13 Jun 01 VP4 Code Clean Out
+ * 1.03 YWX 07-Dec-00 Removed constants and functions that are not in use
+ * Added push and pop ebx in WmtReconIntra
+ * 1.02 YWX 30 Aug 00 changed to be compatible with Microsoft compiler
+ * 1.01 YWX 13 JUL 00 New Willamette Optimized Functions
+ * 1.00 YWX 14/06/00 Configuration baseline from OptFunctions.c
+ *
+ *****************************************************************************
+ */
+
+/*
+ Use Tim's optimized version.
+*/
+
+/****************************************************************************
+ * Header Files
+ *****************************************************************************
+ */
+
+#define STRICT // Strict type checking.
+
+#include "codec_common.h"
+
+#include "pbdll.h"
+
+/****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Imports.
+ *****************************************************************************
+ */
+
+
+/****************************************************************************
+ * Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Exported Functions
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Module Statics
+ *****************************************************************************
+ */
+
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+static UINT8 Eight128s[8] = {128,128,128,128,128,128,128,128};
+#pragma pack()
+#else
+_declspec(align(16)) static UINT8 Eight128s[8] = {128,128,128,128,128,128,128,128};
+#endif
+
+#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
+
+/****************************************************************************
+* Forward References
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtReconIntra
+ *
+ * INPUTS : INT16 * idct
+ * Pointer to the output from the idct for this block
+ *
+ * UINT32 stride
+ * Line Length in pixels in recon and reference images
+ *
+ *
+ *
+ *
+ * OUTPUTS : UINT8 * dest
+ * The reconstruction buffer
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs an intra block - wmt version
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void WmtReconIntra( PB_INSTANCE *pbi, UINT8 * dest, INT16 * idct, INT32 stride )
+{
+ __asm
+ {
+
+ push ebx
+
+ mov eax,[idct] ; Signed 16 bit inputs
+ mov edx,[dest] ; Unsigned 8 bit outputs
+
+ movq xmm0,QWORD PTR [Eight128s] ; Set xmm0 to 0x000000000000008080808080808080
+ pxor xmm3, xmm3 ; set xmm3 to 0
+ ;
+ mov ebx,[stride] ; Line stride in output buffer
+ lea ecx,[eax+128] ; Endpoint in input buffer
+
+loop_label:
+
+ movdqa xmm2,XMMWORD PTR [eax] ; Read the eight inputs
+ packsswb xmm2,xmm3 ;
+
+ pxor xmm2,xmm0 ; Convert result to unsigned (same as add 128)
+ lea eax,[eax + 16] ; Step source buffer
+
+ cmp eax,ecx ; are we done
+ movq QWORD PTR [edx],xmm2 ; store results
+
+ lea edx,[edx+ebx] ; Step output buffer
+ jc loop_label ; Loop back if we are not done
+
+ pop ebx
+ }
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtReconInter
+ *
+ * INPUTS : UINT8 * RefPtr
+ * The last frame reference
+ *
+ * INT16 * ChangePtr
+ * Pointer to the change data
+ *
+ * UINT32 LineStep
+ * Line Length in pixels in recon and ref images
+ *
+ * OUTPUTS : UINT8 * ReconPtr
+ * The reconstruction
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs data from last data and change
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void WmtReconInter( PB_INSTANCE *pbi, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+ (void) pbi;
+
+ _asm {
+ push edi
+
+ mov ebx, [RefPtr]
+ mov ecx, [ChangePtr]
+
+ mov eax, [ReconPtr]
+ mov edx, [LineStep]
+
+ pxor xmm0, xmm0
+ lea edi, [ecx + 128]
+ L:
+ movq xmm2, QWORD ptr [ebx] ; (+3 misaligned) 8 reference pixels
+ movdqa xmm4, XMMWORD ptr [ecx] ; 8 changes
+
+ punpcklbw xmm2, xmm0 ;
+
+ add ebx, edx ; next row of reference pixels
+ paddsw xmm2, xmm4 ; add in first 4 changes
+
+ lea ecx, [ecx + 16] ; next row of changes
+ packuswb xmm2, xmm0 ; pack result to unsigned 8-bit values
+
+ cmp ecx, edi ; are we done?
+ movq QWORD PTR [eax], xmm2 ; store result
+
+ lea eax, [eax+edx] ; next row of output
+ jc L ; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+ pop edi
+ }
+
+}
+
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/dsystemdependant.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/dsystemdependant.c
new file mode 100644
index 00000000..3ec9dcb0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/dsystemdependant.c
@@ -0,0 +1,334 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <windows.h>
+#include "pbdll.h"
+#include "math.h"
+
+#include "vp60dversion.h"
+
+#include "quantize.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#pragma warning(disable:4115)
+
+#define MMX_ENABLED 1
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern unsigned int CPUFrequency;
+
+extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
+extern void VP6_BuildQuantIndex_Generic ( QUANTIZER *pbi );
+extern void VP6_BuildQuantIndex_ForMMX ( QUANTIZER *pbi );
+extern void VP6_BuildQuantIndex_ForWMT ( QUANTIZER *pbi );
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_SetPbParam
+ *
+ * INPUTS : PB_INSTANCE **pbi : Pointer to decoder instance.
+ * PB_COMMAND_TYPE Command : Command action specifier.
+ * UINT32 *Parameter : Command dependent value.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Generalised command interface to decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV VP6_SetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, uintptr_t Parameter )
+{
+#if defined(POSTPROCESS)
+ switch ( Command )
+ {
+ case PBC_SET_CPUFREE:
+ {
+#if defined(_MSC_VER)
+ double Pixels = pbi->Configuration.VideoFrameWidth * pbi->Configuration.VideoFrameHeight;
+ double FreeMhz = pbi->ProcessorFrequency * Parameter / 100;
+ double PixelsPerMhz = 100 * sqrt(1.0*Pixels) / FreeMhz;
+#else
+ double PixelsPerMhz = 100 *10;
+#endif
+ pbi->CPUFree = Parameter;
+
+ if( PixelsPerMhz > 150 )
+ pbi->PostProcessingLevel = 0;
+ else if( PixelsPerMhz > 100 )
+ pbi->PostProcessingLevel = 8;
+ else if( PixelsPerMhz > 90 )
+ pbi->PostProcessingLevel = 4;
+ else if( PixelsPerMhz > 80 )
+ pbi->PostProcessingLevel = 5;
+ else
+ pbi->PostProcessingLevel = 6;
+ break;
+ }
+
+ case PBC_SET_ADDNOISE:
+ pbi->AddNoiseMode = Parameter;
+ //SetAddNoiseMode(pbi->postproc, Parameter);
+ break;
+
+ case PBC_SET_REFERENCEFRAME:
+ CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->LastFrameRecon);
+ CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->GoldenFrame);
+ break;
+
+ case PBC_SET_POSTPROC:
+ if( Parameter == 9 )
+ VP6_SetPbParam( pbi, PBC_SET_CPUFREE, 70);
+ else
+ {
+ pbi->CPUFree = 0;
+ pbi->PostProcessingLevel = Parameter;
+ }
+ break;
+
+ case PBC_SET_DEINTERLACEMODE:
+ pbi->DeInterlaceMode = Parameter;
+ break;
+
+ case PBC_SET_BLACKCLAMP:
+ pbi->BlackClamp = Parameter;
+ break;
+
+ case PBC_SET_WHITECLAMP:
+ pbi->WhiteClamp = Parameter;
+ break;
+
+ default:
+ break;
+ }
+#endif
+}
+/****************************************************************************
+ *
+ * ROUTINE : VP6_readTSC
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : unsigned long *tsc : Pointer to returned counter value.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Reads the cpu time stamp counter.
+ *
+ * SPECIAL NOTES : Since this function uses RDTSC instruction, which is
+ * introduced in Pentium processor, this routine is only
+ * expected to work on Pentium and above.
+ *
+ ****************************************************************************/
+void VP6_readTSC ( unsigned long *tsc )
+{
+ int time;
+
+ __asm
+ {
+ pushad
+ cpuid
+ rdtsc
+ mov time,eax
+ popad
+ }
+ *tsc = time;
+ return;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_GetProcessorFrequency
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : unsigned long: The processors' frequency (in MHz).
+ *
+ * FUNCTION : Check the Processor's working freqency.
+ *
+ * SPECIAL NOTES : This function should only be used here. Limited tests
+ * have verified it works till 166MHz Pentium with MMX.
+ *
+ ****************************************************************************/
+unsigned long VP6_GetProcessorFrequency()
+{
+
+ LARGE_INTEGER pf; // Performance Counter Frequency
+ LARGE_INTEGER startcount, endcount;
+ unsigned long tsc1, tsc2;
+
+ // If the cpu does not support the high resolution counter, return 0
+ unsigned long time1, time2;
+ unsigned long cpufreq = 0;
+ unsigned long Nearest66Mhz, Nearest50Mhz;
+ unsigned long Delta66, Delta50;
+
+ if ( QueryPerformanceFrequency( &pf ) )
+ {
+ // read the counter and TSC at start
+ QueryPerformanceCounter ( &startcount );
+ VP6_readTSC ( &tsc1 );
+
+ // delay for 10 ms to get enough accuracy
+ time1 = timeGetTime();
+ time2 = time1;
+ while ( time2 < time1+5 )
+ time2 = timeGetTime();
+
+ // read the counter and TSC at end
+ QueryPerformanceCounter ( &endcount );
+ VP6_readTSC ( &tsc2 );
+
+ // calculate the frequency
+ cpufreq = (unsigned long )( (double)(tsc2-tsc1)
+ * (double)pf.LowPart
+ / (double) (endcount.LowPart - startcount.LowPart)
+ / 1000000 );
+ }
+
+ Nearest66Mhz = ((cpufreq * 3 + 100)/200 * 200) / 3;
+ Delta66 = abs( Nearest66Mhz - cpufreq );
+ Nearest50Mhz = ((cpufreq + 25)/50 *50);
+ Delta50 = abs( Nearest50Mhz - cpufreq );
+
+ if ( Delta50 < Delta66 )
+ cpufreq = Nearest50Mhz;
+ else
+ {
+ cpufreq = Nearest66Mhz;
+ if ( cpufreq == 666 )
+ cpufreq = 667;
+ }
+ return cpufreq;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DMachineSpecificConfig
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support;
+ * sets approipriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_DMachineSpecificConfig ( void )
+{
+ INT32 MmxEnabled;
+ INT32 XmmEnabled;
+ INT32 WmtEnabled;
+
+ GetProcessorFlags ( &MmxEnabled, &XmmEnabled, &WmtEnabled );
+
+ // If MMX supported use MMX version of functions, else use C versions
+ if ( WmtEnabled ) // Willamette
+ VP6_BuildQuantIndex = VP6_BuildQuantIndex_ForWMT;
+ else if ( MmxEnabled ) // MMX
+ VP6_BuildQuantIndex = VP6_BuildQuantIndex_ForMMX;
+ else // No instruction set support
+ VP6_BuildQuantIndex = VP6_BuildQuantIndex_Generic;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_IssueWarning
+ *
+ * INPUTS : char *WarningMessage : Pointer to warning message text.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Issues a warning message.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_IssueWarning ( char *WarningMessage )
+{
+ MessageBox ( NULL, WarningMessage, NULL, MB_ICONEXCLAMATION | MB_TASKMODAL );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_IssueWarning
+ *
+ * INPUTS : unsigned int SleepMs : Time (in milli-seconds) to wait.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Pause/Sleep for specified time(in milli-seconds).
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_PauseProcess ( unsigned int SleepMs )
+{
+ Sleep ( SleepMs );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_SytemGlobalAlloc
+ *
+ * INPUTS : unsigned int Size : Size of block of memory (in bytes).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : char *: Pointer to allocated block of memory.
+ *
+ * FUNCTION : Allocates a block of memory of specified size.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+char *VP6_SytemGlobalAlloc ( unsigned int Size )
+{
+ return GlobalAlloc( GPTR, Size );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_SystemGlobalFree
+ *
+ * INPUTS : char *MemPtr : Pointer to block of memory.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-allocates a block of memory.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_SystemGlobalFree ( char* MemPtr )
+{
+ GlobalFree ( (HGLOBAL)MemPtr );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/quantindexmmx.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/quantindexmmx.c
new file mode 100644
index 00000000..0bc11412
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/quantindexmmx.c
@@ -0,0 +1,381 @@
+/****************************************************************************
+*
+* Module Title : quantindexmmx.c
+*
+* Description :
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "quantize.h"
+#include "math.h"
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define MIN16 ((1<<16)-1)
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+static UINT32 dequant_index[64] =
+{
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static UINT32 dequant_indexMMX[64] =
+{
+ 0, 1, 5, 6, 14, 15, 27, 28,
+ 2, 4, 7, 13, 16, 26, 29, 42,
+ 3, 8, 12, 17, 25, 30, 41, 43,
+ 9, 11, 18, 24, 31, 40, 44, 53,
+ 10, 19, 23, 32, 39, 45, 52, 54,
+ 20, 22, 33, 38, 46, 51, 55, 60,
+ 21, 34, 37, 47, 50, 56, 59, 61,
+ 35, 36, 48, 49, 57, 58, 62, 63
+};
+
+/* Used to unravel the coeffs in the proper order required */
+/* by MMX_idct (see mmxidct.cxx) */
+static UINT32 transIndexMMX[64] =
+{
+ 0, 8, 1, 2, 9, 16, 24, 17,
+ 10, 3, 32, 11, 18, 25, 4, 12,
+ 5, 26, 19, 40, 33, 34, 41, 48,
+ 27, 6, 13, 20, 28, 21, 14, 7,
+
+ 56, 49, 42, 35, 43, 50, 57, 36,
+ 15, 22, 29, 30, 23, 44, 37, 58,
+ 51, 59, 38, 45, 52, 31, 60, 53,
+ 46, 39, 47, 54, 61, 62, 55, 63
+};
+
+static UINT32 transIndexWMT[64] =
+{
+ 0, 8, 1, 2, 9, 16, 24, 17,
+ 10, 3, 4, 11, 18, 25, 32, 40,
+ 33, 26, 19, 12, 5, 6, 13, 20,
+ 27, 34, 41, 48, 56, 49, 42, 35,
+ 28, 21, 14, 7, 15, 22, 29, 36,
+ 43, 50, 57, 58, 51, 44, 37, 30,
+ 23, 31, 38, 45, 52, 59, 60, 53,
+ 46, 39, 47, 54, 61, 62, 55, 63
+};
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_BuildQuantIndex_ForMMX
+ *
+ * INPUTS : QUANTIZER *pbi : Pointer to quantizer instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Builds the quant_index table in a transposed order.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_BuildQuantIndex_ForMMX ( QUANTIZER *pbi )
+{
+ INT32 i, j;
+
+ pbi->transIndex = transIndexMMX;
+
+ // invert the dequant index into the quant index
+ for ( i=0; i<BLOCK_SIZE; i++ )
+ {
+ j = transIndexMMX[dequant_indexMMX[i]];
+ pbi->quant_index[j] = i;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : BuildQuantIndex_ForWMT
+ *
+ * INPUTS : QUANTIZER *pbi : Pointer to quantizer instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Builds the quant_index table in a transposed order.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_BuildQuantIndex_ForWMT ( QUANTIZER *pbi )
+{
+ INT32 i, j;
+
+ pbi->transIndex = transIndexWMT;
+
+ // invert the dequant index into the quant index
+ for ( i=0; i<BLOCK_SIZE; i++ )
+ {
+ j = transIndexWMT[dequant_indexMMX[i]];
+ pbi->quant_index[j] = i;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_quantize_wmt
+ *
+ * INPUTS : QUANTIZER *pbi : Pointer to quantizer instance.
+ * INT16 *DCT_block : Pointer to block of DCT coeffs.
+ * UINT8 bp : Position of blockin MB.
+ *
+ * OUTPUTS : Q_LIST_ENTRY *quantized_list : Pointer to block of quantized DCT coeffs.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Quantizes an 8x8 blockof DCT coefficients.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_quantize_wmt
+(
+ QUANTIZER *qi,
+ INT16 *DCT_block,
+ Q_LIST_ENTRY *quantized_list,
+ UINT8 bp
+)
+{
+ UINT32 i, j;
+ INT32 temp;
+ INT32 ThisDC;
+
+ UINT32 ColourPlane = VP6_QTableSelect[bp];
+ INT32 *QuantRoundPtr = qi->QuantRound[ColourPlane];
+ INT32 QuantCoeffsDC = qi->QuantCoeffs[ColourPlane][0];
+ INT32 *ZBinPtr = qi->ZeroBinSize[ColourPlane];
+ INT16 *round = &qi->round[0];
+ INT16 *mult = &qi->mult[0];
+ INT32 Zrl = 0;
+ INT32 * ZrlCorrection = qi->ZlrZbinCorrections[ColourPlane];
+__declspec(align(16)) unsigned short xyw[64];
+
+
+ // this quantizer stores its results back in the source!!
+ __asm
+ {
+ // setup and collect registers
+ mov esi, DCT_block
+ xor ecx, ecx // index ptr
+ mov edi, round
+ movdqu xmm2, [edi] // get the round values
+ mov edi, mult
+ movdqu xmm3, [edi] // get the quantizer values
+ lea edi, xyw
+
+ mov eax, quantized_list
+ pxor xmm7, xmm7
+
+ // 8 coefficients at a time loop
+next8:
+ movdqa xmm0, [esi+ecx] // get source values
+ movdqa xmm1, xmm0 // sign bits of the abs values
+ psraw xmm1, 15 // negative all 1's postive all 0's
+
+ // get the absolute value of the input values
+ pxor xmm0, xmm1 // one's complement of negatives
+ psubw xmm0, xmm1 // xmm0 = abs coeffs
+
+ // calculate & round quantizer
+ paddw xmm0, xmm2 // Coeff + Quant Round
+ pmulhuw xmm0, xmm3 // *QuantCoeffs >> 16
+
+
+ // get back the sign bit
+ pxor xmm0, xmm1 // ones complement of negatives
+ psubw xmm0, xmm1 // negatives are back as negative
+
+ // output the results
+ movdqa [edi+ecx], xmm0
+ movdqa [eax+ecx], xmm7
+
+ // loop back to the next set
+ add ecx, 16
+ cmp ecx, 128
+ jl next8
+ }
+
+
+ // DC quantization
+ ThisDC = DCT_block[0];
+ if ( ThisDC >= ZBinPtr[0] )
+ {
+ temp = QuantCoeffsDC * ( ThisDC + QuantRoundPtr[0] );
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+ }
+ else if ( ThisDC <= -ZBinPtr[0] )
+ {
+ temp = QuantCoeffsDC * ( ThisDC - QuantRoundPtr[0] ) + MIN16;
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+ }
+ else
+ {
+ Zrl++;
+ }
+
+
+ // zig-zagify
+ for ( i=1; i<64; i++ )
+ {
+ INT32 x;
+ INT32 y;
+ // Zig Zag order
+ j = dequant_index[i];
+ x = xyw[j];
+ y= abs( DCT_block[j]);
+ if(y<ZBinPtr[j] + ZrlCorrection[Zrl])
+ {
+ Zrl ++;
+ }
+ else
+ {
+ Zrl = 0;
+ quantized_list[i] = x;
+ }
+ }
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_quantize_mmx
+ *
+ * INPUTS : QUANTIZER *pbi : Pointer to quantizer instance.
+ * INT16 *DCT_block : Pointer to block of DCT coeffs.
+ * UINT8 bp : Position of blockin MB.
+ *
+ * OUTPUTS : Q_LIST_ENTRY *quantized_list : Pointer to block of quantized DCT coeffs.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Quantizes an 8x8 blockof DCT coefficients.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_quantize_mmx
+(
+ QUANTIZER *qi,
+ INT16 *DCT_block,
+ Q_LIST_ENTRY *quantized_list,
+ UINT8 bp
+)
+{
+ UINT32 i, j;
+ INT32 temp;
+ INT32 ThisDC;
+
+ UINT32 ColourPlane = VP6_QTableSelect[bp];
+ INT32 *QuantRoundPtr = qi->QuantRound[ColourPlane];
+ INT32 QuantCoeffsDC = qi->QuantCoeffs[ColourPlane][0];
+ INT32 *ZBinPtr = qi->ZeroBinSize[ColourPlane];
+ INT16 *round = &qi->round[0];
+ INT16 *mult = &qi->mult[0];
+ INT32 Zrl = 0;
+ INT32 * ZrlCorrection = qi->ZlrZbinCorrections[ColourPlane];
+__declspec(align(16)) unsigned short xyw[64];
+
+
+ // this quantizer stores its results back in the source!!
+ __asm
+ {
+ // setup and collect registers
+ mov esi, DCT_block
+ xor ecx, ecx // index ptr
+ mov edi, round
+ movq mm2, [edi] // get the round values
+ mov edi, mult
+ movq mm3, [edi] // get the quantizer values
+ lea edi, xyw
+
+ mov eax, quantized_list
+ pxor mm7, mm7
+
+ // 8 coefficients at a time loop
+next8:
+ movq mm0, [esi+ecx] // get source values
+ movq mm1, mm0 // sign bits of the abs values
+ psraw mm1, 15 // negative all 1's postive all 0's
+
+ // get the absolute value of the input values
+ pxor mm0, mm1 // one's complement of negatives
+ psubw mm0, mm1 // xmm0 = abs coeffs
+
+ // calculate & round quantizer
+ paddw mm0, mm2 // Coeff + Quant Round
+ pmulhuw mm0, mm3 // *QuantCoeffs >> 16
+
+
+ // get back the sign bit
+ pxor mm0, mm1 // ones complement of negatives
+ psubw mm0, mm1 // negatives are back as negative
+
+ // output the results
+ movq [edi+ecx], mm0
+ movq [eax+ecx], mm7
+
+ // loop back to the next set
+ add ecx, 8
+ cmp ecx, 128
+ jl next8
+ }
+
+
+ // DC quantization
+ ThisDC = DCT_block[0];
+ if ( ThisDC >= ZBinPtr[0] )
+ {
+ temp = QuantCoeffsDC * ( ThisDC + QuantRoundPtr[0] );
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+ }
+ else if ( ThisDC <= -ZBinPtr[0] )
+ {
+ temp = QuantCoeffsDC * ( ThisDC - QuantRoundPtr[0] ) + MIN16;
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+ }
+ else
+ {
+ Zrl++;
+ }
+
+
+ // zig-zagify
+ for ( i=1; i<64; i++ )
+ {
+ INT32 x;
+ INT32 y;
+ // Zig Zag order
+ j = dequant_index[i];
+ x = xyw[j];
+ y= abs( DCT_block[j]);
+ if(y<ZBinPtr[j] + ZrlCorrection[Zrl])
+ {
+ Zrl ++;
+ }
+ else
+ {
+ Zrl = 0;
+ quantized_list[i] = x;
+ }
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/timer.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/timer.c
new file mode 100644
index 00000000..8df2b37f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/timer.c
@@ -0,0 +1,147 @@
+/****************************************************************************
+*
+* Module Title : Timer.C
+*
+* Description : Video CODEC timer module
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.01 PGW 09/07/99 Added code to support profile timing
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+#define INC_WIN_HEADER 1
+#include <windows.h>
+
+#include "type_aliases.h"
+#include <mmsystem.h>
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module Static Variables
+*****************************************************************************
+*/
+
+// Used for calculation of elapsed time
+UINT32 LastCPUTime;
+
+/****************************************************************************
+ *
+ * ROUTINE : MyInitTimer
+ *
+ * INPUTS :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Initialises the timer mechanism.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void MyInitTimer( void )
+{
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : MyGetTime
+ *
+ * INPUTS :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Time in ms since startup.
+ *
+ * FUNCTION : Provides a model independant interface for getting times.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT32 MyGetTime( void )
+{
+/* Use different timing mechanisms for win32 and win16.
+* The win16 method is accurate to 1ms whilst the Win32 is not garauteed to better than 16ms
+*/
+ return timeGetTime();
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MyGetElapsedCpuTime
+ *
+ * INPUTS :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : CPU cycles since last call
+ *
+ * FUNCTION : Calculate the CPU cycles elapsed since the last call
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT32 MyGetElapsedCpuTime( void )
+{
+ UINT32 CurrCPUTime[2]; // Full 64 bit CPU time
+ UINT32 CurrentCpuTime; // modified 32 bit current time
+ UINT32 ElapsedTime;
+
+__asm
+ {
+ rdtsc ; Get CPU time into EDX:EAX
+
+ mov dword ptr [CurrCPUTime], eax ; Save to a global
+ mov dword ptr [CurrCPUTime+4], edx
+ }
+
+ // Save CurrCPUTime to LastCPUTime
+ CurrCPUTime[0] = (CurrCPUTime[0] >> 8);
+ CurrCPUTime[1] = (CurrCPUTime[1] & 0x000000FF) << 24;
+ CurrentCpuTime = CurrCPUTime[0] | CurrCPUTime[1];
+
+ // Check for wrapp around
+ if ( CurrentCpuTime >= LastCPUTime )
+ {
+ ElapsedTime = CurrentCpuTime - LastCPUTime;
+ }
+ else
+ {
+ ElapsedTime = (LastCPUTime - CurrentCpuTime) + 0xFFFF;
+ }
+ LastCPUTime = CurrentCpuTime;
+
+ return ElapsedTime;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/vp60dxv.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/vp60dxv.c
new file mode 100644
index 00000000..99130f88
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/vp60dxv.c
@@ -0,0 +1,420 @@
+/****************************************************************************
+*
+* Module Title : vp60dxv.c
+*
+* Description : Defines the entry point for the console application.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h> // For Abs()
+#include "pbdll.h"
+
+#include "duck_mem.h" /* interface to memory manager */
+#include "dxl_plugin.h" /* interface to dxv */
+#include "duck_dxl.h"
+
+#include <stddef.h>
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#ifdef _MSC_VER
+#pragma warning(disable:4055)
+#endif
+
+#define VP60_FOURCC DXL_MKFOURCC( 'V', 'P', '6', '0')
+#define VP61_FOURCC DXL_MKFOURCC( 'V', 'P', '6', '1')
+#define VP62_FOURCC DXL_MKFOURCC( 'V', 'P', '6', '2')
+extern int VPX_GetSizeOfPixel(dxvBitDepth bd);
+extern void *VPX_GetBlitter(dxvBlitQuality bq, dxvBitDepth bd);
+
+/****************************************************************************
+* Typedefs
+****************************************************************************/
+typedef unsigned long FourCC;
+
+typedef struct // YUV buffer configuration structure
+{
+ int YWidth;
+ int YHeight;
+ int YStride;
+
+ int UVWidth;
+ int UVHeight;
+ int UVStride;
+
+ char *YBuffer;
+ char *UBuffer;
+ char *VBuffer;
+
+ char *uvStart;
+ int uvDstArea;
+ int uvUsedArea;
+} DXV_YUV_BUFFER_CONFIG;
+
+/* define an xImage structure based on the core xImage struct */
+typedef struct tXImageCODEC
+{
+ FourCC myFourCC;
+ DXV_YUV_BUFFER_CONFIG FrameBuffer;
+ PB_INSTANCE *myPBI;
+ int owned;
+ int decompressedOnce;
+
+} vp60_XIMAGE, *vp60_XIMAGE_HANDLE;
+
+typedef void ((*vp6BLIT_FUNC)(unsigned char *, int, YUV_BUFFER_CONFIG *));
+//typedef void ((*vp6_VSCREEN_FUNC)(void));
+
+/****************************************************************************
+* Modul Statics
+****************************************************************************/
+
+/****************************************************************************
+* Forward declarationss
+****************************************************************************/
+void vp60_SetParameter(DXL_XIMAGE_HANDLE src,int Command, uintptr_t Parameter );
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern void VP6_VPInitLibrary(void);
+extern void VP6_VPDeInitLibrary(void);
+extern void VP6_readTSC(unsigned long *tsc);
+
+int vp60_getWH(DXL_XIMAGE_HANDLE src, int *w, int *h)
+{
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+ *w = thisAlgorithmBase->myPBI->Configuration.VideoFrameWidth;
+ *h = thisAlgorithmBase->myPBI->Configuration.VideoFrameHeight;
+ return DXL_OK;
+}
+#if 0
+/****************************************************************************
+*
+* ROUTINE : vp60_GetInfo
+*
+* INPUTS : unsigned char *source :
+*
+* OUTPUTS : FrameInfo *frameInfo :
+*
+* RETURNS : void
+*
+* FUNCTION :
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void vp60_GetInfo ( unsigned char *source, FrameInfo *frameInfo )
+{
+ // Is the frame and inter frame or a key frame
+ frameInfo->KeyFrame = !(source[0] > 0x7f);
+ frameInfo->Quality = source[0] >> 2;
+ if ( frameInfo->KeyFrame )
+ frameInfo->Version = ((source[2]>>3) & 0x1f );
+ else
+ frameInfo->Version = 0;
+
+ frameInfo->vp30Flag = (int)source[1];
+}
+#endif
+
+/****************************************************************************
+*
+* ROUTINE : vp60_decompress
+*
+* INPUTS : vp60_XIMAGE_HANDLE src :
+* DXL_VSCREEN_HANDLE vScreen :
+*
+* OUTPUTS : None.
+*
+* RETURNS : int:
+*
+* FUNCTION :
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+int vp60_decompress ( DXL_XIMAGE_HANDLE src)
+{
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+ unsigned char *cAddr;
+ int cSize;
+
+ cAddr = DXL_GetXImageCDataAddr(src);
+ cSize = DXL_GetXImageCSize(src);
+
+ // if we have a compressed frame decompress it ( otherwise we'll just redo
+ // the scaling and postprocessing from the last frame )
+ if (cAddr)
+ {
+ if( cSize != 0 && (cAddr[0]>=1 || cAddr[1]>=1 || cAddr[2] >=1))
+ {
+
+
+ // decode the frame
+ int retVal = VP6_DecodeFrameToYUV (
+ thisAlgorithmBase->myPBI,
+ (char *)cAddr,
+ cSize);
+
+ if ( retVal != 0 )
+ {
+ if ( retVal == -1)
+ return DXL_VERSION_CONFLICT;
+ else
+ return DXL_BAD_DATA;
+ }
+ thisAlgorithmBase->decompressedOnce = 1;
+
+ }
+ }
+
+ //CT>removed blit for size
+ VP6_GetYUVConfig(thisAlgorithmBase->myPBI, (YUV_BUFFER_CONFIG *) &thisAlgorithmBase->FrameBuffer);
+
+
+ return DXL_OK;
+}
+
+/****************************************************************************
+*
+* ROUTINE : vp60_xImageDestroy
+*
+* INPUTS : vp60_XIMAGE_HANDLE xThis :
+*
+* OUTPUTS : None.
+*
+* RETURNS : int:
+*
+* FUNCTION : Closes decoder and releases resources.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+static int vp60_xImageDestroy ( DXL_XIMAGE_HANDLE src )
+{
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ if(thisAlgorithmBase)
+ {
+ if ( thisAlgorithmBase->owned )
+ VP6_StopDecoder ( &(thisAlgorithmBase->myPBI) );
+ duck_free ( thisAlgorithmBase );
+ }
+ return DXL_OK;
+}
+
+/****************************************************************************
+*
+* ROUTINE : vp60_xImageReCreate
+*
+* INPUTS : unsigned char *data :
+*
+* OUTPUTS : None.
+*
+* RETURNS : DXL_XIMAGE_HANDLE:
+*
+* FUNCTION :
+*
+* SPECIAL NOTES : Called during initialization and/or when xImage
+* (decompressor) attributes change, note that nImage and
+* src are actually synonymous and should be cleared out
+* a bit (to say the least!)
+*
+* NOTE:
+* This function should be prepared to get data that is
+* NOT of the type native to the decoder, It should do
+* it's best to verify it as valid data and should clean
+* up after itself and return NULL if it doesn't recognize
+* the format of the data.
+*
+****************************************************************************/
+static DXL_HANDLE vp60_xImageReCreate
+(
+ DXL_XIMAGE_HANDLE src,
+ unsigned char *data,
+ int type,
+ enum BITDEPTH bitDepth,
+ int w,
+ int h
+ )
+{
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ (void) bitDepth;
+
+ if ( (type != VP60_FOURCC) && (type != VP61_FOURCC) && (type != VP62_FOURCC) )
+ return NULL;
+
+
+ /* create new PBI */
+ if ( !VP6_StartDecoder( &(thisAlgorithmBase->myPBI), w, h ) )
+ {
+ vp60_xImageDestroy ( src );
+ thisAlgorithmBase = NULL;
+ }
+ else
+ {
+ thisAlgorithmBase->owned = 1;
+ thisAlgorithmBase->decompressedOnce = 0;
+ }
+
+ return (DXL_HANDLE)thisAlgorithmBase;
+}
+
+/****************************************************************************
+*
+* ROUTINE : vp60_xImageCreate
+*
+* INPUTS : unsigned char *data :
+*
+* OUTPUTS : None.
+*
+* RETURNS : DXL_XIMAGE_HANDLE:
+*
+* FUNCTION :
+*
+* SPECIAL NOTES : In this "glue" case, just calls through to the
+* create function.
+*
+****************************************************************************/
+static DXL_HANDLE vp60_xImageCreate (DXL_XIMAGE_HANDLE src, unsigned char *data)
+{
+ // return vp60_xImageReCreate(src, data, VP60_FOURCC, (enum BITDEPTH ) 0, 320, 240);
+
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ /* create a new xImage, specific to this type of decoder,
+ (see "vp60_XIMAGE" struct above and dxl_main.h) */
+ thisAlgorithmBase = (vp60_XIMAGE_HANDLE)duck_calloc ( 1, sizeof(vp60_XIMAGE), DMEM_GENERAL );
+ if (thisAlgorithmBase == NULL)
+ return NULL;
+
+
+ DXL_RegisterXImageRecreate(src, (RECREATE_FUNC) vp60_xImageReCreate);
+
+ DXL_RegisterXImageDestroy(src, (DESTROY_FUNC) vp60_xImageDestroy);
+
+ DXL_RegisterXImageDx(src, (DX_FUNC) vp60_decompress);
+
+ DXL_RegisterXImageSetParameter(src, (SET_PARAMETER_FUNC) vp60_SetParameter);
+
+ thisAlgorithmBase->myFourCC = VP60_FOURCC;
+
+ thisAlgorithmBase->decompressedOnce = 0;
+ return (DXL_HANDLE)thisAlgorithmBase;
+}
+
+/****************************************************************************
+*
+* ROUTINE : vp60_Init
+*
+* INPUTS : None.
+*
+* OUTPUTS : None.
+*
+* RETURNS : int
+*
+* FUNCTION :
+*
+* SPECIAL NOTES :
+*
+****************************************************************************/
+int vp60_Init ( void )
+{
+ DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, VP60_FOURCC);
+ DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, VP61_FOURCC);
+ DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, VP62_FOURCC);
+
+ /* initialize all the global variables */
+ VP6_VPInitLibrary();
+
+ return DXL_OK;
+}
+
+/****************************************************************************
+*
+* ROUTINE : vp60_Exit
+*
+* INPUTS : None.
+*
+* OUTPUTS : None.
+*
+* RETURNS : int
+*
+* FUNCTION : Main exit routine, called during DXL_ExitVideo()
+* clean up any global information if necessary.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+int vp60_Exit(void)
+{
+ VP6_VPDeInitLibrary();
+
+ return DXL_OK;
+}
+
+/****************************************************************************
+*
+* ROUTINE : vp60_SetParameter
+*
+* INPUTS : DXL_XIMAGE_HANDLE src :
+* int Command :
+* unsigned long Parameter :
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION :
+*
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void vp60_SetParameter(DXL_XIMAGE_HANDLE src, int Command, uintptr_t Parameter)
+{
+ vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ if ( Command == PBC_SET_PBSTRUCT )
+ {
+ if ( thisAlgorithmBase->owned )
+ VP6_StopDecoder ( &(thisAlgorithmBase->myPBI) );
+
+ thisAlgorithmBase->owned = 0;
+ thisAlgorithmBase->myPBI = (PB_INSTANCE *) Parameter;
+ }
+ else
+ VP6_SetPbParam( thisAlgorithmBase->myPBI, (PB_COMMAND_TYPE)Command, Parameter );
+}
+
+//CT:
+typedef struct {
+ unsigned char* baseAddr;
+ long rowBytes;
+} YV12_PLANE;
+
+typedef struct {
+ YV12_PLANE y;
+ YV12_PLANE u;
+ YV12_PLANE v;
+} YV12_PLANES;
+
+void GetImageBufs(DXL_XIMAGE_HANDLE x, YV12_PLANES *p)
+{
+ // vp60_XIMAGE_HANDLE xim=(vp60_XIMAGE_HANDLE)x;
+ vp60_XIMAGE_HANDLE xim = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(x);
+
+ p->y.baseAddr=(unsigned char *)xim->FrameBuffer.YBuffer;
+ p->u.baseAddr=(unsigned char *)xim->FrameBuffer.UBuffer;
+ p->v.baseAddr=(unsigned char *)xim->FrameBuffer.VBuffer;
+ p->y.rowBytes=xim->FrameBuffer.YStride;
+ p->u.rowBytes=xim->FrameBuffer.UVStride;
+ p->v.rowBytes=xim->FrameBuffer.UVStride;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win64/dsystemdependant.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win64/dsystemdependant.c
new file mode 100644
index 00000000..d4c0dfe2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win64/dsystemdependant.c
@@ -0,0 +1,348 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <windows.h>
+#include "pbdll.h"
+#include "math.h"
+
+#include "vp60dversion.h"
+
+#include "quantize.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#pragma warning(disable:4115)
+
+#define MMX_ENABLED 1
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern unsigned int CPUFrequency;
+
+extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
+extern void VP6_BuildQuantIndex_Generic ( QUANTIZER *pbi );
+extern void VP6_BuildQuantIndex_ForMMX ( QUANTIZER *pbi );
+extern void VP6_BuildQuantIndex_ForWMT ( QUANTIZER *pbi );
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_SetPbParam
+ *
+ * INPUTS : PB_INSTANCE **pbi : Pointer to decoder instance.
+ * PB_COMMAND_TYPE Command : Command action specifier.
+ * UINT32 *Parameter : Command dependent value.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Generalised command interface to decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV VP6_SetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, uintptr_t Parameter )
+{
+#if defined(POSTPROCESS)
+ switch ( Command )
+ {
+ case PBC_SET_CPUFREE:
+ {
+#if defined(_MSC_VER)
+ double Pixels = pbi->Configuration.VideoFrameWidth * pbi->Configuration.VideoFrameHeight;
+ double FreeMhz = pbi->ProcessorFrequency * Parameter / 100;
+ double PixelsPerMhz = 100 * sqrt(1.0*Pixels) / FreeMhz;
+#else
+ double PixelsPerMhz = 100 *10;
+#endif
+ pbi->CPUFree = Parameter;
+
+ if( PixelsPerMhz > 150 )
+ pbi->PostProcessingLevel = 0;
+ else if( PixelsPerMhz > 100 )
+ pbi->PostProcessingLevel = 8;
+ else if( PixelsPerMhz > 90 )
+ pbi->PostProcessingLevel = 4;
+ else if( PixelsPerMhz > 80 )
+ pbi->PostProcessingLevel = 5;
+ else
+ pbi->PostProcessingLevel = 6;
+ break;
+ }
+
+ case PBC_SET_ADDNOISE:
+ pbi->AddNoiseMode = Parameter;
+ //SetAddNoiseMode(pbi->postproc, Parameter);
+ break;
+
+ case PBC_SET_REFERENCEFRAME:
+ CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->LastFrameRecon);
+ CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->GoldenFrame);
+ break;
+
+ case PBC_SET_POSTPROC:
+ if( Parameter == 9 )
+ VP6_SetPbParam( pbi, PBC_SET_CPUFREE, 70);
+ else
+ {
+ pbi->CPUFree = 0;
+ pbi->PostProcessingLevel = Parameter;
+ }
+ break;
+
+ case PBC_SET_DEINTERLACEMODE:
+ pbi->DeInterlaceMode = Parameter;
+ break;
+
+ case PBC_SET_BLACKCLAMP:
+ pbi->BlackClamp = Parameter;
+ break;
+
+ case PBC_SET_WHITECLAMP:
+ pbi->WhiteClamp = Parameter;
+ break;
+
+ default:
+ break;
+ }
+#endif
+}
+/****************************************************************************
+ *
+ * ROUTINE : VP6_readTSC
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : unsigned long *tsc : Pointer to returned counter value.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Reads the cpu time stamp counter.
+ *
+ * SPECIAL NOTES : Since this function uses RDTSC instruction, which is
+ * introduced in Pentium processor, this routine is only
+ * expected to work on Pentium and above.
+ *
+ ****************************************************************************/
+#ifdef _M_AMD64 // For 64-bit apps
+unsigned __int64 __rdtsc(void);
+#pragma intrinsic(__rdtsc)
+#define _RDTSC __rdtsc
+#else // For 32-bit apps
+
+#define _RDTSC_STACK(ts) \
+ __asm rdtsc \
+ __asm mov DWORD PTR [ts], eax \
+ __asm mov DWORD PTR [ts+4], edx
+
+__inline unsigned __int64 _inl_rdtsc32() {
+ unsigned __int64 t;
+ _RDTSC_STACK(t);
+ return t;
+}
+#define _RDTSC _inl_rdtsc32
+#endif
+
+
+void VP6_readTSC(unsigned long *tsc)
+{
+ LARGE_INTEGER t;
+ t.QuadPart = _RDTSC();
+ *tsc = t.LowPart;
+
+ return;
+}
+/****************************************************************************
+ *
+ * ROUTINE : VP6_GetProcessorFrequency
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : unsigned long: The processors' frequency (in MHz).
+ *
+ * FUNCTION : Check the Processor's working freqency.
+ *
+ * SPECIAL NOTES : This function should only be used here. Limited tests
+ * have verified it works till 166MHz Pentium with MMX.
+ *
+ ****************************************************************************/
+unsigned long VP6_GetProcessorFrequency()
+{
+
+ LARGE_INTEGER pf; // Performance Counter Frequency
+ LARGE_INTEGER startcount, endcount;
+ unsigned long tsc1, tsc2;
+
+ // If the cpu does not support the high resolution counter, return 0
+ unsigned long time1, time2;
+ unsigned long cpufreq = 0;
+ unsigned long Nearest66Mhz, Nearest50Mhz;
+ unsigned long Delta66, Delta50;
+
+ if ( QueryPerformanceFrequency( &pf ) )
+ {
+ // read the counter and TSC at start
+ QueryPerformanceCounter ( &startcount );
+ VP6_readTSC ( &tsc1 );
+
+ // delay for 10 ms to get enough accuracy
+ time1 = timeGetTime();
+ time2 = time1;
+ while ( time2 < time1+5 )
+ time2 = timeGetTime();
+
+ // read the counter and TSC at end
+ QueryPerformanceCounter ( &endcount );
+ VP6_readTSC ( &tsc2 );
+
+ // calculate the frequency
+ cpufreq = (unsigned long )( (double)(tsc2-tsc1)
+ * (double)pf.LowPart
+ / (double) (endcount.LowPart - startcount.LowPart)
+ / 1000000 );
+ }
+
+ Nearest66Mhz = ((cpufreq * 3 + 100)/200 * 200) / 3;
+ Delta66 = abs( Nearest66Mhz - cpufreq );
+ Nearest50Mhz = ((cpufreq + 25)/50 *50);
+ Delta50 = abs( Nearest50Mhz - cpufreq );
+
+ if ( Delta50 < Delta66 )
+ cpufreq = Nearest50Mhz;
+ else
+ {
+ cpufreq = Nearest66Mhz;
+ if ( cpufreq == 666 )
+ cpufreq = 667;
+ }
+ return cpufreq;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_DMachineSpecificConfig
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support;
+ * sets approipriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_DMachineSpecificConfig ( void )
+{
+#if 0
+ INT32 MmxEnabled;
+ INT32 XmmEnabled;
+ INT32 WmtEnabled;
+
+ GetProcessorFlags ( &MmxEnabled, &XmmEnabled, &WmtEnabled );
+
+ // If MMX supported use MMX version of functions, else use C versions
+ if ( WmtEnabled ) // Willamette
+ VP6_BuildQuantIndex = VP6_BuildQuantIndex_ForWMT;
+ else if ( MmxEnabled ) // MMX
+ VP6_BuildQuantIndex = VP6_BuildQuantIndex_ForMMX;
+ else // No instruction set support
+#endif
+ VP6_BuildQuantIndex = VP6_BuildQuantIndex_Generic;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_IssueWarning
+ *
+ * INPUTS : char *WarningMessage : Pointer to warning message text.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Issues a warning message.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_IssueWarning ( char *WarningMessage )
+{
+ MessageBox ( NULL, WarningMessage, NULL, MB_ICONEXCLAMATION | MB_TASKMODAL );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_IssueWarning
+ *
+ * INPUTS : unsigned int SleepMs : Time (in milli-seconds) to wait.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Pause/Sleep for specified time(in milli-seconds).
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_PauseProcess ( unsigned int SleepMs )
+{
+ Sleep ( SleepMs );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_SytemGlobalAlloc
+ *
+ * INPUTS : unsigned int Size : Size of block of memory (in bytes).
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : char *: Pointer to allocated block of memory.
+ *
+ * FUNCTION : Allocates a block of memory of specified size.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+char *VP6_SytemGlobalAlloc ( unsigned int Size )
+{
+ return GlobalAlloc( GPTR, Size );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP6_SystemGlobalFree
+ *
+ * INPUTS : char *MemPtr : Pointer to block of memory.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-allocates a block of memory.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_SystemGlobalFree ( char* MemPtr )
+{
+ GlobalFree ( (HGLOBAL)MemPtr );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj
new file mode 100644
index 00000000..73a81a5a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj
@@ -0,0 +1,385 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <VCProjectVersion>17.0</VCProjectVersion>
+ <ProjectGuid>{8666A681-2E07-49A5-B23E-EC28D165C63B}</ProjectGuid>
+ <RootNamespace>vp6d</RootNamespace>
+ <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\..\obj\vp6d\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <OutDir>..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\..\obj\vp6d\$(PlatformShortName)_$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\..\obj\vp6d\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <OutDir>..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\..\obj\vp6d\$(PlatformShortName)_$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg">
+ <VcpkgEnableManifest>false</VcpkgEnableManifest>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>.\include;..\include;..\..\include;..\..\..\..\..\libvp6\corelibs\include;..\..\..\..\..\libvp6\include;..\..\..\..\..\libvp6\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>_DEBUG;WIN32;_WINDOWS;_USRDLL;vp6D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;4005;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>.\include;..\include;..\..\include;..\..\..\..\..\libvp6\corelibs\include;..\..\..\..\..\libvp6\include;..\..\..\..\..\libvp6\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>_DEBUG;WIN32;_WINDOWS;_USRDLL;vp6D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;4005;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <Optimization>Full</Optimization>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>.\include;..\include;..\..\include;..\..\..\..\..\libvp6\corelibs\include;..\..\..\..\..\libvp6\include;..\..\..\..\..\libvp6\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>NDEBUG;WIN32;_WINDOWS;_USRDLL;vp6D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;4005;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <Optimization>Full</Optimization>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>.\include;..\include;..\..\include;..\..\..\..\..\libvp6\corelibs\include;..\..\..\..\..\libvp6\include;..\..\..\..\..\libvp6\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>NDEBUG;WIN32;_WINDOWS;_USRDLL;vp6D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;4005;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="dx\Generic\boolhuff.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\debug.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodembs.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodemode.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodemv.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\DFrameR.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\DSystemDependant.c">
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\FrameIni.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\Huffman.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\pb_globals.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\quantize.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\recon.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\TokenEntropy.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\vfwpbdll_if.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Win32\dsystemdependant.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Win32\quantindexmmx.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Win32\vp60dxv.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Win64\dsystemdependant.c">
+ <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename)1.obj</ObjectFileName>
+ <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename)1.obj</ObjectFileName>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+ <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename)1.obj</ObjectFileName>
+ <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename)1.obj</ObjectFileName>
+ </ClCompile>
+ <ClCompile Include="xprintf\xprintf.cpp">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+ </ClCompile>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj.filters
new file mode 100644
index 00000000..e46cdc7b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj.filters
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Decompress">
+ <UniqueIdentifier>{95dad006-2a54-48a1-baf0-500b01d2a44e}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="Win32">
+ <UniqueIdentifier>{ca73ff2b-c2ff-4b2d-8dcf-b2fe83464e0f}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="Win64">
+ <UniqueIdentifier>{a92da48f-df62-48b1-9ba1-ffaa42119481}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="dx\Generic\boolhuff.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\debug.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodembs.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodemode.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodemv.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\DFrameR.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\DSystemDependant.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\FrameIni.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\Huffman.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\pb_globals.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\quantize.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\recon.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\TokenEntropy.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\vfwpbdll_if.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Win32\dsystemdependant.c">
+ <Filter>Win32</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Win32\quantindexmmx.c">
+ <Filter>Win32</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Win32\vp60dxv.c">
+ <Filter>Win32</Filter>
+ </ClCompile>
+ <ClCompile Include="xprintf\xprintf.cpp">
+ <Filter>Win32</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Win64\dsystemdependant.c">
+ <Filter>Win64</Filter>
+ </ClCompile>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.xcodeproj/project.pbxproj b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..ee05bb81
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.xcodeproj/project.pbxproj
@@ -0,0 +1,257 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 42;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 0C02406A0BB7912C00AE885C /* boolhuff.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C0240690BB7912C00AE885C /* boolhuff.c */; };
+ 0C02406D0BB7913500AE885C /* debug.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02406C0BB7913500AE885C /* debug.c */; };
+ 0C0240840BB7916D00AE885C /* DSystemDependant.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C0240760BB7916D00AE885C /* DSystemDependant.c */; };
+ 0C0240850BB7916D00AE885C /* FrameIni.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C0240770BB7916D00AE885C /* FrameIni.c */; };
+ 0C0240880BB7916D00AE885C /* modestats.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407A0BB7916D00AE885C /* modestats.c */; };
+ 0C0240890BB7916D00AE885C /* pb_globals.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407B0BB7916D00AE885C /* pb_globals.c */; };
+ 0C02408A0BB7916D00AE885C /* Huffman.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407C0BB7916D00AE885C /* Huffman.c */; };
+ 0C02408B0BB7916D00AE885C /* quantize.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407D0BB7916D00AE885C /* quantize.c */; };
+ 0C02408C0BB7916D00AE885C /* recon.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407E0BB7916D00AE885C /* recon.c */; };
+ 0C02408D0BB7916D00AE885C /* TokenEntropy.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407F0BB7916D00AE885C /* TokenEntropy.c */; };
+ 0C0240B30BB791FF00AE885C /* vp60dxv.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C0240B20BB791FF00AE885C /* vp60dxv.c */; };
+ 0C1423C30BB819EB00FDDAB7 /* vfwpbdll_if.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C1423C20BB819EB00FDDAB7 /* vfwpbdll_if.c */; };
+ 0C1423D90BB81A1200FDDAB7 /* decodembs.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C1423D80BB81A1200FDDAB7 /* decodembs.c */; };
+ 0C1423E00BB81A3000FDDAB7 /* decodemode.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C1423DD0BB81A3000FDDAB7 /* decodemode.c */; };
+ 0C1423E10BB81A3000FDDAB7 /* decodemv.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C1423DE0BB81A3000FDDAB7 /* decodemv.c */; };
+ 0C1423E20BB81A3000FDDAB7 /* DFrameR.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C1423DF0BB81A3000FDDAB7 /* DFrameR.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+ 0C0240690BB7912C00AE885C /* boolhuff.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = boolhuff.c; path = dx/Generic/boolhuff.c; sourceTree = "<group>"; };
+ 0C02406C0BB7913500AE885C /* debug.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = debug.c; path = dx/Generic/debug.c; sourceTree = "<group>"; };
+ 0C0240760BB7916D00AE885C /* DSystemDependant.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = DSystemDependant.c; path = dx/Generic/DSystemDependant.c; sourceTree = "<group>"; };
+ 0C0240770BB7916D00AE885C /* FrameIni.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = FrameIni.c; path = dx/Generic/FrameIni.c; sourceTree = "<group>"; };
+ 0C02407A0BB7916D00AE885C /* modestats.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = modestats.c; path = dx/Generic/modestats.c; sourceTree = "<group>"; };
+ 0C02407B0BB7916D00AE885C /* pb_globals.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = pb_globals.c; path = dx/Generic/pb_globals.c; sourceTree = "<group>"; };
+ 0C02407C0BB7916D00AE885C /* Huffman.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = Huffman.c; path = dx/Generic/Huffman.c; sourceTree = "<group>"; };
+ 0C02407D0BB7916D00AE885C /* quantize.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = quantize.c; path = dx/Generic/quantize.c; sourceTree = "<group>"; };
+ 0C02407E0BB7916D00AE885C /* recon.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = recon.c; path = dx/Generic/recon.c; sourceTree = "<group>"; };
+ 0C02407F0BB7916D00AE885C /* TokenEntropy.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = TokenEntropy.c; path = dx/Generic/TokenEntropy.c; sourceTree = "<group>"; };
+ 0C0240B20BB791FF00AE885C /* vp60dxv.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = vp60dxv.c; path = dx/Win32/vp60dxv.c; sourceTree = "<group>"; };
+ 0C1423C20BB819EB00FDDAB7 /* vfwpbdll_if.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = vfwpbdll_if.c; path = dx/Generic/vfwpbdll_if.c; sourceTree = "<group>"; };
+ 0C1423D80BB81A1200FDDAB7 /* decodembs.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = decodembs.c; path = dx/Generic/decodembs.c; sourceTree = "<group>"; };
+ 0C1423DD0BB81A3000FDDAB7 /* decodemode.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = decodemode.c; path = dx/Generic/decodemode.c; sourceTree = "<group>"; };
+ 0C1423DE0BB81A3000FDDAB7 /* decodemv.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = decodemv.c; path = dx/Generic/decodemv.c; sourceTree = "<group>"; };
+ 0C1423DF0BB81A3000FDDAB7 /* DFrameR.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = DFrameR.c; path = dx/Generic/DFrameR.c; sourceTree = "<group>"; };
+ D2AAC046055464E500DB518D /* libvp6d.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvp6d.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ D289987405E68DCB004EDB86 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 08FB7794FE84155DC02AAC07 /* vp60 */ = {
+ isa = PBXGroup;
+ children = (
+ 08FB7795FE84155DC02AAC07 /* Source */,
+ C6A0FF2B0290797F04C91782 /* Documentation */,
+ 1AB674ADFE9D54B511CA2CBB /* Products */,
+ );
+ name = vp60;
+ sourceTree = "<group>";
+ };
+ 08FB7795FE84155DC02AAC07 /* Source */ = {
+ isa = PBXGroup;
+ children = (
+ 0C0240B20BB791FF00AE885C /* vp60dxv.c */,
+ 0C1423C20BB819EB00FDDAB7 /* vfwpbdll_if.c */,
+ 0C1423D80BB81A1200FDDAB7 /* decodembs.c */,
+ 0C1423DD0BB81A3000FDDAB7 /* decodemode.c */,
+ 0C1423DE0BB81A3000FDDAB7 /* decodemv.c */,
+ 0C1423DF0BB81A3000FDDAB7 /* DFrameR.c */,
+ 0C0240690BB7912C00AE885C /* boolhuff.c */,
+ 0C02406C0BB7913500AE885C /* debug.c */,
+ 0C0240760BB7916D00AE885C /* DSystemDependant.c */,
+ 0C0240770BB7916D00AE885C /* FrameIni.c */,
+ 0C02407A0BB7916D00AE885C /* modestats.c */,
+ 0C02407B0BB7916D00AE885C /* pb_globals.c */,
+ 0C02407C0BB7916D00AE885C /* Huffman.c */,
+ 0C02407D0BB7916D00AE885C /* quantize.c */,
+ 0C02407E0BB7916D00AE885C /* recon.c */,
+ 0C02407F0BB7916D00AE885C /* TokenEntropy.c */,
+ );
+ name = Source;
+ sourceTree = "<group>";
+ };
+ 1AB674ADFE9D54B511CA2CBB /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ D2AAC046055464E500DB518D /* libvp6d.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ C6A0FF2B0290797F04C91782 /* Documentation */ = {
+ isa = PBXGroup;
+ children = (
+ );
+ name = Documentation;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+ D2AAC043055464E500DB518D /* Headers */ = {
+ isa = PBXHeadersBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+ D2AAC045055464E500DB518D /* vp60 */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vp60" */;
+ buildPhases = (
+ D2AAC043055464E500DB518D /* Headers */,
+ D2AAC044055464E500DB518D /* Sources */,
+ D289987405E68DCB004EDB86 /* Frameworks */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = vp60;
+ productName = vp60;
+ productReference = D2AAC046055464E500DB518D /* libvp6d.a */;
+ productType = "com.apple.product-type.library.static";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 08FB7793FE84155DC02AAC07 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vp6d" */;
+ hasScannedForEncodings = 1;
+ mainGroup = 08FB7794FE84155DC02AAC07 /* vp60 */;
+ projectDirPath = "";
+ targets = (
+ D2AAC045055464E500DB518D /* vp60 */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+ D2AAC044055464E500DB518D /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 0C02406A0BB7912C00AE885C /* boolhuff.c in Sources */,
+ 0C02406D0BB7913500AE885C /* debug.c in Sources */,
+ 0C0240840BB7916D00AE885C /* DSystemDependant.c in Sources */,
+ 0C0240850BB7916D00AE885C /* FrameIni.c in Sources */,
+ 0C0240880BB7916D00AE885C /* modestats.c in Sources */,
+ 0C0240890BB7916D00AE885C /* pb_globals.c in Sources */,
+ 0C02408A0BB7916D00AE885C /* Huffman.c in Sources */,
+ 0C02408B0BB7916D00AE885C /* quantize.c in Sources */,
+ 0C02408C0BB7916D00AE885C /* recon.c in Sources */,
+ 0C02408D0BB7916D00AE885C /* TokenEntropy.c in Sources */,
+ 0C0240B30BB791FF00AE885C /* vp60dxv.c in Sources */,
+ 0C1423C30BB819EB00FDDAB7 /* vfwpbdll_if.c in Sources */,
+ 0C1423D90BB81A1200FDDAB7 /* decodembs.c in Sources */,
+ 0C1423E00BB81A3000FDDAB7 /* decodemode.c in Sources */,
+ 0C1423E10BB81A3000FDDAB7 /* decodemv.c in Sources */,
+ 0C1423E20BB81A3000FDDAB7 /* DFrameR.c in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 1DEB91EC08733DB70010E9CD /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_MODEL_TUNING = G5;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ INSTALL_PATH = /usr/local/lib;
+ PRODUCT_NAME = vp6d;
+ ZERO_LINK = YES;
+ };
+ name = Debug;
+ };
+ 1DEB91ED08733DB70010E9CD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ARCHS = (
+ ppc,
+ i386,
+ );
+ GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+ GCC_MODEL_TUNING = G5;
+ INSTALL_PATH = /usr/local/lib;
+ PRODUCT_NAME = vp6d;
+ };
+ name = Release;
+ };
+ 1DEB91F008733DB70010E9CD /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ OBJROOT = build;
+ PREBINDING = NO;
+ SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+ SYMROOT = ../../../../lib/osx;
+ USER_HEADER_SEARCH_PATHS = "include ../../include ../../../include ../../../../include ../../../../include/vp60";
+ };
+ name = Debug;
+ };
+ 1DEB91F108733DB70010E9CD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ OBJROOT = build;
+ PREBINDING = NO;
+ SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+ SYMROOT = ../../../../lib/osx;
+ USER_HEADER_SEARCH_PATHS = "include ../../include ../../../include ../../../../include ../../../../include/vp60";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vp60" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1DEB91EC08733DB70010E9CD /* Debug */,
+ 1DEB91ED08733DB70010E9CD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vp6d" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1DEB91F008733DB70010E9CD /* Debug */,
+ 1DEB91F108733DB70010E9CD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.sln b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.sln
new file mode 100644
index 00000000..3e63ab8f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.sln
@@ -0,0 +1,23 @@
+Microsoft Visual Studio Solution File, Format Version 8.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vp6e", "vp6e.vcproj", "{9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}"
+ ProjectSection(ProjectDependencies) = postProject
+ EndProjectSection
+EndProject
+Global
+ GlobalSection(SolutionConfiguration) = preSolution
+ Debug = Debug
+ Release = Release
+ EndGlobalSection
+ GlobalSection(ProjectDependencies) = postSolution
+ EndGlobalSection
+ GlobalSection(ProjectConfiguration) = postSolution
+ {9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}.Debug.ActiveCfg = Debug|Win32
+ {9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}.Debug.Build.0 = Debug|Win32
+ {9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}.Release.ActiveCfg = Release|Win32
+ {9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}.Release.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ EndGlobalSection
+ GlobalSection(ExtensibilityAddIns) = postSolution
+ EndGlobalSection
+EndGlobal
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.vcproj b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.vcproj
new file mode 100644
index 00000000..91ff6d1a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.vcproj
@@ -0,0 +1,626 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="7.10"
+ Name="vp6e"
+ ProjectGUID="{9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}"
+ SccProjectName=""
+ SccLocalPath="">
+ <Platforms>
+ <Platform
+ Name="Win32"/>
+ </Platforms>
+ <Configurations>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory=".\..\..\Lib\Win32\Debug"
+ IntermediateDirectory=".\..\..\..\..\ObjectCode\vp6e\debug"
+ ConfigurationType="4"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ CharacterSet="2">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ OptimizeForProcessor="2"
+ AdditionalIncludeDirectories=".\include,..\include,..\..\include,..\..\..\include,..\..\..\..\include,..\..\..\..\Include\vp60,..\..\..\..\include\vp60"
+ PreprocessorDefinitions="vp6E_EXPORTS;_DEBUG;WIN32;_WINDOWS;_USRDLL;PREDICT_2D;VFW_COMP;COMPDLL;POSTPROCESS;CPUISLITTLEENDIAN;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="1"
+ PrecompiledHeaderFile=".\..\..\..\..\ObjectCode\vp6e\debug/vp6e.pch"
+ AssemblerListingLocation=".\..\..\..\..\ObjectCode\vp6e\debug/"
+ ObjectFile=".\..\..\..\..\ObjectCode\vp6e\debug/"
+ ProgramDataBaseFileName=".\..\..\..\..\ObjectCode\vp6e\debug/"
+ WarningLevel="3"
+ SuppressStartupBanner="TRUE"
+ DebugInformationFormat="3"
+ CompileAs="0"/>
+ <Tool
+ Name="VCCustomBuildTool"/>
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="..\..\..\..\Lib\Win32\Debug\s_vp60e.lib"
+ SuppressStartupBanner="TRUE"/>
+ <Tool
+ Name="VCMIDLTool"/>
+ <Tool
+ Name="VCPostBuildEventTool"/>
+ <Tool
+ Name="VCPreBuildEventTool"/>
+ <Tool
+ Name="VCPreLinkEventTool"/>
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="_DEBUG"
+ Culture="1033"/>
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"/>
+ <Tool
+ Name="VCXMLDataGeneratorTool"/>
+ <Tool
+ Name="VCManagedWrapperGeneratorTool"/>
+ <Tool
+ Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+ </Configuration>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory=".\..\..\Lib\Win32\Release"
+ IntermediateDirectory=".\..\..\..\..\ObjectCode\vp6e\Release"
+ ConfigurationType="4"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="FALSE"
+ CharacterSet="2">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ InlineFunctionExpansion="1"
+ OptimizeForProcessor="2"
+ AdditionalIncludeDirectories=".\include,..\include,..\..\include,..\..\..\include,..\..\..\..\include,..\..\..\..\Include\vp60"
+ PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;vp6E_EXPORTS;PREDICT_2D;VFW_COMP;COMPDLL;POSTPROCESS;CPUISLITTLEENDIAN;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline"
+ StringPooling="TRUE"
+ RuntimeLibrary="0"
+ EnableFunctionLevelLinking="TRUE"
+ PrecompiledHeaderFile=".\..\..\..\..\ObjectCode\vp6e\Release/vp6e.pch"
+ AssemblerListingLocation=".\..\..\..\..\ObjectCode\vp6e\Release/"
+ ObjectFile=".\..\..\..\..\ObjectCode\vp6e\Release/"
+ ProgramDataBaseFileName=".\..\..\..\..\ObjectCode\vp6e\Release/"
+ WarningLevel="3"
+ SuppressStartupBanner="TRUE"
+ CompileAs="0"/>
+ <Tool
+ Name="VCCustomBuildTool"/>
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="..\..\..\..\Lib\Win32\Release\s_vp60e.lib"
+ SuppressStartupBanner="TRUE"/>
+ <Tool
+ Name="VCMIDLTool"/>
+ <Tool
+ Name="VCPostBuildEventTool"/>
+ <Tool
+ Name="VCPreBuildEventTool"/>
+ <Tool
+ Name="VCPreLinkEventTool"/>
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"/>
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"/>
+ <Tool
+ Name="VCXMLDataGeneratorTool"/>
+ <Tool
+ Name="VCManagedWrapperGeneratorTool"/>
+ <Tool
+ Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="Compress"
+ Filter="">
+ <File
+ RelativePath="CX\Generic\Comp_Globals.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\CSystemDependant.c">
+ <FileConfiguration
+ Name="Debug|Win32"
+ ExcludedFromBuild="TRUE">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32"
+ ExcludedFromBuild="TRUE">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\Encode.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\encodembs.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\encodemode.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\encodemv.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\fullframefdct.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="CX\Generic\mcomp.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="CX\Generic\misc_common.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\PackVideo.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\PickModes.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\RawBuffer.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\Tokenize.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\Transform.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Generic\twopass.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="CX\Generic\vfwcomp.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="CX\Generic\vfwcomp_if.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Win32"
+ Filter="">
+ <File
+ RelativePath="cx\Win32\COptFunctions.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Win32\csystemdependant.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Win32\CWmtFunctions.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Win32\MmxEncodeMath.asm">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.lst /Fo &quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.obj &quot;$(InputPath)&quot;
+"
+ Outputs=".\&quot;$(IntDir)&quot;\$(InputName).obj"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)/$(InputName)&quot;.lst /Fo &quot;$(IntDir)/$(InputName)&quot;.obj &quot;$(InputPath)&quot;"
+ Outputs="$(IntDir)/$(InputName).obj"/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Win32\WmtTransform.c">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Win32\XmmGetError.asm">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.lst /Fo &quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.obj &quot;$(InputPath)&quot;
+"
+ Outputs=".\&quot;$(IntDir)&quot;\$(InputName).obj"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)/$(InputName)&quot;.lst /Fo &quot;$(IntDir)/$(InputName)&quot;.obj &quot;$(InputPath)&quot;"
+ Outputs="$(IntDir)/$(InputName).obj"/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="CX\Win32\XmmGetSAD8.asm">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.lst /Fo &quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.obj &quot;$(InputPath)&quot;
+"
+ Outputs=".\&quot;$(IntDir)&quot;\$(InputName).obj"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)/$(InputName)&quot;.lst /Fo &quot;$(IntDir)/$(InputName)&quot;.obj &quot;$(InputPath)&quot;"
+ Outputs="$(IntDir)/$(InputName).obj"/>
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="cx\Win32\XmmSAD.asm">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.lst /Fo &quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.obj &quot;$(InputPath)&quot;
+"
+ Outputs=".\&quot;$(IntDir)&quot;\$(InputName).obj"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)/$(InputName)&quot;.lst /Fo &quot;$(IntDir)/$(InputName)&quot;.obj &quot;$(InputPath)&quot;"
+ Outputs="$(IntDir)/$(InputName).obj"/>
+ </FileConfiguration>
+ </File>
+ </Filter>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/xprintf/xprintf.cpp b/Src/libvpShared/corelibs/cdxv/VP60/vp60/xprintf/xprintf.cpp
new file mode 100644
index 00000000..d775bf72
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/xprintf/xprintf.cpp
@@ -0,0 +1,139 @@
+/****************************************************************************
+*
+* Module Title : xprintf.cpp
+*
+* Description : Display a printf style message on the current video frame.
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <stdio.h>
+#include <stdarg.h>
+#include <windows.h>
+#include "xprintf.h"
+
+/****************************************************************************
+ *
+ * ROUTINE : xprintf
+ *
+ * INPUTS : const PB_INSTANCE *ppbi : Pointer to decoder instance.
+ * long nPixel : Offset into buffer to write text.
+ * const char *format : Format string for print.
+ * ... : Variable length argument list.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : int: Size (in bytes) of the formatted text.
+ *
+ * FUNCTION : Display a printf style message on the current video frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int vp6_xprintf ( const PB_INSTANCE *ppbi, long nPixel, const char *format, ... )
+{
+ BOOL bRC;
+ va_list arglist;
+ HFONT hfont, hfonto;
+
+ int rc = 0;
+ long nSizeY = ppbi->HFragments * 8;
+ long nStride = ppbi->Configuration.YStride;
+ char szFormatted[256] = "";
+ UINT8 *pDest = &ppbi->PostProcessBuffer[nPixel];
+
+ // Format text
+ va_start ( arglist, format );
+ _vsnprintf ( szFormatted, sizeof(szFormatted), format, arglist );
+ va_end ( arglist );
+
+#if defined (_WIN32_WCE)
+#else
+ // Set up temporary bitmap
+ HDC hdcMemory = NULL;
+ HBITMAP hbmTemp = NULL;
+ HBITMAP hbmOrig = NULL;
+
+ RECT rect;
+ rect.left = 0;
+ rect.top = 0;
+ rect.right = 8 * strlen(szFormatted);
+ rect.bottom = 8;
+
+ hdcMemory = CreateCompatibleDC ( NULL );
+ if ( hdcMemory == NULL )
+ goto Exit;
+
+ hbmTemp = CreateBitmap ( rect.right, rect.bottom, 1, 1, NULL );
+ if ( hbmTemp == NULL )
+ goto Exit;
+
+ hbmOrig = static_cast<HBITMAP>(SelectObject(hdcMemory, hbmTemp));
+ if ( !hbmOrig )
+ goto Exit;
+
+ // Write text into bitmap
+ // font?
+ hfont = CreateFont ( 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, VARIABLE_PITCH | FF_SWISS, "" );
+ if ( hfont == NULL )
+ goto Exit;
+
+ hfonto = static_cast<HFONT>(SelectObject(hdcMemory, hbmTemp));
+ if ( !hfonto )
+ goto Exit;
+
+ SelectObject ( hdcMemory, hfont );
+ SetTextColor ( hdcMemory, 1 );
+ SetBkColor ( hdcMemory, 0 );
+ SetBkMode ( hdcMemory, TRANSPARENT );
+
+ bRC = BitBlt ( hdcMemory, rect.left, rect.top, rect.right, rect.bottom, hdcMemory, rect.left, rect.top, BLACKNESS );
+ if ( !bRC )
+ goto Exit;
+
+ bRC = ExtTextOut ( hdcMemory, 0, 0, ETO_CLIPPED, &rect, szFormatted, strlen(szFormatted), NULL );
+ if ( !bRC )
+ goto Exit;
+
+ // Copy bitmap to video frame
+ long x;
+ long y;
+
+ for ( y=rect.top; y<rect.bottom; ++y )
+ {
+ for ( x=rect.left; x<rect.right; ++x )
+ {
+ if ( GetPixel( hdcMemory, x, rect.bottom - 1 - y ) )
+ pDest[x] = 255;
+ }
+ pDest += nStride;
+ }
+
+ rc = strlen ( szFormatted );
+
+Exit:
+
+ if ( hbmTemp != NULL )
+ {
+ if ( hbmOrig != NULL )
+ {
+ SelectObject ( hdcMemory, hbmOrig );
+ }
+ DeleteObject ( hbmTemp );
+ }
+ if ( hfont != NULL )
+ {
+ if ( hfonto != NULL )
+ SelectObject ( hdcMemory, hfonto );
+ DeleteObject ( hfont );
+ }
+
+ if ( hdcMemory != NULL )
+ DeleteDC ( hdcMemory );
+ hdcMemory = 0;
+#endif
+
+ return rc;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj b/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj
new file mode 100644
index 00000000..413368c7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj
@@ -0,0 +1,308 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{9E8FD088-3571-4BCD-896D-8DBFEC3042FC}</ProjectGuid>
+ <RootNamespace>dxv</RootNamespace>
+ <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>16.0.32002.118</_ProjectFileVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\dxv\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+ <CodeAnalysisRules />
+ <CodeAnalysisRuleAssemblies />
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+ <CodeAnalysisRules />
+ <CodeAnalysisRuleAssemblies />
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\dxv\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+ <CodeAnalysisRules />
+ <CodeAnalysisRuleAssemblies />
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+ <CodeAnalysisRules />
+ <CodeAnalysisRuleAssemblies />
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg">
+ <VcpkgEnableManifest>false</VcpkgEnableManifest>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Vcpkg">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Vcpkg">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+ <AdditionalIncludeDirectories>..\..\include;..\..\..\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4013;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+ <AdditionalIncludeDirectories>..\..\include;..\..\..\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4013;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>..\..\include;..\..\..\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <WarningLevel>Level4</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>..\..\include;..\..\..\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="generic\dxlvinfd.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\dxl_attr.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\dxl_main.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\dxl_reg.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\dxv_init.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\vscreen.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\ximage.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\dxAccurateTime.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\icmdxv.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj.filters
new file mode 100644
index 00000000..e5519660
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj.filters
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="generic">
+ <UniqueIdentifier>{12c2bb0c-53fa-442f-812f-1e57762a8b14}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{16f6450f-7013-49fe-a82b-a2a13821ec8c}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="generic\dxl_attr.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\dxl_main.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\dxl_reg.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\dxlvinfd.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\dxv_init.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\icmdxv.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\vscreen.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\ximage.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\dxAccurateTime.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_attr.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_attr.c
new file mode 100644
index 00000000..59cf2d61
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_attr.c
@@ -0,0 +1,33 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "dxl_main.h"
+
+int DXL_SetVScreenBlitQuality(DXL_VSCREEN_HANDLE dst, enum BLITQUALITY blitquality)
+{
+ int oldBQ;
+
+ validate(dst);
+
+ oldBQ = dst->bq;
+ dst->bq = blitquality;
+
+ return oldBQ;
+}
+
+enum BLITQUALITY DXL_GetVScreenBlitQuality(DXL_VSCREEN_HANDLE dst)
+{
+ if (dst) {
+ return dst->bq;
+ }
+ return DXBLIT_SAME;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_main.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_main.c
new file mode 100644
index 00000000..14adb534
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_main.c
@@ -0,0 +1,69 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "duck_mem.h"
+#include "dxl_main.h"
+
+static DXL_VSCREEN_HANDLE vScreens = NULL;
+static int maxScreens;
+
+int preallocVScreens(int lmaxScreens)
+{
+ (void) lmaxScreens; // not used
+#if PRE_ALLOCATE
+ vScreens = (DXL_VSCREEN_HANDLE)duck_calloc(maxScreens = lmaxScreens,sizeof(DXL_VSCREEN),DMEM_GENERAL);
+
+ if (vScreens == NULL)
+ return DXL_ALLOC_FAILED;
+#endif
+ return DXL_OK;
+}
+
+void freeVScreens(void)
+{
+#if PRE_ALLOCATE
+ int i;
+
+ if (vScreens)
+ {
+ for(i = 0; i < maxScreens; i++)
+ DXL_DestroyVScreen(&vScreens[i]);
+ duck_free(vScreens);
+ }
+#endif
+}
+
+DXL_VSCREEN_HANDLE vScreenCreate(void)
+{
+ DXL_VSCREEN_HANDLE nScreen;
+
+#if PRE_ALLOCATE
+ if (vScreens)
+ {
+ int i;
+ nScreen = vScreens;
+
+ for(i=0; i < maxScreens; i++,nScreen++)
+ if (!nScreen->dkFlags.inUse) break;
+
+ if (i < maxScreens)
+ return nScreen;
+ }
+#endif
+
+ nScreen = (DXL_VSCREEN_HANDLE)duck_calloc(1,sizeof(DXL_VSCREEN),DMEM_GENERAL);
+
+ if (nScreen)
+ nScreen->dkFlags.allocated = 1;
+
+ return nScreen;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_reg.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_reg.c
new file mode 100644
index 00000000..38d1c28c
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_reg.c
@@ -0,0 +1,236 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+/********
+
+ DXL_REG.C - functions for registration of "Blit" functions
+ (C)1996 The Duck Corporation
+
+********/
+#include <assert.h>
+#include <dxl_main.h>
+#include <stdio.h>
+#include <string.h>
+#include "duck_mem.h"
+
+typedef struct tBlitStruct {
+ blitFunc setup, blit, exit;
+} DXL_BLITTER, DXL_BLITTER_HANDLE;
+
+static int nextBlitter = 1;
+ /**************/
+static DXL_BLITTER blitTable[32][DXL_MAX_IFORMATS];
+static signed char blitTranslateTable[MAX_BQUALITIES][MAX_CDEPTHS];
+
+static void nullBlitter(void){}
+
+static DXL_INTERNAL_FORMAT iFormats[] = {
+ DXL_NULL_IFORMAT,
+ DXL_NULL_IFORMAT,
+ DXL_NULL_IFORMAT,
+ DXL_NULL_IFORMAT,
+ DXL_NULL_IFORMAT,
+ DXL_NULL_IFORMAT,
+ DXL_NULL_IFORMAT,
+ DXL_NULL_IFORMAT
+};
+DXL_INTERNAL_FORMAT dxl_GetFOURCCInternalFormat(unsigned long fourcc)
+{
+ int aHandle = dxl_GetAlgHandle(fourcc);
+
+ if (aHandle != -1)
+ return iFormats[aHandle];
+
+ return DXL_NULL_IFORMAT;
+}
+
+int dxl_RegisterInternalFormat(int xHandle, DXL_INTERNAL_FORMAT xFormat)
+{
+ iFormats[xHandle] = xFormat;
+
+ return DXL_OK;
+}
+
+DXL_BLIT_FORMAT DXL_ReserveBlitter(void)
+{
+ if (nextBlitter >= 32)
+ return -1; /*DXL_EXCEEDED_MAX_BLITTERS;*/
+
+ return nextBlitter++;
+}
+
+DXL_BLIT_FORMAT DXL_OverrideBlitter(enum BLITQUALITY bq,enum BITDEPTH bd)
+{
+ if(blitTranslateTable[bq][bd] == (signed char)-1)
+ blitTranslateTable[bq][bd] = (char)DXL_ReserveBlitter();
+
+ return blitTranslateTable[bq][bd];
+}
+
+int DXL_RegisterBlitter(DXL_BLIT_FORMAT dFormat, DXL_INTERNAL_FORMAT sFormat,
+ blitFunc blit, blitFunc setup, blitFunc exit)
+{
+ if ((dFormat >= nextBlitter) || (sFormat >= DXL_MAX_IFORMATS))
+ return -1; /*DXL_INVALID_BLIT_FORMAT;*/
+
+ blitTable[dFormat][sFormat].setup = setup;
+ blitTable[dFormat][sFormat].exit = exit;
+ blitTable[dFormat][sFormat].blit = blit;
+
+ return 0; /*DXL_OK;*/
+}
+
+DXL_INTERNAL_FORMAT DXL_GetXImageInternalFormat(DXL_XIMAGE_HANDLE xImage,
+ DXL_VSCREEN_HANDLE vScreen)
+{
+ int ret;
+
+ ret = xImage->internalFormat(xImage,vScreen);
+
+ if (ret == DXL_NULL_IFORMAT)
+ {
+ return (DXL_INTERNAL_FORMAT )
+ dxl_GetFOURCCInternalFormat(DXL_GetXImageFOURCC(xImage));
+ }
+ return (DXL_INTERNAL_FORMAT ) ret;
+}
+
+DXL_INTERNAL_FORMAT DXL_GetVScreenInternalFormat(DXL_VSCREEN_HANDLE vScreen)
+{
+ if (vScreen->bd == DXRGB16){
+ return DXL_LINE16;
+ }else if (vScreen->bd == DXRGB8||vScreen->bd == DXHALFTONE8){
+ return DXL_LINE8;
+ }else
+ return (DXL_INTERNAL_FORMAT) -1;
+}
+
+blitFunc DXL_GetVBlitFunc(DXL_VSCREEN_HANDLE src,DXL_VSCREEN_HANDLE dst)
+{
+ return blitTable[DXL_GetVScreenBlitFormat(dst)]
+ [DXL_GetVScreenInternalFormat(src)].blit;
+}
+
+blitFunc DXL_GetVBlitSetupFunc(DXL_VSCREEN_HANDLE src,DXL_VSCREEN_HANDLE dst)
+{
+ return blitTable[DXL_GetVScreenBlitFormat(dst)]
+ [DXL_GetVScreenInternalFormat(src)].setup;
+}
+
+blitFunc DXL_GetBlitFunc(DXL_XIMAGE_HANDLE xImage,DXL_VSCREEN_HANDLE vScreen)
+{
+ DXL_BLIT_FORMAT i = DXL_GetVScreenBlitFormat(vScreen);
+ DXL_INTERNAL_FORMAT j = DXL_GetXImageInternalFormat(xImage,vScreen);
+
+ if(i == -1)
+ return (blitFunc)-1;
+
+ if(j == DXL_NULL_IFORMAT)
+#pragma warning(disable:4054) // typecase from function pointer to data pointer
+ return (blitFunc)nullBlitter;
+#pragma warning(default:4054) // typecase from function pointer to data pointer
+ else
+ return blitTable[i][j].blit;
+}
+
+void *DXL_GetBlitSetupFunc(DXL_XIMAGE_HANDLE xImage,DXL_VSCREEN_HANDLE vScreen)
+{
+ return blitTable[DXL_GetVScreenBlitFormat(vScreen)]
+ [DXL_GetXImageInternalFormat(xImage,vScreen)].setup;
+}
+
+void *DXL_GetBlitExitFunc(DXL_XIMAGE_HANDLE xImage,DXL_VSCREEN_HANDLE vScreen)
+{
+ return blitTable[DXL_GetVScreenBlitFormat(vScreen)]
+ [DXL_GetXImageInternalFormat(xImage,vScreen)].exit;
+}
+
+DXL_BLIT_FORMAT DXL_GetVScreenBlitFormat(DXL_VSCREEN_HANDLE vScreen)
+{ enum BLITQUALITY bq;
+
+ if (vScreen->blitFormat != (signed char)-1)
+ return vScreen->blitFormat;
+
+ bq = DXL_GetVScreenBlitQuality(vScreen);
+
+ return blitTranslateTable[bq]
+ [vScreen->bd];
+}
+
+void resetBlitters(void)
+{
+ nextBlitter = 0;
+
+ duck_memset(blitTable,-1,sizeof(blitTable));
+ duck_memset(blitTranslateTable,-1,sizeof(blitTranslateTable));
+}
+
+
+
+int DXL_CheckFCCToVScreenFormat(unsigned long FCC,enum BITDEPTH format, enum BLITQUALITY bq)
+{
+ DXL_XIMAGE_HANDLE src;
+ DXL_VSCREEN_HANDLE dst;
+ int ret = DXL_INVALID_BLIT;
+
+ src = DXL_CreateXImageOfType(NULL,FCC);
+ assert(src != NULL);
+
+ if (src)
+ {
+ dst = DXL_CreateVScreen(
+ (unsigned char *)0xDEADBEEF, format, 1280,480);
+
+ assert(dst != NULL);
+ if (dst)
+ {
+ dst->bq = bq;
+ ret = DXL_CheckdxImageToVScreen(src, dst);
+ DXL_DestroyVScreen(dst);
+ }
+ DXL_DestroyXImage(src);
+ }
+ return ret;
+}
+
+int DXL_CheckVScreenXImageBlit(DXL_VSCREEN_HANDLE dst,DXL_XIMAGE_HANDLE src)
+{
+ validate(src);
+
+ if (!src->dx)
+ return -1;
+
+ if (!dst) return -1;
+
+ if (src->verify != NULL)
+ return(src->verify(src,dst));
+
+#pragma warning(disable:4054) // typecase from function pointer to data pointer
+ if((void *)(src->internalFormat) != NULL){
+ dst->blitter = DXL_GetBlitFunc(src, dst);
+
+ if ((dst->blitter != (void *) -1) && (dst->blitter != nullBlitter))
+ return DXL_OK;
+ }
+#pragma warning(default:4054) // typecase from function pointer to data pointer
+ return DXL_INVALID_BLIT;
+}
+
+int DXL_CheckVScreenBlit(DXL_VSCREEN_HANDLE dst,unsigned long fourcc)
+{
+ return DXL_CheckFCCToVScreenFormat(fourcc,dst->bd, dst->bq);
+}
+
+int DXL_CheckdxImageToVScreen(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE dst)
+{
+ return DXL_CheckVScreenXImageBlit( dst, src);
+} \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/dxlvinfd.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxlvinfd.c
new file mode 100644
index 00000000..3e004911
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxlvinfd.c
@@ -0,0 +1,76 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+/*/////////////////////////////////////////////////////////////////////////
+//
+// dxlvinfd.c
+//
+// Purpose: A list of helper functions to the quick time codec code
+//
+///////////////////////////////////////////////////////////////////////*/
+
+//#include <stdio.h>
+//#include <math.h>
+//#include <string.h>
+#include "dxl_main.h"
+
+struct DisplaySetting {
+ long dotOne;
+ long dotTwo;
+ long dotThree;
+ long dotFour;
+ long dotFive;
+};
+
+static struct DisplaySetting id_RGB24 ={0x00000000,0x00000000,0xffffffff,0x00000000,0xffffffff};
+static struct DisplaySetting id_RGB32 ={0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff};
+static struct DisplaySetting id_RGB555={0xffffffff,0x00000000,0xffffffff,0x00000000,0xffffffff};
+static struct DisplaySetting id_RGB565={0xffffffff,0x00000000,0x00000000,0x00000000,0xffffffff};
+static struct DisplaySetting id_UYVY ={0xff80ff80,0x00800080,0xff80ff80,0x00800080,0x00800080};
+static struct DisplaySetting id_YUY2 ={0x80ff80ff,0x80008000,0x80008000,0x80008000,0x80008000};
+static struct DisplaySetting id_YVU9 ={0x80008000,0x80008000,0xff80ff80,0xff80ff80,0xff80ff80};
+static struct DisplaySetting id_RGB8 ={0x00000000,0xffffffff,0x00000000,0xffffffff,0x00000000};
+
+
+static struct DisplaySetting id_STRETCH ={0x00000000,0xffffffff,0x00000000,0x00000000,0x00000000};
+static struct DisplaySetting id_STRETCH_BRIGHT ={0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000};
+static struct DisplaySetting id_STRETCH_SAME ={0xffffffff,0x00000000,0x00000000,0x00000000,0x00000000};
+
+static struct DisplaySetting id_KEY = {0x00000000,0x00000000,0xffffffff,0x00000000,0x00000000};
+static struct DisplaySetting id_NOTKEY = {0x00000000,0x00000000,0x00000000,0x00000000,0x00000000};
+
+static struct DisplaySetting id_CLEAR_ME = {0x00000000,0x00000000,0x00000000,0x00000000,0x00000000};
+
+
+static void OrSettings(struct DisplaySetting *src1,struct DisplaySetting *src2, struct DisplaySetting *dst)
+{
+ if (dst) {
+ dst->dotOne = src1->dotOne | src2->dotOne;
+ dst->dotTwo = src1->dotTwo | src2->dotTwo;
+ dst->dotThree = src1->dotThree | src2->dotThree;
+ dst->dotFour = src1->dotFour | src2->dotFour;
+ dst->dotFive = src1->dotFive | src2->dotFive;
+ }
+}
+
+
+static void SetSettings(struct DisplaySetting *dst,struct DisplaySetting *src)
+{
+ if (dst) {
+ dst->dotOne = src->dotOne ;
+ dst->dotTwo = src->dotTwo ;
+ dst->dotThree = src->dotThree ;
+ dst->dotFour = src->dotFour ;
+ dst->dotFive = src->dotFive ;
+ }
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/dxv_init.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxv_init.c
new file mode 100644
index 00000000..4126daf8
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxv_init.c
@@ -0,0 +1,43 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "duck_mem.h"
+#include "dxl_main.h"
+
+
+#if defined(DISPLAYDIB)
+#if DISPLAYDIB
+int globalIsDIB,globalDIBWidth,globalDIBHeight;
+#endif
+#endif
+
+extern int preallocVScreens(int lmaxScreens);
+extern void freeVScreens(void);
+
+
+int DXL_InitVideo(int lmaxScreens,int lmaxImages)
+{
+ (void)lmaxImages; // Not Used;
+/* registerDuckBlitters(); */
+ resetBlitters();
+
+// DXL_RegisterXImage(NULL,0L,(DXL_INTERNAL_FORMAT ) 0);
+
+ preallocVScreens(lmaxScreens);
+ return DXL_OK;
+}
+
+
+void DXL_ExitVideo(void)
+{
+ freeVScreens();
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/vscreen.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/vscreen.c
new file mode 100644
index 00000000..704040b7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/vscreen.c
@@ -0,0 +1,175 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "duck_mem.h"
+#include "dxl_main.h"
+#include <assert.h>
+/***********************************************/
+
+int DXL_GetVScreenSizeOfPixel(DXL_VSCREEN_HANDLE vSc)
+{
+ switch (vSc->bd){
+ case DXRGB8:
+ case DXHALFTONE8:
+ case DXRGB8VESA:
+ return 1;
+ case DXRGB16_555:
+ case DXRGB16_565:
+ case DXRGB16VESA:
+ case DXYUY2:
+ case DXUYVY:
+ return 2;
+ case DXRGB24:
+ return 3;
+ case DXRGB32:
+ return 4;
+ default:
+ return -1;
+ }
+}
+
+void DXL_DestroyVScreen(DXL_VSCREEN_HANDLE dst)
+{
+ if (dst != NULL){
+ dst->dkFlags.inUse = 0;
+ dst->addr = NULL;
+ if (dst->dkFlags.allocated)
+ duck_free(dst);
+ }
+}
+
+int DXL_AlterVScreen(DXL_VSCREEN_HANDLE dst, unsigned char *addr,enum BITDEPTH bd, int p,int h)
+{
+ validate(dst);
+
+ if (addr != NULL) dst->addr = addr;
+
+ if (bd != DXRGBNULL) dst->bd = bd;
+
+ if (p != -1) dst->pitch = (short) p;
+
+ if (h != -1) dst->height = (short) h;
+
+ return DXL_OK;
+}
+
+int DXL_AlterVScreenView(DXL_VSCREEN_HANDLE dst,int x,int y,int w,int h)
+{
+ validate(dst);
+
+ if (x > -1) dst->viewX = (short)x;// & 0xfffe;
+ if (y > -1) dst->viewY = (short)y;
+ if (w > -1) dst->viewW = (short)w;// & 0xfffe;
+ if (h > -1) dst->viewH = (short)h;
+
+ return DXL_OK;
+}
+
+DXL_VSCREEN_HANDLE DXL_CreateVScreen(unsigned char *addr, enum BITDEPTH bd, short p,short h)
+{
+#pragma warning(disable: 4210) // nonstandard extension used : function given file scope
+ DXL_VSCREEN_HANDLE vScreenCreate(void);
+#pragma warning(default: 4210) // nonstandard extension used : function given file scope
+
+ DXL_VSCREEN_HANDLE nScreen = vScreenCreate();
+
+ if (!nScreen) return NULL;
+
+ nScreen->dkFlags.inUse = 1;
+ nScreen->blitFormat = -1;
+
+ DXL_AlterVScreen(nScreen, addr, bd, p, h);
+
+ nScreen->bx = nScreen->by = 0;
+ nScreen->bAddr = NULL;
+ nScreen->bq = DXBLIT_SAME;
+
+ return nScreen;
+}
+
+int DXL_GetVScreenView(DXL_VSCREEN_HANDLE dst,int *x,int *y,int *w,int *h)
+{
+ validate(dst);
+
+ *x = dst->viewX;
+ *y = dst->viewY;
+ *w = dst->viewW;
+ *h = dst->viewH;
+
+ return DXL_OK;
+}
+
+
+
+int DXL_GetVScreenAttributes(
+ DXL_VSCREEN_HANDLE vScreen,
+ void **addr,
+ dxvBlitQuality *bq,
+ dxvBitDepth *bd,
+ short *pitch,
+ short *height
+ )
+{
+ if (addr)
+ {
+ *addr = (void *) (vScreen->addr);
+ }
+ else
+ {
+ assert(0);
+ }
+
+
+ if (bq)
+ {
+ *bq = vScreen->bq;
+ }
+ else
+ {
+ assert(0);
+ }
+
+
+
+ if (bd)
+ {
+ *bd = vScreen->bd;
+ }
+ else
+ {
+ assert(0);
+ }
+
+
+ if (pitch)
+ {
+ *pitch = vScreen->pitch;
+ }
+ else
+ {
+ assert(0);
+ }
+
+
+
+ if (height)
+ {
+ *height = vScreen->height;
+ }
+ else
+ {
+ assert(0);
+ }
+
+
+ return 0;
+} /* end get attributes */
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/ximage.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/ximage.c
new file mode 100644
index 00000000..c4e9cffc
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/ximage.c
@@ -0,0 +1,353 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "duck_mem.h"
+#include "dxl_main.h"
+
+
+
+int DXL_SetXImageCSize(DXL_XIMAGE_HANDLE src, int temp)
+{
+ src->fSize = temp;
+
+ return temp;
+}
+
+void DXL_DestroyXImage(DXL_XIMAGE_HANDLE src)
+{
+
+ {
+#pragma warning(disable:4210) //nonstandard extension used : function given file scope
+ void DXL_AccurateTime(UINT64* time);
+#pragma warning(default:4210) //nonstandard extension used : function given file scope
+
+ UINT64 clocksTotal;
+ double ratio, ratio2;
+ DXL_AccurateTime(&src->prof.profileEnd);
+ clocksTotal = (src->prof.profileEnd - src->prof.profileStart);
+ if (clocksTotal)
+ {
+ ratio = src->prof.dxClocks * 1.0 / clocksTotal;
+ ratio2 = (double)(src->prof.dxClocks / src->prof.frameCount);
+ }
+ }
+
+
+ if (src != NULL)
+ {
+ if (src->dkFlags.inUse)
+ {
+ src->destroy(src);
+ }
+ }
+}
+
+int DXL_MoveXImage(DXL_XIMAGE_HANDLE src,enum OFFSETXY mode, int x,int y)
+{
+ validate(src);
+
+ if (mode != DXL_RELATIVE){
+ src->x = 0;
+ src->y = 0;
+ }
+ src->x = (short)(src->x + x);
+ src->y = (short)(src->y + y);
+ return DXL_OK;
+}
+
+int DXL_AlterXImageData(DXL_XIMAGE_HANDLE src, unsigned char *data)
+{
+ validate(src);
+
+ src->addr = data;
+ src->dkFlags.DXed = 0;
+
+ if (data == NULL) return DXL_OK;
+
+ if (src->seedData)
+ return src->seedData(src);
+ else
+ return 0;
+}
+
+int DXL_GetXImageXYWH(DXL_XIMAGE_HANDLE src,int *x,int *y,int *w, int *h)
+{
+ validate(src);
+
+ *x = src->x;
+ *y = src->y;
+ *w = src->w;
+ *h = src->h;
+
+ return DXL_OK;
+}
+
+int DXL_IsXImageKeyFrame(DXL_XIMAGE_HANDLE src)
+{
+ validate(src);
+
+ return src->dkFlags.keyFrame;
+}
+
+/* typedef DXL_XIMAGE_HANDLE (*createFunc)(unsigned char *data); */
+#define NUM_ALG 16
+static createFunc creator[NUM_ALG];
+static unsigned long fourCC[NUM_ALG];
+
+DXL_XIMAGE_HANDLE DXL_CreateXImage(unsigned char *data)
+{
+ int i;
+ DXL_XIMAGE_HANDLE nImage = NULL;
+
+ for(i = 0; i < NUM_ALG; i++){
+ if (fourCC[i]){
+ nImage = creator[i](data);
+ if ( nImage )
+ break;
+ }else
+ break;
+ }
+
+ if (nImage)
+ {
+ nImage->dkFlags.inUse = 1;
+ nImage->addr = data;
+ nImage->create = (struct tXImage *(__cdecl *)(void *))creator[i];
+ }
+
+ return nImage;
+}
+
+
+
+
+DXL_XIMAGE_HANDLE DXL_CreateXImageOfType(unsigned char *data,unsigned long type)
+{
+ int i;
+ DXL_XIMAGE_HANDLE nImage = NULL;
+
+ for(i = 0; i < NUM_ALG; i++){
+ if (fourCC[i] == type){
+ nImage = creator[i](data);
+ if ( nImage )
+ break;
+ }
+ }
+
+ if (nImage)
+ {
+ nImage->dkFlags.inUse = 1;
+ nImage->addr = data;
+ nImage->prof.profileStart = 0;
+ nImage->prof.dxClocks = 0;
+ nImage->prof.frameCount = 0;
+ }
+
+ return nImage;
+}
+
+
+
+
+DXL_XIMAGE_HANDLE DXL_CreateXImageFromBMI(
+ unsigned char *data,
+ unsigned long fcc,
+ DK_BITMAPINFOHEADER *srcAndDest /* There will always be two Obiwan */
+ )
+{
+ int i;
+ DXL_XIMAGE_HANDLE nImage = NULL;
+
+ for(i = 0; i < NUM_ALG; i++){
+ if (fourCC[i] == fcc){
+ nImage = creator[i]((unsigned char *) srcAndDest);
+ if ( nImage )
+ break;
+ }
+ }
+
+ if (nImage)
+ {
+ nImage->dkFlags.inUse = 1;
+ nImage->addr = data;
+ duck_memset(&nImage->prof,0,sizeof(DXL_PROFILEPACK)); /* probably redundent */
+ }
+
+ return nImage;
+}
+
+
+
+
+
+
+int DXL_RegisterXImage(createFunc myCreator,unsigned long fourcc, DXL_INTERNAL_FORMAT xFormat)
+{
+ int i;
+
+ if (!fourcc){
+ duck_memset(creator,0,sizeof(creator));
+ duck_memset(fourCC,0,sizeof(fourCC));
+ return 0;
+ }
+
+ for (i = 0; i < sizeof(fourCC)/sizeof(unsigned long);i++){
+ if (!fourCC[i]){
+ creator[i] = myCreator;
+ fourCC[i] = fourcc;
+ dxl_RegisterInternalFormat(i, xFormat);
+ return i;
+ }
+ }
+ return -1;
+}
+
+unsigned long *DXL_GetFourCCList(void)
+{
+ /*********
+ return a list of all supported fourccs
+ *********/
+ return fourCC;
+}
+
+
+int dxl_GetAlgHandle(unsigned long fourcc)
+{
+ /*********
+ search through the fourcc table to find a dx'er's index
+ *********/
+ int i;
+
+ for (i = 0; i < sizeof(fourCC)/sizeof(unsigned long);i++)
+ if (fourCC[i] == fourcc) return i;
+
+ return -1;
+}
+
+
+unsigned long DXL_GetXImageFOURCC(DXL_XIMAGE_HANDLE src)
+{
+ /*********
+ find an ximages fourcc (by comparing creator functions)
+ *********/
+ int i;
+
+ for (i = 0; i < sizeof(fourCC)/sizeof(unsigned long);i++)
+ if (creator[i] == (createFunc)src->create)
+ return fourCC[i];
+
+ return 0L;
+}
+
+unsigned char *DXL_GetDestAddress(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE dst)
+{
+ /*********
+ get the address within the vscreen to start writing at
+ *********/
+ unsigned char *scrnDest = (unsigned char *)0L;
+ int x,y;
+
+ y = dst->viewY + src->y;
+ x = dst->viewX + src->x;
+
+ scrnDest = (unsigned char *) dst->addr;
+ scrnDest += (x * DXL_GetVScreenSizeOfPixel(dst)) + (y * dst->pitch);
+
+ return scrnDest;
+}
+
+int DXL_dxImageToVScreen(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE dst)
+{
+ int dxvCode;
+ validate(src);
+
+ if (!src->dx)
+ return -1;
+
+#pragma warning(disable:4054) // typecase from function pointer to data pointer
+ if(dst && ((void *)(src->internalFormat) != NULL)) {
+ /* get your hamdy damdy((c)1997 Duck North) registered blitter setup */
+ dst->blitSetup = DXL_GetBlitSetupFunc(src,dst);
+ dst->blitExit = DXL_GetBlitExitFunc(src,dst);
+ dst->blitter = DXL_GetBlitFunc(src, dst);
+
+ if (dst->blitter == (void *) -1)
+ return DXL_INVALID_BLIT;
+ }
+#pragma warning(default:4054) // typecase from function pointer to data pointer
+
+ // if (!src->addr)
+ // return 1;
+
+#if 1 /* we want to profile ... this should constitute no performance hit to profile */
+ {
+ UINT64 timerStart;
+ UINT64 timerEnd;
+
+ void DXL_AccurateTime(UINT64* time);
+ DXL_AccurateTime(&timerStart);
+
+ if (src->prof.profileStart == 0)
+ src->prof.profileStart = timerStart;
+ dxvCode = src->dx(src,dst);
+ DXL_AccurateTime(&timerEnd);
+ src->prof.dxClocks += (timerEnd - timerStart);
+ src->prof.frameCount += 1;
+ }
+#else
+ dxvCode = src->dx(src,dst);
+#endif
+
+
+ return dxvCode;
+}
+
+
+long DXL_GetXImageCSize(DXL_XIMAGE_HANDLE src)
+{
+ if (src == NULL) return -1;
+
+ if (!src->GetXImageCSize)
+ return -2;
+
+ return(src->GetXImageCSize(src));
+}
+
+/***********************************************/
+
+DXL_XIMAGE_HANDLE DXL_AlterXImage(DXL_XIMAGE_HANDLE src,
+ unsigned char *data,int type,
+ enum BITDEPTH bitDepth,int width,int height)
+{
+ if (src == NULL)
+ {
+ if (type) /* if type specified, try using it as the fourcc */
+ src = DXL_CreateXImageOfType(data,type);
+
+ if (src == NULL) /* if still null, try creating it blind from the data */
+ src = DXL_CreateXImage(data);
+
+ if (src == NULL) /* if still null, give up */
+ return NULL;
+ }
+
+ if (!src->recreate) /* no way to recreate, assume create is good enough */
+ return src;
+
+ return(src->recreate(src,data,type,bitDepth,width,height));
+}
+
+
+void DXL_SetParameter(DXL_XIMAGE_HANDLE src, int Command, unsigned long Parameter )
+{
+ src->setParameter(src,Command,Parameter);
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/utils/dxv_utils.c b/Src/libvpShared/corelibs/cdxv/dxv/utils/dxv_utils.c
new file mode 100644
index 00000000..a4bf4098
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/utils/dxv_utils.c
@@ -0,0 +1,40 @@
+#include "dxl_main.h"
+#include "duck_dxl.h"
+
+
+/*-------------------------------------------------------------------
+
+File : dxv_util.c
+
+Any extra functions whose lifespan/utility might be "questionable".
+Functions that are not part of the "core", but yet are not really
+anything but Dxv specific.
+
+-------------------------------------------------------------------*/
+
+
+/* This function used during the development of ICM wrapper */
+/*----------------------------------------------------------*/
+char *DXL_DumpRegistry(char *buf);
+char *DXL_DumpRegistry(char *buf)
+{
+ int sprintf( char *buffer, const char *format, ...);
+
+ unsigned long *g = DXL_GetFourCCList();
+
+ int i = 0;
+
+ while(g)
+ {
+ sprintf(buf,"fourCC[%d] = %c%c%c%c\n",i,
+ (g[i] & 0xFF000000) >> 24,
+ (g[i] & 0xFF0000) >> 16,
+ (g[i] & 0xFF00) >> 8,
+ (g[i] & 0xFF) >> 0
+ );
+
+ g++;
+ }
+
+ return buf;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dkprof.c b/Src/libvpShared/corelibs/cdxv/dxv/win32/dkprof.c
new file mode 100644
index 00000000..7cb5b11f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dkprof.c
@@ -0,0 +1,104 @@
+/***********************************************\
+??? dkprof.c
+? profiling functions
+? also see perf.asm and pentium.asm
+\***********************************************/
+#include "duck_mem.h"
+#include "dkprof.h"
+
+#define MAX_PROFILE 15
+
+int profStarted = 0;
+
+enum PROFILESECTION {
+ LOSSLESSDX = 0,
+ PLANARDX,
+ BLITME,
+ RD_FRAME_DESC,
+ RASTER_CONFIG,
+ DELTA_TABLES,
+ HANDLER_CONFIG,
+ STRING_DECODER,
+ STRING_DATA,
+ TSC0,
+ TSC1,
+ TSC2,
+ TSC3
+};
+
+PSECTION pSectionArray[MAX_PROFILE];
+
+unsigned long pentiumKiloCycles(void);
+
+#if 1
+/***********************************************/
+void tscStart(enum PROFILESECTION sel)
+{
+ PSECTION *pSection;
+
+ if(profStarted) {
+ pSection = &pSectionArray[sel];
+ pSection->pkc1 = pentiumKiloCycles();
+ }
+}
+
+/***********************************************/
+void tscEnd(enum PROFILESECTION sel)
+{
+ PSECTION *pSection;
+
+ if(profStarted) {
+ pSection = &pSectionArray[sel];
+
+ pSection->pkc2 = pentiumKiloCycles();
+ pSection->pkc2 = (pSection->pkc2 - pSection->pkc1);
+ pSection->avgKc += pSection->pkc2;
+ pSection->numTimes += 1;
+
+ if(pSection->pkc2 < pSection->minKc)
+ pSection->minKc = pSection->pkc2;
+
+ if(pSection->pkc2 > pSection->maxKc)
+ pSection->maxKc = pSection->pkc2;
+ }
+}
+
+/***********************************************/
+void tscInit()
+{
+ int i;
+
+ for(i=0; i<MAX_PROFILE; i++) {
+ duck_memset(&pSectionArray[i],0,sizeof(PSECTION));
+ pSectionArray[i].minKc = 0xffffffff;
+ }
+
+ profStarted = 1;
+}
+
+/***********************************************/
+void tscUninit()
+{
+ profStarted = 0;
+}
+
+/***********************************************/
+unsigned long tscProcessCounts(unsigned long *cnt, enum PROFILESECTION sel)
+{
+ unsigned long rv = 0;
+
+ *cnt = 0;
+ if(profStarted) {
+ if(pSectionArray[sel].numTimes) {
+ rv = pSectionArray[sel].avgKc /= pSectionArray[sel].numTimes;
+ *cnt = pSectionArray[sel].numTimes;
+ duck_memset(&pSectionArray[sel],0,sizeof(PSECTION));
+ pSectionArray[sel].minKc = 0xffffffff;
+ }
+ /* reset all vars */
+ }
+ return (rv);
+}
+#endif
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxAccurateTime.c b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxAccurateTime.c
new file mode 100644
index 00000000..aa9e42a1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxAccurateTime.c
@@ -0,0 +1,30 @@
+#include "dkpltfrm.h"
+
+
+static void readTSC(UINT64 *bigOne)
+{
+ unsigned long xhigh;
+ unsigned long xlow;
+
+ __asm
+ {
+
+ rdtsc
+
+ mov [xlow],EAX;
+ mov [xhigh],edx;
+
+ }
+
+ *bigOne = xhigh ;
+ *bigOne <<= 32;
+ *bigOne |= xlow;
+
+ return;
+}
+
+
+void DXL_AccurateTime(UINT64 *temp)
+{
+ readTSC(temp);
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxl_feat.c b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxl_feat.c
new file mode 100644
index 00000000..1bda2ef4
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxl_feat.c
@@ -0,0 +1,20 @@
+#include "dkpltfrm.h"
+#include "dxl_main.h"
+
+unsigned long cpuFeatures;
+
+#define CHECK_MMX 0x800000
+#define CHECK_TSC 0x10
+#define CHECK_CMOV 0x8000
+#define CHECK_FCMOV 0x10000
+
+CPU_FEATURES DXL_GetCPUFeatures(void)
+{
+ enum CPU_FEATURES currentFeatures = NO_FEATURES;
+
+ if(cpuFeatures & CHECK_MMX)
+ currentFeatures |= MMX_SUPPORTED;
+
+ return currentFeatures;
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.cpp b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.cpp
new file mode 100644
index 00000000..342717ce
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.cpp
@@ -0,0 +1,70 @@
+#include <stdio.h>
+#include <windows.h>
+#include <windowsx.h>
+
+
+// ************************************************************************
+// FUNCTION : DllMain( HINSTANCE, DWORD, LPVOID )
+// PURPOSE : DllMain is called by the C run-time library from the
+// _DllMainCRTStartup entry point. The DLL entry point gets
+// called (entered) on the following events: "Process Attach",
+// "Thread Attach", "Thread Detach" or "Process Detach".
+// COMMENTS : No initialization is needed here so this entry point simply
+// returns TRUE.
+// ************************************************************************
+BOOL WINAPI
+DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpvReserved )
+{
+ UNREFERENCED_PARAMETER( hInstDLL );
+ UNREFERENCED_PARAMETER( fdwReason );
+ UNREFERENCED_PARAMETER( lpvReserved );
+
+ return( TRUE );
+}
+
+int DXV_GetVersion()
+{
+ return (int)0x0365;
+}
+
+extern "C" {
+
+char* pannounce;
+char *announcestart;
+#define ANNBUFSIZE 2048
+
+FILE* hf = NULL;
+
+void Announcement(const char* lpszString)
+{
+#if _DEBUG
+ if (!hf) {
+ hf = fopen("Announce.txt","w");
+ }
+ if (hf) {
+ fprintf(hf,lpszString);
+ fflush(hf);
+ }
+#endif
+ if ((2 * strlen(lpszString) + pannounce) > announcestart + ANNBUFSIZE) {
+ pannounce = announcestart + ANNBUFSIZE - 2 * strlen(lpszString); // lock up at end
+ }
+ strcpy(pannounce,lpszString); // copy and bump
+ pannounce += strlen(lpszString);
+}
+
+void AnnDone()
+{
+#if _DEBUG
+ Announcement("Closing Announcements");
+ if(hf) fclose(hf);
+// hf = 0; // don't reset handle or file will re-open on next call
+#endif
+}
+
+void ErrorBuffer(char *errorbuf)
+{
+ pannounce = announcestart = errorbuf;
+}
+
+} \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.def b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.def
new file mode 100644
index 00000000..a67094b2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.def
@@ -0,0 +1,97 @@
+; -------------------------------------------------------------------------
+;
+; Duck Developer Support
+; Copyright (c) 1995 Duck Corporation
+; -------------------------------------------------------------------------
+; MODULE : Dukdll.DEF
+; PURPOSE : module-definition file for the Duck Player
+; COMMENTS :
+; -------------------------------------------------------------------------
+;LIBRARY DXV
+DESCRIPTION 'Duck Windows Player'
+
+EXPORTS
+ DXL_InitVideo @2
+ DXL_ExitVideo @3
+ DXL_CreateVScreen @4
+ DXL_DestroyVScreen @5
+ DXL_AlterVScreen @6
+ DXL_AlterVScreenClip @7
+ DXL_AlterVScreenView @8
+ DXL_AlterVScreenBackground @9
+ DXL_CreateXImage @10
+ DXL_DestroyXImage @11
+ DXL_AlterXImageData @12
+ DXL_MoveXImage @13
+
+; DXL_AlterXImageSpriteModes @14
+
+ DXL_GetXImageColorDepth @15
+; DXL_GetXImageDirtyXYWH @16
+ DXL_GetXImageXYWH @17
+ DXL_dxImageToVScreen @19
+ DXL_IsXImageKeyFrame @20
+ DXL_GetVScreenView @21
+
+; DXL_CreateDirtyBuffer @22
+; DXL_DeriveVScreenFromXImage @23
+; DXL_DisplayVScreen @24
+ DXL_SetVScreenBlitQuality @25
+ DXL_GetVScreenBlitQuality @26
+ DXL_GetVScreenAddr @27
+ DXL_CreateGenericXImage @28
+ DXL_EraseVScreen @29
+
+ DXL_BlitXImageToVScreen @30
+
+ DXL_BlitVScreenToVScreen @31
+
+ DXL_GetXImageFrameBuffer @32
+ DXL_AlterXImage @33
+ DXL_SetVScreenCLUTs @34
+ DXL_ResetVScreenCLUTs @35
+
+
+; BlackBox @81
+
+ AnnDone @86
+ Announcement @88
+
+; RestoreDirty @90
+; GetScreenAddr @91
+; resetDisplay @92
+; SetClutPtrs @93
+; MarkSkips @94
+
+ DXL_Decompress16BitAs565 @107
+
+ ErrorBuffer @108
+ DXL_SetXImageBlitAll @109
+
+ DXV_Setmalloc @111
+ DXV_Setcalloc @112
+ DXV_Setfree @113
+
+; DUCK_rdtsc_Start @114
+; DUCK_rdtsc_End @115
+; DUCK_InitProfile @116
+; DUCK_ProcessCounts @117
+; DUCK_UninitProfile @118
+; DUCK_ClearI @119
+; DUCK_SetI @120
+
+ DXL_CheckdxImageToVScreen @121
+ DXL_GetFourCCList @122
+ DXL_CreateXImageOfType @123
+ DXL_GetXImageCSize @124
+ DXL_GetBitDepthPalette @125
+
+ DXL_CheckVScreenBlit @126
+ DXL_CheckVScreenXImageBlit @127
+
+ DXL_BlackLineVScreen @128
+ DXL_GetCPUFeatures @129
+ DXL_GetXImageFOURCC @130
+
+ DXL_SetXImageCSize @131
+ DXL_InitVideoEx @132
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.rc b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.rc
new file mode 100644
index 00000000..8175d7dc
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.rc
@@ -0,0 +1,126 @@
+//Microsoft Developer Studio generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "afxres.h"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+#ifndef MAJ
+#define MAJ 0
+#endif
+#ifndef MIN
+#define MIN 0
+#endif
+#ifndef PVER
+#define PVER 0
+#endif
+#ifndef BVER
+#define BVER ".b1"
+#endif
+
+#ifndef PROJ
+#define PROJ UNKNOWN!!
+#endif
+
+#define DT(x) #x
+#define VTEXT(x) DT(x)
+#define VERSTRING(w,x,y,z) DT(w) "." DT(x) "." DT(y) DT(z) "\0"
+
+#ifndef _MAC
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION 1,0,0,1
+ PRODUCTVERSION 1,0,0,1
+ FILEFLAGSMASK 0x3fL
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x10004L
+ FILETYPE 0x1L
+ FILESUBTYPE 0x0L
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904b0"
+ BEGIN
+ VALUE "CompanyName", "The Duck Corporation\0"
+ VALUE "FileDescription", "TruePlay SDK Library - " VTEXT(PROJ) ".dll\0"
+ VALUE "FileVersion", VERSTRING(MAJ,MIN,PVER,BVER)
+ VALUE "InternalName", "MPVE - " VTEXT(PROJ) "\0"
+ VALUE "LegalCopyright", "Copyright ©1998 The Duck Corp.\0"
+ VALUE "ProductName", "TruePlay SDK - " VTEXT(PROJ) " Library \0"
+ VALUE "ProductVersion", VERSTRING(MAJ,MIN,PVER,BVER)
+ VALUE "Providing", VTEXT(ALG)
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
+
+#endif // !_MAC
+//resource
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE DISCARDABLE
+BEGIN
+ "resource.h\0"
+END
+
+2 TEXTINCLUDE DISCARDABLE
+BEGIN
+ "#include ""afxres.h""\r\n"
+ "\0"
+END
+
+3 TEXTINCLUDE DISCARDABLE
+BEGIN
+ "\r\n"
+ "\0"
+END
+
+#endif // APSTUDIO_INVOKED
+
+#endif // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif // not APSTUDIO_INVOKED
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv_mem.c b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv_mem.c
new file mode 100644
index 00000000..a86d620b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv_mem.c
@@ -0,0 +1,103 @@
+/*
+ Windows 95 Memory allocation abstraction functions
+ */
+
+#include <stdio.h>
+#include <windows.h>
+
+#include "dkpltfrm.h"
+#include "duck_mem.h"
+
+#define CHECK_FOR_MEMORY_LEAK 0
+
+void *malloc_32b(size_t size)
+{
+ return LocalAlloc( LMEM_FIXED, size );
+
+}
+void *calloc_32b(size_t count,size_t size)
+{
+ /* maybe we should rewrite this to use dwords
+ (who knows if they do it automatica like) */
+ return LocalAlloc( LMEM_ZEROINIT, size*count );
+
+}
+
+void free_32b(void * hMem)
+{
+ LocalFree(hMem);
+}
+
+static void *(*ptr_malloc)(size_t size) = malloc_32b;
+static void *(*ptr_calloc)(size_t, size_t size) = calloc_32b;
+static void (*ptr_free)(void *) = free_32b;
+
+void *duck_malloc(size_t size, enum tmemtype fred)
+{
+ void *temp;
+
+ temp = (*ptr_malloc)(size);
+
+#if CHECK_FOR_MEMORY_LEAK
+{
+ FILE * out;
+
+ if ((out = fopen("c:\\sjl.log","a")) != NULL) {
+ fprintf(out,"DXV duck_malloc:%x %d\n", temp, size);
+ fclose(out);
+ }
+}
+#endif
+
+ return temp;
+}
+
+void *duck_calloc(size_t n,size_t size, enum tmemtype fred)
+{
+ void *temp = (*ptr_calloc) (n, size);
+
+#if CHECK_FOR_MEMORY_LEAK
+{
+ FILE * out;
+
+ if ((out = fopen("c:\\sjl.log","a")) != NULL) {
+ fprintf(out,"DXV duck_calloc:%x %d %d \n", temp, n, size);
+ fclose(out);
+ }
+}
+#endif
+
+ return temp;
+}
+
+void duck_free(void *old_blk)
+{
+
+#if CHECK_FOR_MEMORY_LEAK
+{
+ FILE * out;
+
+ if ((out = fopen("c:\\sjl.log","a")) != NULL) {
+ fprintf(out,"DXV duck_free:%x\n", old_blk);
+ fclose(out);
+ }
+}
+#endif
+
+ (*ptr_free) (old_blk);
+}
+
+void DXV_Setmalloc(void *(*ptr)(size_t))
+{
+ ptr_malloc = ptr;
+}
+
+void DXV_Setcalloc(void *(*ptr)(size_t, size_t))
+{
+ ptr_calloc = ptr;
+}
+
+void DXV_Setfree(void (*ptr)(void *))
+{
+ ptr_free = ptr;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/goals.mk b/Src/libvpShared/corelibs/cdxv/dxv/win32/goals.mk
new file mode 100644
index 00000000..e549d598
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/goals.mk
@@ -0,0 +1,9 @@
+
+$(OBJDIR)sc_$(PROJECT).lib: $(LIBDIR)\sc_tm20.lib $(LIBDIR)\sc_torq.lib
+ LIB $(OBJS) /OUT:$@
+ LIB $@ $(LIBDIR)\s_tm1.lib /OUT:$@
+ LIB $@ $(LIBDIR)\sc_tm20.lib /OUT:$@
+ LIB $@ $(LIBDIR)\s_tmrt.lib /OUT:$@
+ LIB $@ $(LIBDIR)\sc_torq.lib /OUT:$@
+ copy $@ $(LIBDIR)
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/icmdxv.c b/Src/libvpShared/corelibs/cdxv/dxv/win32/icmdxv.c
new file mode 100644
index 00000000..85d9d04b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/icmdxv.c
@@ -0,0 +1,717 @@
+// dxvmpg.cpp : Defines the entry point for the console application.
+//
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "dkpltfrm.h" /* platform specifics */
+#include "duktypes.h" /* predefined general types used at duck */
+
+#include "duck_mem.h" /* interface to memory manager */
+#include "dxl_main.h" /* interface to dxv */
+
+#include <windows.h>
+#include <mmsystem.h>
+#include <vfw.h>
+
+
+
+/* formats that might be supported by a codec and dxv */
+/* call me crazy ... but I want to make this public ... ie in the HEADER ! */
+BMIMapping DXL_BMIMap[] =
+{
+ { DXL_MKFOURCC('Y','V','1','2'), 12, 3, DXYV12 },
+
+ { DXL_MKFOURCC('I','Y','U','V'), 12, 1, DXI420 },
+
+ { DXL_MKFOURCC('Y','U','Y','2'), 16, 1, DXYUY2 },
+
+ { DXL_MKFOURCC('Y','V','Y','U'), 16, 1, DXYVYU },
+
+ { DXL_MKFOURCC('U','Y','V','Y'), 16, 1, DXUYVY },
+
+ { 0, 24, 1, DXRGB24 },
+
+ { 0, 32, 1, DXRGB32 }
+
+};
+
+
+
+static char *MakeFourCCString(unsigned long fcc, char *buff)
+{
+ sprintf(buff,"%c%c%c%c",
+ (fcc & 0xFF) >> 0,
+ (fcc & 0xFF00) >> 8,
+ (fcc & 0xFF0000) >> 16,
+ (fcc & 0xFF000000) >> 24
+ );
+ return buff;
+}
+
+
+/* Report to a little HTML file */
+void DXL_ReportBMIMapping(char *filename)
+{
+ FILE *fp;
+ int len = sizeof(DXL_BMIMap) / sizeof(BMIMapping);
+
+ fp = fopen(filename,"w");
+
+ if (fp)
+ {
+ int t;
+ char temp[5];
+ fprintf(fp, "<table BORDER=1>");
+ fprintf(fp, "<tr><td>FOURCC</td><td>BitDepth</td><td>Planes</td><td>dxvBitDepth</td></tr>");
+
+ for(t = 0; t < len; t++)
+ {
+ fprintf(fp, "<tr><td>%s<br>%x</td> <td>%ld</td> <td>%ld</td> <td>%ld</td></tr>",
+ MakeFourCCString(DXL_BMIMap[t].biCompression,temp),
+ DXL_BMIMap[t].biCompression,
+ DXL_BMIMap[t].biBitCount,
+ DXL_BMIMap[t].biPlanes,
+ DXL_BMIMap[t].bd
+ );
+ }
+
+ fprintf(fp,"</table>");
+ fclose(fp);
+ }
+
+}
+
+
+
+
+/********* USAGE
+
+In addition to regular DXV services, this library will invoke VFW
+codecs for decompression services.
+
+Because of a bug in the frame parsing, the library is compiled to
+decompress TM2X via its codec as well. So be certain to have a
+TM2X VFW codec installed.
+
+The library has only been tested with TM2X(2.5.1.8),
+Indeo 5.2 and MPEG4.2. Other codecs may work, but only if
+they support RGB32, RGB24, and RGB16 (555 and/or 565).
+MS-CRAM and Cinepak crashed painfully in tests so far.
+
+The library assumes support for all 4 RGB truecolor modes
+mentioned above, (NOTE: TM2X doesn't support RGB24 yet!)
+
+- 5/19/99 -
+We added black-lining blitters for stretched modes. Note that
+24 bit display uses a 32bit offscreen buffer which is blitted
+down to 24bit when stretched, this makes the asm code that much
+simpler.
+
+To use:
+
+in addition to the regular DXV,
+link s_icm.lib to your application and do the following
+
+substitute:
+DXL_InitVideoEx(int lmaxScreens,int lmaxImages);
+in place of:
+DXL_InitVideo(int lmaxScreens,int lmaxImages)
+
+use:
+movie->xImage =
+DXL_AlterXImage(movie->xImage,movie->vData,
+HFB_GetStreamInfo(movie->vStream)->a.BitmapInfo.ulHandler,
+0,
+HFB_GetStreamInfo(movie->vStream)->a.BitmapInfo.usWidth,
+HFB_GetStreamInfo(movie->vStream)->a.BitmapInfo.usHeight);
+
+in place of:
+movie->xImage = DXL_CreateXImage(movie->vData);
+
+and, prior to any calls to:
+DXL_dxImageToVScreen(movie->xImage, movie->vScreen);
+
+you must call:
+DXL_SetXImageCSize(movie->xImage, movie->vLength);
+
+********/
+
+
+static dxvBitDepth bitDepths[] =
+{
+ DXRGB32,DXRGB24,DXRGB16,DXRGBNULL
+};
+
+/* define an xImage structure based on the core xImage struct */
+
+
+
+typedef struct tXImageCODEC{
+ xImageBaseStruct;
+
+ DK_BITMAPINFOHEADER bihIn;
+ unsigned long bihInFields[3];
+ DK_BITMAPINFOHEADER bihOut;
+ unsigned long bihOutFields[3];
+ HIC hic;
+ int changeVScreen;
+
+ BMIMapping* maps[20];
+ int maxMaps;
+
+} DXL_CODEC,*DXL_CODEC_HANDLE;
+
+
+
+
+
+char* DXL_DecodeICERR(int err, char *storage, int length)
+{
+ (void)length; // not used
+
+ switch (err)
+ {
+ case ICERR_UNSUPPORTED :
+ strcpy(storage,"ICERR_UNSUPPORTED");
+ break;
+
+ case ICERR_BADFORMAT :
+ strcpy(storage,"ICERR_BADFORMAT");
+ break;
+
+ case ICERR_MEMORY :
+ strcpy(storage,"ICERR_MEMORY");
+ break;
+
+ case ICERR_ERROR :
+ strcpy(storage,"ICERR_ERROR");
+ break;
+
+ default :
+ strcpy(storage,"Defaulted to ICERR_ERROR");
+ break;
+
+ }
+
+ return storage;
+}
+
+
+
+
+DK_BITMAPINFOHEADER* DXL_VSCREEN_2_BMI
+(
+ DXL_XIMAGE_HANDLE xImage,
+ DXL_VSCREEN_HANDLE vScreen,
+ DK_BITMAPINFOHEADER *bmih,
+ dxvBitDepth* bd1
+ )
+{
+ unsigned char *addr;
+ dxvBlitQuality bq;
+ dxvBitDepth bd;
+ short pitch;
+ short height;
+ int t;
+
+ DXL_CODEC_HANDLE src = (DXL_CODEC_HANDLE ) xImage;
+
+
+ duck_memcpy(bmih,&((DXL_CODEC_HANDLE ) xImage)->bihIn,sizeof(DK_BITMAPINFOHEADER));
+
+ DXL_GetVScreenAttributes(vScreen, (void **) &addr, &bq, &bd, &pitch, &height );
+
+ for(t = 0; t < src->maxMaps; t++)
+ {
+ if (src->maps[t]->bd == bd)
+ {
+ bmih->biBitCount = src->maps[t]->biBitCount;
+ bmih->biCompression = src->maps[t]->biCompression;
+ bmih->biPlanes = src->maps[t]->biPlanes;
+
+ bmih->biWidth = pitch / (bmih->biBitCount / 8);
+ bmih->biHeight = height;
+ bmih->biSizeImage = pitch * bmih->biHeight;
+
+ fprintf(stderr,"\nBMI from VScreen attributes ...\n");
+ fprintf(stderr,"\t pitch = %ld\n", pitch);
+ fprintf(stderr,"\t width = %ld\n", bmih->biWidth);
+ fprintf(stderr,"\t height = %ld\n", bmih->biHeight);
+ fprintf(stderr,"\t biCompression = %c%c%c%c\n",
+ ((char *) &bmih->biCompression)[0],
+ ((char *) &bmih->biCompression)[1],
+ ((char *) &bmih->biCompression)[2],
+ ((char *) &bmih->biCompression)[3]
+ );
+
+ fflush(stderr);
+
+ return bmih;
+ }
+ }
+
+ *bd1 = bd;
+
+ return 0;
+}
+
+
+
+
+int DXL_ReportBestBMIMatch(DXL_XIMAGE_HANDLE xImage, BMIMapping** map, int *maxMaps, int doConsoleReport)
+{
+ int t;
+ int ret;
+ char buff[5];
+ int len = sizeof(DXL_BMIMap)/sizeof(BMIMapping);
+ int matches = 0;
+ DXL_CODEC_HANDLE src = (DXL_CODEC_HANDLE ) xImage;
+ DK_BITMAPINFOHEADER temp;
+
+ (void)doConsoleReport; //unused
+
+ src->bihIn.dxFlavor = 2; /* use the extended ICM functions */
+
+ duck_memcpy(&temp,&src->bihIn,sizeof(DK_BITMAPINFOHEADER));
+
+ for(t = 0; t < len; t++) /* for each one we support with out mapping */
+ {
+
+ temp.biBitCount = DXL_BMIMap[t].biBitCount;
+ temp.biCompression = DXL_BMIMap[t].biCompression;
+ temp.biPlanes = DXL_BMIMap[t].biPlanes;
+ temp.biSizeImage = temp.biBitCount * temp.biWidth * temp.biHeight / 8;
+ ret = ICDecompressQuery(src->hic, &(src->bihIn), &temp );
+
+ if (ret == ICERR_OK)
+ {
+ fprintf(stderr,"format of %s supported, planes = %ld, rank = %ld\n",
+ MakeFourCCString(temp.biCompression, buff ), temp.biPlanes, matches + 1);
+ fflush(stderr);
+
+
+ if (matches < *maxMaps)
+ {
+ src->maps[matches] = map[matches] = &DXL_BMIMap[t];
+ matches += 1;
+ }
+ }
+ else
+ {
+ fprintf(stderr,"format of %s NOT supported, planes = %ld\n",
+ MakeFourCCString(temp.biCompression, buff ), temp.biPlanes);
+ fflush(stderr);
+
+ }
+ }
+
+
+ src->maxMaps = *maxMaps = matches;
+
+ /* This could be done somewhere else ! */
+ duck_memset(&src->bihOut,0,sizeof(DK_BITMAPINFOHEADER));
+
+ return matches;
+}
+
+
+
+static int decompress1(DXL_XIMAGE_HANDLE xImage, DXL_VSCREEN_HANDLE vScreen)
+{
+ /* Keep the warnings away ! */
+ DXL_CODEC_HANDLE src = (DXL_CODEC_HANDLE ) xImage;
+ // DWORD dwFlags = 0;
+ DWORD ret;
+ dxvBitDepth bd;
+
+
+ int changeOutput = src->changeVScreen;
+
+ if (changeOutput)
+ {
+
+ /* should be cleared first time in so width zero ! */
+ if (src->bihOut.biWidth != 0)
+ ICDecompressEnd(src->hic);
+
+
+ if ( DXL_VSCREEN_2_BMI(xImage, vScreen, (DK_BITMAPINFOHEADER *) &(src->bihOut), &bd ) == 0)
+ {
+ /* user asks for unsupported surface FOURCC */
+ fprintf(stderr, "User asks for unsupported dxvBitDepth = %ld\n", bd );
+ fflush(stderr);
+
+ return ICERR_BADFORMAT;
+ }
+
+
+ ret = ICDecompressBegin(src->hic, &src->bihIn, &src->bihOut);
+
+
+ if (ret != ICERR_OK)
+ {
+ return ret;
+ }
+
+ }
+
+
+ src->bihIn.biSizeImage = src->fSize;
+
+ ret = ICDecompress( src->hic, 0,
+ (BITMAPINFOHEADER *) &src->bihIn, src->addr,
+ (BITMAPINFOHEADER *) &src->bihOut,
+ (char *) vScreen->addr);
+
+
+ if (ret != ICERR_OK)
+ {
+ fprintf(stderr,"Oh boy decompress may have failed !\n");
+ assert(0);
+ exit(0);
+ }
+
+ src->changeVScreen = 0;
+
+ return ICERR_OK;
+}
+
+
+static int decompress2(DXL_XIMAGE_HANDLE xImage, DXL_VSCREEN_HANDLE vScreen)
+{
+ /* Keep the warnings away ! */
+ DXL_CODEC_HANDLE src = (DXL_CODEC_HANDLE ) xImage;
+ DWORD dwFlags = 0;
+ DWORD ret;
+ dxvBitDepth bd;
+
+
+ if (src->changeVScreen)
+ {
+
+ /* should be cleared first time in so width zero ! */
+ if (src->bihOut.biWidth != 0)
+ ICDecompressExEnd(src->hic);
+
+
+ if ( DXL_VSCREEN_2_BMI(xImage, vScreen, (DK_BITMAPINFOHEADER *) &(src->bihOut), &bd ) == 0)
+ {
+ /* user asks for unsupported surface FOURCC */
+ fprintf(stderr, "User asks for unsupported dxvBitDepth = %ld\n", bd );
+ fflush(stderr);
+
+ return ICERR_BADFORMAT;
+ }
+
+
+
+ ret = ICDecompressExBegin(
+ src->hic,
+ dwFlags,
+ (BITMAPINFOHEADER *) &(src->bihIn),
+ src->addr,
+ 0,
+ 0,
+ src->bihIn.biWidth,
+ src->bihIn.biHeight,
+ (BITMAPINFOHEADER *) &(src->bihOut),
+ (char *) vScreen->addr,
+ 0,
+ 0,
+ src->bihIn.biWidth,
+ src->bihIn.biHeight
+ );
+
+
+ if (ret == ICERR_UNSUPPORTED)
+ {
+ return ICERR_UNSUPPORTED;
+ }
+ if (ret != ICERR_OK)
+ {
+ char *storage = (char *) calloc(256,sizeof(char));
+ fprintf(stderr,"ICDecompressExBegin returns error code = %ld\n", ret);
+ fprintf(stderr,"Decoded as ... %s\n", DXL_DecodeICERR(ret, storage, sizeof(storage) - 1));
+ fflush(stderr);
+
+ if (storage)
+ free(storage);
+ assert(0);
+ }
+
+
+ src->changeVScreen = 0;
+ }
+
+
+ src->bihIn.biSizeImage = src->fSize;
+
+
+ ret = ICDecompressEx(
+ src->hic,
+ dwFlags,
+ (BITMAPINFOHEADER *) &src->bihIn,
+ src->addr,
+ 0,
+ 0,
+ src->bihIn.biWidth,
+ src->bihIn.biHeight,
+ (BITMAPINFOHEADER *) &src->bihOut,
+ (char *) vScreen->addr,
+ 0,
+ 0,
+ src->bihIn.biWidth,
+ src->bihIn.biHeight
+ );
+
+
+
+ if (ret != ICERR_OK)
+ {
+ fprintf(stderr,"Oh boy decompress may have failed !\n");
+ assert(0);
+ exit(0);
+ }
+
+
+ return 0;
+}
+
+
+
+static int decompress(DXL_XIMAGE_HANDLE xImage, DXL_VSCREEN_HANDLE vScreen2)
+{
+ DXL_CODEC_HANDLE xThis = (DXL_CODEC_HANDLE) xImage;
+
+ int retVal = ICERR_OK;
+
+ /* Try the version that handles wack pitch first ! */
+ if (xThis->bihIn.dxFlavor == 2)
+ {
+ retVal = decompress2(xImage, vScreen2);
+ if (retVal == ICERR_UNSUPPORTED)
+ {
+ xThis->bihIn.dxFlavor = 1;
+ }
+ }
+
+ /* if the wack pitch one failed */
+ if (xThis->bihIn.dxFlavor == 1)
+ {
+ retVal = decompress1(xImage, vScreen2);
+ }
+
+ return retVal;
+}
+
+
+
+
+
+/*
+close down a decompressor, releasing the icm decompressor,
+the xImage (decompressor), and the intermediate vScreen (surface)
+*/
+
+static int destroyCodec(DXL_XIMAGE_HANDLE xImage)
+{
+ DXL_CODEC_HANDLE xThis = (DXL_CODEC_HANDLE ) xImage;
+ if (xThis)
+ {
+
+ if (xThis->hic)
+ {
+
+ ICDecompressEnd(xThis->hic);
+ ICClose(xThis->hic);
+
+ }
+ duck_free(xThis);
+
+ }
+
+ return DXL_OK;
+}
+
+
+
+
+static char* duckToNarrow(char *s)
+{
+ char dst[256];
+
+ int t=0;
+
+ if (s)
+ {
+ do
+ {
+ dst[t] = s[2*t];
+ t = t + 1;
+ }
+ while ( *((short *) &s[t]) );
+
+ dst[t] = '\0';
+
+ strcpy(s,dst);
+
+ return s;
+ }
+ else
+ {
+ return 0;
+ }
+
+} /* end duckToNarrow */
+
+
+
+
+
+/*
+called during initialization and/or when xImage (decompressor)
+attributes change, note that nImage and src are actually
+synonymous and should be cleared out a bit (to say the least!)
+
+
+!!!!!!
+This function should be prepared to get data that is NOT of the
+type native to the decoder, It should do it's best to verify it
+as valid data and should clean up after itself and return NULL
+if it doesn't recognize the format of the data
+*/
+
+static DXL_XIMAGE_HANDLE reCreateCodec(DXL_CODEC_HANDLE src,unsigned char *data,
+ int type,enum BITDEPTH bitDepth,int w,int h)
+{
+#pragma warning(disable: 4210) // nonstandard extension used : function given file scope
+ DXL_XIMAGE_HANDLE createCodec(unsigned char *data);
+#pragma warning(default: 4210) // nonstandard extension used : function given file scope
+
+ DXL_XIMAGE_HANDLE nImage;
+ DK_BITMAPINFOHEADER *bmiHeader = (DK_BITMAPINFOHEADER *) data;
+ unsigned long fccHandler;
+
+ (void)h; // unused
+ (void)w; // unused
+ (void)bitDepth; //unused
+ (void)type; //unused
+
+ if (src != NULL) /* if an xImage/decompressor already exists, destroy it */
+ destroyCodec((DXL_XIMAGE_HANDLE ) src);
+
+ /* create a new xImage, specific to this type of decoder, (
+ see "DXL_CODEC" struct above and dxl_main.h) */
+
+ nImage = (DXL_XIMAGE_HANDLE)duck_calloc(1,sizeof(DXL_CODEC),DMEM_GENERAL);
+ src = (DXL_CODEC_HANDLE ) nImage;
+
+ if (!nImage) return NULL;
+
+ duck_memset(nImage,0,sizeof(DXL_CODEC));
+
+
+ src->changeVScreen = 1; /* True ... inform decompresss the dest has changed */
+
+
+ /* set up the "vtable" of interface calls */
+ src->create = (DXL_XIMAGE_HANDLE (*)(void *)) createCodec;
+ src->recreate = (DXL_XIMAGE_HANDLE (*)(DXL_XIMAGE_HANDLE,void *,int,int,int,int)) reCreateCodec;
+
+ nImage->destroy = destroyCodec;
+ nImage->dx = decompress;
+ nImage->blit = NULL; /* there is no interleaved blitter for codecs */
+
+ src->bdPrefs = bitDepths; /* plug in the list of prefered bit depths */
+
+ nImage->addr = data;
+ nImage->dkFlags.inUse = 1;
+
+ duck_memcpy(&src->bihIn, bmiHeader,sizeof(DK_BITMAPINFOHEADER));
+ duck_memset(&src->bihOut, 0, sizeof(DK_BITMAPINFOHEADER));
+
+ src->w = (short ) (src->bihIn.biWidth);
+ src->h = (short ) (src->bihIn.biHeight);
+
+ src->imWidth = (short) src->w;
+ src->imHeight = (short) src->h;
+
+ fccHandler = src->bihIn.fccHandler;
+
+ if (fccHandler == 0)
+ {
+ src->hic=ICLocate(ICTYPE_VIDEO, fccHandler, (BITMAPINFOHEADER *) &src->bihIn, 0, ICMODE_DECOMPRESS);
+ }
+ else
+ {
+ src->hic=ICOpen(ICTYPE_VIDEO, fccHandler, ICMODE_DECOMPRESS);
+ }
+
+
+
+
+ {
+ ICINFO i;
+
+ memset(&i,0,sizeof(ICINFO));
+ if (ICGetInfo( src->hic, &i, sizeof(ICINFO) ))
+ {
+
+ char temp[5];
+
+ unsigned long biCompression = src->bihIn.biCompression;
+
+ fccHandler = src->bihIn.fccHandler = i.fccHandler;
+
+
+ fprintf(stderr, "Short Name : %s\n"
+ "Driver : %s\n"
+ "driver version = %d %d or as hex = %x\n"
+ "Description : %s\n"
+ "Codec biCompression = %s\n"
+ "Codec fccHandler = %s\n",
+ duckToNarrow( (char *) i.szName),
+ duckToNarrow( (char *) i.szDriver),
+ ((i.dwVersion & 0x0000FFFF) >> 0 ),
+ ((i.dwVersion & 0xFFFF0000) >> 16 ),
+ i.dwVersion,
+ duckToNarrow( (char *) i.szDescription),
+ MakeFourCCString(biCompression, temp),
+ MakeFourCCString(fccHandler, temp)
+ );
+ fflush(stderr);
+
+ }
+ }
+
+
+ if (src->hic == 0)
+ {
+ destroyCodec((DXL_XIMAGE_HANDLE ) src);
+
+ fprintf(stderr, "codec for fourCC = %c%c%c%c, %x won't open\n",
+ (fccHandler & 0xFF000000) >> 24,
+ (fccHandler & 0xFF0000) >> 16,
+ (fccHandler & 0xFF00) >> 8,
+ (fccHandler & 0xFF) >> 0,
+ fccHandler
+ );
+ fflush(stderr);
+
+
+ return 0;
+ }
+
+
+ return nImage;
+}
+
+/* in this "glue" case, just calls through to the create function */
+
+#pragma warning(disable:4211) //nonstandard extension used : redefined extern to static
+static DXL_XIMAGE_HANDLE createCodec(unsigned char *bmih)
+{
+ return reCreateCodec(NULL, bmih ,0,(enum BITDEPTH ) 0,0,0);
+}
+#pragma warning(default:4211) //nonstandard extension used : redefined extern to static
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/makefile b/Src/libvpShared/corelibs/cdxv/dxv/win32/makefile
new file mode 100644
index 00000000..8547cbbc
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/makefile
@@ -0,0 +1,339 @@
+everything : all
+###### dxv standard project specs
+
+PROJECT = dxv
+# project name
+
+# standard modules for this library
+
+OBJS = \
+ $(OBJDIR)dxlcsize.$(OBJ) \
+ $(OBJDIR)dxv_init.$(OBJ) \
+ $(OBJDIR)dxv_initex.$(OBJ) \
+ $(OBJDIR)dxl_main.$(OBJ) \
+ $(OBJDIR)vscreen.$(OBJ) \
+ $(OBJDIR)ximage.$(OBJ) \
+ $(OBJDIR)ximgblit.$(OBJ) \
+ $(OBJDIR)getprefs.$(OBJ) \
+ $(OBJDIR)dxl_clip.$(OBJ) \
+ $(OBJDIR)dxl_bkgr.$(OBJ) \
+ $(OBJDIR)dxl_view.$(OBJ) \
+ $(OBJDIR)vgetview.$(OBJ) \
+ $(OBJDIR)vscerase.$(OBJ) \
+ $(OBJDIR)dxl_attr.$(OBJ) \
+ $(OBJDIR)dxl_reg.$(OBJ) \
+ $(OBJDIR)dxl_feat.$(OBJ) \
+ $(OBJDIR)genximg.$(OBJ) \
+ $(OBJDIR)svsbitdp.$(OBJ) \
+ $(OBJDIR)vscraddr.$(OBJ) \
+ $(OBJDIR)blitall.$(OBJ) \
+ $(OBJDIR)dxlvinfd.$(OBJ) \
+ $(OBJDIR)cpuid.$(OBJ) \
+ $(OBJDIR)count_tm.$(OBJ)
+
+!ifndef NOCLUT
+
+OBJS = $(OBJS) \
+ $(OBJDIR)dxl_clut.$(OBJ)
+
+!endif
+
+!ifndef NOLOGO
+
+OBJS = $(OBJS) \
+ $(OBJDIR)on2logo.$(OBJ) \
+ $(OBJDIR)tmlogo55.$(OBJ) \
+ $(OBJDIR)tmlogo56.$(OBJ) \
+ $(OBJDIR)tmlogo24.$(OBJ) \
+ $(OBJDIR)ducklogo.$(OBJ)
+
+!endif
+
+
+MAJREV = 6 # major library revision
+MINREV = 0 # minor library revision
+PVER = 1 # minor.minor library revision
+BVER = ".b1" # build version
+
+# libraries on which this should be built
+
+!if 0
+ $(LIBDIR)$(LIBPREFIX)tmrt$(LIBSUFFIX) \
+ $(LIBDIR)$(LIBPREFIX)tm1$(LIBSUFFIX) \
+ $(LIBDIR)$(LIBPREFIX)tm20$(LIBSUFFIX) \
+
+ $(LIBDIR)$(LIBPREFIX)vp3$(LIBSUFFIX) \
+ $(LIBDIR)$(LIBPREFIX)vp31d$(LIBSUFFIX)
+
+
+!endif
+
+!if 0
+USER_LIBS = \
+ $(LIBDIR)$(LIBPREFIX)torq$(LIBSUFFIX)
+!endif
+
+#
+# project sub options sent to compilers
+#
+!ifndef CENV
+CENV = -DTORQB -DTM1B -DTM2B -DTMRTB
+!endif
+
+# add sub-options to c options (allows for environment override)
+C_FLAGS = $(CENV)
+c_flags = $(C_FLAGS)
+
+# this proj library is to be built as a shared or master library
+SHAREDLIB = 0
+
+M_INCLUDES = -I$(INCDIR)/$(OS) -I$(INCDIR) -I. -I../generic $(M_INCLUDES)
+# win95 specific modules
+
+OBJS = $(OBJS) \
+ $(OBJDIR)dxv.$(OBJ) \
+ $(OBJDIR)dkprof.$(OBJ) \
+ $(OBJDIR)perf.$(OBJ) \
+ $(OBJDIR)pentium.$(OBJ) \
+ $(OBJDIR)$(PROJECT).res
+
+# modules specifically sent to linker,
+# others are provided in libraries
+
+DLLOBJS = $(OBJDIR)dxl_main.obj \
+ $(OBJDIR)dxv_mem.$(OBJ) \
+ $(OBJDIR)dxv_mems.$(OBJ) \
+ $(OBJDIR)$(PROJECT).res \
+ $(OBJDIR)$(PROJECT).$(OBJ)
+
+PROJLIBS = $(PROJLIBS)
+
+#$(OBJDIR)sc_$(PROJECT).lib
+
+THISFILE = makefile
+
+SRCDIR = .
+GENDIR = ..\generic
+OBJDIR = obj\
+LIBDIR = obj\
+INCDIR = $(CORELIBS_INC)
+
+ASMDIR = ..\X86\
+MMXDIR = ..\mmx\
+WASMDIR = ..\wX86\
+
+OBJRELDIR = $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)\Release\
+OBJDEBDIR = $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)\Debug\
+LIBRELDIR = $(ROOT)Private\CoreLibs\Lib\$(OS)\Release
+LIBDEBDIR = $(ROOT)Private\CoreLibs\Lib\$(OS)\Debug
+
+OBJ = obj
+
+
+MKDIR =
+
+# tools
+CC = cl
+ASM = tasm /ml /m8 /dWINP /zd /I$(ASMDIR) /dWIN95
+WASM = wasm /dWINP /dWIN95 /I=$(ASMDIR) /I=$(MMXDIR) -q
+RC = rc
+
+LIBNAME = $(OBJDIR)$(PROJECT).lib
+SLIBNAME = $(OBJDIR)s_$(PROJECT).lib
+EXENAME = $(OBJDIR)$(PROJECT).exe
+###DEFNAME = $(SRCDIR)$(PROJECT).DEF
+DEFNAME = $(PROJECT).DEF
+
+LIBPREFIX = \s_
+LIBSUFFIX = .lib
+
+# Include Path Variable Extensible
+include = $(SDK)/inc;$(include)
+
+
+############################################################
+# Global Flags (compiler / linker)
+C_FLAGS = $(C_FLAGS) -Gy -W3 -nologo -DWIN32 -D_MBCS -D_AFXDLL -D_WINDOWS -DHIRES\
+ -DDOS32 -c $(M_INCLUDES:/=\)
+#-WX
+
+L_FLAGS = $(L_FLAGS) /SUBSYSTEM:windows /INCREMENTAL:no /MACHINE:I386
+
+############################################################
+# Locally Overridable Flags
+
+!ifndef DLLNAME
+DLLNAME = $(OBJDIR)$(PROJECT).dll
+!endif
+
+!ifndef c_thread
+
+## SJL - NEED TO USE LIBCMT NOW
+## c_thread = -MD
+c_thread = -MT
+!endif
+
+!ifndef c_ndebugflags
+c_ndebugflags = -DNDEBUG -Ob2 -Ot -O2 -G5
+!endif
+
+!ifndef c_debugflags
+c_debugflags = -D_DEBUG -GX -Od -Z7
+!endif
+
+!ifndef l_ndebugflags
+l_ndebugflags =
+!endif
+
+!ifndef l_debugflags
+l_debugflags = -DEBUG -DEBUGTYPE:BOTH
+!endif
+
+# exe flags / objects
+!ifndef l_exe_flags
+l_exe_flags =
+!endif
+
+
+# DLL flags / objects
+!ifndef l_dll_flags
+l_dll_flags = -DLL -DEF:$(DEFNAME)
+!endif
+
+!ifndef DLLOBJS
+DLLOBJS = $(OBJS)
+!endif
+
+# targets we will build
+!ifndef targets
+targets = $(SLIBNAME)
+!ifdef SHAREDLIB
+!if $(SHAREDLIB) == 1
+targets = $(TARGETS) $(DLLNAME)
+!endif
+!endif
+!endif
+
+
+# versions we will build
+!ifndef VERSIONS
+VERSIONS = release.ver debug.ver
+!endif
+
+############################################################
+# inference rules
+# -mkdir $(OBJDIR:\= )
+# -mkdir $(MKOBJDIR:\= )
+
+# Target Directories
+#$(OBJDIR:\= ) :
+!if 0
+$(OBJDIR:\= ) :
+ mkdir $*
+ cd $*
+!endif
+
+$(LIBDIR) :
+# -mkdir $(LIBDIR)\..
+ -mkdir $(MKLIBDIR)
+
+# Object files
+
+{$(GENDIR)}.c{$(OBJDIR)}.obj:
+ $(CC) $(C_FLAGS) $< -Fo$@
+
+{$(GENDIR)}.cpp{$(OBJDIR)}.obj:
+ $(CC) $(C_FLAGS) $< -Fo$@
+
+{$(SRCDIR)}.c{$(OBJDIR)}.obj:
+ $(CC) $(C_FLAGS) $< -Fo$@
+
+{$(SRCDIR)}.cpp{$(OBJDIR)}.obj:
+ $(CC) $(C_FLAGS) $< -Fo$@
+
+{$(WASMDIR)}.asm{$(OBJDIR)}.obj:
+ $(WASM) $< -fo=$@
+ editbin /NOLOGO /RELEASE $@
+
+{$(ASMDIR)}.asm{$(OBJDIR)}.obj:
+ $(ASM) $< $@
+ editbin /NOLOGO /RELEASE $@
+
+{$(SRCDIR)}.rc{$(OBJDIR)}.res:
+ $(RC) /fo$@ /dALG="$(CENV:-D= )" /dPROJ=$(PROJECT) /dMAJ=$(MAJREV) /dMIN=$(MINREV) /dPVER=$(PVER) /dBVER=$(BVER) $<
+
+#
+# building of targets
+#
+
+###$(SLIBNAME): $(OBJDIR:\= ) $(LIBDIR) $(OBJS)
+$(SLIBNAME): $(OBJS)
+ LIB $(OBJS) /OUT:$@
+!ifdef USER_LIBS
+ for %i in ($(USER_LIBS)) do LIB $(SLIBNAME) %i /OUT:$@
+!endif
+ copy $(SLIBNAME) $(LIBDIR)
+
+##$(DLLNAME): $(SLIBNAME) $(OBJDIR:\= ) $(LIBDIR) $(DEFNAME) $(OBJDIR)$(PROJECT).res $(SLIBNAME) $(DLLOBJS)
+$(DLLNAME): $(SLIBNAME) $(LIBDIR) $(DEFNAME) $(OBJDIR)$(PROJECT).res $(SLIBNAME) $(DLLOBJS)
+ link $(L_FLAGS) $(l_dll_flags) /OUT:$(DLLNAME) $(DLLOBJS) $(SLIBNAME) /MAP:$(OBJDIR)$(PROJECT).map
+ copy $@ $(LIBDIR)
+ copy $(LIBNAME) $(LIBDIR)
+
+$(EXENAME): $(OBJDIR:\= ) $(DEFNAME) $(OBJDIR)$(PROJECT).res $(OBJS)
+ link $(L_FLAGS) $(l_exe_flags) /OUT:$(EXENAME) $(OBJS) /MAP:$(OBJDIR)$(PROJECT).map
+
+#
+# Goals
+#
+build_me: $(targets) $(PROJLIBS)
+ @echo Build Completed
+
+#
+# there should be a more elegant way to build the following directories
+#
+build_dirs:
+ @echo BUILDING DIRECTORIES
+ mkdir $(ROOT)Object
+ mkdir $(ROOT)Object\CoreLibs
+ mkdir $(ROOT)Object\CoreLibs\$(PROJECT)
+ mkdir $(ROOT)Object\CoreLibs\$(PROJECT)\Dx
+ mkdir $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)
+ mkdir $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)
+ mkdir $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)\Release
+ mkdir $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)\Debug
+
+release.ver:
+ set C_FLAGS=$(C_FLAGS) $(c_ndebugflags) $(c_thread) $(c_local)
+ set L_FLAGS=$(L_FLAGS) $(l_ndebugflags)
+ set OBJDIR=$(OBJRELDIR)
+ set LIBDIR=$(LIBRELDIR)
+ nmake /NOLOGO /C /S build_dirs
+ nmake /NOLOGO -f $(THISFILE) build_me
+
+debug.ver:
+ @echo NOW DOING DEBUG VERSION
+ set C_FLAGS=$(C_FLAGS) $(c_debugflags) $(c_thread) $(c_local)
+ set L_FLAGS=$(L_FLAGS) $(l_debugflags)
+ set OBJDIR=$(OBJDEBDIR)
+ set LIBDIR=$(LIBDEBDIR)
+ nmake /NOLOGO /C /S build_dirs
+ nmake /NOLOGO -f $(THISFILE) build_me
+
+all:: $(VERSIONS)
+
+clean:
+ deltree /Y obj
+ deltree /Y obj.*
+
+
+
+
+$(OBJDIR)sc_$(PROJECT).lib: $(LIBDIR)\sc_tm20.lib $(LIBDIR)\sc_torq.lib
+ LIB $(OBJS) /OUT:$@
+ LIB $@ $(LIBDIR)\s_tm1.lib /OUT:$@
+ LIB $@ $(LIBDIR)\sc_tm20.lib /OUT:$@
+ LIB $@ $(LIBDIR)\s_tmrt.lib /OUT:$@
+ LIB $@ $(LIBDIR)\sc_torq.lib /OUT:$@
+ copy $@ $(LIBDIR)
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/resource.h b/Src/libvpShared/corelibs/cdxv/dxv/win32/resource.h
new file mode 100644
index 00000000..6ed72fa3
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/resource.h
@@ -0,0 +1,15 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Developer Studio generated include file.
+// Used by dxv.rc
+//
+
+// Next default values for new objects
+//
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE 101
+#define _APS_NEXT_COMMAND_VALUE 40001
+#define _APS_NEXT_CONTROL_VALUE 1000
+#define _APS_NEXT_SYMED_VALUE 101
+#endif
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/template.mk b/Src/libvpShared/corelibs/cdxv/dxv/win32/template.mk
new file mode 100644
index 00000000..6fc2687a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/template.mk
@@ -0,0 +1,22 @@
+# win95 specific modules
+
+OBJS = $(OBJS) \
+ $(OBJDIR)dxv.$(OBJ) \
+ $(OBJDIR)dkprof.$(OBJ) \
+ $(OBJDIR)perf.$(OBJ) \
+ $(OBJDIR)pentium.$(OBJ) \
+ $(OBJDIR)$(PROJECT).res
+
+# modules specifically sent to linker,
+# others are provided in libraries
+
+DLLOBJS = $(OBJDIR)dxl_main.obj \
+ $(OBJDIR)dxv_mem.$(OBJ) \
+ $(OBJDIR)dxv_mems.$(OBJ) \
+ $(OBJDIR)$(PROJECT).res \
+ $(OBJDIR)$(PROJECT).$(OBJ)
+
+PROJLIBS = $(PROJLIBS)
+
+#$(OBJDIR)sc_$(PROJECT).lib
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/x86/cpuid.asm b/Src/libvpShared/corelibs/cdxv/dxv/x86/cpuid.asm
new file mode 100644
index 00000000..5f3b61ca
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/x86/cpuid.asm
@@ -0,0 +1,229 @@
+;/***********************************************\
+;??? cpuid.asm
+; checks for cpuid
+; if an id is not found, the program assumes a x86
+;\***********************************************/
+
+; parts taken from intel's AP-485
+
+
+
+;put checks for cmov and mmx support ????
+
+
+
+ .486
+ .MODEL flat, SYSCALL, os_dos
+ .CODE
+
+IDEAL
+NAME x86cpuid
+MASM
+
+PUBLIC getCPUID_
+PUBLIC _getCPUID
+
+INCLUDE proc.ash
+
+EXTRN c cpuFeatures:DWORD
+
+
+_486 EQU 4h
+PENT EQU 50h
+PENTMMX EQU 54h
+PENTPRO EQU 61h
+PENTII EQU 63h
+
+AMD_K63D EQU 58h
+AMD_K6 EQU 56h
+AMD_K5 EQU 50h ; K5 has models 0 - 6
+
+_6X86 EQU 52h
+_6X86MX EQU 60h
+
+.DATA
+
+_vendor_id db "------------"
+intel_id db "GenuineIntel"
+amd_id db "AuthenticAMD"
+cyrix_id db "CyrixInstead"
+
+getCPUID_:
+_getCPUID:
+ push esi ;safety sh*&
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+;------------------------------------------------
+; Intel486 processor check
+; Checking for ability to set/clear ID flag (Bit 21) in EFLAGS
+; which indicates the presence of a processor with the CPUID
+; instruction.
+;------------------------------------------------
+.486
+check_80486:
+ pushfd ; push original EFLAGS
+ pop eax ; get original EFLAGS
+ mov ebp,X86 ; rv
+ mov ecx, eax ; save original EFLAGS
+ xor eax, 200000h ; flip ID bit in EFLAGS
+ push eax ; save new EFLAGS value on stack
+ popfd ; replace current EFLAGS value
+ pushfd ; get new EFLAGS
+ pop eax ; store new EFLAGS in EAX
+ xor eax, ecx ; can not toggle ID bit,
+ je end_cpu_type486 ; processor=80486
+
+;------------------------------------------------
+; Execute CPUID instruction to not determine vendor, family,
+; model, stepping and features. For the purpose of this
+; code, only the initial set of CPUID information is saved.
+;------------------------------------------------
+; push ebx ; save registers
+; push esi
+; push edi
+; push edx
+; push ecx
+
+; mov ebp,X86 ; rv
+
+ mov eax, 0 ; set up for CPUID instruction
+ CPU_ID ; get and save vendor ID
+
+ mov DWORD PTR _vendor_id, ebx
+ mov DWORD PTR _vendor_id[+4], edx
+ mov DWORD PTR _vendor_id[+8], ecx
+
+ cmp DWORD PTR intel_id, ebx
+ jne IsProc_AMD
+ cmp DWORD PTR intel_id[+4], edx
+ jne end_cpuid_type
+ cmp DWORD PTR intel_id[+8], ecx
+ jne end_cpuid_type ; if not equal, not an Intel processor
+
+ cmp eax, 1 ; make sure 1 is valid input for CPUID
+ jl end_cpuid_type ; if not, jump to end
+
+ mov eax, 1
+ CPU_ID ; get family/model/stepping/features
+
+ shr eax, 4 ; isolate family and model
+ mov ebp,PII ; assume PII
+
+ and eax,0ffh ;mask out type and reserved
+nop
+
+ cmp eax,PENTII
+ jge end_cpuid_type
+
+ mov ebp,PPRO
+
+ cmp eax,PENTPRO
+ je end_cpuid_type
+
+ mov ebp,PMMX
+
+ cmp eax,PENTMMX
+ je end_cpuid_type
+
+ mov ebp,X86
+
+; cmp eax,PENT
+; jge end_cpuid_type
+
+end_cpuid_type:
+ mov eax,ebp
+ mov [cpuFeatures],edx
+
+ pop edx ;safety sh*&
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+
+end_cpu_type486:
+ mov eax,ebp
+ pop edx ;safety sh*&
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+
+;------------------------------------------------
+IsProc_AMD:
+ cmp DWORD PTR amd_id, ebx
+ jne IsProc_CYRIX
+
+ cmp DWORD PTR amd_id[+4], edx
+ jne end_cpuid_type
+
+ cmp DWORD PTR amd_id[+8], ecx
+ jne end_cpuid_type ; if not equal, not an AMD processor
+
+ cmp eax, 1 ; make sure 1 is valid input for CPUID
+ jl end_cpuid_type ; if not, jump to end
+
+ mov eax, 1
+ CPU_ID ; get family/model/stepping/features
+
+ shr eax, 4 ; isolate family and model
+ mov ebp,AMDK63D
+
+ and eax,0ffh ;mask out type and reserved
+ nop
+
+ cmp eax,AMD_K63D
+ jge end_cpuid_type
+
+ mov ebp,AMDK6
+ nop
+
+ cmp eax,AMD_K6
+ jge end_cpuid_type
+
+ mov ebp,X86
+ nop
+
+ cmp eax,AMD_K5
+ jge end_cpuid_type
+
+ mov ebp,X86
+ jmp end_cpuid_type
+
+;------------------------------------------------
+IsProc_CYRIX:
+ cmp DWORD PTR cyrix_id, ebx
+ jne end_cpuid_type
+
+ cmp DWORD PTR cyrix_id[+4], edx
+ jne end_cpuid_type
+
+ cmp DWORD PTR cyrix_id[+8], ecx
+ jne end_cpuid_type ; if not equal, not an CYRIX processor
+
+ cmp eax, 1 ; make sure 1 is valid input for CPUID
+ jl end_cpuid_type ; if not, jump to end
+
+ mov eax, 1
+ CPU_ID ; get family/model/stepping/features
+
+ shr eax, 4 ; isolate family and model
+ mov ebp,C6X86MX
+
+ and eax,0ffh ;mask out type and reserved
+ nop
+
+ cmp eax,_6X86MX
+ je end_cpuid_type
+
+ mov ebp,X86
+ jmp end_cpuid_type
+;************************************************
+ END
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/x86/pentium.asm b/Src/libvpShared/corelibs/cdxv/dxv/x86/pentium.asm
new file mode 100644
index 00000000..31dfe1f4
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/x86/pentium.asm
@@ -0,0 +1,77 @@
+
+; Pentium utilities. Timothy S. Murphy 1/11/97.
+; This is a Borland i586 TASM source file.
+; Works (at least) with Watcom C++ and Visual C++ using "cdecl" linkage.
+
+ .586
+ .MODEL flat, c, os_dos
+ .CODE
+
+;------------------------------------------------
+PUBLIC c pentiumKiloCycles, pentiumTime
+
+pentiumKiloCycles:
+ push edx
+ ; rdtsc ; get 64-bit cycle count in edx:eax
+ db 0Fh, 31h ; (tasm 4.0 doesn't have rdtsc opcode)
+ shrd eax, edx, 10 ; divide by 1024
+ pop edx
+ ret ; value in eax
+
+pentiumTime:
+ push ebx
+ push edx
+
+ ; rdtsc ; get 64-bit cycle count in edx:eax
+ db 0Fh, 31h ; (tasm 4.0 doesn't have rdtsc opcode)
+ shrd eax, edx, 10 ; divide by 1024
+ mov ebx, eax
+
+ mov eax, 12[esp]
+ shr eax, 1
+
+lup: shr edx, 16
+ dec eax
+ nop
+ jns lup
+
+ ; rdtsc ; get 64-bit cycle count in edx:eax
+ db 0Fh, 31h ; (tasm 4.0 doesn't have rdtsc opcode)
+ shrd eax, edx, 10 ; divide by 1024
+
+ sub eax, ebx
+
+ pop edx
+ pop ebx
+ ret ; value in eax
+;------------------------------------------------
+; void Get_scc(&preciseU32,&lessPreciseU32);
+x86_Get_sccParams STRUC
+ dd 3 dup (?) ;3 pushed regs
+ dd ? ;return address
+ preciseU32 dd ?
+ lessPreciseU32 dd ?
+x86_Get_sccParams ENDS
+
+PUBLIC c Get_scc
+
+Get_scc:
+ push edx
+ push esi
+ push edi
+
+ mov esi,[esp].preciseU32
+ mov edi,[esp].lessPreciseU32
+
+ ; rdtsc ; get 64-bit cycle count in edx:eax
+ db 0Fh, 31h ; (tasm 4.0 doesn't have rdtsc opcode)
+
+ mov [edi],edx
+ mov [esi],eax
+
+ pop edi
+ pop esi
+ pop edx
+ ret ; value in eax
+
+END \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/x86/perf.asm b/Src/libvpShared/corelibs/cdxv/dxv/x86/perf.asm
new file mode 100644
index 00000000..478aa101
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/x86/perf.asm
@@ -0,0 +1,183 @@
+;/***********************************************\
+;??? perf.asm
+;\***********************************************/
+ .586
+ .MODEL flat, SYSCALL, os_dos
+ .CODE
+
+IDEAL
+NAME tsc
+MASM
+
+PUBLIC DUCK_sti_
+PUBLIC _DUCK_sti
+
+PUBLIC DUCK_cli_
+PUBLIC _DUCK_cli
+
+PUBLIC rdtsc_Start_
+PUBLIC _rdtsc_Start
+
+PUBLIC rdtsc_End_
+PUBLIC _rdtsc_End
+
+PUBLIC addTSC_
+PUBLIC _addTSC
+
+; typedef struct tsc_cnt {
+; unsigned long low;
+; unsigned long high;
+; } *TSC_HANDLE, TSC;
+
+DUCK_sti_:
+_DUCK_sti:
+ sti
+ ret
+
+DUCK_cli_:
+_DUCK_cli:
+ cli
+ ret
+
+;------------------------------------------------
+; void rdtsc_Start(low, high)
+;
+rdtsc_StartParams STRUC
+ dd 3 dup (?) ;3 pushed regs
+ dd ? ;return address
+ low dd ?
+ high dd ?
+rdtsc_StartParams ENDS
+;------------------------------------------------
+rdtsc_Start_:
+_rdtsc_Start:
+ push ebx
+ push ecx
+ push edx
+nop
+
+ mov ebx,[esp].low ;pointer to low
+ mov ecx,[esp].high ;pointer to high
+
+; RDTSC
+ db 0fh, 31h
+
+ mov [ebx],eax ;return values
+ mov [ecx],edx
+
+nop
+ pop edx
+ pop ecx
+ pop ebx
+ ret
+
+;------------------------------------------------
+; void rdtsc_End(unsigned long *)
+;
+rdtsc_EndParams STRUC
+ dd 6 dup (?) ;6 pushed regs
+ dd ? ;return address
+ elow dd ?
+ ehigh dd ?
+rdtsc_EndParams ENDS
+;------------------------------------------------
+rdtsc_End_:
+_rdtsc_End:
+ push esi
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+ mov edi,[esp].elow ;pointer to low var
+ mov esi,[esp].ehigh ;pointer to high var
+
+; RDTSC
+ db 0fh, 31h
+
+ mov ebx,[edi] ;get start values
+ mov ecx,[esi]
+ sub eax,ebx
+ sbb edx,ecx
+
+ mov [edi],eax ;return values
+ mov [esi],edx
+
+ pop edx
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+
+;------------------------------------------------
+; adds time stamped counts and passes back average
+;------------------------------------------------
+; void addTSC(unsigned long *, unsigned long, unsigned long *);
+;
+addTSCParams STRUC
+ dd 6 dup (?) ;6 pushed regs
+ dd ? ;return address
+ dkTimes dd ?
+ dkCount dd ?
+ rv dd ?
+addTSCParams ENDS
+
+addTSC_:
+_addTSC:
+ push esi
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+ xor ebp,ebp ;used for adc
+ mov eax,[esp].dkTimes ;pointer to array of TSC's
+
+ mov edi,[esp].dkCount ;array count
+ mov esi,[esp].rv ;pointer to result
+
+ xor edx,edx
+ mov ebx,[eax] ;get first TSC
+
+ mov ecx,[eax+4] ;get next TSC
+ add eax,8
+
+ adc edx,ebp
+ add ebx,ecx
+
+add_loop:
+ dec edi
+ jz averageVal
+
+ mov ecx,[eax]
+ add eax,4
+
+ adc edx,ebp
+ add ebx,ecx
+
+ jmp add_loop
+
+averageVal:
+ mov eax,ebx
+ mov ebx,[esp].dkCount ;array count
+
+ div ebx ;div edx:eax by ebx (eax=quo, edx=rem)
+
+ mov [esi],eax ;get average of counts
+
+the_exit:
+ pop edx
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+
+;************************************************
+ END
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/x86/proc.ash b/Src/libvpShared/corelibs/cdxv/dxv/x86/proc.ash
new file mode 100644
index 00000000..997d22a4
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/x86/proc.ash
@@ -0,0 +1,22 @@
+;/***********************************************\
+;??? proc.ash
+;\***********************************************/
+
+
+CPU_ID MACRO
+ db 0fh ; Hardcoded CPUID instruction
+ db 0a2h
+ENDM
+
+;see proc.h
+X86 EQU 0 ; /* 486, Pentium plain, or any other x86 compatible */
+PMMX EQU 1 ; /* Pentium with MMX */
+PPRO EQU 2 ; /* Pentium Pro */
+PII EQU 3 ; /* Pentium II */
+C6X86 EQU 4
+C6X86MX EQU 5
+AMDK63D EQU 6
+AMDK6 EQU 7
+AMDK5 EQU 8
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/dxv.vcproj b/Src/libvpShared/corelibs/cdxv/dxv2/dxv.vcproj
new file mode 100644
index 00000000..46938184
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/dxv.vcproj
@@ -0,0 +1,327 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="9.00"
+ Name="dxv"
+ ProjectGUID="{ADAC45FD-B93F-40A3-85B2-DBECA1283614}"
+ TargetFrameworkVersion="131072"
+ >
+ <Platforms>
+ <Platform
+ Name="Win32"
+ />
+ </Platforms>
+ <ToolFiles>
+ </ToolFiles>
+ <Configurations>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ CharacterSet="2"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ InlineFunctionExpansion="1"
+ EnableIntrinsicFunctions="true"
+ FavorSizeOrSpeed="1"
+ OmitFramePointers="true"
+ AdditionalIncludeDirectories="..\include,..\..\include,..\..\..\include"
+ PreprocessorDefinitions="WIN32;NDEBUG;_LIB"
+ StringPooling="true"
+ RuntimeLibrary="2"
+ BufferSecurityCheck="false"
+ EnableFunctionLevelLinking="true"
+ UsePrecompiledHeader="0"
+ AssemblerListingLocation=""
+ ObjectFile="$(IntDir)/"
+ ProgramDataBaseFileName="$(IntDir)/vc70.pdb"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ CompileAs="0"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="$(SolutionDir)lib\win32\release\s_dxv.lib"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ CharacterSet="2"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="..\..\include,..\..\..\include"
+ PreprocessorDefinitions="WIN32;_DEBUG;_LIB"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="1"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ DebugInformationFormat="4"
+ CompileAs="0"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="_DEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="..\..\..\Lib\Win32\Debug\s_dxv.lib"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release 64|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ CharacterSet="2"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalOptions="/GS-"
+ Optimization="2"
+ InlineFunctionExpansion="1"
+ EnableIntrinsicFunctions="true"
+ FavorSizeOrSpeed="1"
+ OmitFramePointers="true"
+ AdditionalIncludeDirectories="..\include,..\..\include,..\..\..\include"
+ PreprocessorDefinitions="WIN32;NDEBUG;_LIB"
+ StringPooling="true"
+ RuntimeLibrary="2"
+ EnableFunctionLevelLinking="true"
+ EnableEnhancedInstructionSet="0"
+ UsePrecompiledHeader="0"
+ AssemblerListingLocation=""
+ ObjectFile="$(IntDir)/"
+ ProgramDataBaseFileName="$(IntDir)/vc70.pdb"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ Detect64BitPortabilityProblems="true"
+ CompileAs="0"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ AdditionalOptions="/machine:AMD64"
+ OutputFile="..\..\..\Lib\Win64\Release\s_dxv.lib"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="generic"
+ >
+ <File
+ RelativePath="generic\vscreen.c"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release 64|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="generic\ximage.c"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release 64|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/dxv.xcodeproj/project.pbxproj b/Src/libvpShared/corelibs/cdxv/dxv2/dxv.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..02310491
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/dxv.xcodeproj/project.pbxproj
@@ -0,0 +1,205 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 42;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 0C442A480BB78AD600B3EE20 /* ximage.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C442A470BB78AD600B3EE20 /* ximage.c */; };
+ 0C442A760BB78BC700B3EE20 /* vscreen.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C442A750BB78BC700B3EE20 /* vscreen.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+ 0C442A470BB78AD600B3EE20 /* ximage.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = ximage.c; path = generic/ximage.c; sourceTree = "<group>"; };
+ 0C442A750BB78BC700B3EE20 /* vscreen.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = vscreen.c; path = generic/vscreen.c; sourceTree = "<group>"; };
+ D2AAC046055464E500DB518D /* libdxv.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdxv.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ D289987405E68DCB004EDB86 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 08FB7794FE84155DC02AAC07 /* dxv */ = {
+ isa = PBXGroup;
+ children = (
+ 08FB7795FE84155DC02AAC07 /* Source */,
+ C6A0FF2B0290797F04C91782 /* Documentation */,
+ 1AB674ADFE9D54B511CA2CBB /* Products */,
+ );
+ name = dxv;
+ sourceTree = "<group>";
+ };
+ 08FB7795FE84155DC02AAC07 /* Source */ = {
+ isa = PBXGroup;
+ children = (
+ 0C442A470BB78AD600B3EE20 /* ximage.c */,
+ 0C442A750BB78BC700B3EE20 /* vscreen.c */,
+ );
+ name = Source;
+ sourceTree = "<group>";
+ };
+ 1AB674ADFE9D54B511CA2CBB /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ D2AAC046055464E500DB518D /* libdxv.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ C6A0FF2B0290797F04C91782 /* Documentation */ = {
+ isa = PBXGroup;
+ children = (
+ );
+ name = Documentation;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+ D2AAC043055464E500DB518D /* Headers */ = {
+ isa = PBXHeadersBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+ D2AAC045055464E500DB518D /* dxv */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "dxv" */;
+ buildPhases = (
+ D2AAC043055464E500DB518D /* Headers */,
+ D2AAC044055464E500DB518D /* Sources */,
+ D289987405E68DCB004EDB86 /* Frameworks */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = dxv;
+ productName = dxv;
+ productReference = D2AAC046055464E500DB518D /* libdxv.a */;
+ productType = "com.apple.product-type.library.static";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 08FB7793FE84155DC02AAC07 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "dxv" */;
+ hasScannedForEncodings = 1;
+ mainGroup = 08FB7794FE84155DC02AAC07 /* dxv */;
+ projectDirPath = "";
+ targets = (
+ D2AAC045055464E500DB518D /* dxv */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+ D2AAC044055464E500DB518D /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 0C442A480BB78AD600B3EE20 /* ximage.c in Sources */,
+ 0C442A760BB78BC700B3EE20 /* vscreen.c in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 1DEB91EC08733DB70010E9CD /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_MODEL_TUNING = G5;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ INSTALL_PATH = /usr/local/lib;
+ PRODUCT_NAME = dxv;
+ USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../.. ../../../include";
+ ZERO_LINK = YES;
+ };
+ name = Debug;
+ };
+ 1DEB91ED08733DB70010E9CD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ARCHS = (
+ ppc,
+ i386,
+ );
+ GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+ GCC_MODEL_TUNING = G5;
+ INSTALL_PATH = /usr/local/lib;
+ PRODUCT_NAME = dxv;
+ USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../.. ../../../include";
+ };
+ name = Release;
+ };
+ 1DEB91F008733DB70010E9CD /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ HEADER_SEARCH_PATHS = "";
+ OBJROOT = build;
+ PREBINDING = NO;
+ SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+ SYMROOT = ../../../lib/osx;
+ USER_HEADER_SEARCH_PATHS = "../include ../../include ../../.. ../../../include ../../../../include";
+ };
+ name = Debug;
+ };
+ 1DEB91F108733DB70010E9CD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ HEADER_SEARCH_PATHS = "";
+ OBJROOT = build;
+ PREBINDING = NO;
+ SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+ SYMROOT = ../../../lib/osx;
+ USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../.. ../../../include";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "dxv" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1DEB91EC08733DB70010E9CD /* Debug */,
+ 1DEB91ED08733DB70010E9CD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "dxv" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1DEB91F008733DB70010E9CD /* Debug */,
+ 1DEB91F108733DB70010E9CD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj b/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj
new file mode 100644
index 00000000..f852c980
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <VCProjectVersion>17.0</VCProjectVersion>
+ <ProjectGuid>{ADAC45FD-B93F-40A3-85B2-DBECA1283614}</ProjectGuid>
+ <RootNamespace>dxv</RootNamespace>
+ <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\dxv2\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\dxv2\$(PlatformShortName)_$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\dxv2\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\dxv2\$(PlatformShortName)_$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg">
+ <VcpkgEnableManifest>false</VcpkgEnableManifest>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>..\include;..\..\..\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>..\include;..\..\..\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>..\include;..\..\..\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>..\include;..\..\..\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="generic\vscreen.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\ximage.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj.filters
new file mode 100644
index 00000000..99856f05
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj.filters
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="generic">
+ <UniqueIdentifier>{af33cf3f-226e-4ed6-9952-d31a95439e2b}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="generic\vscreen.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\ximage.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/generic/vscreen.c b/Src/libvpShared/corelibs/cdxv/dxv2/generic/vscreen.c
new file mode 100644
index 00000000..5cc1cafb
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/generic/vscreen.c
@@ -0,0 +1,179 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "../include/duck_dxl.h"
+#include "duck_mem.h"
+#include "../include/dxl_plugin.h"
+typedef struct tagflgs
+{
+ unsigned inUse : 1;
+ unsigned DXed : 1;
+ unsigned clutOwner: 1;
+ unsigned doCompleteBlit : 1;
+ unsigned keyFrame : 1;
+ unsigned nullFrame : 1;
+ unsigned interframe : 1;
+ unsigned logo : 1;
+ unsigned allocated : 1;
+} dkInfoFlags;
+
+typedef struct vScreen
+{
+ DXL_OBJECT_VERSION version;
+
+ unsigned char *_addr;
+ unsigned char *laddr; /* address of destination and what it was the last time */
+
+ enum BITDEPTH bd; /* format of destination */
+ enum BLITQUALITY bq; /* blit translation mode */
+
+ short pitch, height; /* pitch and height of dest */
+
+ short viewX,viewY; /* offset/clipping viewport within destination */
+ short viewW,viewH;
+
+ dkInfoFlags dkFlags;
+
+} DXL_VSCREEN;
+
+#define validate(x) \
+{ \
+ if (!x) \
+ return (int) DXL_NULLSOURCE; \
+ if (!x->dkFlags.inUse) \
+ return (int) DXL_NOTINUSE; \
+}
+
+/***********************************************/
+
+DXL_VSCREEN_HANDLE
+vScreenCreate(void)
+{
+ DXL_VSCREEN_HANDLE nScreen;
+
+ nScreen = (DXL_VSCREEN_HANDLE)duck_calloc(1,sizeof(DXL_VSCREEN),DMEM_GENERAL);
+ if (nScreen)
+ nScreen->dkFlags.allocated = 1;
+
+ return nScreen;
+}
+
+int
+DXL_SetVScreenBlitQuality(DXL_VSCREEN_HANDLE dst, enum BLITQUALITY blitquality)
+{
+ int oldBQ;
+
+ validate(dst);
+
+ oldBQ = dst->bq;
+ dst->bq = blitquality;
+
+ return oldBQ;
+}
+
+void
+DXL_DestroyVScreen(DXL_VSCREEN_HANDLE dst)
+{
+ if (dst != NULL){
+ dst->dkFlags.inUse = 0;
+ dst->_addr = NULL;
+ if (dst->dkFlags.allocated)
+ duck_free(dst);
+ }
+}
+
+int
+DXL_AlterVScreen(DXL_VSCREEN_HANDLE dst, unsigned char *_addr, enum BITDEPTH bd, int p, int h)
+{
+ validate(dst);
+
+ if (_addr != NULL) dst->_addr = _addr;
+
+ if (bd != DXRGBNULL) dst->bd = bd;
+
+ if (p != -1) dst->pitch = (short) p;
+
+ if (h != -1) dst->height = (short) h;
+
+ return DXL_OK;
+}
+
+int
+DXL_AlterVScreenView(DXL_VSCREEN_HANDLE dst,int x,int y,int w,int h)
+{
+ validate(dst);
+
+ if (x > -1) dst->viewX = (short)x;
+ if (y > -1) dst->viewY = (short)y;
+ if (w > -1) dst->viewW = (short)w;
+ if (h > -1) dst->viewH = (short)h;
+
+ return DXL_OK;
+}
+
+DXL_VSCREEN_HANDLE
+DXL_CreateVScreen(unsigned char *_addr, enum BITDEPTH bd, short p,short h)
+{
+ DXL_VSCREEN_HANDLE vScreenCreate(void);
+ DXL_VSCREEN_HANDLE nScreen = vScreenCreate();
+
+ if (!nScreen)
+ return NULL;
+
+ nScreen->dkFlags.inUse = 1;
+
+ DXL_AlterVScreen(nScreen, _addr, bd, p, h);
+
+ return nScreen;
+}
+
+int DXL_GetVScreenView(DXL_VSCREEN_HANDLE dst,int *x,int *y,int *w,int *h)
+{
+ validate(dst);
+
+ if(x)
+ *x = dst->viewX;
+ if(y)
+ *y = dst->viewY;
+ if(w)
+ *w = dst->viewW;
+ if(h)
+ *h = dst->viewH;
+
+ return DXL_OK;
+}
+
+
+
+int DXL_GetVScreenAttributes(DXL_VSCREEN_HANDLE dst, void **_addr, dxvBlitQuality *bq, dxvBitDepth *bd, short *pitch, short *height)
+{
+ validate(dst);
+
+ if(_addr)
+ *_addr = dst->_addr;
+
+ if(bq)
+ *bq = dst->bq;
+
+ if(bd)
+ *bd = dst->bd;
+
+ if(pitch)
+ *pitch = dst->pitch;
+
+ if(height)
+ *height = dst->height;
+
+ return DXL_OK;
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/generic/ximage.c b/Src/libvpShared/corelibs/cdxv/dxv2/generic/ximage.c
new file mode 100644
index 00000000..e416ce93
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/generic/ximage.c
@@ -0,0 +1,490 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+#include "duck_mem.h"
+#include "../include/dxl_plugin.h"
+#include <ctype.h> /* toupper */
+
+static CREATE_FUNC creator[NUM_ALG];
+static unsigned int fourCC[NUM_ALG];
+
+static DXL_OBJECT_VERSION thisVersion = 0x01000001;
+
+typedef struct tagflgs
+{
+ unsigned inUse : 1;
+ unsigned DXed : 1;
+ unsigned clutOwner: 1;
+ unsigned doCompleteBlit : 1;
+ unsigned keyFrame : 1;
+ unsigned nullFrame : 1;
+ unsigned interframe : 1;
+ unsigned logo : 1;
+ unsigned allocated : 1;
+} dkInfoFlags;
+
+
+typedef struct tXImageBase
+{
+ dkInfoFlags dkFlags;
+ //short x,y,w,h;
+ unsigned char *addr;
+ enum BITDEPTH *bdPrefs;
+ CREATE_FUNC create;
+ RECREATE_FUNC recreate;
+ DESTROY_FUNC destroy;
+ SEND_VMSG_FUNC sendVideoMessage;
+ DX_FUNC dx;
+ int fSize;
+ SET_PARAMETER_FUNC setParameter;
+ GET_PARAMETER_FUNC getParameter;
+} DXL_XIMAGE_BASE;
+
+typedef struct tXImage
+{
+ DXL_OBJECT_VERSION version;
+
+ DXL_XIMAGE_BASE * xImageBasePtr;
+
+ DXL_HANDLE algorithmBasePtr;
+} DXL_XIMAGE;
+
+#define validateXImage(x) \
+{ \
+ if (!x) \
+ return (int) DXL_NULLSOURCE; \
+ if (!x->xImageBasePtr->dkFlags.inUse) \
+ return (int) DXL_NOTINUSE; \
+}
+
+// if (!x->version != thisVersion)
+// return (int) DXL_INVALID_DATA;
+
+static //inline
+unsigned int toUpperFOURCC(unsigned int type)
+{
+
+ return (
+ (toupper((char)((type >> 24) & 0xff)) << 24) |
+ (toupper((char)((type >> 16) & 0xff)) << 16) |
+ (toupper((char)((type >> 8) & 0xff)) << 8) |
+ toupper((char)((type >> 0) & 0xff))
+ );
+}
+
+int
+DXL_SetXImageCSize(DXL_XIMAGE_HANDLE src, int temp)
+{
+ if(src == NULL)
+ return DXL_NOTINUSE;
+
+ src->xImageBasePtr->fSize = temp;
+
+ return DXL_OK;
+}
+
+int
+DXL_GetXImageCSize(DXL_XIMAGE_HANDLE src)
+{
+ if(src == NULL)
+ return 0;
+
+ return src->xImageBasePtr->fSize;
+}
+
+unsigned char *
+DXL_GetXImageCDataAddr(DXL_XIMAGE_HANDLE src)
+{
+ if(src == NULL)
+ return NULL;
+
+ return src->xImageBasePtr->addr;
+}
+/*
+int
+DXL_MoveXImage(DXL_XIMAGE_HANDLE src, enum OFFSETXY mode, int x, int y)
+{
+ validateXImage(src);
+
+ if (mode != DXL_RELATIVE)
+ {
+ src->xImageBasePtr->x = 0;
+ src->xImageBasePtr->y = 0;
+ }
+ src->xImageBasePtr->x += (short) x;
+ src->xImageBasePtr->y += (short) y;
+
+ return DXL_OK;
+}
+*/
+int
+DXL_AlterXImageData(DXL_XIMAGE_HANDLE src, unsigned char *data)
+{
+ validateXImage(src);
+
+ src->xImageBasePtr->addr = data;
+ src->xImageBasePtr->dkFlags.DXed = 0;
+
+ if (data == NULL)
+ return DXL_OK;
+
+ return DXL_OK;
+}
+
+int
+DXL_IsXImageKeyFrame(DXL_XIMAGE_HANDLE src)
+{
+ validateXImage(src);
+
+ return src->xImageBasePtr->dkFlags.keyFrame;
+}
+
+
+void
+DXL_DestroyXImage(DXL_XIMAGE_HANDLE src)
+{
+ if (src != NULL)
+ {
+ if (src->xImageBasePtr->dkFlags.inUse)
+ {
+ src->xImageBasePtr->destroy(src);
+ }
+
+ duck_free(src->xImageBasePtr);
+ duck_free(src);
+ }
+}
+
+DXL_XIMAGE_HANDLE DXL_CreateXImageOfType(unsigned char *data, unsigned int type)
+{
+ int i;
+ DXL_XIMAGE_HANDLE nImage = NULL;
+
+ /* alloc our ximage */
+ nImage = (DXL_XIMAGE_HANDLE) duck_calloc(1, sizeof(DXL_XIMAGE), DMEM_GENERAL);
+ if(nImage == NULL)
+ {
+ return NULL;
+ }
+
+ nImage->version = thisVersion;
+
+ /* alloc our generic ximage base */
+ nImage->xImageBasePtr = (DXL_XIMAGE_BASE *) duck_calloc(1, sizeof(DXL_XIMAGE_BASE), DMEM_GENERAL);
+ if(nImage->xImageBasePtr == NULL)
+ {
+ duck_free(nImage);
+ return NULL;
+ }
+
+ /* clear out just in case calloc does not really work */
+ nImage->algorithmBasePtr = NULL;
+
+
+ /*
+ //convert fourCC to uppercase, fixes problem with calls to DXV with
+ //lowercase fourCC's
+ */
+ type = toUpperFOURCC(type);
+
+
+ /* try to match the fourcc to a registered algorithm */
+ for(i = 0; i < NUM_ALG; i++)
+ {
+ if(fourCC[i] == type)
+ {
+ if(nImage->algorithmBasePtr = creator[i](nImage, data))
+ {
+ nImage->xImageBasePtr->create = creator[i];
+ break;
+ }
+ }
+ }
+
+ /* was a valid registered alogrith found ? */
+ if(nImage->algorithmBasePtr == NULL)
+ {
+ /* nope, so we are going to bail */
+ duck_free(nImage->xImageBasePtr);
+ duck_free(nImage);
+
+ return NULL;
+ }
+
+ nImage->xImageBasePtr->dkFlags.inUse = 1;
+ nImage->xImageBasePtr->addr = data;
+
+ return nImage;
+}
+
+
+unsigned int *
+DXL_GetFourCCList(void)
+{
+ /*********
+ return a list of all supported fourccs
+ *********/
+ return fourCC;
+}
+
+
+int
+DXL_GetAlgHandle(unsigned int fourcc)
+{
+ /*********
+ search through the fourcc table to find a dx'er's index
+ *********/
+ int i;
+
+ for (i = 0; i < NUM_ALG; i++)
+ if (fourCC[i] == fourcc)
+ return i;
+
+ return DXL_NOTINUSE;
+}
+
+
+unsigned int
+DXL_GetXImageFOURCC(DXL_XIMAGE_HANDLE src)
+{
+ /*********
+ find an ximages fourcc (by comparing creator functions)
+ *********/
+ int i;
+
+ for (i = 0; i < NUM_ALG; i++)
+ if (creator[i] == (CREATE_FUNC) src->xImageBasePtr->create)
+ {
+ return fourCC[i];
+ }
+
+ return 0;
+}
+
+int
+DXL_dxImageToVScreen(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE dst)
+{
+ int dxvCode;
+
+ validateXImage(src);
+
+ /*
+ after a ximage is created, it must always be altered.... this check will
+ catch programmers who do not follow the api
+ */
+ if(!src->xImageBasePtr->dkFlags.allocated)
+ return DXL_NOTINUSE;
+
+ if(!src->xImageBasePtr->dx)
+ return DXL_NOTINUSE;
+
+ //if(!src->xImageBasePtr->addr)
+ // return DXL_HOLD_FRAME;
+
+ dxvCode = src->xImageBasePtr->dx(src, dst);
+
+ return dxvCode;
+}
+
+/*-------------------------------------------------------------------
+
+-------------------------------------------------------------------*/
+int
+DXL_InitVideo(void)
+{
+ /* this will force the internal fourcc and creator arrays to be set to 0 */
+ DXL_RegisterXImage(NULL, 0L);
+
+ return DXL_OK;
+}
+
+
+void
+DXL_ExitVideo(void)
+{
+
+}
+
+
+DXL_XIMAGE_HANDLE
+DXL_AlterXImage(DXL_XIMAGE_HANDLE src, unsigned char *data, int type,
+ enum BITDEPTH bitDepth, int width, int height)
+{
+ type = toUpperFOURCC(type);
+
+ if (src == NULL)
+ {
+ if(type) /* if type specified, try using it as the fourcc */
+ src = DXL_CreateXImageOfType(data,type);
+
+ if (src == NULL) /* if still null, give up */
+ return NULL;
+ }
+
+ /* no way to recreate, assume create is good enough */
+ if (!src->xImageBasePtr->recreate)
+ return src;
+
+
+ src->xImageBasePtr->addr = data;
+
+ src->algorithmBasePtr = src->xImageBasePtr->recreate(src, data, type, bitDepth, width, height);
+
+
+ /* was a valid registered alogrith found ? */
+ if(src->algorithmBasePtr == NULL)
+ {
+ /* nope, so we are going to bail */
+ duck_free(src->xImageBasePtr);
+ duck_free(src);
+
+ return NULL;
+ }
+
+
+ src->xImageBasePtr->dkFlags.allocated = 1;
+
+ return src;
+}
+
+
+int
+DXL_SetParameter(DXL_XIMAGE_HANDLE src, int Command, unsigned int Parameter )
+{
+ if (src == NULL)
+ return DXL_NULLSOURCE;
+
+ if (src->xImageBasePtr == NULL)
+ return DXL_NULLSOURCE;
+
+ if(src->xImageBasePtr->setParameter == NULL)
+ return DXL_NULLSOURCE;
+
+ src->xImageBasePtr->setParameter(src, Command, Parameter);
+
+ return DXL_OK;
+}
+
+int
+DXL_GetParameter(DXL_XIMAGE_HANDLE src, int Command, unsigned int Parameter )
+{
+ if (src == NULL)
+ return DXL_NULLSOURCE;
+
+ if (src->xImageBasePtr == NULL)
+ return DXL_NULLSOURCE;
+
+ if(src->xImageBasePtr->getParameter == NULL)
+ return DXL_NULLSOURCE;
+
+ return src->xImageBasePtr->getParameter(src, Command, Parameter);
+}
+
+DXL_HANDLE
+DXL_GetAlgorithmBasePtr(DXL_XIMAGE_HANDLE src)
+{
+ return src->algorithmBasePtr;
+}
+
+int
+DXL_SendVideoMessage(DXL_XIMAGE_HANDLE src, void *msgHandle, unsigned int msgSize)
+{
+ validateXImage(src);
+
+ if(src->xImageBasePtr->sendVideoMessage != NULL)
+ return src->xImageBasePtr->sendVideoMessage(src, msgHandle, msgSize);
+
+ return DXL_OK;
+}
+
+
+
+/*-------------------------------------------------------------------
+ CALLBACK REGISTRATION SECTION
+-------------------------------------------------------------------*/
+int
+DXL_RegisterXImage(CREATE_FUNC myCreator, unsigned int fourcc)
+{
+ int i;
+
+ /* special case -- a fourcc of zero will set the creator and fourcc arrays to 0 */
+ if (!fourcc)
+ {
+ duck_memset(creator, 0, sizeof(creator));
+ duck_memset(fourCC, 0, sizeof(fourCC));
+ return DXL_OK;
+ }
+
+ for (i = 0; i < NUM_ALG; i++)
+ {
+ if (!fourCC[i])
+ {
+ creator[i] = myCreator;
+ fourCC[i] = fourcc;
+
+ return i;
+ }
+ }
+ return DXL_NOTINUSE;
+}
+
+int
+DXL_RegisterXImageRecreate(DXL_XIMAGE_HANDLE src, RECREATE_FUNC thisFunc)
+{
+ src->xImageBasePtr->recreate = thisFunc;
+
+ return DXL_OK;
+}
+
+int
+DXL_RegisterXImageDestroy(DXL_XIMAGE_HANDLE src, DESTROY_FUNC thisFunc)
+{
+ src->xImageBasePtr->destroy = thisFunc;
+
+ return DXL_OK;
+}
+
+int
+DXL_RegisterXImageDx(DXL_XIMAGE_HANDLE src, DX_FUNC thisFunc)
+{
+ src->xImageBasePtr->dx = thisFunc;
+
+ return DXL_OK;
+}
+
+int
+DXL_RegisterXImageSetParameter(DXL_XIMAGE_HANDLE src, SET_PARAMETER_FUNC thisFunc)
+{
+ src->xImageBasePtr->setParameter = thisFunc;
+
+ return DXL_OK;
+}
+
+int
+DXL_RegisterXImageGetParameter(DXL_XIMAGE_HANDLE src, GET_PARAMETER_FUNC thisFunc)
+{
+ src->xImageBasePtr->getParameter = thisFunc;
+
+ return DXL_OK;
+}
+
+int
+DXL_RegisterXImageSendVideoMessage(DXL_XIMAGE_HANDLE src, SEND_VMSG_FUNC thisFunc)
+{
+ src->xImageBasePtr->sendVideoMessage = thisFunc;
+
+ return DXL_OK;
+}
+
+/*-------------------------------------------------------------------
+
+-------------------------------------------------------------------*/
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/include/duck_dxl.h b/Src/libvpShared/corelibs/cdxv/dxv2/include/duck_dxl.h
new file mode 100644
index 00000000..d152ae29
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/include/duck_dxl.h
@@ -0,0 +1,350 @@
+#ifndef _duck_dxl_h
+#define _duck_dxl_h
+
+
+/******************************************************************************\
+<table BGCOLOR=#FFC0C0 border=1 WIDTH=100% ><tr><td><b>
+ duck_dxl.h </b></td><td><b> TrueMotion include file for decompression libraries </b>
+
+</td></tr><tr><td>&nbsp</td><td> Version: 6.0.0
+</td></tr><tr><td>&nbsp</td><td> Created: 3/3/98
+</td></tr><tr><td>&nbsp</td><td> Copyright (c) 1994-98, The Duck Corp. All rights reserved.
+</td></tr><tr><td>Important Objects</td><td>The On2 Decompression services tries to abstract the various objects
+used to decompress and render both audio and video. This allows the overall API to flex and accomodate new
+decompression schemes and new destinations.
+</td></tr><tr><td>DXL_XIMAGE_HANDLE</td><td>Abstract container object used to organize and control compressed
+video.
+</td></tr><tr><td>DXL_VSCREEN_HANDLE</td><td>Abstract container object used to organize and control display of
+uncompressed video to a surface.
+</td></tr><tr><td>DXL_XAUDIOSRC_HANDLE</td><td>Abstract container object used to organize and control
+compressed audio.
+</td></tr><tr><td>DXL_AUDIODST_HANDLE</td><td>Abstract container object used to organize and control
+rendering / playing of uncompressed audio.
+</td></tr>
+</table>
+******************************************************************************/
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* enumerated data types */
+
+typedef enum BLITQUALITY {
+ DXBLIT_SAME = 0, /* Blit directly, w/o stretching */
+ DXBLIT_R1,
+ DXBLIT_R2,
+ DXBLIT_STRETCH, /* double horizontally, skip lines vertically */
+ DXBLIT_R3,
+ DXBLIT_STRETCH_BRIGHT, /* double horizontally, interpolate vertically */
+ DXBLIT_R4,
+ DXBLIT_R5,
+ DXBLIT_R6,
+ DXBLIT_NONE,
+ DXBLITMAX
+} dxvBlitQuality ;
+
+typedef enum BITDEPTH {
+ DXRGBNULL = 0,
+ DXRGB8 = 1,
+
+ DXRGB16_555 = 2,
+ DXRGB24 = 3,
+ DXRGB_UNUSED = 4,
+ DXRGB16VESA = 5,
+ DXRGB8VESA = 6,
+ DXRGB16_565 = 7,
+
+ DXYUY2 = 8,
+ DXYVU9 = 9,
+ DXYV12 = 10,
+ DXUYVY = 11,
+
+ DXRGB32 = 12,
+ DXRGB16VESA_565 = 13,
+ DXHALFTONE8 =14,
+ DXI420 = 15,
+ DXMAX
+} dxvBitDepth ;
+
+#define DXRGB16 DXRGB16_555
+#define DXRGB24CHAR DXRGB24
+
+typedef enum OFFSETXY {
+ DXL_ABSOLUTE = 0,
+ DXL_RELATIVE
+} dxvOffsetMode;
+
+typedef enum DXL_ERR{
+ DXL_LOW_ERR = -32000,
+ DXL_HARDWARE_ERROR = -16002,
+ DXL_HARDWARE_NOT_INITED = -16001,
+ DXL_HARDWARE_BUFFER_FULL = -16000,
+ DXL_INVALID_REQUEST = -9,
+ DXL_VERSION_CONFLICT = -8,
+ DXL_INVALID_DATA = -7,
+ DXL_INVALID_BLIT = -6,
+ DXL_BAD_DATA = -5,
+ DXL_ALLOC_FAILED = -4,
+ DXL_NULL_FRAME = -3,
+ DXL_NULLSOURCE = -2,
+ DXL_NOTINUSE = -1,
+ DXL_OK = 0,
+ DXL_HOLD_FRAME = 1
+} dxvError ;
+
+
+enum IMAGETYPE { whgfw_X=0 }; /* MEW */
+enum BGMODE { kjhdkj_X=0 }; /* MEW */
+
+
+/*********************************************************/
+
+/* definition of data handles */
+
+typedef struct vScreen *DXL_VSCREEN_HANDLE;
+typedef struct tXImage *DXL_XIMAGE_HANDLE;
+
+
+/* main video decompression init, exit and query */
+
+
+/*@
+@Name DXL_InitVideo
+@Description Initialize Video decompression services
+@Return value DXL_OK on success.
+@*/
+int DXL_InitVideo(
+void
+);
+
+
+/*@
+@Name DXL_ExitVideo
+@Description shutdown video decompression services.
+@Return value none
+@*/
+void DXL_ExitVideo(void);
+
+
+/*get pointer to NULL terminated
+ array of supported fourCCs */
+unsigned int *DXL_GetFourCCList(void);
+
+
+/*@
+@Name DXL_SetXImageCSize
+@Description Set the size of the current compressed frame
+@Return value echo back the compressed image size
+@*/
+int DXL_SetXImageCSize(
+DXL_XIMAGE_HANDLE xImage, /* compressed image handle */
+int compressedSize /* compressed image size */
+);
+
+
+
+
+/*@
+@Name DXL_CreateXImageOfType
+@Description Create an xImage (decompressor) object of a requested type based on a FOURCC.
+@Return value handle to xImage created by this call .
+@*/
+DXL_XIMAGE_HANDLE DXL_CreateXImageOfType(
+unsigned char *data, /* pointer to compressed data */
+unsigned int fccType /* FOURCC style code indicating type of compressed data */
+);
+
+
+
+/*@
+@Name DXL_DestroyXImage
+@Description destroy the specified xImage
+@Return value void
+@*/
+void DXL_DestroyXImage(
+DXL_XIMAGE_HANDLE src /* handle to compressed image */
+);
+
+
+
+/*@
+@Name DXL_AlterXImageData
+@Description feed the xImage new data, get ready to decompress
+@Return value DXL_OK on success
+@*/
+int DXL_AlterXImageData(
+DXL_XIMAGE_HANDLE src, /* xImage, handle to compressed data */
+unsigned char *ptrData /* latest data to be associated with xImage */
+);
+
+
+
+/*@
+@Name DXL_AlterXImage
+@Description explicitly alter attributes of an xImage
+@Return value handle to compressed image
+@*/
+DXL_XIMAGE_HANDLE DXL_AlterXImage(
+DXL_XIMAGE_HANDLE src, /* handle to compressed image */
+unsigned char *ptrData, /* pointer to compressed data. */
+int xImType, /* code for compress data type. */
+dxvBitDepth bitDepth , /* bitdepth of decompressed data */
+int maxWidth, /* width of decompressed image */
+int maxHeight /* height of decompressed image */
+);
+
+
+unsigned char *
+DXL_GetXImageCDataAddr(DXL_XIMAGE_HANDLE src);
+
+
+
+/*@
+@Name DXL_GetXImageCSize
+@Description Get xImage compressed size
+@Return value returns the compressed size
+@*/
+int DXL_GetXImageCSize(
+DXL_XIMAGE_HANDLE src /* handle to compressed image */
+);
+
+
+
+/*@
+@Name DXL_GetXImageXYWH
+@Description get application specified x,y offset, and overall decompressed width and height.
+x and y offsets are legacy fields, ignore.
+@Return value DXL_OK on success
+@*/
+int DXL_GetXImageXYWH(
+ DXL_XIMAGE_HANDLE src, /* the xImage Handle. */
+ int *x,int *y,int *w, int *h /* x,y,w,h */
+ );
+
+
+/*@
+@Name DXL_IsXImageKeyFrame
+@Description return whether this xImage is a keyFrame.
+@Return value return whether this xImage is a keyFrame.
+@*/
+int DXL_IsXImageKeyFrame(
+ DXL_XIMAGE_HANDLE src /* handle to compressed image */
+);
+
+
+
+/*@
+@Name DXL_dxImageToVScreen
+@Description decompress and blit as a single process
+@Return value DXL_OK on success.
+@*/
+int DXL_dxImageToVScreen(
+ DXL_XIMAGE_HANDLE src, /* xImage handle. */
+ DXL_VSCREEN_HANDLE dst /* handle to destination surface */
+ );
+
+
+/* vscreen management functions */
+
+/*@
+@Name DXL_CreateVScreen
+@Description create a virtual screen for rendering, storing decompressed video.
+@Return value returns a DXL_VSCREEN_HANDLE
+@*/
+DXL_VSCREEN_HANDLE DXL_CreateVScreen(
+ unsigned char *addr, /* The address where pixel data should be written */
+ dxvBitDepth colorMode, /* Determines the colorspace and color depth of VScreen */
+ short bytePitch, /* offset from one raster to the next */
+ short height /* number of rasters in a VScreen */
+ );
+
+
+
+/*@
+@Name DXL_AlterVScreen
+@Description Alter address and attributes associated with a vscreen.
+@Return value returns a DXL_VSCREEN_HANDLE
+@*/
+int DXL_AlterVScreen(
+ DXL_VSCREEN_HANDLE dst, /* handle to a VScreen */
+ unsigned char *addr, /* The address where pixel data should be written */
+ dxvBitDepth colorMode, /* Determines the colorspace and color depth of VScreen */
+ int bytePitch, /* offset from one raster to the next */
+ int height /* number of rasters in a VScreen */
+ );
+
+
+/* alter clipping rectangle of vScreen */
+/* not supported by all decompressors */
+int DXL_AlterVScreenClip(
+ DXL_VSCREEN_HANDLE dst,
+ int x,int y,
+ int w,int h
+ );
+
+/* alter viewport rectangle of vScreen */
+/* width/height not supported by all decompressors */
+int DXL_AlterVScreenView(
+ DXL_VSCREEN_HANDLE dst,
+ int x,int y,
+ int w,int h
+ );
+
+/* destroy a vScreen object/struct */
+void DXL_DestroyVScreen(
+ DXL_VSCREEN_HANDLE dst
+ );
+
+/* set blit mode/quality of a vScreen
+ same (normal), stretch (black lined)
+ stretch bright (stretched w/interpolation) */
+int DXL_SetVScreenBlitQuality(
+ DXL_VSCREEN_HANDLE dest,
+ dxvBlitQuality bq
+ );
+
+
+/* get attributes of the vScreen */
+int DXL_GetVScreenAttributes(
+ DXL_VSCREEN_HANDLE vScreen,
+ void **addr,
+ dxvBlitQuality *bq,
+ dxvBitDepth *bd,
+ short *pitch,
+ short *height
+ );
+
+/* get vScreen's current viewport rectangle
+ a viewport represents an x,y, offset and
+ a clipping width and height */
+int DXL_GetVScreenView(
+ DXL_VSCREEN_HANDLE dst,
+ int *x,int *y,int *w,int *h
+ );
+
+/* pass a parameter to the decompressor */
+int DXL_SetParameter(
+ DXL_XIMAGE_HANDLE src,
+ int Command,
+ unsigned int Parameter
+ );
+
+unsigned int DXL_GetXImageFOURCC(DXL_XIMAGE_HANDLE src);
+
+/* Temporary hack to dxv to allow calls to get info (jbb) */
+/*
+typedef struct tFrameInfo
+{
+ int KeyFrame;
+ int Version;
+ int Quality;
+ int vp30Flag;
+} FrameInfo;
+*/
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* include guards */
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/include/dxl_plugin.h b/Src/libvpShared/corelibs/cdxv/dxv2/include/dxl_plugin.h
new file mode 100644
index 00000000..0b49855a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/include/dxl_plugin.h
@@ -0,0 +1,70 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#ifndef _dxl_plugin_h
+#define _dxl_plugin_h
+
+#include "duck_dxl.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* max number of algorithms to be supported at one time */
+#define NUM_ALG 16
+
+typedef void * DXL_HANDLE;
+
+typedef unsigned int DXL_OBJECT_VERSION;
+
+typedef DXL_HANDLE (*CREATE_FUNC)(DXL_XIMAGE_HANDLE, unsigned char *);
+
+typedef DXL_HANDLE (*RECREATE_FUNC)(DXL_XIMAGE_HANDLE,void *,int,int,int,int);
+
+typedef int (*DESTROY_FUNC)(DXL_XIMAGE_HANDLE);
+
+typedef int (*SEED_DATA_FUNC)(DXL_XIMAGE_HANDLE);
+
+typedef int (*DX_FUNC)(DXL_XIMAGE_HANDLE, DXL_VSCREEN_HANDLE);
+
+typedef void (*SET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+//typedef int (*SET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+
+typedef int (*GET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+
+typedef int (*SEND_VMSG_FUNC)(DXL_XIMAGE_HANDLE, void *, unsigned int);
+
+
+int DXL_GetAlgHandle(unsigned int fourcc);
+DXL_HANDLE DXL_GetAlgorithmBasePtr(DXL_XIMAGE_HANDLE src);
+
+int DXL_RegisterXImage(CREATE_FUNC creator, unsigned int fourcc);
+int DXL_RegisterXImageRecreate(DXL_XIMAGE_HANDLE src, RECREATE_FUNC thisFunc);
+int DXL_RegisterXImageDestroy(DXL_XIMAGE_HANDLE src, DESTROY_FUNC thisFunc);
+int DXL_RegisterXImageSeedData(DXL_XIMAGE_HANDLE src, SEED_DATA_FUNC thisFunc);
+int DXL_RegisterXImageDx(DXL_XIMAGE_HANDLE src, DX_FUNC thisFunc);
+
+int DXL_RegisterXImageSetParameter(DXL_XIMAGE_HANDLE src, SET_PARAMETER_FUNC thisFunc);
+int DXL_RegisterXImageGetParameter(DXL_XIMAGE_HANDLE src, GET_PARAMETER_FUNC thisFunc);
+
+int DXL_RegisterXImageSendVideoMessage(DXL_XIMAGE_HANDLE src, SEND_VMSG_FUNC thisFunc);
+
+
+#define DXL_MKFOURCC( ch0, ch1, ch2, ch3 ) \
+ ( (unsigned int)(unsigned char)(ch0) | ( (unsigned int)(unsigned char)(ch1) << 8 ) | \
+ ( (unsigned int)(unsigned char)(ch2) << 16 ) | ( (unsigned int)(unsigned char)(ch3) << 24 ) )
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/codec_common.h b/Src/libvpShared/corelibs/cdxv/include/codec_common.h
new file mode 100644
index 00000000..a1cf862b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/codec_common.h
@@ -0,0 +1,101 @@
+/****************************************************************************
+*
+* Module Title : Codec_common.h
+*
+* Description : Common codec definitions header file.
+*
+****************************************************************************/
+#ifndef __INC_COMCODEC_H
+#define __INC_COMCODEC_H
+
+/****************************************************************************
+* Include Files
+****************************************************************************/
+#include <string.h>
+#include "type_aliases.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+
+// Platform Specific Inlines
+#ifdef _MSC_VER
+ #ifndef INLINE
+ #define INLINE __inline
+ #endif
+ #ifndef FORCEINLINE
+ #define FORCEINLINE __forceinline
+ #endif
+#else
+ #ifndef INLINE
+ #define INLINE inline
+ #endif
+ #ifndef FORCEINLINE
+ #define FORCEINLINE inline
+ #endif
+#endif
+
+
+// Platform specific math function defines
+#define LIMIT(x) ( (x)<0 ? 0: (x)>255 ? 255: (x) )
+/****************************************************************************
+* Module constants.
+****************************************************************************/
+#define BASE_FRAME 0
+#define NORMAL_FRAME 1
+#define Q_TABLE_SIZE 64
+#define BLOCK_HEIGHT_WIDTH 8
+#define BLOCK_SIZE (BLOCK_HEIGHT_WIDTH * BLOCK_HEIGHT_WIDTH)
+
+
+/****************************************************************************
+* Types
+****************************************************************************/
+
+/* Type defining YUV data elements. */
+typedef UINT8 YUV_BUFFER_ENTRY;
+typedef UINT8 *YUV_BUFFER_ENTRY_PTR;
+
+typedef struct CONFIG_TYPE
+{
+ // The size of the surface we want to draw to
+ UINT32 VideoFrameWidth;
+ UINT32 VideoFrameHeight;
+
+ INT32 YStride;
+ INT32 UVStride;
+
+ // The number of horizontal and vertical blocks encoded
+ UINT32 HFragPixels;
+ UINT32 VFragPixels;
+
+ // The Intended Horizontal Scale
+ UINT32 HScale;
+ UINT32 HRatio;
+
+ // The Intended Vertical Scale
+ UINT32 VScale;
+ UINT32 VRatio;
+
+ // The way in which we intended
+ UINT32 ScalingMode;
+
+ // Interlaced (0) means no (1) means Yes
+ UINT32 Interlaced;
+
+ UINT32 ExpandedFrameWidth;
+ UINT32 ExpandedFrameHeight;
+
+} CONFIG_TYPE;
+
+typedef struct
+{
+ INT16 x;
+ INT16 y;
+} MOTION_VECTOR;
+
+typedef MOTION_VECTOR COORDINATE;
+typedef INT16 Q_LIST_ENTRY;
+typedef Q_LIST_ENTRY Q_LIST[64];
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/codec_common_interface.h b/Src/libvpShared/corelibs/cdxv/include/codec_common_interface.h
new file mode 100644
index 00000000..072bd723
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/codec_common_interface.h
@@ -0,0 +1,108 @@
+/****************************************************************************
+*
+* Module Title : codec_common_if.H
+*
+* Description : Interface to video codec demo decompressor DLL
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.01 PGW 21/07/99 Added FR_INVALID_MODE_TOKEN.
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+#ifndef CODEC_COMMON_INTERFACE_H
+#define CODEC_COMMON_INTERFACE_H
+
+#define __export
+#define _export
+#define DllExport __declspec( dllexport )
+#define DllImport __declspec( dllimport )
+
+// Playback ERROR Codes.
+#define NO_DECODER_ERROR 0
+#define REMOTE_DECODER_ERROR -1
+
+#define DFR_BAD_DCT_COEFF -100
+#define DFR_ZERO_LENGTH_FRAME -101
+#define DFR_FRAME_SIZE_INVALID -102
+#define DFR_OUTPUT_BUFFER_OVERFLOW -103
+#define DFR_INVALID_FRAME_HEADER -104
+#define FR_INVALID_MODE_TOKEN -110
+#define ETR_ALLOCATION_ERROR -200
+#define ETR_INVALID_ROOT_PTR -201
+#define SYNCH_ERROR -400
+#define BUFFER_UNDERFLOW_ERROR -500
+#define PB_IB_OVERFLOW_ERROR -501
+
+// External error triggers
+#define PB_HEADER_CHECKSUM_ERROR -601
+#define PB_DATA_CHECKSUM_ERROR -602
+
+// DCT Error Codes
+#define DDCT_EXPANSION_ERROR -700
+#define DDCT_INVALID_TOKEN_ERROR -701
+
+// ExceptionErrors
+#define GEN_EXCEPTIONS -800
+#define EX_UNQUAL_ERROR -801
+
+// Unrecoverable error codes
+#define FATAL_PLAYBACK_ERROR -1000
+#define GEN_ERROR_CREATING_CDC -1001
+#define GEN_THREAD_CREATION_ERROR -1002
+#define DFR_CREATE_BMP_FAILED -1003
+
+// YUV buffer configuration structure
+typedef struct
+{
+ int YWidth;
+ int YHeight;
+ int YStride;
+
+ int UVWidth;
+ int UVHeight;
+ int UVStride;
+
+ char * YBuffer;
+ char * UBuffer;
+ char * VBuffer;
+
+} YUV_BUFFER_CONFIG;
+typedef enum
+{
+ C_SET_KEY_FRAME,
+ C_SET_FIXED_Q,
+ C_SET_FIRSTPASS_FILE,
+ C_SET_EXPERIMENTAL_MIN,
+ C_SET_EXPERIMENTAL_MAX = C_SET_EXPERIMENTAL_MIN + 255,
+ C_SET_CHECKPROTECT,
+ C_SET_TESTMODE,
+ C_SET_INTERNAL_SIZE,
+ C_SET_RECOVERY_FRAME,
+ C_SET_REFERENCEFRAME,
+ C_SET_GOLDENFRAME
+
+#ifndef VP50_COMP_INTERFACE
+ // Specialist test facilities.
+// C_VCAP_PARAMS, // DO NOT USE FOR NOW WITH VFW CODEC
+#endif
+
+} C_SETTING;
+
+typedef enum
+{
+ MAINTAIN_ASPECT_RATIO = 0x0,
+ SCALE_TO_FIT = 0x1,
+ CENTER = 0x2,
+ OTHER = 0x3
+} SCALE_MODE;
+
+
+#endif
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/include/dxl_plugin.h b/Src/libvpShared/corelibs/cdxv/include/dxl_plugin.h
new file mode 100644
index 00000000..c961f5f2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/dxl_plugin.h
@@ -0,0 +1,75 @@
+//==========================================================================
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+// PURPOSE.
+//
+// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#ifndef _dxl_plugin_h
+#define _dxl_plugin_h
+
+#include "duck_dxl.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* max number of algorithms to be supported at one time */
+#define NUM_ALG 16
+
+typedef void * DXL_HANDLE;
+
+typedef unsigned int DXL_OBJECT_VERSION;
+
+DXL_HANDLE
+DXL_GetAlgorithmBasePtr(DXL_XIMAGE_HANDLE src);
+
+unsigned char *
+DXL_GetXImageCDataAddr(DXL_XIMAGE_HANDLE src);
+
+
+typedef DXL_HANDLE (*CREATE_FUNC)(DXL_XIMAGE_HANDLE, unsigned char *);
+
+typedef DXL_HANDLE (*RECREATE_FUNC)(DXL_XIMAGE_HANDLE,void *,int,int,int,int);
+
+typedef int (*DESTROY_FUNC)(DXL_XIMAGE_HANDLE);
+
+typedef int (*SEED_DATA_FUNC)(DXL_XIMAGE_HANDLE);
+
+typedef int (*DX_FUNC)(DXL_XIMAGE_HANDLE, DXL_VSCREEN_HANDLE);
+
+typedef void (*SET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+//typedef int (*SET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+
+typedef int (*GET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+
+typedef int (*SEND_VMSG_FUNC)(DXL_XIMAGE_HANDLE, void *, unsigned int);
+
+
+int DXL_GetAlgHandle(unsigned int fourcc);
+
+int DXL_RegisterXImage(CREATE_FUNC creator, unsigned int fourcc);
+int DXL_RegisterXImageRecreate(DXL_XIMAGE_HANDLE src, RECREATE_FUNC thisFunc);
+int DXL_RegisterXImageDestroy(DXL_XIMAGE_HANDLE src, DESTROY_FUNC thisFunc);
+int DXL_RegisterXImageDx(DXL_XIMAGE_HANDLE src, DX_FUNC thisFunc);
+
+int DXL_RegisterXImageSetParameter(DXL_XIMAGE_HANDLE src, SET_PARAMETER_FUNC thisFunc);
+int DXL_RegisterXImageGetParameter(DXL_XIMAGE_HANDLE src, GET_PARAMETER_FUNC thisFunc);
+
+int DXL_RegisterXImageSendVideoMessage(DXL_XIMAGE_HANDLE src, SEND_VMSG_FUNC thisFunc);
+
+
+#define DXL_MKFOURCC( ch0, ch1, ch2, ch3 ) \
+ ( (unsigned int)(unsigned char)(ch0) | ( (unsigned int)(unsigned char)(ch1) << 8 ) | \
+ ( (unsigned int)(unsigned char)(ch2) << 16 ) | ( (unsigned int)(unsigned char)(ch3) << 24 ) )
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/postproc_if.h b/Src/libvpShared/corelibs/cdxv/include/postproc_if.h
new file mode 100644
index 00000000..40c2a450
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/postproc_if.h
@@ -0,0 +1,151 @@
+/****************************************************************************
+*
+* Module Title : postproc_if.h
+*
+* Description : Post-processor interface header file.
+*
+****************************************************************************/
+#ifndef __INC_POSTPROC_IF_H
+#define __INC_POSTPROC_IF_H
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "codec_common_interface.h"
+
+/****************************************************************************
+* Typedefs
+****************************************************************************/
+typedef struct POSTPROC_INSTANCE * POSTPROC_INST;
+
+/****************************************************************************
+* Imported Functions.
+****************************************************************************/
+extern void InitPostProcessing
+(
+ UINT32 *DCQuantScaleV2p,
+ UINT32 *DCQuantScaleUVp,
+ UINT32 *DCQuantScaleV1p,
+ UINT32 Version
+);
+
+extern void DeInitPostProcessing ();
+
+extern POSTPROC_INST CreatePostProcInstance
+(
+ CONFIG_TYPE *ConfigurationInit // configuration to setup
+);
+
+extern void DeletePostProcInstance
+(
+ POSTPROC_INST *pbi // postprocessor instance to delete
+);
+
+extern void SetPPInterlacedMode(POSTPROC_INST ppi, int Interlaced);
+extern void SetDeInterlaceMode(POSTPROC_INST ppi, int DeInterlaceMode);
+extern void SetAddNoiseMode(POSTPROC_INST ppi, int AddNoiseMode);
+
+extern void ChangePostProcConfiguration
+(
+ POSTPROC_INST pbi, // postprocessor instance to use
+ CONFIG_TYPE *Configuration // configuration to change to
+);
+
+extern void PostProcess
+(
+ POSTPROC_INST pbi, // postprocessor instance to use
+ INT32 Vp3VersionNo, // version of frame
+ INT32 FrameType, // key or non key
+ INT32 PostProcessingLevel, // level of post processing to perform
+ INT32 FrameQIndex, // q index value used on passed in frame
+ UINT8 *LastFrameRecon, // reconstruction buffer : passed in
+ UINT8 *PostProcessBuffer, // postprocessing buffer : passed in
+ UINT8 *FragInfo, // blocks coded : passed in
+ UINT32 FragInfoElementSize, // size of each element
+ UINT32 FragInfoCodedMask // mask to get at whether fragment is coded
+);
+
+extern void (*ClampLevels)
+(
+ POSTPROC_INST pbi,
+ INT32 BlackClamp, // number of values to clamp from 0
+ INT32 WhiteClamp, // number of values to clamp from 255
+ UINT8 *Src, // reconstruction buffer : passed in
+ UINT8 *Dst // postprocessing buffer : passed in
+);
+
+extern void LoopFilter
+(
+ POSTPROC_INST pbi, // postprocessor instance to use
+ INT32 FrameQIndex, // q index value used on passed in frame
+ UINT8 *LastFrameRecon, // reconstruction buffer : passed in
+ UINT8 *PostProcessBuffer, // postprocessing buffer : passed in
+ UINT8 *FragInfo, // blocks coded : passed in
+ UINT32 FragInfoElementSize, // size of each element
+ UINT32 FragInfoCodedMask // mask to get at whether fragment is coded
+);
+
+extern void ApplyReconLoopFilter
+(
+ POSTPROC_INST pbi, // postprocessor instance to use
+ INT32 FrameQIndex, // q index value used on passed in frame
+ UINT8 *LastFrameRecon, // reconstruction buffer : passed in
+ UINT8 *PostProcessBuffer, // postprocessing buffer : passed in
+ UINT8 *FragInfo, // blocks coded : passed in
+ UINT32 FragInfoElementSize, // size of each element
+ UINT32 FragInfoCodedMask // mask to get at whether fragment is coded
+);
+
+extern void ScaleOrCenter
+(
+ POSTPROC_INST pbi, // postprocessor instance to use
+ UINT8 *FrameBuffer, // buffer to use passed in
+ YUV_BUFFER_CONFIG * YuvConfig // size you want to output buffer to
+);
+
+/****************************************************************************
+* Exported Functions.
+****************************************************************************/
+extern void UpdateUMVBorder
+(
+ POSTPROC_INST pbi,
+ UINT8 * DestReconPtr
+);
+
+extern void (*FilteringVert_12)
+(
+ UINT32 QValue,
+ UINT8 * Src,
+ INT32 Pitch
+);
+
+extern void (*FilteringHoriz_12)
+(
+ UINT32 QValue,
+ UINT8 * Src,
+ INT32 Pitch
+);
+
+extern void (*FilteringVert_8)
+(
+ UINT32 QValue,
+ UINT8 * Src,
+ INT32 Pitch
+);
+
+extern void (*FilteringHoriz_8)
+(
+ UINT32 QValue,
+ UINT8 * Src,
+ INT32 Pitch
+);
+
+extern void CopyFrame( POSTPROC_INST pbi, YUV_BUFFER_CONFIG *b, UINT8 *DestReconPtr);
+
+/****************************************************************************
+* Exported Data.
+****************************************************************************/
+extern UINT8 LimitVal_VP31[VAL_RANGE * 3];
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/preproc.h b/Src/libvpShared/corelibs/cdxv/include/preproc.h
new file mode 100644
index 00000000..98d748b4
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/preproc.h
@@ -0,0 +1,40 @@
+/****************************************************************************
+*
+* Module Title : preproc.h
+*
+* Description : simple preprocessor
+*
+****************************************************************************/
+
+#ifndef __INC_PREPROC_H
+#define __INC_PREPROC_H
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "duck_mem.h"
+
+/****************************************************************************
+* Types
+****************************************************************************/
+
+typedef struct
+{
+ unsigned char* frameBuffer;
+ int frame;
+ unsigned int *fixedDivide;
+
+ unsigned char*frameBufferAlloc;
+ unsigned int *fixedDivideAlloc;
+} PreProcInstance;
+
+/****************************************************************************
+* Functions.
+****************************************************************************/
+
+void DeletePreProc( PreProcInstance *ppi);
+int InitPreProc( PreProcInstance *ppi, int FrameSize);
+extern void spatialFilter_c( PreProcInstance *ppi,unsigned char *s,unsigned char *d,int width,int height,int pitch,int strength);
+extern void (*tempFilter)( PreProcInstance *ppi,unsigned char *s,unsigned char *d,int bytes,int strength);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/preprocif.h b/Src/libvpShared/corelibs/cdxv/include/preprocif.h
new file mode 100644
index 00000000..e941be4f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/preprocif.h
@@ -0,0 +1,64 @@
+/****************************************************************************
+*
+* Module Title : preproc_if.h
+*
+* Description : Pre-processor interface header file.
+*
+****************************************************************************/
+
+#ifndef __PREPROC_IF_H
+#define __PREPROC_IF_H
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "type_aliases.h"
+
+/****************************************************************************
+* Types
+****************************************************************************/
+
+typedef struct
+{
+ UINT8 * Yuv0ptr;
+ UINT8 * Yuv1ptr;
+
+ UINT8 *FragInfo; // blocks coded : passed in
+ UINT32 FragInfoElementSize; // size of each element
+ UINT32 FragInfoCodedMask; // mask to get at whether fragment is coded
+
+ UINT32 * RegionIndex; // Gives pixel index for top left of each block
+ UINT32 VideoFrameHeight;
+ UINT32 VideoFrameWidth;
+ UINT8 HFragPixels;
+ UINT8 VFragPixels;
+
+} SCAN_CONFIG_DATA;
+
+typedef enum
+{ SCP_FILTER_ON_OFF,
+ SCP_SET_SRF_OFFSET,
+ SCP_SET_EBO_ON_OFF,
+ SCP_SET_VCAP_LEVEL_OFFSET,
+ SCP_SET_SHOW_LOCAL
+
+} SCP_SETTINGS;
+
+typedef struct PP_INSTANCE * xPP_INST;
+
+/****************************************************************************
+* Module statics
+****************************************************************************/
+/* Controls whether Early break out is on or off in default case */
+#define EARLY_BREAKOUT_DEFAULT TRUE
+
+/****************************************************************************
+* Functions
+****************************************************************************/
+extern void SetScanParam ( xPP_INST ppi, UINT32 ParamId, INT32 ParamValue );
+extern UINT32 YUVAnalyseFrame ( xPP_INST ppi, UINT32 * KFIndicator );
+extern xPP_INST CreatePPInstance ( void );
+extern void DeletePPInstance ( xPP_INST * );
+extern BOOL ScanYUVInit ( xPP_INST, SCAN_CONFIG_DATA *ScanConfigPtr );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/vputil_if.h b/Src/libvpShared/corelibs/cdxv/include/vputil_if.h
new file mode 100644
index 00000000..63fc0128
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/vputil_if.h
@@ -0,0 +1,149 @@
+/****************************************************************************
+*
+* Module Title : vputil_if.h
+*
+* Description : Codec utilities header file.
+*
+****************************************************************************/
+#ifndef __VPUTIL_IF_H
+#define __VPUTIL_IF_H
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "codec_common_interface.h"
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+extern void InitVPUtil ( void );
+
+extern void (*ReconIntra)
+(
+ INT16 *tmpBuffer,
+ UINT8 *ReconPtr,
+ UINT16 *ChangePtr,
+ UINT32 LineStep
+);
+
+extern void (*ReconInter)
+(
+ INT16 * tmpBuffer,
+ UINT8 * ReconPtr,
+ UINT8 * RefPtr,
+ INT16 * ChangePtr,
+ UINT32 LineStep
+);
+
+extern void (*ReconInterHalfPixel2)
+(
+ INT16 * tmpBuffer,
+ UINT8 * ReconPtr,
+ UINT8 * RefPtr1,
+ UINT8 * RefPtr2,
+ INT16 * ChangePtr,
+ UINT32 LineStep
+);
+
+extern void (*idct[65])
+(
+ INT16 *InputData,
+ INT16 *QuantMatrix,
+ INT16 *OutputData
+);
+
+extern void (*idctc[65])
+(
+ INT16 *InputData,
+ INT16 *QuantMatrix,
+ INT16 * OutputData
+);
+
+extern void (*ClearSysState) ( void );
+
+extern void (*ReconBlock)
+(
+ INT16 *SrcBlock,
+ INT16 *ReconRefPtr,
+ UINT8 *DestBlock,
+ UINT32 LineStep
+);
+
+extern void (*SubtractBlock)
+(
+ UINT8 *SrcBlock,
+ INT16 *DestPtr,
+ UINT32 LineStep
+);
+
+extern void (*UnpackBlock)
+(
+ UINT8 *ReconPtr,
+ INT16 *ReconRefPtr,
+ UINT32 ReconPixelsPerLine
+);
+
+extern void (*AverageBlock)
+(
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ UINT16 *ReconRefPtr,
+ UINT32 ReconPixelsPerLine
+);
+
+extern void (*CopyBlock)
+(
+ unsigned char *src,
+ unsigned char *dest,
+ unsigned int srcstride
+);
+
+extern void (*fdct_short)
+(
+ INT16 * InputData,
+ INT16 * OutputData
+);
+
+extern void (*Copy12x12)
+(
+ const unsigned char *src,
+ unsigned char *dest,
+ unsigned int srcstride,
+ unsigned int deststride
+);
+
+extern void (*FilterBlockBil_8)
+(
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ UINT8 *ReconRefPtr,
+ UINT32 ReconPixelsPerLine,
+ INT32 ModX,
+ INT32 ModY
+);
+
+extern void (*FilterBlock)
+(
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ UINT16 *ReconRefPtr,
+ UINT32 PixelsPerLine,
+ INT32 ModX,
+ INT32 ModY,
+ BOOL UseBicubic,
+ UINT8 BicubicAlpha
+);
+
+extern UINT32 (*FiltBlockBilGetSad)
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ INT32 PixelsPerLine,
+ INT32 ModX,
+ INT32 ModY,
+ UINT32 BestSoFar
+);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/pp/Win32/preprocfunctions.c b/Src/libvpShared/corelibs/cdxv/pp/Win32/preprocfunctions.c
new file mode 100644
index 00000000..9bd9cba7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/Win32/preprocfunctions.c
@@ -0,0 +1,257 @@
+/****************************************************************************
+*
+* Module Title : PreProcOptFunctions.c
+*
+* Description : MMX or otherwise processor specific
+* optimised versions of pre-processor functions
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.04 YWX 30-Nov-00 Added support for WMT cpu
+* 1.03 PGW 24 Jul 00 Added Column SAD function.
+* 1.02 YX 06/04/00 Optimized get row sad for xmm
+* 1.01 PGW 12/07/99 Changes to reduce uneccessary dependancies.
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include "preproc.h"
+#include "cpuidlib.h"
+#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imports.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Forward References
+*****************************************************************************
+*/
+
+UINT32 MmxRowSAD( UINT8 * Src1, UINT8 * Src2 );
+extern UINT32 XmmRowSAD( UINT8 * Src1, UINT8 * Src2 );
+
+/****************************************************************************
+ *
+ * ROUTINE : MachineSpecificConfig
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support
+ * sets approipriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+#define MMX_ENABLED 1
+void MachineSpecificConfig(PP_INSTANCE *ppi)
+{
+ UINT32 FeatureFlags = 0;
+ BOOL CPUID_Supported = TRUE; // Is the CPUID instruction supported
+
+ BOOL TestMmx = TRUE;
+
+
+ PROCTYPE CPUType = findCPUId();
+ switch(CPUType)
+ {
+ case X86 :
+ case PPRO :
+ case C6X86 :
+ case C6X86MX:
+ case AMDK5 :
+ case MACG3 :
+ case MAC68K :
+ ppi->MmxEnabled = FALSE;
+ ppi->XmmEnabled = FALSE;
+ break;
+ case PII :
+ case AMDK63D:
+ case AMDK6 :
+ case PMMX :
+ ppi->MmxEnabled = TRUE;
+ ppi->XmmEnabled = FALSE;
+ break;
+ case XMM :
+ case WMT :
+ ppi->MmxEnabled = TRUE;
+ ppi->XmmEnabled = TRUE;
+ break;
+ }
+
+
+ //To test We force the cpu type here
+ //ppi->MmxEnabled = FALSE;
+ //ppi->XmmEnabled = FALSE;
+
+ // If MMX supported then set to use MMX versions of functions else
+ // use original 'C' versions.
+ if (ppi->XmmEnabled)
+ {
+ ppi->RowSAD=XmmRowSAD;
+ ppi->ColSAD = ScalarColSAD;
+ }
+ else if ( ppi->MmxEnabled )
+ {
+ ppi->RowSAD = MmxRowSAD;
+ ppi->ColSAD = ScalarColSAD;
+ }
+ else
+ {
+ ppi->RowSAD = ScalarRowSAD;
+ ppi->ColSAD = ScalarColSAD;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxRowSAD
+ *
+ * INPUTS : UINT8 * NewDataPtr (New Data)
+ * UINT8 * RefDataPtr
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : Highest of two S.A.D. values.
+ *
+ *
+ * FUNCTION : Calculates the sum of the absolute differences for two groups of
+ * four pixels and returns the larger of the two.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT32 MmxRowSAD( UINT8 * NewDataPtr, UINT8 * RefDataPtr )
+{
+ UINT32 SadValue;
+ UINT32 SadValue1;
+ UINT32 AbsValues[2];
+
+ // MMX code for calculating absolute difference values
+__asm
+ {
+ pxor mm6, mm6 ; Blank mmx6
+ pxor mm7, mm7 ; Blank mmx6
+
+ mov eax,dword ptr [NewDataPtr] ; Load base addresses
+ mov ebx,dword ptr [RefDataPtr]
+
+ // Calculate eight ABS difference values.
+ movq mm0, [eax] ; Copy eight bytes to mm0
+ movq mm1, [ebx] ; Copy eight bytes to mm1
+ movq mm2, mm0 ; Take copy of MM0
+
+ psubusb mm0, mm1 ; A-B to MM0
+ psubusb mm1, mm2 ; B-A to MM1
+ por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
+
+ movq mm1, mm0 ; keep a copy
+
+ // Sum together the low four bytes and the high four bytes
+ punpcklbw mm0, mm6 ; unpack low four bytes to higher precision
+ punpckhbw mm1, mm7 ; unpack high four bytes to higher precision
+ movq mm2, mm0 ; take a copy
+ movq mm3, mm1 ; take a copy
+ punpcklwd mm0, mm6 ; unpack low two words to higher precision
+ punpcklwd mm1, mm7 ; unpack low two words to higher precision
+ punpckhwd mm2, mm6 ; unpack high low two words to higher precision
+ punpckhwd mm3, mm7 ; unpack high low two words to higher precision
+
+ paddd mm0, mm2 ; Accumulate intermediate results
+ paddd mm1, mm3 ; Accumulate intermediate results
+ movq mm2, mm0 ; take a copy
+ movq mm3, mm1 ; take a copy
+ punpckhdq mm0, mm6 ; Unpack and accumulate again
+ punpckhdq mm1, mm7 ; Unpack and accumulate again
+ punpckldq mm2, mm6
+ punpckldq mm3, mm7
+ paddd mm0, mm2 ; Accumulate final result
+ paddd mm1, mm3 ; Accumulate final result
+
+ // Interleave the two SAD results
+ punpckldq mm0, mm1
+
+ // Write back the abs values
+ movq dword ptr [AbsValues], mm0
+ }
+
+ SadValue = AbsValues[0];
+ SadValue1 = AbsValues[1];
+ SadValue = (SadValue > SadValue1) ? SadValue : SadValue1;
+
+ return SadValue;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ClearMmxState()
+ *
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Clears down the MMX state
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ClearMmxState(PP_INSTANCE *ppi)
+{
+ if ( ppi->MmxEnabled )
+ {
+ __asm
+ {
+ emms ; Clear the MMX state.
+ }
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/pp/Win32/resource.h b/Src/libvpShared/corelibs/cdxv/pp/Win32/resource.h
new file mode 100644
index 00000000..46597097
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/Win32/resource.h
@@ -0,0 +1,43 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Developer Studio generated include file.
+// Used by PreprocParams.rc
+//
+#define IDD_VCAP_PARAMS 101
+#define IDD_PREPROC_PARAMS 101
+#define IDC_VCAP_P_TRESH_SB 1000
+#define IDC_VCAP_NOISE_SUP_SB 1001
+#define IDC_VCAP_TRIG_SB 1002
+#define IDC_SRF_CHECK 1004
+#define IDC_SRF_TEMPORAL_CHECK 1005
+#define IDC_SC_TRADE_OFF_SB 1005
+#define IDC_RSAD_LOW_SB 1006
+#define IDC_VCAP_PUV_TRESH_SB 1007
+#define IDC_SGC_TRESH_SB 1008
+#define IDC_SGC_TRIGGER_SB 1009
+#define IDC_SGC_UV_TRESH_SB 1010
+#define IDC_VCAP_BAR_THRESH_SB 1011
+#define IDC_VCAP_P_TRESH_ED 1012
+#define IDC_VCAP_PUV_TRESH_ED 1013
+#define IDC_VCAP_NOISE_SUP_ED 1014
+#define IDC_VCAP_TRIG_ED 1015
+#define IDC_VCAP_BAR_THRESH_ED 1016
+#define IDC_SGC_TRESH_ED 1017
+#define IDC_SGC_UV_TRESH_ED 1018
+#define IDC_SGC_TRIGGER_ED 1019
+#define IDC_SRF_MEDIAN_CHECK 1020
+#define IDC_RSAD_HIGH_SB 1020
+#define IDC_PAK_ENABLED_CHECK 1023
+#define IDC_SC_TRADE_OFF_ED 1024
+#define IDC_RSAD_LOW_ED 1025
+#define IDC_RSAD_HIGH_ED 1026
+
+// Next default values for new objects
+//
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE 103
+#define _APS_NEXT_COMMAND_VALUE 40001
+#define _APS_NEXT_CONTROL_VALUE 1021
+#define _APS_NEXT_SYMED_VALUE 101
+#endif
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/pp/Win32/rowdiffscan.c b/Src/libvpShared/corelibs/cdxv/pp/Win32/rowdiffscan.c
new file mode 100644
index 00000000..6ae77d7f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/Win32/rowdiffscan.c
@@ -0,0 +1,765 @@
+/****************************************************************************
+*
+* Module Title : RowDiffScan.c
+*
+* Description : Pre-processor row difference Scan
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 JBB 22 AUG 00 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include "type_aliases.h"
+#include "preproc.h"
+
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+
+/****************************************************************************
+ *
+ * ROUTINE : RowDiffScan
+ *
+ * INPUTS : UINT8 * YuvPtr1, YuvPtr2
+ * Pointers into current and previous frame
+ * BOOL EdgeRow
+ * Is this row an edge row.
+ *
+ * OUTPUTS : UINT16 * YUVDiffsPtr
+ * Differnece map
+ * UINT8 * bits_map_ptr
+ * Pixels changed map
+ * UINT8 * SgcPtr
+ * Level change score.
+ * INT8 * DispFragPtr
+ * Block update map.
+ * INT32 * RowDiffsPtr
+ * Total sig changes for row
+ * UINT8 * ChLocalsPtr
+ * Changed locals data structure
+ *
+ *
+ * RETURNS :
+ *
+ * FUNCTION : Initial pixel differences scan
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void RowDiffScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2,
+ INT16 * YUVDiffsPtr, UINT8 * bits_map_ptr,
+ INT8 * SgcPtr, INT8 * DispFragPtr,
+ UINT8 * FDiffPixels, INT32 * RowDiffsPtr,
+ UINT8 * ChLocalsPtr, BOOL EdgeRow )
+{
+ INT32 i;
+ INT32 FragChangedPixels;
+
+ INT16 Diff[8];
+
+ UINT32 ZeroData[2] = { 0,0 };
+ UINT8 OneData[8] = { 1,1,1,1,1,1,1,1 };
+ UINT8 ChlocalsDummyData[8] = { 8,8,8,8,8,8,8,8 };
+
+ // Cannot use kernel if at edge or if PAK disabled
+ if ( (!ppi->PAKEnabled) || EdgeRow )
+ {
+ for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+ {
+ // Reset count of pixels changed for the current fragment.
+ FragChangedPixels = 0;
+
+ // Test for break out conditions to save time.
+ if ((*DispFragPtr == CANDIDATE_BLOCK) )//|| !ppi->EarlyBreakAllowed)
+ {
+
+ __asm
+ {
+
+ movd esi, [YuvPtr1];
+ movd ebx, [YuvPtr2];
+ movd edx, FragChangedPixels
+ pxor mm7, mm7;
+
+ movq mm0, [esi] ;76543210
+ movq mm1, [ebx] ;76543210
+
+ movq mm2, mm0 ;make a copy
+ movq mm3, mm1 ;make a copy
+
+ punpcklbw mm0, mm7 ; 3 2 1 0
+ punpcklbw mm1, mm7 ; 3 2 1 0
+
+ punpckhbw mm2, mm7 ; 7 6 5 4
+ punpckhbw mm3, mm7 ; 7 6 5 4
+
+ psubw mm0 mm1 ; Diff[3,2,1,0]
+ psubw mm2, mm3 ; Diff[7,6,5,4]
+
+ movq QWORD PTR [YUVDiffsPtr], mm0
+ movq QWORD PTR [YUVDiffsPtr], mm2
+
+ ;------------------------------------------------------
+ ; mm0, mm1, mm3, mm4, m5, mm6, mm7, Free
+ ; mm2, keep the Diff[7 6 5 4]
+ ;------------------------------------------------------
+
+ movd eax, ppi->LevelThresh
+
+ movd mm1, eax ;
+ movd mm3, eax ;
+
+ packsdw mm1, mm3 ;
+ movq mm4, mm1 ;
+
+ psllw mm1, 16
+ por mm1, mm4 ;4 ppi->LevelThresh
+
+
+ ;-------------------------------------------------------
+ ; mm3, mm4, mm5, mm6, mm7 Free
+ ;
+ ;-------------------------------------------------------
+
+ movd eax, ppi->SrfThresh
+
+ movd mm3, eax ;
+ movd mm4, eax ;
+
+ packsdw mm3, mm4 ;
+ movq mm5, mm3 ;
+
+ psllw mm3, 16
+ por mm3, mm6 ;4 ppi->SrfThresh
+
+ ;--------------------------------------------------------
+ ; mm0 mm2 diff[0]-diff[7]
+ ; mm1 ppi->LevelThresh
+ ; mm3 ppi->SrfThresh
+ ; mm4-mm7 free
+ ; Note, ppi->NegLevelThresh = - ppi->LevelThresh
+ ; ppi->NegSrfThresh = - ppi->SrfThresh
+ ;--------------------------------------------------------
+
+ movq mm4, mm0 ; diff[0][1][2][3]
+ movq mm5, mm0 ;
+
+ psubsw mm4, mm1 ; if diff >= LevelThresh
+ psraw mm4, 15 ; 00s(True) and ffs (False)
+ pandn mm4, FFFFFFFFh ; ffs(True) and 00s (False)
+ psrlw mm4, 15 ; 01 (True) and 00 (False)
+
+ pcmpgtw mm5, mm3 ; if diff > SrfThresh
+ ; ffs(True) and 00s (False)
+ psrlw mm5, 15 ; 01 (True) and 00 (False)
+ pand mm5, mm4 ;
+
+
+ movq mm7, mm0 ; save a copy of diff[0][1][2][3]
+ pxor mm6, mm6 ; clear MM6
+
+ psubsw mm6, mm1 ; mm6 = NegLevelThresh
+ pcmpgtw mm0, mm6 ; if diff > NegLevelThresh
+ ; ffs(True) and 00s (False)
+ pandn mm0, FFFFFFFFh ; if diff <= NegLevelThresh
+ ; ffs(True) and 00 (False)
+ psrlw mm0, 15 ; 01 (True) and 00 (False)
+
+ paddsw mm7, mm3 ; if diff < -NegSrfThresh
+ psraw mm7, 15 ; ffs(True) and 00s (False)
+
+ psrlw mm7, 15 ; 01 (True) and 00s (False)
+ pand mm7, mm0 ;
+
+ ;----------------------------------------------------------------------------
+ ; mm0, mm1, mm2, mm3, mm4, mm5, mm7 in use
+ ; mm6 free
+ ;----------------------------------------------------------------------------
+
+ por mm5, mm7 ; mm7 is free now
+ pxor mm6, mm6 ;
+ movq mm7, mm5 ;
+ punpcklwd mm5, mm6 ;
+ punpckhwd mm7, mm6 ;
+
+ paddw mm5, mm7 ;
+ movq mm7, mm5 ;
+
+ psrlq mm7, 32 ;
+ paddd mm7, mm5 ;
+
+ movd eax, mm7 ;
+
+ add eax, ebx
+
+
+
+
+ // Calculate the diference values and copy to YUVDiffsPtr
+ Diff[0] = ((INT16)YuvPtr1[0]) - ((INT16)YuvPtr2[0]);
+ Diff[1] = ((INT16)YuvPtr1[1]) - ((INT16)YuvPtr2[1]);
+ Diff[2] = ((INT16)YuvPtr1[2]) - ((INT16)YuvPtr2[2]);
+ Diff[3] = ((INT16)YuvPtr1[3]) - ((INT16)YuvPtr2[3]);
+ ((INT32 *)YUVDiffsPtr)[0] = ((INT32 *)Diff)[0];
+ ((INT32 *)YUVDiffsPtr)[1] = ((INT32 *)Diff)[1];
+
+ // Test against the Level and ppi->SRF thresholds and record the results
+ // Pixel 0
+ if ( Diff[0] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+ if ( Diff[0] > ppi->SrfThresh )
+ {
+ bits_map_ptr[0] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[0] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+ if ( Diff[0] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[0] = 1;
+ FragChangedPixels++;
+ }
+ }
+ // Pixel 1
+ if ( Diff[1] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+ if ( Diff[1] > ppi->SrfThresh )
+ {
+ bits_map_ptr[1] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[1] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+ if ( Diff[1] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[1] = 1;
+ FragChangedPixels++;
+ }
+ }
+ // Pixel 2
+ if ( Diff[2] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+ if ( Diff[2] > ppi->SrfThresh )
+ {
+ bits_map_ptr[2] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[2] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+ if ( Diff[2] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[2] = 1;
+ FragChangedPixels++;
+ }
+ }
+ // Pixel 3
+ if ( Diff[3] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+ if ( Diff[3] > ppi->SrfThresh )
+ {
+ bits_map_ptr[3] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[3] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+ if ( Diff[3] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[3] = 1;
+ FragChangedPixels++;
+ }
+ }
+
+ // Clear down entries in changed locals array
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+
+ // Calculate the diference values and copy to YUVDiffsPtr
+ Diff[4] = ((INT16)YuvPtr1[4]) - ((INT16)YuvPtr2[4]);
+ Diff[5] = ((INT16)YuvPtr1[5]) - ((INT16)YuvPtr2[5]);
+ Diff[6] = ((INT16)YuvPtr1[6]) - ((INT16)YuvPtr2[6]);
+ Diff[7] = ((INT16)YuvPtr1[7]) - ((INT16)YuvPtr2[7]);
+ ((INT32 *)YUVDiffsPtr)[2] = ((INT32 *)Diff)[2];
+ ((INT32 *)YUVDiffsPtr)[3] = ((INT32 *)Diff)[3];
+
+ // Test against the Level and ppi->SRF thresholds and record the results
+ // Pixel 4
+ if ( Diff[4] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+ if ( Diff[4] > ppi->SrfThresh )
+ {
+ bits_map_ptr[4] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[4] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+ if ( Diff[4] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[4] = 1;
+ FragChangedPixels++;
+ }
+ }
+ // Pixel 5
+ if ( Diff[5] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+ if ( Diff[5] > ppi->SrfThresh )
+ {
+ bits_map_ptr[5] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[5] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+ if ( Diff[5] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[5] = 1;
+ FragChangedPixels++;
+ }
+ }
+ // Pixel 6
+ if ( Diff[6] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+ if ( Diff[6] > ppi->SrfThresh )
+ {
+ bits_map_ptr[6] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[6] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+ if ( Diff[6] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[6] = 1;
+ FragChangedPixels++;
+ }
+ }
+ // Pixel 7
+ if ( Diff[7] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+ if ( Diff[7] > ppi->SrfThresh )
+ {
+ bits_map_ptr[7] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[7] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+ if ( Diff[7] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[7] = 1;
+ FragChangedPixels++;
+ }
+ }
+
+ // Clear down entries in changed locals array
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+ }
+ else
+ {
+ // For EBO coded blocks mark all pixels as changed.
+ if ( *DispFragPtr > BLOCK_NOT_CODED )
+ {
+ ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+ ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+ }
+ else
+ {
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+ }
+ }
+
+ *RowDiffsPtr += FragChangedPixels;
+ *FDiffPixels += (UINT8)FragChangedPixels;
+
+ YuvPtr1 += ppi->HFragPixels;
+ YuvPtr2 += ppi->HFragPixels;
+ bits_map_ptr += ppi->HFragPixels;
+ ChLocalsPtr += ppi->HFragPixels;
+ YUVDiffsPtr += ppi->HFragPixels;
+ SgcPtr ++;
+ FDiffPixels ++;
+
+ // If we have a lot of changed pixels for this fragment on this row then
+ // the fragment is almost sure to be picked (e.g. through the line search) so we
+ // can mark it as selected and then ignore it.
+ // if ( ppi->EarlyBreakAllowed )
+ {
+ if (FragChangedPixels >= 7)
+ {
+ *DispFragPtr = BLOCK_CODED;
+ }
+ }
+ DispFragPtr++;
+ }
+ }
+ else
+ {
+ for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+ {
+ // Reset count of pixels changed for the current fragment.
+ FragChangedPixels = 0;
+
+ // Test for break out conditions to save time.
+ if ((*DispFragPtr == CANDIDATE_BLOCK) )//|| !ppi->EarlyBreakAllowed)
+ {
+ // Calculate the diference values and copy to YUVDiffsPtr
+ Diff[0] = ((INT16)YuvPtr1[0]) - ((INT16)YuvPtr2[0]);
+ Diff[1] = ((INT16)YuvPtr1[1]) - ((INT16)YuvPtr2[1]);
+ Diff[2] = ((INT16)YuvPtr1[2]) - ((INT16)YuvPtr2[2]);
+ Diff[3] = ((INT16)YuvPtr1[3]) - ((INT16)YuvPtr2[3]);
+ ((INT32 *)YUVDiffsPtr)[0] = ((INT32 *)Diff)[0];
+ ((INT32 *)YUVDiffsPtr)[1] = ((INT32 *)Diff)[1];
+
+ // Test against the Level and ppi->SRF thresholds and record the results
+ // Pixel 0
+ if ( Diff[0] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[0] > ppi->SrfThresh) && (Diff[0] <= ppi->HighChange) )
+ Diff[0] = (int)ApplyPakLowPass( ppi, &YuvPtr1[0] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[0] );
+
+ if ( Diff[0] > ppi->SrfThresh )
+ {
+ bits_map_ptr[0] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[0] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[0] < ppi->NegSrfThresh) && (Diff[0] >= ppi->NegHighChange) )
+ Diff[0] = (int)ApplyPakLowPass( ppi, &YuvPtr1[0] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[0] );
+
+ if ( Diff[0] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[0] = 1;
+ FragChangedPixels++;
+ }
+ }
+
+ // Pixel 1
+ if ( Diff[1] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[1] > ppi->SrfThresh) && (Diff[1] <= ppi->HighChange) )
+ Diff[1] = (int)ApplyPakLowPass( ppi, &YuvPtr1[1] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[1] );
+
+ if ( Diff[1] > ppi->SrfThresh )
+ {
+ bits_map_ptr[1] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[1] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[1] < ppi->NegSrfThresh) && (Diff[1] >= ppi->NegHighChange) )
+ Diff[1] = (int)ApplyPakLowPass( ppi, &YuvPtr1[1] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[1] );
+
+ if ( Diff[1] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[1] = 1;
+ FragChangedPixels++;
+ }
+ }
+
+ // Pixel 2
+ if ( Diff[2] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[2] > ppi->SrfThresh) && (Diff[2] <= ppi->HighChange) )
+ Diff[2] = (int)ApplyPakLowPass( ppi, &YuvPtr1[2] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[2] );
+
+ if ( Diff[2] > ppi->SrfThresh )
+ {
+ bits_map_ptr[2] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[2] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[2] < ppi->NegSrfThresh) && (Diff[2] >= ppi->NegHighChange) )
+ Diff[2] = (int)ApplyPakLowPass( ppi, &YuvPtr1[2] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[2] );
+
+ if ( Diff[2] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[2] = 1;
+ FragChangedPixels++;
+ }
+ }
+
+ // Pixel 3
+ if ( Diff[3] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[3] > ppi->SrfThresh) && (Diff[3] <= ppi->HighChange) )
+ Diff[3] = (int)ApplyPakLowPass( ppi, &YuvPtr1[3] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[3] );
+
+ if ( Diff[3] > ppi->SrfThresh )
+ {
+ bits_map_ptr[3] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[3] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[3] < ppi->NegSrfThresh) && (Diff[3] >= ppi->NegHighChange) )
+ Diff[3] = (int)ApplyPakLowPass( ppi, &YuvPtr1[3] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[3] );
+
+ if ( Diff[3] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[3] = 1;
+ FragChangedPixels++;
+ }
+ }
+
+ // Clear down entries in changed locals array
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+
+ // Calculate the diference values and copy to YUVDiffsPtr
+ Diff[4] = ((INT16)YuvPtr1[4]) - ((INT16)YuvPtr2[4]);
+ Diff[5] = ((INT16)YuvPtr1[5]) - ((INT16)YuvPtr2[5]);
+ Diff[6] = ((INT16)YuvPtr1[6]) - ((INT16)YuvPtr2[6]);
+ Diff[7] = ((INT16)YuvPtr1[7]) - ((INT16)YuvPtr2[7]);
+ ((INT32 *)YUVDiffsPtr)[2] = ((INT32 *)Diff)[2];
+ ((INT32 *)YUVDiffsPtr)[3] = ((INT32 *)Diff)[3];
+
+ // Test against the Level and ppi->SRF thresholds and record the results
+ // Pixel 4
+ if ( Diff[4] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[4] > ppi->SrfThresh) && (Diff[4] <= ppi->HighChange) )
+ Diff[4] = (int)ApplyPakLowPass( ppi, &YuvPtr1[4] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[4] );
+
+ if ( Diff[4] > ppi->SrfThresh )
+ {
+ bits_map_ptr[4] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[4] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[4] < ppi->NegSrfThresh) && (Diff[4] >= ppi->NegHighChange) )
+ Diff[4] = (int)ApplyPakLowPass( ppi, &YuvPtr1[4] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[4] );
+
+ if ( Diff[4] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[4] = 1;
+ FragChangedPixels++;
+ }
+ }
+
+ // Pixel 5
+ if ( Diff[5] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[5] > ppi->SrfThresh) && (Diff[5] <= ppi->HighChange) )
+ Diff[5] = (int)ApplyPakLowPass( ppi, &YuvPtr1[5] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[5] );
+
+ if ( Diff[5] > ppi->SrfThresh )
+ {
+ bits_map_ptr[5] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[5] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[5] < ppi->NegSrfThresh) && (Diff[5] >= ppi->NegHighChange) )
+ Diff[5] = (int)ApplyPakLowPass( ppi, &YuvPtr1[5] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[5] );
+
+ if ( Diff[5] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[5] = 1;
+ FragChangedPixels++;
+ }
+ }
+
+ // Pixel 6
+ if ( Diff[6] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[6] > ppi->SrfThresh) && (Diff[6] <= ppi->HighChange) )
+ Diff[6] = (int)ApplyPakLowPass( ppi, &YuvPtr1[6] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[6] );
+
+ if ( Diff[6] > ppi->SrfThresh )
+ {
+ bits_map_ptr[6] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[6] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[6] < ppi->NegSrfThresh) && (Diff[6] >= ppi->NegHighChange) )
+ Diff[6] = (int)ApplyPakLowPass( ppi, &YuvPtr1[6] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[6] );
+
+ if ( Diff[6] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[6] = 1;
+ FragChangedPixels++;
+ }
+ }
+
+ // Pixel 7
+ if ( Diff[7] >= ppi->LevelThresh )
+ {
+ SgcPtr[0]++;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[7] > ppi->SrfThresh) && (Diff[7] <= ppi->HighChange) )
+ Diff[7] = (int)ApplyPakLowPass( ppi, &YuvPtr1[7] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[7] );
+
+ if ( Diff[7] > ppi->SrfThresh )
+ {
+ bits_map_ptr[7] = 1;
+ FragChangedPixels++;
+ }
+ }
+ else if ( Diff[7] <= ppi->NegLevelThresh )
+ {
+ SgcPtr[0]--;
+
+ // If the level change is still suspect then apply PAK kernel.
+ if ( (Diff[7] < ppi->NegSrfThresh) && (Diff[7] >= ppi->NegHighChange) )
+ Diff[7] = (int)ApplyPakLowPass( ppi, &YuvPtr1[7] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[7] );
+
+ if ( Diff[7] < ppi->NegSrfThresh )
+ {
+ bits_map_ptr[7] = 1;
+ FragChangedPixels++;
+ }
+ }
+
+ // Clear down entries in changed locals array
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+ }
+ else
+ {
+ // For EBO coded blocks mark all pixels as changed.
+ if ( *DispFragPtr > BLOCK_NOT_CODED )
+ {
+ ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+
+ ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+ }
+ else
+ {
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+ }
+ }
+
+ *RowDiffsPtr += FragChangedPixels;
+ *FDiffPixels += (UINT8)FragChangedPixels;
+
+ YuvPtr1 += ppi->HFragPixels;
+ YuvPtr2 += ppi->HFragPixels;
+ bits_map_ptr += ppi->HFragPixels;
+ ChLocalsPtr += ppi->HFragPixels;
+ YUVDiffsPtr += ppi->HFragPixels;
+ SgcPtr ++;
+ FDiffPixels ++;
+
+ // If we have a lot of changed pixels for this fragment on this row then
+ // the fragment is almost sure to be picked (e.g. through the line search) so we
+ // can mark it as selected and then ignore it.
+// if ( ppi->EarlyBreakAllowed )
+ {
+ if (FragChangedPixels >= 7)
+ {
+ *DispFragPtr = BLOCK_CODED;
+ }
+ }
+ DispFragPtr++;
+ }
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/pp/Win32/xmmrowsad.asm b/Src/libvpShared/corelibs/cdxv/pp/Win32/xmmrowsad.asm
new file mode 100644
index 00000000..b162ad9d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/Win32/xmmrowsad.asm
@@ -0,0 +1,88 @@
+;------------------------------------------------
+XmmRowSADParams STRUC
+ dd ? ;1 pushed regs
+ dd ? ;return address
+ NewDataPtr dd ?
+ RefDataPtr dd ?
+XmmRowSADParams ENDS
+;------------------------------------------------
+
+INCLUDE iaxmm.inc
+
+ .586
+ .387
+ .MODEL flat, SYSCALL, os_dos
+ .MMX
+
+; macros
+
+
+ .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
+
+ ALIGN 32
+
+
+ .CODE
+
+NAME XmmRowSAD
+
+PUBLIC XmmRowSAD_
+PUBLIC _XmmRowSAD
+
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE EQU 0
+
+
+;------------------------------------------------
+;UINT32 XmmRowSAD( UINT8 * NewDataPtr, UINT8 * RefDataPtr)
+;
+XmmRowSAD_:
+_XmmRowSAD:
+
+ push ebx
+ mov eax,(XmmRowSADParams PTR [esp]).NewDataPtr ; Load base addresses
+ mov ebx,(XmmRowSADParams PTR [esp]).RefDataPtr
+
+;
+; ESP = Stack Pointer MM0 = Free
+; ESI = Free MM1 = Free
+; EDI = Free MM2 = Free
+; EBP = Free MM3 = Free
+; EBX = RefDataPtr MM4 = Free
+; ECX = PixelsPerLine MM5 = Free
+; EDX = PixelsPerLine + STRIDE_EXTRA MM6 = Free
+; EAX = NewDataPtr MM7 = Free
+;
+
+
+ movq mm0, QWORD PTR [eax] ; copy eight bytes from NewDataPtr to mm0
+ movq mm3, QWORD PTR [ebx] ; copy eight bytes from ReconDataPtr to mm3
+
+ pxor mm1, mm1 ; clear mm1 for unpacking
+
+ movq mm2, mm0 ; make a copy
+ movq mm4, mm3 ; make a copy
+
+ punpcklbw mm0, mm1 ; unpack the lower four bytes
+ punpcklbw mm3, mm1 ; unpack the lower four bytes
+
+ psadbw mm0, mm3 ; sum of absolute difference of four bytes
+ punpckhbw mm2, mm1 ; unpack the higher four bytes
+ punpckhbw mm4, mm1 ; unpack the higher four bytes
+
+ psadbw mm2, mm4 ; sum of absolute difference of another four
+
+ pop ebx
+ pmaxsw mm0, mm2 ; get the max
+ movd eax, mm0 ; return value
+
+ ret
+
+;************************************************
+ END
+
+END \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/blockmap.c b/Src/libvpShared/corelibs/cdxv/pp/generic/blockmap.c
new file mode 100644
index 00000000..d543dbbe
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/blockmap.c
@@ -0,0 +1,391 @@
+/****************************************************************************
+*
+* Module Title : BlockMap.c
+*
+* Description : Contains functions used to create the block map
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.08 PGW 28 Feb 01 Removal of history buffer mechanism.
+* 1.07 PGW 04 Oct 00 Changes to RowBarEnhBlockMap()
+* 1.06 JBB 03 Aug 00 Fixed Problem in which rownumber was compared to
+* PlaneHFragments instead of PlaneVFragments, added
+* statistic output functions
+* 1.05 PGW 27/07/00 Experiments with motion score.
+* 1.04 JBB 30/05/00 Removed hard coded size limits
+* 1.03 PGW 18/02/00 Changed weighting for History blocks.
+* Redundant functions deleted.
+* Deglobalization.
+* 1.02 PGW 12/07/99 Changes to reduce uneccessary dependancies.
+* 1.01 PGW 21/06/99 Alter function of RowBarEnhBlockMap() for VFW codec.
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include <string.h>
+
+#include "preproc.h"
+
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module Types
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Foreward References
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+ *
+ * ROUTINE : RowBarEnhBlockMap
+ *
+ * INPUTS : UINT32 * FragNoiseScorePtr
+ * INT8 * FragSgcPtr
+ * UINT32 RowNumber
+ *
+ * OUTPUTS : INT8 * UpdatedBlockMapPtr
+ * INT8 * BarBlockMapPtr
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : BAR Enhances block map on a row by row basis.
+ *
+ * SPECIAL NOTES : Note special cases for first and last row and first and last
+ * block in each row.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void RowBarEnhBlockMap( PP_INSTANCE *ppi,
+ UINT32 * FragScorePtr,
+ INT8 * FragSgcPtr,
+ INT8 * UpdatedBlockMapPtr,
+ INT8 * BarBlockMapPtr,
+ UINT32 RowNumber )
+{
+ // For boundary blocks relax thresholds
+ UINT32 BarBlockThresh = ppi->BlockThreshold / 10;
+ UINT32 BarSGCThresh = ppi->BlockSgcThresh / 2;
+
+ INT32 i;
+
+ // Start by blanking the row in the bar block map structure.
+ memset( BarBlockMapPtr, BLOCK_NOT_CODED, ppi->PlaneHFragments );
+
+ // First row
+ if ( RowNumber == 0 )
+ {
+
+ // For each fragment in the row.
+ for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+ {
+ // Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW
+ // Uncoded or coded blocks will be ignored.
+ if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK )
+ {
+ // Is one of the immediate neighbours updated in the main map.
+ // Note special cases for blocks at the start and end of rows.
+ if ( i == 0 )
+ {
+
+ if ( (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+ {
+ BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+ }
+
+ }
+ else if ( i == (ppi->PlaneHFragments - 1) )
+ {
+
+ if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) )
+ {
+ BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+ }
+
+ }
+ else
+ {
+ if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+ {
+ BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+ }
+ }
+ }
+ }
+
+ }
+ // Last row
+ // Used to read PlaneHFragments
+ else if ( RowNumber == (UINT32)(ppi->PlaneVFragments-1))
+ {
+
+ // For each fragment in the row.
+ for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+ {
+ // Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW
+ // Uncoded or coded blocks will be ignored.
+ if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK )
+ {
+ // Is one of the immediate neighbours updated in the main map.
+ // Note special cases for blocks at the start and end of rows.
+ if ( i == 0 )
+ {
+ if ( (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+ {
+ BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+ }
+
+ }
+ else if ( i == (ppi->PlaneHFragments - 1) )
+ {
+ if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) )
+ {
+ BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+ }
+ }
+ else
+ {
+ if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+ {
+ BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+ }
+ }
+ }
+ }
+
+ }
+ // All other rows
+ else
+ {
+ // For each fragment in the row.
+ for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+ {
+ // Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW
+ // Uncoded or coded blocks will be ignored.
+ if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK )
+ {
+ // Is one of the immediate neighbours updated in the main map.
+ // Note special cases for blocks at the start and end of rows.
+ if ( i == 0 )
+ {
+
+ if ( (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+ {
+ BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+ }
+
+ }
+ else if ( i == (ppi->PlaneHFragments - 1) )
+ {
+
+ if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) )
+ {
+ BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+ }
+
+ }
+ else
+ {
+ if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+ (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+
+ {
+ BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+ }
+ }
+ }
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : BarCopyBack
+ *
+ * INPUTS : INT8 * BarBlockMapPtr
+ *
+ * OUTPUTS : INT8 * UpdatedBlockMapPtr
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Copies BAR blocks back into main block map.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void BarCopyBack( PP_INSTANCE *ppi,
+ INT8 * UpdatedBlockMapPtr,
+ INT8 * BarBlockMapPtr )
+{
+ INT32 i;
+
+ // For each fragment in the row.
+ for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+ {
+ if ( BarBlockMapPtr[i] > BLOCK_NOT_CODED )
+ {
+ UpdatedBlockMapPtr[i] = BarBlockMapPtr[i];
+ }
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : CreateOutputDisplayMap
+ *
+ * INPUTS : INT8 * InternalFragmentsPtr
+ * Fragment list using internal format.
+ * INT8 * RecentHistoryPtr
+ * List of blocks that have been marked for update int he last few frames.
+ *
+ * UINT8 * ExternalFragmentsPtr
+ * Fragment list using external format.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Creates a block update map in the format expected by the caller.
+ *
+ * SPECIAL NOTES : The output block height and width must be an integer
+ * multiple of the internal value.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void CreateOutputDisplayMap
+(
+ PP_INSTANCE *ppi,
+ INT8 *InternalFragmentsPtr
+)
+{
+ UINT32 i;
+ UINT32 KFScore = 0;
+ UINT32 YBand = (ppi->ScanYPlaneFragments/8); // 1/8th of Y image.
+
+//#define DISPLAY_STATS
+#ifdef DISPLAY_STATS
+#include <stdio.h>
+ {
+
+ FILE * StatsFilePtr;
+ StatsFilePtr = fopen( "c:\\display_stats.stt", "a" );
+ if ( StatsFilePtr )
+ {
+ int i;
+ for(i=0;i<ppi->ScanYPlaneFragments;i++)
+ {
+ if(i%ppi->ScanHFragments == 0 )
+ fprintf( StatsFilePtr , "\n");
+
+ fprintf( StatsFilePtr, "%2d",
+ InternalFragmentsPtr[i]);
+ }
+ fprintf( StatsFilePtr , "\n");
+ fclose( StatsFilePtr );
+
+ }
+ }
+#endif
+
+ ppi->OutputBlocksUpdated = 0;
+ for ( i = 0; i < ppi->ScanFrameFragments; i++ )
+ {
+ if ( InternalFragmentsPtr[i] > BLOCK_NOT_CODED )
+ {
+ ppi->OutputBlocksUpdated ++;
+ setBlockCoded(i)
+ }
+ else
+ {
+ setBlockUncoded(i);
+ }
+ }
+
+ // Now calculate a key frame candidate indicator.
+ // This is based upon Y data only and only ignores the top and bottom 1/8 of the image.
+ // Also ignore history blocks and BAR blocks.
+ ppi->KFIndicator = 0;
+ for ( i = YBand; i < (ppi->ScanYPlaneFragments - YBand); i++ )
+ {
+ if ( InternalFragmentsPtr[i] > BLOCK_CODED_BAR )
+ {
+ ppi->KFIndicator ++;
+ }
+ }
+
+ // Convert the KF score to a range 0-100
+ ppi->KFIndicator = ((ppi->KFIndicator*100)/((ppi->ScanYPlaneFragments*3)/4));
+}
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/clamp.c b/Src/libvpShared/corelibs/cdxv/pp/generic/clamp.c
new file mode 100644
index 00000000..29efc90e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/clamp.c
@@ -0,0 +1,96 @@
+/****************************************************************************
+ *
+ * Module Title : clamp.c
+ *
+ * Description : c
+ *
+ * AUTHOR : Jim Bankoski
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ * 1.09 YWX 26-Sep-01 Changed the default bandHeight from 5 to 4
+ * 1.08 YWX 23-Jul-00 Changed horizontal scaling function names
+ * 1.07 JBB 04 Dec 00 Added new Center vs Scale Bits
+ * 1.06 YWX 01-Dec-00 Removed bi-cubic scale functions
+ * 1.05 YWX 18-Oct-00 Added 1-2 scale functions
+ * 1.04 YWX 11-Oct-00 Added ratio check to determine scaling or centering
+ * 1.03 YWX 09-Oct-00 Added functions that do differen scaling in horizontal
+ * and vertical directions
+ * 1.02 YWX 04-Oct-00 Added 3-5 scaling functions
+ * 1.01 YWX 03-Oct-00 Added a set of 4-5 scaling functions
+ * 1.00 JBB 15 Sep 00 New Configuration baseline.
+ *
+ *****************************************************************************
+ */
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+#include "postp.h"
+#include <stdio.h>
+
+/****************************************************************************
+ * Imported
+ *****************************************************************************
+ */
+
+void ClampLevels_C(
+ POSTPROC_INSTANCE *pbi,
+ INT32 BlackClamp, // number of values to clamp from 0
+ INT32 WhiteClamp, // number of values to clamp from 255
+ UINT8 *Src, // reconstruction buffer : passed in
+ UINT8 *Dst // postprocessing buffer : passed in
+ )
+{
+
+ unsigned char clamped[255];
+ int width = pbi->HFragments*8;
+ int height = pbi->VFragments*8; // Y plane will be done in two passes
+ UINT8 *SrcPtr = Src + pbi->ReconYDataOffset;
+ UINT8 *DestPtr = Dst + pbi->ReconYDataOffset;
+ UINT32 LineLength = pbi->YStride * 2; // pitch is doubled for interlacing
+
+ // set up clamping table so we can avoid ifs while clamping
+ int i;
+ for(i=0;i<255;i++)
+ {
+ if(i<BlackClamp)
+ clamped[i] = BlackClamp;
+
+ if(i>WhiteClamp)
+ clamped[i] = WhiteClamp;
+ }
+
+ Block = 0;
+
+ // clamping is for y only!
+ for ( row = 0 ; row < height ; row ++)
+ {
+ for (col = 0; col < width ; col ++)
+ {
+ SrcPtr[col]=clamped[DestPtr[col]];
+ }
+ SrcPtr += LineLength;
+ DestPtr += LineLength;
+ }
+
+
+}
+/****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module Static Variables
+*****************************************************************************
+*/
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/cscanyuv.c b/Src/libvpShared/corelibs/cdxv/pp/generic/cscanyuv.c
new file mode 100644
index 00000000..d0c33145
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/cscanyuv.c
@@ -0,0 +1,2750 @@
+/****************************************************************************
+*
+* Module Title : SCAN_YUV
+*
+* Description : Content analysis and scoring functions for YUV 411. .
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.12 PGW 27 Apr 01 Changes to use last frame coded list passed in from codec.
+* 1.11 PGW 28 Feb 01 Removal of requirement for a seperate pre-processor output buffer.
+* 1.10 PGW 04 Oct 00 Bug fixes to SadPass2() and changes to how it is called.
+* Changes to ConsolidateDiffScanResults()
+* 1.09 PGW 29 Aug 00 Correction to defaults in SetVcapLevelOffset()
+* 1.08 JBB 03 Aug 00 Cleaned up a bit (memset full buffer)
+* Fixed Problem with Pak Filter wrapping over edges
+* 1.07 PGW 24 Jul 00 Added column scan funtion. Experiment with PAK off.
+* Tweaks to filter thresholds.
+* 1.06 PGW 10 Jul 00 Changes to RowDiffScan() to reduce number of conditionals.
+* 1.05 PGW 22/06/00 Filtering threshold tweaks.
+* 1.04 JBB 30/05/00 Removed hard coded size limits
+* 1.03 YX 13/04/00 Comment out some if() testings
+* 1.02 PGW 16/03/00 Changes to SetVcapLevelOffset() to provide
+* various pre-set filter levels.
+* 1.01 PGW 12/07/99 Changes to reduce uneccessary dependancies.
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include "preproc.h"
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+#define MIN_STEP_THRESH 6
+
+
+#define SCORE_MULT_LOW 0.5
+#define SCORE_MULT_MEDIUM 2.0
+#define SCORE_MULT_HIGH 4
+
+/****************************************************************************
+* Explicit Imports
+*****************************************************************************
+*/
+
+
+extern void ClearMmxState(PP_INSTANCE *ppi);
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+UINT32 LineLengthScores[ MAX_SEARCH_LINE_LEN + 1 ] = { 0, 0, 0, 0, 2, 4, 12, 24 };
+UINT32 BodyNeighbourScore = 8;
+double DiffDevisor = 0.0625; // 1/16
+UINT8 LineSearchTripTresh = 16;
+double LowVarianceThresh = 200.0;
+
+/****************************************************************************
+* Foreward References
+*****************************************************************************
+*/
+
+BOOL RowSadScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, INT8 * DispFragPtr );
+BOOL ColSadScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, INT8 * DispFragPtr );
+
+void RowDiffScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2,
+ INT16 * YUVDiffPtr, UINT8 * bits_map_ptr,
+ INT8 * SgcPtr, INT8 * DispFragPtr,
+ UINT8 * FDiffPixels, INT32 * RowDiffsPtr,
+ UINT8 * ChLocalsPtr, BOOL EdgeRow );
+
+void SadPass2( PP_INSTANCE *ppi, INT32 RowNumber, INT8 * DispFragPtr );
+
+void ConsolidateDiffScanResults( PP_INSTANCE *ppi, UINT8 * FDiffPixels, INT8 * SgcScores, INT8 * DispFragPtr1 );
+
+void RowChangedLocalsScan( PP_INSTANCE *ppi, UINT8 * PixelMapPtr, UINT8 * ChLocalsPtr, INT8 * DispFragPtr,
+ UINT8 RowType );
+
+
+void NoiseScoreRow( PP_INSTANCE *ppi, UINT8 * PixelMapPtr, UINT8 * ChLocalsPtr,
+ INT16 * YUVDiffsPtr,
+ UINT8 * PixelNoiseScorePtr,
+ UINT32 * FragScorePtr,
+ INT8 * DispFragPtr,
+ INT32 * RowDiffsPtr );
+
+void PrimaryEdgeScoreRow( PP_INSTANCE *ppi,
+ UINT8 * ChangedLocalsPtr, INT16 * YUVDiffsPtr,
+ UINT8 * PixelNoiseScorePtr,
+ UINT32 * FragScorePtr,
+ INT8 * DispFragPtr,
+ UINT8 RowType );
+
+void LineSearchScoreRow( PP_INSTANCE *ppi,
+ UINT8 * ChangedLocalsPtr, INT16 * YUVDiffsPtr,
+ UINT8 * PixelNoiseScorePtr,
+ UINT32 * FragScorePtr,
+ INT8 * DispFragPtr,
+ INT32 RowNumber );
+
+UINT8 LineSearchScorePixel( PP_INSTANCE *ppi, UINT8 * ChangedLocalsPtr, INT32 RowNumber, INT32 ColNumber );
+void PixelLineSearch( PP_INSTANCE *ppi, UINT8 * ChangedLocalsPtr, INT32 RowNumber, INT32 ColNumber, UINT8 direction, UINT32 * line_length );
+double GetLocalVarianceMultiplier( PP_INSTANCE *ppi, INT16 * YUVDiffPtr, UINT32 PlaneLineLength );
+
+//void RowCopy( PP_INSTANCE *ppi, UINT32 BlockMapIndex );
+UINT8 ApplyPakLowPass( PP_INSTANCE *ppi, UINT8 * SrcPtr );
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+ *
+ * ROUTINE : InitScanMapArrays
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Initialise the display and score maps
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void InitScanMapArrays(PP_INSTANCE *ppi)
+{
+ INT32 i;
+ UINT8 StepThresh;
+
+ /* Clear down the fragment level map arrays for the current frame. */
+ memset( ppi->FragScores, 0, ppi->ScanFrameFragments * sizeof(UINT32) );
+ memset( ppi->SameGreyDirPixels, 0, ppi->ScanFrameFragments );
+ memset( ppi->FragDiffPixels, 0, ppi->ScanFrameFragments );
+ memset( (void *)ppi->RowChangedPixels, 0, 3* ppi->ScanConfig.VideoFrameHeight * sizeof(INT32) );
+
+ // Clear down blocks coded worspace.
+ memset( ppi->ScanDisplayFragments, BLOCK_NOT_CODED, ppi->ScanFrameFragments );
+
+ // Threshold used in setting up ppi->NoiseScoreBoostTable[]
+ StepThresh = (UINT8)(ppi->SRFGreyThresh >> 1);
+ if ( StepThresh < MIN_STEP_THRESH )
+ StepThresh = MIN_STEP_THRESH;
+ ppi->SrfThresh = (int)ppi->SRFGreyThresh;
+
+ // Set up various tables used to tweak pixel score values and scoring rules
+ // based upon absolute value of a pixel change
+ for ( i = 0; i < 256; i++ )
+ {
+ // Score multiplier table indexed by absolute difference.
+ ppi->AbsDiff_ScoreMultiplierTable[i] = (double)i * DiffDevisor;
+ if ( ppi->AbsDiff_ScoreMultiplierTable[i] < SCORE_MULT_LOW )
+ ppi->AbsDiff_ScoreMultiplierTable[i] = SCORE_MULT_LOW;
+ else if ( ppi->AbsDiff_ScoreMultiplierTable[i] > SCORE_MULT_HIGH )
+ ppi->AbsDiff_ScoreMultiplierTable[i] = SCORE_MULT_HIGH;
+
+ // Table that facilitates a relaxation of the changed locals rules in
+ // NoiseScoreRow() for pixels that have changed by a large amount.
+ if ( i < (ppi->SrfThresh + StepThresh) )
+ ppi->NoiseScoreBoostTable[i] = 0;
+ else if ( i < (ppi->SrfThresh + (StepThresh * 4)) )
+ ppi->NoiseScoreBoostTable[i] = 1;
+ else if ( i < (ppi->SrfThresh + (StepThresh * 6)) )
+ ppi->NoiseScoreBoostTable[i] = 2;
+ else
+ ppi->NoiseScoreBoostTable[i] = 3;
+
+ }
+
+ // Set various other threshold parameters.
+
+ // Set variables that control access to the line search algorithms.
+ LineSearchTripTresh = 16;
+ if ( LineSearchTripTresh > ppi->PrimaryBlockThreshold )
+ LineSearchTripTresh = (UINT8)(ppi->PrimaryBlockThreshold + 1);
+
+ // Adjust line search length if block threshold low
+ ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN;
+ while ( (ppi->MaxLineSearchLen > 0) && (LineLengthScores[ppi->MaxLineSearchLen-1] > ppi->PrimaryBlockThreshold) )
+ ppi->MaxLineSearchLen -= 1;
+
+ // Initialise the level, srf and PAK threshold table pointers..
+ ppi->SrfThreshTablePtr = &(ppi->SrfThreshTable[255]);
+ ppi->SgcThreshTablePtr = &(ppi->SgcThreshTable[255]);
+ ppi->SrfPakThreshTablePtr = &(ppi->SrfPakThreshTable[255]);
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : AnalysePlane
+ *
+ * INPUTS : PlanePtr0/1 Pointers to the first pixel in the plane
+ * for source and reference images
+ * FragArrayOffset Start offset in fragment arrays.
+ * PWidth Width of an image plane in pixels.
+ * PHeight Height of image plane in pixels
+ * PStride Plane stride (the number to be added to
+ * a pixel index to get to the corresponding
+ * pixel in the next line (can be different
+ * from PWidth))
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Analyses and filters the image plane defined by the inputs.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void AnalysePlane( PP_INSTANCE *ppi, UINT8 * PlanePtr0, UINT8 * PlanePtr1, UINT32 FragArrayOffset, UINT32 PWidth, UINT32 PHeight, UINT32 PStride )
+{
+ UINT8 * RawPlanePtr0;
+ UINT8 * RawPlanePtr1;
+
+ INT16 * YUVDiffsPtr;
+ INT16 * YUVDiffsPtr1;
+ INT16 * YUVDiffsPtr2;
+
+ UINT32 FragIndex;
+ UINT32 ScoreFragIndex1;
+ UINT32 ScoreFragIndex2;
+ UINT32 ScoreFragIndex3;
+ UINT32 ScoreFragIndex4;
+
+ BOOL UpdatedOrCandidateBlocks = FALSE;
+
+ UINT8 * ChLocalsPtr0;
+ UINT8 * ChLocalsPtr1;
+ UINT8 * ChLocalsPtr2;
+
+ UINT8 * PixelsChangedPtr0;
+ UINT8 * PixelsChangedPtr1;
+
+ UINT8 * PixelScoresPtr1;
+ UINT8 * PixelScoresPtr2;
+// UINT8 * PixelScoresPtr4;
+
+ INT8 * DispFragPtr0;
+ INT8 * DispFragPtr1;
+ INT8 * DispFragPtr2;
+
+ UINT32 * FragScoresPtr1;
+ UINT32 * FragScoresPtr2;
+
+ INT32 * RowDiffsPtr;
+ INT32 * RowDiffsPtr1;
+ INT32 * RowDiffsPtr2;
+
+ INT32 i,j;
+
+ INT32 RowNumber1;
+ INT32 RowNumber2;
+ INT32 RowNumber3;
+ INT32 RowNumber4;
+
+ BOOL EdgeRow;
+ INT32 LineSearchRowNumber = 0;
+
+ // Variables used as temporary stores for frequently used values.
+ INT32 Row0Mod3;
+ INT32 Row1Mod3;
+ INT32 Row2Mod3;
+ INT32 BlockRowPixels;
+
+
+ /* Set pixel difference threshold */
+ if ( FragArrayOffset == 0 )
+ {
+ /* Luminance */
+ ppi->LevelThresh = (int)ppi->SgcLevelThresh;
+ ppi->NegLevelThresh = -ppi->LevelThresh;
+
+ ppi->SrfThresh = (int)ppi->SRFGreyThresh;
+ ppi->NegSrfThresh = -ppi->SrfThresh;
+
+ // Scores correction for Y pixels.
+ ppi->YUVPlaneCorrectionFactor = 1.0;
+
+ ppi->BlockThreshold = ppi->PrimaryBlockThreshold;
+ ppi->BlockSgcThresh = ppi->SgcThresh;
+ }
+ else
+ {
+ /* Chrominance */
+ ppi->LevelThresh = (int)ppi->SuvcLevelThresh;
+ ppi->NegLevelThresh = -ppi->LevelThresh;
+
+ ppi->SrfThresh = (int)ppi->SRFColThresh;
+ ppi->NegSrfThresh = -ppi->SrfThresh;
+
+ // Scores correction for UV pixels.
+ ppi->YUVPlaneCorrectionFactor = 1.5;
+
+ // Block threholds different for subsampled U and V blocks
+ ppi->BlockThreshold = (UINT32)(ppi->PrimaryBlockThreshold / ppi->UVBlockThreshCorrection);
+ ppi->BlockSgcThresh = (UINT32)(ppi->SgcThresh / ppi->UVSgcCorrection);
+ }
+
+ // Initialise the SRF thresh table and pointer.
+ memset( ppi->SrfThreshTable, 1, 512 );
+ for ( i = ppi->NegSrfThresh; i <= ppi->SrfThresh; i++ )
+ {
+ ppi->SrfThreshTablePtr[i] = 0;
+ }
+
+ // Initialise the PAK thresh table.
+ for ( i = -255; i <= 255; i++ )
+ {
+ if ( ppi->SrfThreshTablePtr[i] && (i <= ppi->HighChange) && (i >= ppi->NegHighChange) )
+ ppi->SrfPakThreshTablePtr[i] = 1;
+ else
+ ppi->SrfPakThreshTablePtr[i] = 0;
+ }
+
+ // Initialise the SGc lookup table
+ for ( i = -255; i <= 255; i++ )
+ {
+ if ( i <= ppi->NegLevelThresh )
+ ppi->SgcThreshTablePtr[i] = -1;
+ else if ( i >= ppi->LevelThresh )
+ ppi->SgcThreshTablePtr[i] = 1;
+ else
+ ppi->SgcThreshTablePtr[i] = 0;
+ }
+
+ // Set up plane dimension variables
+ ppi->PlaneHFragments = PWidth / ppi->HFragPixels;
+ ppi->PlaneVFragments = PHeight / ppi->VFragPixels;
+ ppi->PlaneWidth = PWidth;
+ ppi->PlaneHeight = PHeight;
+ ppi->PlaneStride = PStride;
+
+ // Set up local pointers into the raw image data.
+ RawPlanePtr0 = (UINT8 *)PlanePtr0;
+ RawPlanePtr1 = (UINT8 *)PlanePtr1;
+
+ // Note size and endo points for circular buffers.
+ ppi->YuvDiffsCircularBufferSize = YDIFF_CB_ROWS * ppi->PlaneWidth;
+ ppi->ChLocalsCircularBufferSize = CHLOCALS_CB_ROWS * ppi->PlaneWidth;
+ ppi->PixelMapCircularBufferSize = PMAP_CB_ROWS * ppi->PlaneWidth;
+
+ // Set high change thresh where PAK not needed;
+ ppi->HighChange = ppi->SrfThresh * 4;
+ ppi->NegHighChange = -ppi->HighChange;
+
+ // Set up row difference pointers.
+ RowDiffsPtr = ppi->RowChangedPixels;
+ RowDiffsPtr1 = ppi->RowChangedPixels;
+ RowDiffsPtr2 = ppi->RowChangedPixels;
+
+ BlockRowPixels = ppi->PlaneWidth * ppi->VFragPixels;
+
+ for ( i = 0; i < (ppi->PlaneVFragments + 4); i++ )
+ {
+ RowNumber1 = (i - 1);
+ RowNumber2 = (i - 2);
+ RowNumber3 = (i - 3);
+ RowNumber4 = (i - 4);
+
+ // Pre calculate some frequently used values
+ Row0Mod3 = i % 3;
+ Row1Mod3 = RowNumber1 % 3;
+ Row2Mod3 = RowNumber2 % 3;
+
+ // For row diff scan last two iterations are invalid
+ if ( i < ppi->PlaneVFragments )
+ {
+ FragIndex = (i * ppi->PlaneHFragments) + FragArrayOffset;
+ YUVDiffsPtr = &ppi->yuv_differences[Row0Mod3 * BlockRowPixels];
+
+ PixelsChangedPtr0 = (UINT8 *)(&ppi->PixelChangedMap[Row0Mod3 * BlockRowPixels]);
+ DispFragPtr0 = &ppi->ScanDisplayFragments[FragIndex];
+
+ ChLocalsPtr0 = (UINT8 *)(&ppi->ChLocals[Row0Mod3 * BlockRowPixels]);
+
+ }
+
+ // Set up the changed locals pointer to trail behind by one row of fragments.
+ if ( i > 0 )
+ {
+ // For last iteration the ch locals and noise scans are invalid
+ if ( RowNumber1 < ppi->PlaneVFragments )
+ {
+ ScoreFragIndex1 = (RowNumber1 * ppi->PlaneHFragments) + FragArrayOffset;
+
+ ChLocalsPtr1 = (UINT8 *)(&ppi->ChLocals[Row1Mod3 * BlockRowPixels]);
+ PixelsChangedPtr1 = (UINT8 *)(&ppi->PixelChangedMap[(Row1Mod3) * BlockRowPixels]);
+
+ PixelScoresPtr1 = &ppi->PixelScores[(RowNumber1 % 4) * BlockRowPixels];
+
+ YUVDiffsPtr1 = &ppi->yuv_differences[Row1Mod3 * BlockRowPixels];
+ FragScoresPtr1 = &ppi->FragScores[ScoreFragIndex1];
+ DispFragPtr1 = &ppi->ScanDisplayFragments[ScoreFragIndex1];
+
+ }
+
+ if ( RowNumber2 >= 0 )
+ {
+ ScoreFragIndex2 = (RowNumber2 * ppi->PlaneHFragments) + FragArrayOffset;
+ ChLocalsPtr2 = (UINT8 *)(&ppi->ChLocals[Row2Mod3 * BlockRowPixels]);
+ YUVDiffsPtr2 = &ppi->yuv_differences[Row2Mod3 * BlockRowPixels];
+
+ PixelScoresPtr2 = &ppi->PixelScores[(RowNumber2 % 4) * BlockRowPixels];
+
+ FragScoresPtr2 = &ppi->FragScores[ScoreFragIndex2];
+ DispFragPtr2 = &ppi->ScanDisplayFragments[ScoreFragIndex2];
+ }
+ else
+ {
+ ChLocalsPtr2 = NULL;
+ }
+ }
+ else
+ {
+ ChLocalsPtr1 = NULL;
+ ChLocalsPtr2 = NULL;
+ }
+
+ // Fast break out test for obvious yes and no cases in this row of blocks
+ if ( i < ppi->PlaneVFragments )
+ {
+ UpdatedOrCandidateBlocks = RowSadScan( ppi, RawPlanePtr0, RawPlanePtr1, DispFragPtr0 );
+ if( ColSadScan( ppi, RawPlanePtr0, RawPlanePtr1, DispFragPtr0 ) )
+ UpdatedOrCandidateBlocks = TRUE;
+
+// SadPass2( ppi, i, DispFragPtr0 );
+ }
+ else // ????? Not needed now as we always do RowSadScan etc.
+ {
+ // Make sure we still call other functions if RowSadScan() etc. disabled
+ UpdatedOrCandidateBlocks = TRUE;
+ }
+
+ // Consolidation and fast break ot tests at Row 1 level
+ if ( (i > 0) && (RowNumber1 < ppi->PlaneVFragments) )
+ {
+ // Mark as coded any candidate block that lies adjacent to a coded block.
+ SadPass2( ppi, RowNumber1, DispFragPtr1 );
+
+ // Check results of diff scan in last set of blocks.
+ // Eliminate NO cases and add in +SGC cases
+ ConsolidateDiffScanResults( ppi, &ppi->FragDiffPixels[ScoreFragIndex1], &ppi->SameGreyDirPixels[ScoreFragIndex1], DispFragPtr1 );
+ }
+
+ for ( j = 0; j < ppi->VFragPixels; j++ )
+ {
+ // Last two iterations do not apply
+ if ( i < ppi->PlaneVFragments )
+ {
+ /* Is the current fragment at an edge. */
+ EdgeRow = ( ( (i == 0) && (j == 0) ) ||
+ ( (i == (ppi->PlaneVFragments - 1)) && (j == (ppi->VFragPixels - 1)) ) );
+
+ // Clear the arrays that will be used for the changed pixels maps
+ memset( PixelsChangedPtr0, 0, ppi->PlaneWidth );
+
+ // Difference scan and map each row
+ if ( UpdatedOrCandidateBlocks )
+ {
+ // Scan the row for interesting differences
+ // Also clear the array that will be used for changed locals map
+ RowDiffScan( ppi, RawPlanePtr0, RawPlanePtr1,
+ YUVDiffsPtr, PixelsChangedPtr0,
+ &ppi->SameGreyDirPixels[FragIndex],
+ DispFragPtr0, &ppi->FragDiffPixels[FragIndex],
+ RowDiffsPtr, ChLocalsPtr0, EdgeRow);
+ }
+ else
+ {
+ // Clear the array that will be used for changed locals map
+ memset( ChLocalsPtr0, 0, ppi->PlaneWidth );
+ }
+
+ // The actual image plane pointers must be incremented by stride as this may be
+ // different (more) than the plane width. Our own internal buffers use ppi->PlaneWidth.
+ RawPlanePtr0 += ppi->PlaneStride;
+ RawPlanePtr1 += ppi->PlaneStride;
+ PixelsChangedPtr0 += ppi->PlaneWidth;
+ ChLocalsPtr0 += ppi->PlaneWidth;
+ YUVDiffsPtr += ppi->PlaneWidth;
+ RowDiffsPtr++;
+ }
+
+ // Run behind calculating the changed locals data and noise scores.
+ if ( ChLocalsPtr1 != NULL )
+ {
+ // Last few iterations do not apply
+ if ( RowNumber1 < ppi->PlaneVFragments )
+ {
+ // Blank the next row in the pixel scores data structure.
+ memset( PixelScoresPtr1, 0, ppi->PlaneWidth );
+
+ // Don't bother doing anything if there are no changed pixels in this row
+ if ( *RowDiffsPtr1 )
+ {
+ // Last valid row is a special case
+ if ( i < ppi->PlaneVFragments )
+ RowChangedLocalsScan( ppi, PixelsChangedPtr1, ChLocalsPtr1, DispFragPtr1, (UINT8)( (((i-1)==0) && (j==0)) ? FIRST_ROW : NOT_EDGE_ROW) );
+ else
+ RowChangedLocalsScan( ppi, PixelsChangedPtr1, ChLocalsPtr1, DispFragPtr1, (UINT8)((j==(ppi->VFragPixels-1)) ? LAST_ROW : NOT_EDGE_ROW) );
+
+ NoiseScoreRow( ppi, PixelsChangedPtr1, ChLocalsPtr1, YUVDiffsPtr1,
+ PixelScoresPtr1, FragScoresPtr1, DispFragPtr1, RowDiffsPtr1 );
+ }
+
+ ChLocalsPtr1 += ppi->PlaneWidth;
+ PixelsChangedPtr1 += ppi->PlaneWidth;
+ YUVDiffsPtr1 += ppi->PlaneWidth;
+ PixelScoresPtr1 += ppi->PlaneWidth;
+ RowDiffsPtr1 ++;
+ }
+
+ // Run edge enhancement algorithms
+ if ( RowNumber2 < ppi->PlaneVFragments )
+ {
+ if ( ChLocalsPtr2 != NULL )
+ {
+ // Don't bother doing anything if there are no changed pixels in this row
+ if ( *RowDiffsPtr2 )
+ {
+ if ( RowNumber1 < ppi->PlaneVFragments )
+ {
+ PrimaryEdgeScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
+ PixelScoresPtr2, FragScoresPtr2, DispFragPtr2,
+ (UINT8)( (((i-2)==0) && (j==0)) ? FIRST_ROW : NOT_EDGE_ROW) );
+ }
+ else
+ {
+ // Edge enhancement
+ PrimaryEdgeScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
+ PixelScoresPtr2, FragScoresPtr2, DispFragPtr2,
+ (UINT8)((j==(ppi->VFragPixels-1)) ? LAST_ROW : NOT_EDGE_ROW) );
+ }
+
+ // Recursive line search
+ LineSearchScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
+ PixelScoresPtr2, FragScoresPtr2, DispFragPtr2,
+ LineSearchRowNumber );
+ }
+
+ ChLocalsPtr2 += ppi->PlaneWidth;
+ YUVDiffsPtr2 += ppi->PlaneWidth;
+ PixelScoresPtr2 += ppi->PlaneWidth;
+ LineSearchRowNumber += 1;
+ RowDiffsPtr2 ++;
+ }
+ }
+ }
+ }
+
+ // BAR algorithm
+ if ( (RowNumber3 >= 0) && (RowNumber3 < ppi->PlaneVFragments) )
+ {
+ ScoreFragIndex3 = (RowNumber3 * ppi->PlaneHFragments) + FragArrayOffset;
+ RowBarEnhBlockMap(ppi, &ppi->FragScores[ScoreFragIndex3],
+ &ppi->SameGreyDirPixels[ScoreFragIndex3],
+ &ppi->ScanDisplayFragments[ScoreFragIndex3],
+ &ppi->BarBlockMap[(RowNumber3 % 3) * ppi->PlaneHFragments],
+ RowNumber3 );
+ }
+
+ // BAR copy back and "ppi->SRF filtering" or "pixel copy back"
+ if ( (RowNumber4 >= 0) && (RowNumber4 < ppi->PlaneVFragments) )
+ {
+ // BAR copy back stage must lag by one more row to avoid BAR blocks
+ // being used in BAR descisions.
+ ScoreFragIndex4 = (RowNumber4 * ppi->PlaneHFragments) + FragArrayOffset;
+
+ BarCopyBack(ppi, &ppi->ScanDisplayFragments[ScoreFragIndex4],
+ &ppi->BarBlockMap[(RowNumber4 % 3) * ppi->PlaneHFragments]);
+
+/*
+ // "Apply ppi->SRF filtering to" or "copy back" pixels.
+ PixelScoresPtr4 = &ppi->PixelScores[(RowNumber4 % 4) * BlockRowPixels];
+*/
+ // Copy over the data from any blocks marked for update into the output buffer.
+ //RowCopy(ppi, ScoreFragIndex4);
+ }
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : RowSadScan
+ *
+ * INPUTS : UINT8 * YuvPtr1, YuvPtr2
+ * Pointers into current and previous frame
+ *
+ * OUTPUTS : INT8 * DispFragPtr
+ * Fragment update map (-1 = ???, 0 = No, >0 = Yes)
+ *
+ * RETURNS : TRUE if row contains Candidate or coded blocsk else FALSE
+ *
+ * FUNCTION : Preliminary fast scan based upon local SAD scores of 4 pixel groups
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+BOOL RowSadScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, INT8 * DispFragPtr )
+{
+ INT32 i, j;
+ UINT32 GrpSad;
+ UINT32 LocalGrpLowSadThresh = ppi->ModifiedGrpLowSadThresh;
+ UINT32 LocalGrpHighSadThresh = ppi->ModifiedGrpHighSadThresh;
+ INT8 * LocalDispFragPtr;
+ UINT32 * LocalYuvPtr1;
+ UINT32 * LocalYuvPtr2;
+
+ BOOL InterestingBlocksInRow = FALSE;
+
+ // For each row of pixels in the row of blocks
+ for ( j = 0; j < ppi->VFragPixels; j++ )
+ {
+ // Set local block map pointer.
+ LocalDispFragPtr = DispFragPtr;
+
+ // Set the local pixel data pointers for this row.
+ LocalYuvPtr1 = (UINT32 *)YuvPtr1;
+ LocalYuvPtr2 = (UINT32 *)YuvPtr2;
+
+ // Scan along the row of pixels
+ // If the block to which a group of pixels belongs is already marked for update then do nothing.
+ for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+ {
+ if ( *LocalDispFragPtr <= BLOCK_NOT_CODED )
+ {
+ // Calculate the SAD score for the block row
+ GrpSad = ppi->RowSAD((UINT8 *)LocalYuvPtr1,(UINT8 *)LocalYuvPtr2);
+
+ // Now test the group SAD score
+ if ( GrpSad > LocalGrpLowSadThresh )
+ {
+ // If SAD very high we must update else we have candidate block
+ if ( GrpSad > LocalGrpHighSadThresh )
+ {
+ // Force update
+ *LocalDispFragPtr = BLOCK_CODED;
+ }
+ else
+ {
+ // Possible Update required
+ *LocalDispFragPtr = CANDIDATE_BLOCK;
+ }
+ InterestingBlocksInRow = TRUE;
+ }
+ }
+ /********** PGW 27/APR/2001 ***********/
+ else
+ InterestingBlocksInRow = TRUE;
+
+ LocalDispFragPtr++;
+
+ LocalYuvPtr1 += 2;
+ LocalYuvPtr2 += 2;
+ }
+
+ // Increment the base data pointers to the start of the next line.
+ YuvPtr1 += ppi->PlaneStride;
+ YuvPtr2 += ppi->PlaneStride;
+ }
+
+ // This code is PC specific
+ if ( ppi->MmxEnabled )
+ {
+ ClearMmxState(ppi);
+ }
+
+ return InterestingBlocksInRow;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ColSadScan
+ *
+ * INPUTS : UINT8 * YuvPtr1, YuvPtr2
+ * Pointers into current and previous frame
+ *
+ * OUTPUTS : INT8 * DispFragPtr
+ * Fragment update map (-1 = ???, 0 = No, >0 = Yes)
+ *
+ * RETURNS : TRUE if row contains Candidate or coded blocsk else FALSE
+ *
+ * FUNCTION : Preliminary fast scan based upon local SAD scores of 4 pixel groups
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+BOOL ColSadScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, INT8 * DispFragPtr )
+{
+ INT32 i;
+ UINT32 MaxSad;
+ UINT32 LocalGrpLowSadThresh = ppi->ModifiedGrpLowSadThresh;
+ UINT32 LocalGrpHighSadThresh = ppi->ModifiedGrpHighSadThresh;
+ INT8 * LocalDispFragPtr;
+
+ UINT8 * LocalYuvPtr1;
+ UINT8 * LocalYuvPtr2;
+
+ BOOL InterestingBlocksInRow = FALSE;
+
+ // Set the local pixel data pointers for this row.
+ LocalYuvPtr1 = YuvPtr1;
+ LocalYuvPtr2 = YuvPtr2;
+
+ // Set local block map pointer.
+ LocalDispFragPtr = DispFragPtr;
+
+ // Scan along the row of blocks
+ for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+ {
+ // Skip if block already marked to be coded.
+ if ( *LocalDispFragPtr <= BLOCK_NOT_CODED )
+ {
+ // Calculate the SAD score for the block column
+ MaxSad = ppi->ColSAD( ppi, (UINT8 *)LocalYuvPtr1,(UINT8 *)LocalYuvPtr2 );
+
+ // Now test the group SAD score
+ if ( MaxSad > LocalGrpLowSadThresh )
+ {
+ // If SAD very high we must update else we have candidate block
+ if ( MaxSad > LocalGrpHighSadThresh )
+ {
+ // Force update
+ *LocalDispFragPtr = BLOCK_CODED;
+ }
+ else
+ {
+ // Possible Update required
+ *LocalDispFragPtr = CANDIDATE_BLOCK;
+ }
+ InterestingBlocksInRow = TRUE;
+ }
+ }
+ /********** PGW 27/APR/2001 ***********/
+ else
+ InterestingBlocksInRow = TRUE;
+
+ // Increment the block map pointer.
+ LocalDispFragPtr++;
+
+ // Step data pointers on ready for next block
+ LocalYuvPtr1 += ppi->HFragPixels;
+ LocalYuvPtr2 += ppi->HFragPixels;
+ }
+
+ // This code is PC specific
+ if ( ppi->MmxEnabled )
+ {
+ ClearMmxState(ppi);
+ }
+
+ return InterestingBlocksInRow;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SadPass2
+ *
+ * INPUTS : UINT32 RowNumber
+ * Fragment row number
+ * INT8 * DispFragPtr
+ * Fragment update map (-1 = ???, 0 = No, >0 = Yes)
+ *
+ * OUTPUTS : INT8 * DispFragPtr
+ * Fragment update map (-1 = ???, 0 = No, >0 = Yes)
+ * RETURNS :
+ *
+ * FUNCTION : This second pass should only be used when speed is critical.
+ * The function revisits the classification of CANDIDATE_BLOCKS
+ * if they are adjacent to one or more CODED_BLOCKS.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void SadPass2( PP_INSTANCE *ppi, INT32 RowNumber, INT8 * DispFragPtr )
+{
+ INT32 i;
+
+ // First row
+ if ( RowNumber == 0 )
+ {
+ // First block in row.
+ if ( DispFragPtr[0] == CANDIDATE_BLOCK )
+ {
+ if ( (DispFragPtr[1] == BLOCK_CODED) ||
+ (DispFragPtr[ppi->PlaneHFragments] == BLOCK_CODED) ||
+ (DispFragPtr[ppi->PlaneHFragments+1] == BLOCK_CODED) )
+ {
+ ppi->TmpCodedMap[0] = BLOCK_CODED_LOW;
+ }
+ else
+ {
+ ppi->TmpCodedMap[0] = DispFragPtr[0];
+ }
+ }
+ else
+ {
+ ppi->TmpCodedMap[0] = DispFragPtr[0];
+ }
+
+ // All but first and last in row
+ for ( i = 1; (i < ppi->PlaneHFragments-1); i++ )
+ {
+ if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+ {
+ if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
+ (DispFragPtr[i+1] == BLOCK_CODED) ||
+ (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
+ (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ||
+ (DispFragPtr[i+ppi->PlaneHFragments+1] == BLOCK_CODED) )
+ {
+ ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+
+ // Last block in row.
+ i = ppi->PlaneHFragments-1;
+ if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+ {
+ if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
+ (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
+ (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) )
+ {
+ ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+
+ // General case
+ else if ( RowNumber < (ppi->PlaneVFragments - 1) )
+ {
+ // First block in row.
+ if ( DispFragPtr[0] == CANDIDATE_BLOCK )
+ {
+ if ( (DispFragPtr[1] == BLOCK_CODED) ||
+ (DispFragPtr[(-ppi->PlaneHFragments)] == BLOCK_CODED) ||
+ (DispFragPtr[(-ppi->PlaneHFragments)+1] == BLOCK_CODED) ||
+ (DispFragPtr[ppi->PlaneHFragments] == BLOCK_CODED) ||
+ (DispFragPtr[ppi->PlaneHFragments+1] == BLOCK_CODED) )
+ {
+ ppi->TmpCodedMap[0] = BLOCK_CODED_LOW;
+ }
+ else
+ {
+ ppi->TmpCodedMap[0] = DispFragPtr[0];
+ }
+ }
+ else
+ {
+ ppi->TmpCodedMap[0] = DispFragPtr[0];
+ }
+
+ // All but first and last in row
+ for ( i = 1; (i < ppi->PlaneHFragments-1); i++ )
+ {
+ if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+ {
+ if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
+ (DispFragPtr[i+1] == BLOCK_CODED) ||
+ (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
+ (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
+ (DispFragPtr[i-ppi->PlaneHFragments+1] == BLOCK_CODED) ||
+ (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
+ (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ||
+ (DispFragPtr[i+ppi->PlaneHFragments+1] == BLOCK_CODED) )
+ {
+ ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+
+ // Last block in row.
+ i = ppi->PlaneHFragments-1;
+ if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+ {
+ if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
+ (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
+ (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
+ (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
+ (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) )
+ {
+ ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+
+ // Last row
+ else
+ {
+ // First block in row.
+ if ( DispFragPtr[0] == CANDIDATE_BLOCK )
+ {
+ if ( (DispFragPtr[1] == BLOCK_CODED) ||
+ (DispFragPtr[(-ppi->PlaneHFragments)] == BLOCK_CODED) ||
+ (DispFragPtr[(-ppi->PlaneHFragments)+1] == BLOCK_CODED))
+ {
+ ppi->TmpCodedMap[0] = BLOCK_CODED_LOW;
+ }
+ else
+ {
+ ppi->TmpCodedMap[0] = DispFragPtr[0];
+ }
+ }
+ else
+ {
+ ppi->TmpCodedMap[0] = DispFragPtr[0];
+ }
+
+ // All but first and last in row
+ for ( i = 1; (i < ppi->PlaneHFragments-1); i++ )
+ {
+ if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+ {
+ if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
+ (DispFragPtr[i+1] == BLOCK_CODED) ||
+ (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
+ (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
+ (DispFragPtr[i-ppi->PlaneHFragments+1] == BLOCK_CODED) )
+ {
+ ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+
+ // Last block in row.
+ i = ppi->PlaneHFragments-1;
+ if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+ {
+ if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
+ (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
+ (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) )
+ {
+ ppi->TmpCodedMap[i] = BLOCK_CODED_LOW;
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+ else
+ {
+ ppi->TmpCodedMap[i] = DispFragPtr[i];
+ }
+ }
+
+ // Now copy back the modified Fragment data
+ memcpy( &DispFragPtr[0], &ppi->TmpCodedMap[0], (ppi->PlaneHFragments) );
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : RowDiffScan
+ *
+ * INPUTS : UINT8 * YuvPtr1, YuvPtr2
+ * Pointers into current and previous frame
+ * BOOL EdgeRow
+ * Is this row an edge row.
+ *
+ * OUTPUTS : UINT16 * YUVDiffsPtr
+ * Differnece map
+ * UINT8 * bits_map_ptr
+ * Pixels changed map
+ * UINT8 * SgcPtr
+ * Level change score.
+ * INT8 * DispFragPtr
+ * Block update map.
+ * INT32 * RowDiffsPtr
+ * Total sig changes for row
+ * UINT8 * ChLocalsPtr
+ * Changed locals data structure
+ *
+ *
+ * RETURNS :
+ *
+ * FUNCTION : Initial pixel differences scan
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void RowDiffScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2,
+ INT16 * YUVDiffsPtr, UINT8 * bits_map_ptr,
+ INT8 * SgcPtr, INT8 * DispFragPtr,
+ UINT8 * FDiffPixels, INT32 * RowDiffsPtr,
+ UINT8 * ChLocalsPtr, BOOL EdgeRow )
+{
+ INT32 i,j;
+ INT32 FragChangedPixels;
+
+ UINT32 ZeroData[2] = { 0,0 };
+ UINT8 OneData[8] = { 1,1,1,1,1,1,1,1 };
+ UINT8 ChlocalsDummyData[8] = { 8,8,8,8,8,8,8,8 };
+
+ INT16 Diff; // Temp local workspace.
+
+ // Cannot use kernel if at edge or if PAK disabled
+ if ( (!ppi->PAKEnabled) || EdgeRow )
+ {
+ for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+ {
+ // Reset count of pixels changed for the current fragment.
+ FragChangedPixels = 0;
+
+ // Test for break out conditions to save time.
+ if (*DispFragPtr == CANDIDATE_BLOCK)
+ {
+ // Clear down entries in changed locals array
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+
+ for ( j = 0; j < ppi->HFragPixels; j++ )
+ {
+ // Take a local copy of the measured difference.
+ Diff = ((INT16)YuvPtr1[j]) - ((INT16)YuvPtr2[j]);
+
+ // Store the actual difference value
+ YUVDiffsPtr[j] = Diff;
+
+ // Test against the Level thresholds and record the results
+ SgcPtr[0] += ppi->SgcThreshTablePtr[Diff];
+
+ // Test against the SRF thresholds
+ bits_map_ptr[j] = ppi->SrfThreshTablePtr[Diff];
+ FragChangedPixels += ppi->SrfThreshTablePtr[Diff];
+ }
+ }
+ else
+ {
+ // For EBO coded blocks mark all pixels as changed.
+ if ( *DispFragPtr > BLOCK_NOT_CODED )
+ {
+ ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+ ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+ }
+ else
+ {
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+ }
+ }
+
+ *RowDiffsPtr += FragChangedPixels;
+ *FDiffPixels += (UINT8)FragChangedPixels;
+
+ YuvPtr1 += ppi->HFragPixels;
+ YuvPtr2 += ppi->HFragPixels;
+ bits_map_ptr += ppi->HFragPixels;
+ ChLocalsPtr += ppi->HFragPixels;
+ YUVDiffsPtr += ppi->HFragPixels;
+ SgcPtr ++;
+ FDiffPixels ++;
+
+ // If we have a lot of changed pixels for this fragment on this row then
+ // the fragment is almost sure to be picked (e.g. through the line search) so we
+ // can mark it as selected and then ignore it.
+ if (FragChangedPixels >= 7)
+ {
+ *DispFragPtr = BLOCK_CODED_LOW;
+ }
+ DispFragPtr++;
+ }
+ }
+ else
+ {
+
+ //*************************************************************
+ // First fragment of row !!
+
+ i = 0;
+ // Reset count of pixels changed for the current fragment.
+ FragChangedPixels = 0;
+
+ // Test for break out conditions to save time.
+ if (*DispFragPtr == CANDIDATE_BLOCK)
+ {
+ // Clear down entries in changed locals array
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+
+ for ( j = 0; j < ppi->HFragPixels; j++ )
+ {
+ // Take a local copy of the measured difference.
+ Diff = ((INT16)YuvPtr1[j]) - ((INT16)YuvPtr2[j]);
+
+ // Store the actual difference value
+ YUVDiffsPtr[j] = Diff;
+
+ // Test against the Level thresholds and record the results
+ SgcPtr[0] += ppi->SgcThreshTablePtr[Diff];
+
+ // jbb added i+j > 0 and i+j < ppi->HFragPixels - 1 check
+ if (j>0 && ppi->SrfPakThreshTablePtr[Diff] )
+ Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
+
+
+ // Test against the SRF thresholds
+ bits_map_ptr[j] = ppi->SrfThreshTablePtr[Diff];
+ FragChangedPixels += ppi->SrfThreshTablePtr[Diff];
+ }
+ }
+ else
+ {
+ // For EBO coded blocks mark all pixels as changed.
+ if ( *DispFragPtr > BLOCK_NOT_CODED )
+ {
+ ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+
+ ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+ }
+ else
+ {
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+ }
+ }
+
+ *RowDiffsPtr += FragChangedPixels;
+ *FDiffPixels += (UINT8)FragChangedPixels;
+
+ YuvPtr1 += ppi->HFragPixels;
+ YuvPtr2 += ppi->HFragPixels;
+ bits_map_ptr += ppi->HFragPixels;
+ ChLocalsPtr += ppi->HFragPixels;
+ YUVDiffsPtr += ppi->HFragPixels;
+ SgcPtr ++;
+ FDiffPixels ++;
+
+ // If we have a lot of changed pixels for this fragment on this row then
+ // the fragment is almost sure to be picked (e.g. through the line search) so we
+ // can mark it as selected and then ignore it.
+ if (FragChangedPixels >= 7)
+ {
+ *DispFragPtr = BLOCK_CODED_LOW;
+ }
+ DispFragPtr++;
+ //*************************************************************
+ // Fragment in between!!
+
+ for ( i = ppi->HFragPixels ; i < ppi->PlaneWidth-ppi->HFragPixels; i += ppi->HFragPixels )
+ {
+ // Reset count of pixels changed for the current fragment.
+ FragChangedPixels = 0;
+
+ // Test for break out conditions to save time.
+ if (*DispFragPtr == CANDIDATE_BLOCK)
+ {
+ // Clear down entries in changed locals array
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+
+ for ( j = 0; j < ppi->HFragPixels; j++ )
+ {
+ // Take a local copy of the measured difference.
+ Diff = ((INT16)YuvPtr1[j]) - ((INT16)YuvPtr2[j]);
+
+ // Store the actual difference value
+ YUVDiffsPtr[j] = Diff;
+
+ // Test against the Level thresholds and record the results
+ SgcPtr[0] += ppi->SgcThreshTablePtr[Diff];
+
+ // jbb added i+j > 0 and i+j < ppi->HFragPixels - 1 check
+ if (ppi->SrfPakThreshTablePtr[Diff] )
+ Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
+
+
+ // Test against the SRF thresholds
+ bits_map_ptr[j] = ppi->SrfThreshTablePtr[Diff];
+ FragChangedPixels += ppi->SrfThreshTablePtr[Diff];
+ }
+ }
+ else
+ {
+ // For EBO coded blocks mark all pixels as changed.
+ if ( *DispFragPtr > BLOCK_NOT_CODED )
+ {
+ ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+
+ ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+ }
+ else
+ {
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+ }
+ }
+
+ *RowDiffsPtr += FragChangedPixels;
+ *FDiffPixels += (UINT8)FragChangedPixels;
+
+ YuvPtr1 += ppi->HFragPixels;
+ YuvPtr2 += ppi->HFragPixels;
+ bits_map_ptr += ppi->HFragPixels;
+ ChLocalsPtr += ppi->HFragPixels;
+ YUVDiffsPtr += ppi->HFragPixels;
+ SgcPtr ++;
+ FDiffPixels ++;
+
+ // If we have a lot of changed pixels for this fragment on this row then
+ // the fragment is almost sure to be picked (e.g. through the line search) so we
+ // can mark it as selected and then ignore it.
+ if (FragChangedPixels >= 7)
+ {
+ *DispFragPtr = BLOCK_CODED_LOW;
+ }
+ DispFragPtr++;
+ }
+ //*************************************************************
+
+ //*************************************************************
+ // Last fragment of row !!
+
+ // Reset count of pixels changed for the current fragment.
+ FragChangedPixels = 0;
+
+ // Test for break out conditions to save time.
+ if (*DispFragPtr == CANDIDATE_BLOCK)
+ {
+ // Clear down entries in changed locals array
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+
+ for ( j = 0; j < ppi->HFragPixels; j++ )
+ {
+ // Take a local copy of the measured difference.
+ Diff = ((INT16)YuvPtr1[j]) - ((INT16)YuvPtr2[j]);
+
+ // Store the actual difference value
+ YUVDiffsPtr[j] = Diff;
+
+ // Test against the Level thresholds and record the results
+ SgcPtr[0] += ppi->SgcThreshTablePtr[Diff];
+
+ // jbb added i+j > 0 and i+j < ppi->HFragPixels - 1 check
+ if (j<7 && ppi->SrfPakThreshTablePtr[Diff] )
+ Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) -
+ (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
+
+
+ // Test against the SRF thresholds
+ bits_map_ptr[j] = ppi->SrfThreshTablePtr[Diff];
+ FragChangedPixels += ppi->SrfThreshTablePtr[Diff];
+ }
+ }
+ else
+ {
+ // For EBO coded blocks mark all pixels as changed.
+ if ( *DispFragPtr > BLOCK_NOT_CODED )
+ {
+ ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+
+ ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+ }
+ else
+ {
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+ }
+ }
+ // If we have a lot of changed pixels for this fragment on this row then
+ // the fragment is almost sure to be picked (e.g. through the line search) so we
+ // can mark it as selected and then ignore it.
+ *RowDiffsPtr += FragChangedPixels;
+ *FDiffPixels += (UINT8)FragChangedPixels;
+
+ // If we have a lot of changed pixels for this fragment on this row then
+ // the fragment is almost sure to be picked (e.g. through the line search) so we
+ // can mark it as selected and then ignore it.
+ if (FragChangedPixels >= 7)
+ {
+ *DispFragPtr = BLOCK_CODED_LOW;
+ }
+ DispFragPtr++;
+ //*************************************************************
+
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ConsolidateDiffScanResults
+ *
+ * INPUTS : UINT8 * FDiffPixels
+ * Fragment changed pixels records
+ * UINT8 * SgcScoresPtr
+ * Fragment SGC records
+ * INT8 * DispFragPtr
+ * Fragment update map (-1 = ???, 0 = No, 1 = Yes)
+ *
+ * OUTPUTS : UINT8 * DispFragPtr
+ * Fragment update map (-1 = ???, 0 = No, 1 = Yes)
+ * RETURNS :
+ *
+ * FUNCTION : Considers new information from difference scan and, if necessary,
+ * upates output map.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ConsolidateDiffScanResults( PP_INSTANCE *ppi, UINT8 * FDiffPixels, INT8 * SgcScoresPtr, INT8 * DispFragPtr )
+{
+ INT32 i;
+
+ for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+ {
+ // Consider only those blocks that were candidates in the
+ // difference scan. Ignore definite YES and NO cases.
+ if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+ {
+ if ( ((UINT32)abs(SgcScoresPtr[i]) > ppi->BlockSgcThresh) )
+ {
+ // Block marked for update due to Sgc change
+ DispFragPtr[i] = BLOCK_CODED_SGC;
+ }
+ else if ( FDiffPixels[i] == 0 )
+ {
+ // Block marked for NO update as no/too few interesting pixels.
+ //DispFragPtr[i] = BLOCK_NOT_CODED;
+
+ // Block is no longer a candidate for the main tests but will
+ // still be considered a candidate in RowBarEnhBlockMap()
+ DispFragPtr[i] = CANDIDATE_BLOCK_LOW;
+ }
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : RowChangedLocalsScan
+ *
+ * INPUTS : UINT8 * PixelMapPtr.
+ * UINT8 * ChLocalsPtr.
+ * INT8 * DispFragPtr
+ * UINT8 * RowType
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : Calculates changed locals for changed pixels
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void RowChangedLocalsScan( PP_INSTANCE *ppi, UINT8 * PixelMapPtr, UINT8 * ChLocalsPtr,
+ INT8 * DispFragPtr, UINT8 RowType )
+{
+ UINT8 ChlocalsDummyData[8] = { 8,8,8,8,8,8,8,8 };
+ UINT8 changed_locals = 0;
+ UINT8 Score = 0;
+ UINT8 * PixelsChangedPtr0;
+ UINT8 * PixelsChangedPtr1;
+ UINT8 * PixelsChangedPtr2;
+ INT32 i, j;
+ INT32 LastRowIndex = ppi->PlaneWidth - 1;
+
+ // Set up the line based pointers into the bits changed map.
+ PixelsChangedPtr0 = PixelMapPtr - ppi->PlaneWidth;
+ if ( PixelsChangedPtr0 < ppi->PixelChangedMap )
+ PixelsChangedPtr0 += ppi->PixelMapCircularBufferSize;
+ PixelsChangedPtr0 -= 1;
+
+ PixelsChangedPtr1 = PixelMapPtr - 1;
+
+ PixelsChangedPtr2 = PixelMapPtr + ppi->PlaneWidth;
+ if ( PixelsChangedPtr2 >= (ppi->PixelChangedMap + ppi->PixelMapCircularBufferSize) )
+ PixelsChangedPtr2 -= ppi->PixelMapCircularBufferSize;
+ PixelsChangedPtr2 -= 1;
+
+ if ( RowType == NOT_EDGE_ROW )
+ {
+ // Scan through the row of pixels and calculate changed locals.
+ for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+ {
+ // Skip a group of 8 pixels if the assosciated fragment has no pixels of interest or
+ // if EBO is enabled and a breakout condition is met.
+ if ( *DispFragPtr == CANDIDATE_BLOCK )
+ {
+ for ( j = 0; j < ppi->HFragPixels; j++ )
+ {
+ changed_locals = 0;
+
+ // If the pixel itself has changed
+ if ( PixelsChangedPtr1[1] )
+ {
+ if ( (i > 0) || (j > 0) )
+ {
+ changed_locals += PixelsChangedPtr0[0];
+ changed_locals += PixelsChangedPtr1[0];
+ changed_locals += PixelsChangedPtr2[0];
+ }
+
+ changed_locals += PixelsChangedPtr0[1];
+ changed_locals += PixelsChangedPtr2[1];
+
+ if ( (i + j) < LastRowIndex )
+ {
+ changed_locals += PixelsChangedPtr0[2];
+ changed_locals += PixelsChangedPtr1[2];
+ changed_locals += PixelsChangedPtr2[2];
+ }
+
+ // Store the number of changed locals
+ *ChLocalsPtr |= changed_locals;
+ }
+
+ // Increment to next pixel in the row
+ ChLocalsPtr++;
+ PixelsChangedPtr0++;
+ PixelsChangedPtr1++;
+ PixelsChangedPtr2++;
+ }
+ }
+ else
+ {
+ if ( *DispFragPtr > BLOCK_NOT_CODED )
+ {
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+ }
+
+ // Step pointers
+ ChLocalsPtr += ppi->HFragPixels;
+ PixelsChangedPtr0 += ppi->HFragPixels;
+ PixelsChangedPtr1 += ppi->HFragPixels;
+ PixelsChangedPtr2 += ppi->HFragPixels;
+ }
+
+ // Move on to next fragment.
+ DispFragPtr++;
+
+ }
+ }
+ else
+ {
+ // Scan through the row of pixels and calculate changed locals.
+ for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+ {
+ // Skip a group of 8 pixels if the assosciated fragment has no pixels of interest or
+ // if EBO is enabled and a breakout condition is met.
+ if ( *DispFragPtr == CANDIDATE_BLOCK )
+ {
+ for ( j = 0; j < ppi->HFragPixels; j++ )
+ {
+ changed_locals = 0;
+
+ // If the pixel itself has changed
+ if ( PixelsChangedPtr1[1] )
+ {
+ if ( RowType == FIRST_ROW )
+ {
+ if ( (i > 0) || (j > 0) )
+ {
+ changed_locals += PixelsChangedPtr1[0];
+ changed_locals += PixelsChangedPtr2[0];
+ }
+
+ changed_locals += PixelsChangedPtr2[1];
+
+ if ( (i + j) < LastRowIndex )
+ {
+ changed_locals += PixelsChangedPtr1[2];
+ changed_locals += PixelsChangedPtr2[2];
+ }
+ }
+ else // Last row
+ {
+ if ( (i > 0) || (j > 0 ) )
+ {
+ changed_locals += PixelsChangedPtr0[0];
+ changed_locals += PixelsChangedPtr1[0];
+ }
+
+ changed_locals += PixelsChangedPtr0[1];
+
+ if ( (i + j) < LastRowIndex )
+ {
+ changed_locals += PixelsChangedPtr0[2];
+ changed_locals += PixelsChangedPtr1[2];
+ }
+ }
+
+ // Store the number of changed locals
+ *ChLocalsPtr |= changed_locals;
+ }
+
+ // Increment to next pixel in the row
+ ChLocalsPtr++;
+ PixelsChangedPtr0++;
+ PixelsChangedPtr1++;
+ PixelsChangedPtr2++;
+ }
+ }
+ else
+ {
+ if ( *DispFragPtr > BLOCK_NOT_CODED )
+ {
+ ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+ ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+ }
+
+ // Step pointers
+ ChLocalsPtr += ppi->HFragPixels;
+ PixelsChangedPtr0 += ppi->HFragPixels;
+ PixelsChangedPtr1 += ppi->HFragPixels;
+ PixelsChangedPtr2 += ppi->HFragPixels;
+ }
+
+ // Move on to next fragment.
+ DispFragPtr++;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : NoiseScoreRow
+ *
+ * INPUTS : UINT8 * PixelMapPtr.
+ * INT16 * YUVDiffsPtr,
+ * UINT8 * PixelNoiseScorePtr
+ * UINT32 * FragScorePtr
+ * INT8 * DispFragPtr
+ * INT32 * RowDiffsPtr
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : Calculates the noise scores for a row of pixels.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void NoiseScoreRow( PP_INSTANCE *ppi, UINT8 * PixelMapPtr, UINT8 * ChLocalsPtr,
+ INT16 * YUVDiffsPtr,
+ UINT8 * PixelNoiseScorePtr,
+ UINT32 * FragScorePtr,
+ INT8 * DispFragPtr,
+ INT32 * RowDiffsPtr )
+{
+ INT32 i,j;
+ UINT8 changed_locals = 0;
+ INT32 Score;
+ UINT32 FragScore;
+ INT32 AbsDiff;
+
+ // For each pixel in the row
+ for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+ {
+ // Skip a group of 8 pixels if the assosciated fragment has no pixels of interest or
+ // if EBO is enabled and a breakout condition is met.
+ if ( *DispFragPtr == CANDIDATE_BLOCK )
+ {
+ // Reset the cumulative fragment score.
+ FragScore = 0;
+
+ // Pixels grouped along the row into fragments
+ for ( j = 0; j < ppi->HFragPixels; j++ )
+ {
+ if ( PixelMapPtr[j] )
+ {
+ AbsDiff = (INT32)( abs(YUVDiffsPtr[j]) );
+ changed_locals = ChLocalsPtr[j];
+
+ // Give this pixel a score based on changed locals and level of its own change.
+ Score = (1 + ((INT32)(changed_locals + ppi->NoiseScoreBoostTable[AbsDiff]) - ppi->NoiseSupLevel));
+
+ // For no zero scores adjust by a level based score multiplier.
+ if ( Score > 0 )
+ {
+ Score = (INT32)( (double)Score * ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
+ if ( Score < 1 )
+ Score = 1;
+ }
+ else
+ {
+ // Set -ve values to 0
+ Score = 0;
+
+ // If there are no changed locals then clear the pixel changed flag and
+ // decrement the pixels changed in fragment count to speed later stages.
+ if ( changed_locals == 0 )
+ {
+ PixelMapPtr[j] = 0;
+ *RowDiffsPtr -= 1;
+ }
+ }
+
+ // Update the pixel scores etc.
+ PixelNoiseScorePtr[j] = (UINT8)Score;
+ FragScore += (UINT32)Score;
+ }
+ }
+
+ // Add fragment score (with plane correction factor) into main data structure
+ *FragScorePtr += (INT32)(FragScore * ppi->YUVPlaneCorrectionFactor);
+
+ // If score is greater than trip threshold then mark blcok for update.
+ if ( *FragScorePtr > ppi->BlockThreshold )
+ {
+ *DispFragPtr = BLOCK_CODED_LOW;
+ }
+ }
+
+ // Increment the various pointers
+ FragScorePtr++;
+ DispFragPtr++;
+ PixelNoiseScorePtr += ppi->HFragPixels;
+ PixelMapPtr += ppi->HFragPixels;
+ ChLocalsPtr += ppi->HFragPixels;
+ YUVDiffsPtr += ppi->HFragPixels;
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : PrimaryEdgeScoreRow
+ *
+ * INPUTS : UINT8 * PixelMapPtr.
+ * INT16 * YUVDiffsPtr,
+ * UINT32 * FragScorePtr
+ * INT8 * DispFragPtr,
+ * UINT8 RowType
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : Calculates the primary edge scores for a row of pixels.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void PrimaryEdgeScoreRow( PP_INSTANCE *ppi, UINT8 * ChangedLocalsPtr, INT16 * YUVDiffsPtr,
+ UINT8 * PixelNoiseScorePtr,
+ UINT32 * FragScorePtr,
+ INT8 * DispFragPtr,
+ UINT8 RowType )
+{
+ UINT32 BodyNeighbours;
+ UINT32 AbsDiff;
+ UINT8 changed_locals = 0;
+ INT32 Score;
+ UINT32 FragScore;
+ UINT8 * CHLocalsPtr0;
+ UINT8 * CHLocalsPtr1;
+ UINT8 * CHLocalsPtr2;
+ INT32 i,j;
+ INT32 LastRowIndex = ppi->PlaneWidth - 1;
+
+ // Set up pointers into the current previous and next row of the changed locals data structure.
+ CHLocalsPtr0 = ChangedLocalsPtr - ppi->PlaneWidth;
+ if ( CHLocalsPtr0 < ppi->ChLocals )
+ CHLocalsPtr0 += ppi->ChLocalsCircularBufferSize;
+ CHLocalsPtr0 -= 1;
+
+ CHLocalsPtr1 = ChangedLocalsPtr - 1;
+
+ CHLocalsPtr2 = ChangedLocalsPtr + ppi->PlaneWidth;
+ if ( CHLocalsPtr2 >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
+ CHLocalsPtr2 -= ppi->ChLocalsCircularBufferSize;
+ CHLocalsPtr2 -= 1;
+
+
+ /* The defining rule used here is as follows. */
+ /* An edge pixels has 3-5 changed locals. */
+ /* And one or more of these changed locals has itself got 7-8 changed locals. */
+
+ if ( RowType == NOT_EDGE_ROW )
+ {
+ /* Loop for all pixels in the row. */
+ for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+ {
+ // Does the fragment contain anything interesting to work with.
+ if ( *DispFragPtr == CANDIDATE_BLOCK )
+ {
+ // Reset the cumulative fragment score.
+ FragScore = 0;
+
+ // Pixels grouped along the row into fragments
+ for ( j = 0; j < ppi->HFragPixels; j++ )
+ {
+ // How many changed locals has the current pixel got.
+ changed_locals = ChangedLocalsPtr[j];
+
+ // Is the pixel a suitable candidate
+ if ( (changed_locals > 2) && (changed_locals < 6) )
+ {
+ // The pixel may qualify... have a closer look.
+ BodyNeighbours = 0;
+
+ // Count the number of "BodyNeighbours" .. Pixels
+ // that have 7 or more changed neighbours.
+ if ( (i > 0) || (j > 0 ) )
+ {
+ if ( CHLocalsPtr0[0] >= 7 )
+ BodyNeighbours++;
+ if ( CHLocalsPtr1[0] >= 7 )
+ BodyNeighbours++;
+ if ( CHLocalsPtr2[0] >= 7 )
+ BodyNeighbours++;
+ }
+
+ if ( CHLocalsPtr0[1] >= 7 )
+ BodyNeighbours++;
+ if ( CHLocalsPtr2[1] >= 7 )
+ BodyNeighbours++;
+
+ if ( (i + j) < LastRowIndex )
+ {
+ if ( CHLocalsPtr0[2] >= 7 )
+ BodyNeighbours++;
+ if ( CHLocalsPtr1[2] >= 7 )
+ BodyNeighbours++;
+ if ( CHLocalsPtr2[2] >= 7 )
+ BodyNeighbours++;
+ }
+
+ if ( BodyNeighbours > 0 )
+ {
+ AbsDiff = abs( YUVDiffsPtr[j] );
+ Score = (INT32)( (double)(BodyNeighbours * BodyNeighbourScore) *
+ ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
+ if ( Score < 1 )
+ Score = 1;
+
+ /* Increment the score by a value determined by the number of body neighbours. */
+ PixelNoiseScorePtr[j] += (UINT8)Score;
+ FragScore += (UINT32)Score;
+ }
+ }
+
+ // Increment pointers into changed locals buffer
+ CHLocalsPtr0 ++;
+ CHLocalsPtr1 ++;
+ CHLocalsPtr2 ++;
+ }
+
+ // Add fragment score (with plane correction factor) into main data structure
+ *FragScorePtr += (INT32)(FragScore * ppi->YUVPlaneCorrectionFactor);
+
+ // If score is greater than trip threshold then mark blcok for update.
+ if ( *FragScorePtr > ppi->BlockThreshold )
+ {
+ *DispFragPtr = BLOCK_CODED_LOW;
+ }
+
+ }
+ else // Nothing to do for this fragment group
+ {
+ // Advance pointers into changed locals buffer
+ CHLocalsPtr0 += ppi->HFragPixels;
+ CHLocalsPtr1 += ppi->HFragPixels;
+ CHLocalsPtr2 += ppi->HFragPixels;
+ }
+
+ // Increment the various pointers
+ FragScorePtr++;
+ DispFragPtr++;
+ PixelNoiseScorePtr += ppi->HFragPixels;
+ ChangedLocalsPtr += ppi->HFragPixels;
+ YUVDiffsPtr += ppi->HFragPixels;
+ }
+ }
+ else // This is either the top or bottom row of pixels in a plane.
+ {
+ /* Loop for all pixels in the row. */
+ for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+ {
+ // Does the fragment contain anything interesting to work with.
+ if ( *DispFragPtr == CANDIDATE_BLOCK )
+ {
+ // Reset the cumulative fragment score.
+ FragScore = 0;
+
+ // Pixels grouped along the row into fragments
+ for ( j = 0; j < ppi->HFragPixels; j++ )
+ {
+ // How many changed locals has the current pixel got.
+ changed_locals = ChangedLocalsPtr[j];
+
+ // Is the pixel a suitable candidate
+ if ( (changed_locals > 2) && (changed_locals < 6) )
+ {
+ /* The pixel may qualify... have a closer look. */
+ BodyNeighbours = 0;
+
+ /* Count the number of "BodyNeighbours" .. Pixels
+ * that have 7 or more changed neighbours. */
+ if ( RowType == LAST_ROW )
+ {
+ // Test for cases where it could be the first pixel on the line
+ if ( (i > 0) || (j > 0) )
+ {
+ if ( CHLocalsPtr0[0] >= 7 )
+ BodyNeighbours++;
+ if ( CHLocalsPtr1[0] >= 7 )
+ BodyNeighbours++;
+ }
+
+ if ( CHLocalsPtr0[1] >= 7 )
+ BodyNeighbours++;
+
+ // Test for the end of line case
+ if ( (i + j) < LastRowIndex )
+ {
+ if ( CHLocalsPtr0[2] >= 7 )
+ BodyNeighbours++;
+
+ if ( CHLocalsPtr1[2] >= 7 )
+ BodyNeighbours++;
+ }
+ }
+ else // FIRST ROW
+ {
+ // Test for cases where it could be the first pixel on the line
+ if ( (i > 0) || (j > 0) )
+ {
+ if ( CHLocalsPtr1[0] >= 7 )
+ BodyNeighbours++;
+ if ( CHLocalsPtr2[0] >= 7 )
+ BodyNeighbours++;
+ }
+
+ // Test for the end of line case
+ if ( CHLocalsPtr2[1] >= 7 )
+ BodyNeighbours++;
+
+ if ( (i + j) < LastRowIndex )
+ {
+ if ( CHLocalsPtr1[2] >= 7 )
+ BodyNeighbours++;
+ if ( CHLocalsPtr2[2] >= 7 )
+ BodyNeighbours++;
+ }
+ }
+
+ // Allocate a score according to the number of Body neighbours.
+ if ( BodyNeighbours > 0 )
+ {
+ AbsDiff = abs( YUVDiffsPtr[j] );
+ Score = (INT32)( (double)(BodyNeighbours * BodyNeighbourScore) *
+ ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
+ if ( Score < 1 )
+ Score = 1;
+
+ PixelNoiseScorePtr[j] += (UINT8)Score;
+ FragScore += (UINT32)Score;
+ }
+ }
+
+ // Increment pointers into changed locals buffer
+ CHLocalsPtr0 ++;
+ CHLocalsPtr1 ++;
+ CHLocalsPtr2 ++;
+ }
+
+ // Add fragment score (with plane correction factor) into main data structure
+ *FragScorePtr += (INT32)(FragScore * ppi->YUVPlaneCorrectionFactor);
+
+ // If score is greater than trip threshold then mark blcok for update.
+ if ( *FragScorePtr > ppi->BlockThreshold )
+ {
+ *DispFragPtr = BLOCK_CODED_LOW;
+ }
+
+ }
+ else // Nothing to do for this fragment group
+ {
+ // Advance pointers into changed locals buffer
+ CHLocalsPtr0 += ppi->HFragPixels;
+ CHLocalsPtr1 += ppi->HFragPixels;
+ CHLocalsPtr2 += ppi->HFragPixels;
+ }
+
+ // Increment the various pointers
+ FragScorePtr++;
+ DispFragPtr++;
+ PixelNoiseScorePtr += ppi->HFragPixels;
+ ChangedLocalsPtr += ppi->HFragPixels;
+ YUVDiffsPtr += ppi->HFragPixels;
+ }
+ }
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : LineSearchScoreRow
+ *
+ * INPUTS : UINT8 * ChangedLocalsPtr.
+ * INT16 * YUVDiffsPtr,
+ * UINT32 * FragScorePtr
+ * UINT8 RowNumber
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : Calculates the line match scores for a row of pixels.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void LineSearchScoreRow( PP_INSTANCE *ppi, UINT8 * ChangedLocalsPtr, INT16 * YUVDiffsPtr,
+ UINT8 * PixelNoiseScorePtr,
+ UINT32 * FragScorePtr,
+ INT8 * DispFragPtr,
+ INT32 RowNumber )
+{
+ UINT32 AbsDiff;
+ UINT8 changed_locals = 0;
+ INT32 Score;
+ UINT32 FragScore;
+ INT32 i,j;
+
+ /* The defining rule used here is as follows. */
+ /* An edge pixels has 2-5 changed locals. */
+ /* And one or more of these changed locals has itself got 7-8 changed locals. */
+
+ /* Loop for all pixels in the row. */
+ for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+ {
+ // Does the fragment contain anything interesting to work with.
+ if ( *DispFragPtr == CANDIDATE_BLOCK )
+ {
+ // Reset the cumulative fragment score.
+ FragScore = 0;
+
+ // Pixels grouped along the row into fragments
+ for ( j = 0; j < ppi->HFragPixels; j++ )
+ {
+ // How many changed locals has the current pixel got.
+ changed_locals = ChangedLocalsPtr[j];
+
+ // Is the pixel a suitable candidate for edge enhancement
+ if ( (changed_locals > 1) && (changed_locals < 6) &&
+ (PixelNoiseScorePtr[j] < LineSearchTripTresh) )
+ {
+ Score = (INT32)LineSearchScorePixel( ppi, &ChangedLocalsPtr[j], RowNumber, i+j );
+
+ if ( Score )
+ {
+ AbsDiff = abs( YUVDiffsPtr[j] );
+ Score = (INT32)( (double)Score * ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
+ if ( Score < 1 )
+ Score = 1;
+
+ PixelNoiseScorePtr[j] += (UINT8)Score;
+ FragScore += (UINT32)Score;
+ }
+ }
+ }
+
+ // Add fragment score (with plane correction factor) into main data structure
+ *FragScorePtr += (INT32)(FragScore * ppi->YUVPlaneCorrectionFactor);
+
+ // If score is greater than trip threshold then mark blcok for update.
+ if ( *FragScorePtr > ppi->BlockThreshold )
+ {
+ *DispFragPtr = BLOCK_CODED_LOW;
+ }
+ }
+
+ // Increment the various pointers
+ FragScorePtr++;
+ DispFragPtr++;
+ PixelNoiseScorePtr += ppi->HFragPixels;
+ ChangedLocalsPtr += ppi->HFragPixels;
+ YUVDiffsPtr += ppi->HFragPixels;
+
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : LineSearchScorePixel
+ *
+ * INPUTS : UINT32 ChangedLocalsPtr (this pixels index.)
+ * INT32 RowNumber (Row number)
+ * INT32 ColNumber (Column number within a row)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : A pixel line search score
+ *
+ * FUNCTION : Returns a Line Search score for a pixel.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT8 LineSearchScorePixel( PP_INSTANCE *ppi, UINT8 * ChangedLocalsPtr, INT32 RowNumber, INT32 ColNumber )
+{
+ UINT32 line_length = 0;
+ UINT32 line_length2 = 0;
+ UINT32 line_length_score = 0;
+ UINT32 tmp_line_length = 0;
+ UINT32 tmp_line_length2 = 0;
+
+ // Look UP and Down
+ PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, ColNumber, UP, &tmp_line_length );
+
+ if (tmp_line_length < ppi->MaxLineSearchLen)
+ {
+ // Look DOWN
+ PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, ColNumber, DOWN, &tmp_line_length2 );
+ line_length = tmp_line_length + tmp_line_length2 - 1;
+
+ if ( line_length > ppi->MaxLineSearchLen )
+ line_length = ppi->MaxLineSearchLen;
+ }
+ else
+ line_length = tmp_line_length;
+
+ // If no max length line found then look left and right
+ if ( line_length < ppi->MaxLineSearchLen )
+ {
+ tmp_line_length = 0;
+ tmp_line_length2 = 0;
+
+ PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, ColNumber, LEFT, &tmp_line_length );
+ if (tmp_line_length < ppi->MaxLineSearchLen)
+ {
+ PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, ColNumber, RIGHT, &tmp_line_length2 );
+ line_length2 = tmp_line_length + tmp_line_length2 - 1;
+
+ if ( line_length2 > ppi->MaxLineSearchLen )
+ line_length2 = ppi->MaxLineSearchLen;
+ }
+ else
+ line_length2 = tmp_line_length;
+
+ }
+
+ /* Take the largest line length */
+ if ( line_length2 > line_length )
+ line_length = line_length2;
+
+ /* Create line length score */
+ line_length_score = LineLengthScores[line_length];
+
+ return (UINT8)line_length_score;
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : PixelLineSearch
+ *
+ * INPUTS : UINT8 * ChangedLocalsPtr (Map entry for this pixel)
+ * INT32 RowNumber (Row number)
+ * INT32 ColNumber (Column number within a row)
+ * UINT8 direction
+ *
+ * OUTPUTS : UINT8 * line_length
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Recursive function for tracking along a line of pixels
+ * obeying a specific set of rules
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void PixelLineSearch( PP_INSTANCE *ppi, UINT8 * ChangedLocalsPtr, INT32 RowNumber, INT32 ColNumber, UINT8 direction, UINT32 * line_length )
+{
+ // Exit if the pixel does not qualify or we have fallen off the edge
+ // of either the image plane or the row.
+ if ( ((*ChangedLocalsPtr) <= 1) ||
+ ((*ChangedLocalsPtr) >= 6) ||
+ (RowNumber < 0) ||
+ (RowNumber >= ppi->PlaneHeight) ||
+ (ColNumber < 0) ||
+ (ColNumber >= ppi->PlaneWidth) )
+ {
+ // If not then it isn't part of any line.
+ return;
+ }
+
+ if (*line_length < ppi->MaxLineSearchLen)
+ {
+ UINT32 TmpLineLength;
+ UINT32 BestLineLength;
+ UINT8 * search_ptr;
+
+ // Increment the line length to include this pixel.
+ *line_length += 1;
+ BestLineLength = *line_length;
+
+ // Continue search
+ // up
+ if ( direction == UP )
+ {
+ TmpLineLength = *line_length;
+
+ search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
+ if ( search_ptr < ppi->ChLocals )
+ search_ptr += ppi->ChLocalsCircularBufferSize;
+
+ PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber, direction, &TmpLineLength );
+
+ if ( TmpLineLength > BestLineLength )
+ BestLineLength = TmpLineLength;
+ }
+
+ // up and left
+ if ( (BestLineLength < ppi->MaxLineSearchLen) && ((direction == UP) || (direction == LEFT)) )
+ {
+ TmpLineLength = *line_length;
+
+ search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
+ if ( search_ptr < ppi->ChLocals )
+ search_ptr += ppi->ChLocalsCircularBufferSize;
+ search_ptr -= 1;
+
+ PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber - 1, direction, &TmpLineLength );
+
+ if ( TmpLineLength > BestLineLength )
+ BestLineLength = TmpLineLength;
+ }
+
+ // up and right
+ if ( (BestLineLength < ppi->MaxLineSearchLen) && ((direction == UP) || (direction == RIGHT)) )
+ {
+ TmpLineLength = *line_length;
+
+ search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
+ if ( search_ptr < ppi->ChLocals )
+ search_ptr += ppi->ChLocalsCircularBufferSize;
+ search_ptr += 1;
+
+ PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber + 1, direction, &TmpLineLength );
+
+ if ( TmpLineLength > BestLineLength )
+ BestLineLength = TmpLineLength;
+ }
+
+ // left
+ if ( (BestLineLength < ppi->MaxLineSearchLen) && ( direction == LEFT ) )
+ {
+ TmpLineLength = *line_length;
+ PixelLineSearch( ppi, ChangedLocalsPtr - 1, RowNumber, ColNumber - 1, direction, &TmpLineLength );
+
+ if ( TmpLineLength > BestLineLength )
+ BestLineLength = TmpLineLength;
+ }
+
+ // right
+ if ( (BestLineLength < ppi->MaxLineSearchLen) && ( direction == RIGHT ) )
+ {
+ TmpLineLength = *line_length;
+ PixelLineSearch( ppi, ChangedLocalsPtr + 1, RowNumber, ColNumber + 1, direction, &TmpLineLength );
+
+ if ( TmpLineLength > BestLineLength )
+ BestLineLength = TmpLineLength;
+ }
+
+ // Down...
+ if ( BestLineLength < ppi->MaxLineSearchLen )
+ {
+ TmpLineLength = *line_length;
+ // down
+ if ( direction == DOWN )
+ {
+ search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
+ if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
+ search_ptr -= ppi->ChLocalsCircularBufferSize;
+
+ PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber, direction, &TmpLineLength );
+
+ if ( TmpLineLength > BestLineLength )
+ BestLineLength = TmpLineLength;
+ }
+
+
+ // down and left
+ if ( (BestLineLength < ppi->MaxLineSearchLen) && ((direction == DOWN) || (direction == LEFT)) )
+ {
+ TmpLineLength = *line_length;
+
+ search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
+ if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
+ search_ptr -= ppi->ChLocalsCircularBufferSize;
+ search_ptr -= 1;
+
+ PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber - 1, direction, &TmpLineLength );
+
+ if ( TmpLineLength > BestLineLength )
+ BestLineLength = TmpLineLength;
+ }
+
+ // down and right
+ if ( (BestLineLength < ppi->MaxLineSearchLen) && ((direction == DOWN) || (direction == RIGHT)) )
+ {
+ TmpLineLength = *line_length;
+
+ search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
+ if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
+ search_ptr -= ppi->ChLocalsCircularBufferSize;
+ search_ptr += 1;
+
+ PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber + 1, direction, &TmpLineLength );
+
+ if ( TmpLineLength > BestLineLength )
+ BestLineLength = TmpLineLength;
+ }
+ }
+
+ // Note the search value for this pixel.
+ *line_length = BestLineLength;
+ }
+
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : ScanCalcPixelIndexTable
+ *
+ * INPUTS : Nonex.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Initialises the pixel index table used in the scan module.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ScanCalcPixelIndexTable(PP_INSTANCE *ppi)
+{
+ UINT32 i;
+ UINT32 * PixelIndexTablePtr = ppi->ScanPixelIndexTable;
+
+ /* If appropriate add on extra inices for U and V planes. */
+ for ( i = 0; i < (ppi->ScanYPlaneFragments); i++ )
+ {
+ PixelIndexTablePtr[ i ] = ((i / ppi->ScanHFragments) * ppi->VFragPixels * ppi->ScanConfig.VideoFrameWidth);
+ PixelIndexTablePtr[ i ] += ((i % ppi->ScanHFragments) * ppi->HFragPixels);
+ }
+
+ PixelIndexTablePtr = &ppi->ScanPixelIndexTable[ppi->ScanYPlaneFragments];
+
+ for ( i = 0; i < (ppi->ScanUVPlaneFragments * 2); i++ )
+ {
+ PixelIndexTablePtr[ i ] = ((i / (ppi->ScanHFragments >> 1) ) *
+ (ppi->VFragPixels * (ppi->ScanConfig.VideoFrameWidth >> 1)) );
+ PixelIndexTablePtr[ i ] += ((i % (ppi->ScanHFragments >> 1) ) * ppi->HFragPixels) + ppi->YFramePixels;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SetVcapLevelOffset
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Configures VCAP parameters to one of a set of pre-defined
+ * alternatives.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void SetVcapLevelOffset( PP_INSTANCE *ppi, INT32 Level )
+{
+ switch ( Level )
+ {
+ case 0:
+ ppi->SRFGreyThresh = 1;
+ ppi->SRFColThresh = 1;
+ ppi->NoiseSupLevel = 2;
+ ppi->SgcLevelThresh = 1;
+ ppi->SuvcLevelThresh = 1;
+ ppi->GrpLowSadThresh = 6;
+ ppi->GrpHighSadThresh = 24;
+ ppi->PrimaryBlockThreshold = 2;
+ ppi->SgcThresh = 10;
+
+ ppi->PAKEnabled = FALSE;
+ break;
+
+ case 1:
+ ppi->SRFGreyThresh = 2;
+ ppi->SRFColThresh = 2;
+ ppi->NoiseSupLevel = 2;
+ ppi->SgcLevelThresh = 2;
+ ppi->SuvcLevelThresh = 2;
+ ppi->GrpLowSadThresh = 8;
+ ppi->GrpHighSadThresh = 32;
+ ppi->PrimaryBlockThreshold = 5;
+ ppi->SgcThresh = 12;
+
+ ppi->PAKEnabled = TRUE;
+ break;
+
+ case 2: // Default VP3 settings
+ ppi->SRFGreyThresh = 3;
+ ppi->SRFColThresh = 3;
+ ppi->NoiseSupLevel = 2;
+ ppi->SgcLevelThresh = 2;
+ ppi->SuvcLevelThresh = 2;
+ ppi->GrpLowSadThresh = 8;
+ ppi->GrpHighSadThresh = 32;
+ ppi->PrimaryBlockThreshold = 5;
+ ppi->SgcThresh = 16;
+
+ ppi->PAKEnabled = TRUE;
+ break;
+
+ case 3:
+ ppi->SRFGreyThresh = 4;
+ ppi->SRFColThresh = 4;
+ ppi->NoiseSupLevel = 3;
+ ppi->SgcLevelThresh = 3;
+ ppi->SuvcLevelThresh = 3;
+ ppi->GrpLowSadThresh = 10;
+ ppi->GrpHighSadThresh = 48;
+ ppi->PrimaryBlockThreshold = 5;
+ ppi->SgcThresh = 18;
+
+ ppi->PAKEnabled = TRUE;
+ break;
+
+ case 4:
+ ppi->SRFGreyThresh = 5;
+ ppi->SRFColThresh = 5;
+ ppi->NoiseSupLevel = 3;
+ ppi->SgcLevelThresh = 4;
+ ppi->SuvcLevelThresh = 4;
+ ppi->GrpLowSadThresh = 12;
+ ppi->GrpHighSadThresh = 48;
+ ppi->PrimaryBlockThreshold = 5;
+ ppi->SgcThresh = 20;
+
+ ppi->PAKEnabled = TRUE;
+ break;
+
+ case 5: // Default live narrow band settings
+ ppi->SRFGreyThresh = 6;
+ ppi->SRFColThresh = 6;
+ ppi->NoiseSupLevel = 3;
+ ppi->SgcLevelThresh = 4;
+ ppi->SuvcLevelThresh = 4;
+ ppi->GrpLowSadThresh = 12;
+ ppi->GrpHighSadThresh = 64;
+ ppi->PrimaryBlockThreshold = 10;
+ ppi->SgcThresh = 24;
+
+ ppi->PAKEnabled = TRUE;
+ break;
+
+ case 6: // Default live narrow band settings
+ ppi->SRFGreyThresh = 6;
+ ppi->SRFColThresh = 7;
+ ppi->NoiseSupLevel = 3;
+ ppi->SgcLevelThresh = 4;
+ ppi->SuvcLevelThresh = 4;
+ ppi->GrpLowSadThresh = 12;
+ ppi->GrpHighSadThresh = 64;
+ ppi->PrimaryBlockThreshold = 10;
+ ppi->SgcThresh = 24;
+
+ ppi->PAKEnabled = TRUE;
+ break;
+
+ default:
+ ppi->SRFGreyThresh = 3;
+ ppi->SRFColThresh = 3;
+ ppi->NoiseSupLevel = 2;
+ ppi->SgcLevelThresh = 2;
+ ppi->SuvcLevelThresh = 2;
+ ppi->GrpLowSadThresh = 10;
+ ppi->GrpHighSadThresh = 32;
+ ppi->PrimaryBlockThreshold = 5;
+ ppi->SgcThresh = 16;
+ ppi->PAKEnabled = TRUE;
+ break;
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : GetLocalVarianceMultiplier
+ *
+ * INPUTS : INT16 * MasterYUVDiffPtr.
+ * UINT32 PlaneLineLength
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Pixel variance
+ *
+ * FUNCTION : Calculates a score correction based on local variance
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+double GetLocalVarianceMultiplier( PP_INSTANCE *ppi, INT16 * MasterYUVDiffPtr, UINT32 PlaneLineLength )
+{
+ INT32 XSum=0;
+ INT32 XXSum=0;
+ INT32 DiffVal;
+ double LocalVariance;
+ double VarMultiplier;
+ INT16 * YUVDiffPtr;
+
+ // Previous row (wrap back to top of buffer if necessary
+ YUVDiffPtr = MasterYUVDiffPtr - PlaneLineLength;
+ if ( YUVDiffPtr < ppi->yuv_differences )
+ YUVDiffPtr += ppi->YuvDiffsCircularBufferSize;
+
+ DiffVal = YUVDiffPtr[-1];
+ XSum += DiffVal;
+ XXSum += DiffVal * DiffVal;
+
+ DiffVal = YUVDiffPtr[0];
+ XSum += DiffVal;
+ XXSum += DiffVal * DiffVal;
+
+ DiffVal = YUVDiffPtr[1];
+ XSum += DiffVal;
+ XXSum += DiffVal * DiffVal;
+
+ // Current row
+ YUVDiffPtr = MasterYUVDiffPtr;
+ DiffVal = YUVDiffPtr[-1];
+ XSum += DiffVal;
+ XXSum += DiffVal * DiffVal;
+
+ DiffVal = YUVDiffPtr[0];
+ XSum += DiffVal;
+ XXSum += DiffVal * DiffVal;
+
+ DiffVal = YUVDiffPtr[1];
+ XSum += DiffVal;
+ XXSum += DiffVal * DiffVal;
+
+ // Last row (wrap back around if neeeded
+ YUVDiffPtr = MasterYUVDiffPtr + PlaneLineLength;
+ if ( YUVDiffPtr > &ppi->yuv_differences[ppi->YuvDiffsCircularBufferSize] )
+ YUVDiffPtr -= ppi->YuvDiffsCircularBufferSize;
+
+ DiffVal = YUVDiffPtr[-1];
+ XSum += DiffVal;
+ XXSum += DiffVal * DiffVal;
+
+ DiffVal = YUVDiffPtr[0];
+ XSum += DiffVal;
+ XXSum += DiffVal * DiffVal;
+
+ DiffVal = YUVDiffPtr[1];
+ XSum += DiffVal;
+ XXSum += DiffVal * DiffVal;
+
+ // Compute and return population variance as mis-match metric.
+ LocalVariance = ((double)XXSum * 0.1111) - ((double)XSum * (double)XSum * 0.012346);
+
+ if ( LocalVariance > 2 * LowVarianceThresh )
+ {
+ VarMultiplier = 1.5;
+ }
+ else if ( LocalVariance > LowVarianceThresh )
+ {
+ VarMultiplier = 1.0;
+ }
+ else
+ {
+ VarMultiplier = 0.5;
+ }
+
+ return VarMultiplier;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ScalarRowSAD
+ *
+ * INPUTS : UINT8 * Src1
+ * UINT8 * Src2
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : A Sum of the absolute difference value for a row of 4 pixels
+ *
+ * FUNCTION : Calculates a sum of the absolute difference for one or two groups of
+ * of 4 pixels. If two groups it returns the larger of the two values.
+ *
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT32 ScalarRowSAD( UINT8 * Src1, UINT8 * Src2 )
+{
+ UINT32 SadValue;
+ UINT32 SadValue1;
+
+ SadValue = abs( Src1[0] - Src2[0] ) + abs( Src1[1] - Src2[1] ) +
+ abs( Src1[2] - Src2[2] ) + abs( Src1[3] - Src2[3] );
+
+ SadValue1 = abs( Src1[4] - Src2[4] ) + abs( Src1[5] - Src2[5] ) +
+ abs( Src1[6] - Src2[6] ) + abs( Src1[7] - Src2[7] );
+
+ SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;
+
+ return SadValue;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : ScalarColSAD
+ *
+ * INPUTS : PP_INSTANCE *ppi
+ * UINT8 * Src1
+ * UINT8 * Src2
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : The maximum 4 pixel column SAD for an 8x8 block.
+ *
+ * FUNCTION : Calculates a SAD for each 4 pixel column in a block and
+ * returns the MAX value.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT32 ScalarColSAD( PP_INSTANCE *ppi, UINT8 * Src1, UINT8 * Src2 )
+{
+ UINT32 SadValue[8] = {0,0,0,0,0,0,0,0};
+ UINT32 SadValue2[8] = {0,0,0,0,0,0,0,0};
+ UINT32 MaxSad = 0;
+ UINT32 i;
+
+ for ( i = 0; i < 4; i++ )
+ {
+ SadValue[0] += abs(Src1[0] - Src2[0]);
+ SadValue[1] += abs(Src1[1] - Src2[1]);
+ SadValue[2] += abs(Src1[2] - Src2[2]);
+ SadValue[3] += abs(Src1[3] - Src2[3]);
+ SadValue[4] += abs(Src1[4] - Src2[4]);
+ SadValue[5] += abs(Src1[5] - Src2[5]);
+ SadValue[6] += abs(Src1[6] - Src2[6]);
+ SadValue[7] += abs(Src1[7] - Src2[7]);
+
+ Src1 += ppi->PlaneStride;
+ Src2 += ppi->PlaneStride;
+ }
+
+ for ( i = 0; i < 4; i++ )
+ {
+ SadValue2[0] += abs(Src1[0] - Src2[0]);
+ SadValue2[1] += abs(Src1[1] - Src2[1]);
+ SadValue2[2] += abs(Src1[2] - Src2[2]);
+ SadValue2[3] += abs(Src1[3] - Src2[3]);
+ SadValue2[4] += abs(Src1[4] - Src2[4]);
+ SadValue2[5] += abs(Src1[5] - Src2[5]);
+ SadValue2[6] += abs(Src1[6] - Src2[6]);
+ SadValue2[7] += abs(Src1[7] - Src2[7]);
+
+ Src1 += ppi->PlaneStride;
+ Src2 += ppi->PlaneStride;
+ }
+
+ for ( i = 0; i < 8; i++ )
+ {
+ if ( SadValue[i] > MaxSad )
+ MaxSad = SadValue[i];
+ if ( SadValue2[i] > MaxSad )
+ MaxSad = SadValue2[i];
+ }
+
+ return MaxSad;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ApplyPakLowPass
+ *
+ * INPUTS : UINT8 * SrcPtr
+ * central point in kernel.
+ * OUTPUTS : None.
+ *
+ * RETURNS : Filtered value.
+ *
+ * FUNCTION : Applies a moderate low pass filter at the given location.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT8 ApplyPakLowPass( PP_INSTANCE *ppi, UINT8 * SrcPtr )
+{
+ UINT8 * SrcPtr1 = SrcPtr - 1;
+ UINT8 * SrcPtr0 = SrcPtr1 - ppi->PlaneStride; // Note the use of stride not width.
+ UINT8 * SrcPtr2 = SrcPtr1 + ppi->PlaneStride;
+
+ return (UINT8)( ( (UINT32)SrcPtr0[0] + (UINT32)SrcPtr0[1] + (UINT32)SrcPtr0[2] +
+ (UINT32)SrcPtr1[0] + (UINT32)SrcPtr1[2] +
+ (UINT32)SrcPtr2[0] + (UINT32)SrcPtr2[1] + (UINT32)SrcPtr2[2] ) >> 3 );
+
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/preprocfunctions.c b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocfunctions.c
new file mode 100644
index 00000000..7c9070c5
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocfunctions.c
@@ -0,0 +1,110 @@
+/****************************************************************************
+*
+* Module Title : PreProcFunctions.c
+*
+* Description :
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 JBB 22 Aug 00 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include "preproc.h"
+#ifdef _MSC_VER
+#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
+#endif
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imports.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Forward References
+*****************************************************************************
+*/
+
+/****************************************************************************
+ *
+ * ROUTINE : MachineSpecificConfig
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support
+ * sets approipriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+#define MMX_ENABLED 1
+void MachineSpecificConfig(PP_INSTANCE *ppi)
+{
+ UINT32 FeatureFlags = 0;
+ ppi->RowSAD = ScalarRowSAD;
+ ppi->ColSAD = ScalarColSAD;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : ClearMmxState()
+ *
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Clears down the MMX state
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ClearMmxState(PP_INSTANCE *ppi)
+{
+ return;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/preprocglobals.c b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocglobals.c
new file mode 100644
index 00000000..9df699bd
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocglobals.c
@@ -0,0 +1,501 @@
+/****************************************************************************
+*
+* Module Title : PreProcGlobals
+*
+* Description : Pre-processor module globals.
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.07 PGW 20 Feb 01 Disable history buffer mechanism.
+* 1.06 JBB 20 Sep 00 duck_ memory allocation calls
+* 1.05 JBB 02 Aug 00 Checked duck_malloc return codes
+* 1.04 PGW 24 Jul 00 Deleted BeThreshold & ShowVcapPramsDlg.
+* 1.03 PGW 10 Jul 00 Added KFIndicator.
+* 1.02 JBB 30/05/00 Removed hard coded size limits
+* 1.01 PGW 12/07/99 Changes to reduce uneccessary dependancies.
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+#include "preprocconf.h"
+#include "preproc.h"
+#include <stdlib.h>
+#include "duck_mem.h"
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+
+
+//PP_INSTANCE *ppi;
+
+
+/****************************************************************************
+ *
+ * ROUTINE : PDeleteFragmentInfo
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void PDeleteFragmentInfo(PP_INSTANCE * ppi)
+{
+
+ // duck_free prior allocs if present
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : PAllocateFragmentInfo
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void PAllocateFragmentInfo(PP_INSTANCE * ppi)
+{
+
+ // clear any existing info
+ PDeleteFragmentInfo(ppi);
+
+ // Perform Fragment Allocations
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PDeleteFrameInfo
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void PDeleteFrameInfo(PP_INSTANCE * ppi)
+{
+ if( ppi->ScanPixelIndexTableAlloc )
+ duck_free(ppi->ScanPixelIndexTableAlloc);
+ ppi->ScanPixelIndexTableAlloc= 0;
+ ppi->ScanPixelIndexTable= 0;
+
+ if( ppi->ScanDisplayFragmentsAlloc )
+ duck_free(ppi->ScanDisplayFragmentsAlloc);
+ ppi->ScanDisplayFragmentsAlloc= 0;
+ ppi->ScanDisplayFragments= 0;
+
+ if( ppi->FragScoresAlloc )
+ duck_free(ppi->FragScoresAlloc);
+ ppi->FragScoresAlloc= 0;
+ ppi->FragScores= 0;
+
+ if( ppi->SameGreyDirPixelsAlloc )
+ duck_free(ppi->SameGreyDirPixelsAlloc);
+ ppi->SameGreyDirPixelsAlloc= 0;
+ ppi->SameGreyDirPixels= 0;
+
+ if( ppi->FragDiffPixelsAlloc )
+ duck_free(ppi->FragDiffPixelsAlloc);
+ ppi->FragDiffPixelsAlloc= 0;
+ ppi->FragDiffPixels= 0;
+
+ if( ppi->BarBlockMapAlloc )
+ duck_free(ppi->BarBlockMapAlloc);
+ ppi->BarBlockMapAlloc= 0;
+ ppi->BarBlockMap= 0;
+
+ if( ppi->TmpCodedMapAlloc )
+ duck_free(ppi->TmpCodedMapAlloc);
+ ppi->TmpCodedMapAlloc= 0;
+ ppi->TmpCodedMap= 0;
+
+ if( ppi->RowChangedPixelsAlloc )
+ duck_free(ppi->RowChangedPixelsAlloc);
+ ppi->RowChangedPixelsAlloc= 0;
+ ppi->RowChangedPixels= 0;
+
+ if( ppi->PixelScoresAlloc )
+ duck_free(ppi->PixelScoresAlloc);
+ ppi->PixelScoresAlloc= 0;
+ ppi->PixelScores= 0;
+
+ if( ppi->PixelChangedMapAlloc )
+ duck_free(ppi->PixelChangedMapAlloc);
+ ppi->PixelChangedMapAlloc= 0;
+ ppi->PixelChangedMap= 0;
+
+ if( ppi->ChLocalsAlloc )
+ duck_free(ppi->ChLocalsAlloc);
+ ppi->ChLocalsAlloc= 0;
+ ppi->ChLocals= 0;
+
+ if( ppi->yuv_differencesAlloc )
+ duck_free(ppi->yuv_differencesAlloc);
+ ppi->yuv_differencesAlloc= 0;
+ ppi->yuv_differences= 0;
+
+}
+
+
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+/****************************************************************************
+ *
+ * ROUTINE : PAllocateFrameInfo
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+BOOL PAllocateFrameInfo(PP_INSTANCE * ppi)
+{
+ PDeleteFrameInfo(ppi);
+
+ ppi->ScanPixelIndexTableAlloc = duck_malloc(32 + ppi->ScanFrameFragments*sizeof(UINT32), DMEM_GENERAL);
+ if(!ppi->ScanPixelIndexTableAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->ScanPixelIndexTable = (UINT32 *) ROUNDUP32(ppi->ScanPixelIndexTableAlloc);
+
+ ppi->ScanDisplayFragmentsAlloc = duck_malloc(32 + ppi->ScanFrameFragments*sizeof(INT8), DMEM_GENERAL);
+ if(!ppi->ScanDisplayFragmentsAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->ScanDisplayFragments = (INT8 *) ROUNDUP32(ppi->ScanDisplayFragmentsAlloc);
+
+ ppi->FragScoresAlloc = duck_malloc(32 + ppi->ScanFrameFragments*sizeof(UINT32), DMEM_GENERAL);
+ if(!ppi->FragScoresAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->FragScores = (UINT32 *) ROUNDUP32(ppi->FragScoresAlloc);
+
+ ppi->SameGreyDirPixelsAlloc = duck_malloc(32 + ppi->ScanFrameFragments*sizeof(INT8), DMEM_GENERAL);
+ if(!ppi->SameGreyDirPixelsAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->SameGreyDirPixels = (INT8 *) ROUNDUP32(ppi->SameGreyDirPixelsAlloc);
+
+ ppi->FragDiffPixelsAlloc = duck_malloc(32 + ppi->ScanFrameFragments*sizeof(UINT8), DMEM_GENERAL);
+ if(!ppi->FragDiffPixelsAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->FragDiffPixels = (UINT8 *) ROUNDUP32(ppi->FragDiffPixelsAlloc);
+
+ ppi->BarBlockMapAlloc = duck_malloc(32 + 3 * ppi->ScanHFragments*sizeof(INT8), DMEM_GENERAL);
+ if(!ppi->BarBlockMapAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->BarBlockMap = (INT8 *) ROUNDUP32(ppi->BarBlockMapAlloc);
+
+ ppi->TmpCodedMapAlloc = duck_malloc(32 + ppi->ScanHFragments*sizeof(INT8), DMEM_GENERAL);
+ if(!ppi->TmpCodedMapAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->TmpCodedMap = (INT8 *) ROUNDUP32(ppi->TmpCodedMapAlloc);
+
+ ppi->RowChangedPixelsAlloc = duck_malloc(32 + 3 * ppi->ScanConfig.VideoFrameHeight *sizeof(INT32), DMEM_GENERAL);
+ if(!ppi->RowChangedPixelsAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->RowChangedPixels = (INT32 *) ROUNDUP32(ppi->RowChangedPixelsAlloc);
+
+ ppi->PixelScoresAlloc = duck_malloc(32 + ppi->ScanConfig.VideoFrameWidth* sizeof(UINT8) * PSCORE_CB_ROWS, DMEM_GENERAL);
+ if(!ppi->PixelScoresAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->PixelScores = (UINT8 *) ROUNDUP32(ppi->PixelScoresAlloc);
+
+ ppi->PixelChangedMapAlloc = duck_malloc(32 + ppi->ScanConfig.VideoFrameWidth*sizeof(UINT8) * PMAP_CB_ROWS, DMEM_GENERAL);
+ if(!ppi->PixelChangedMapAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->PixelChangedMap = ( UINT8 *) ROUNDUP32(ppi->PixelChangedMapAlloc);
+
+ ppi->ChLocalsAlloc = duck_malloc(32 + ppi->ScanConfig.VideoFrameWidth*sizeof(UINT8) * CHLOCALS_CB_ROWS, DMEM_GENERAL);
+ if(!ppi->ChLocalsAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->ChLocals = (UINT8 *) ROUNDUP32(ppi->ChLocalsAlloc);
+
+ ppi->yuv_differencesAlloc = duck_malloc(32 + ppi->ScanConfig.VideoFrameWidth*sizeof(INT16) * YDIFF_CB_ROWS, DMEM_GENERAL);
+ if(!ppi->yuv_differencesAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+ ppi->yuv_differences = (INT16 *) ROUNDUP32(ppi->yuv_differencesAlloc);
+
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeletePPInstance
+ *
+ *
+ * INPUTS : Instance of PB to be deleted
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : frees the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void DeletePPInstance(PP_INSTANCE **ppi)
+{
+ PDeleteFrameInfo(*ppi);
+ duck_free(*ppi);
+ *ppi=0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : Createppinstance
+ *
+ *
+ * INPUTS : Instance of CP to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Create and Initializes the Compression instance
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+PP_INSTANCE * CreatePPInstance(void)
+{
+ PP_INSTANCE *ppi;
+
+ /* The VCAP configuration. */
+ SCAN_CONFIG_DATA ScanConfigInit =
+ {
+ NULL, NULL, NULL, 0,0, NULL,
+ 176, 144,
+ 8,8,
+ };
+
+ // allocate structure
+ int ppi_size = sizeof(PP_INSTANCE);
+ ppi=duck_calloc(1,ppi_size, DMEM_GENERAL);
+
+ ppi->OutputBlocksUpdated = 0;
+ ppi->KFIndicator = 0;
+
+// Initializations
+ ppi->VideoYPlaneWidth = 0;
+ ppi->VideoYPlaneHeight = 0;
+ ppi->VideoUVPlaneWidth = 0;
+ ppi->VideoUVPlaneHeight = 0;
+
+ ppi->VideoYPlaneStride = 0;
+ ppi->VideoUPlaneStride = 0;
+ ppi->VideoVPlaneStride = 0;
+
+ /* Scan control variables. */
+ ppi->HFragPixels = 8;
+ ppi->VFragPixels = 8;
+
+ ppi->ScanFrameFragments = 0 ;
+ ppi->ScanYPlaneFragments = 0;
+ ppi->ScanUVPlaneFragments= 0;
+ ppi->ScanHFragments= 0;
+ ppi->ScanVFragments= 0;
+
+ ppi->YFramePixels = 0;
+ ppi->UVFramePixels = 0;
+ ppi->TotFramePixels = 0;
+
+
+ ppi->SRFGreyThresh = 4;
+ ppi->SRFColThresh = 5;
+ ppi->NoiseSupLevel = 3;
+ ppi->SgcLevelThresh = 3;
+ ppi->SuvcLevelThresh = 4;
+
+ // Variables controlling S.A.D. break outs.
+ ppi->GrpLowSadThresh = 10;
+ ppi->GrpHighSadThresh = 64;
+ ppi->PrimaryBlockThreshold = 5;
+ ppi->SgcThresh = 16; // (Default values for 8x8 blocks).
+
+ ppi->PAKEnabled = FALSE; //TRUE;
+
+ ppi->LevelThresh = 0; // no initializaiton in Paul's
+ ppi->NegLevelThresh = 0; // no initializaiton in Paul's
+ ppi->SrfThresh = 0; // no initializaiton in Paul's
+ ppi->NegSrfThresh = 0; // no initializaiton in Paul's
+ ppi->HighChange = 0; // no initializaiton in Paul's
+ ppi->NegHighChange = 0; // no initializaiton in Paul's
+
+ ppi->ModifiedGrpLowSadThresh = 0;
+ ppi->ModifiedGrpHighSadThresh = 0; // no initializaiton in Paul's
+
+ ppi->PlaneHFragments = 0;
+ ppi->PlaneVFragments = 0;
+ ppi->PlaneHeight = 0;
+ ppi->PlaneWidth = 0;
+ ppi->PlaneStride = 0;
+
+ ppi->BlockThreshold = 0; // no initializaiton in Paul's
+ ppi->BlockSgcThresh = 0;
+ ppi->UVBlockThreshCorrection = 1.25;
+ ppi->UVSgcCorrection = 1.5;
+
+ ppi->SpeedCritical = 3;
+
+ // PC specific variables
+ ppi->MmxEnabled = FALSE;
+
+ ppi->YUVPlaneCorrectionFactor = 0; // no initialization in Paul's
+ ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN;
+
+ ppi->YuvDiffsCircularBufferSize = 0; // no initializaiton in Paul's
+ ppi->ChLocalsCircularBufferSize = 0;
+ ppi->PixelMapCircularBufferSize = 0;
+
+ // Function pointers for mmx switches
+ ppi->RowSAD = 0;
+
+
+ ppi->ScanPixelIndexTableAlloc= 0;
+ ppi->ScanPixelIndexTable= 0;
+
+ ppi->ScanDisplayFragmentsAlloc= 0;
+ ppi->ScanDisplayFragments= 0;
+
+ ppi->FragScores= 0;
+ ppi->FragScores= 0;
+
+ ppi->ScanDisplayFragmentsAlloc= 0;
+ ppi->ScanDisplayFragments= 0;
+
+ ppi->SameGreyDirPixelsAlloc= 0;
+ ppi->SameGreyDirPixels= 0;
+
+ ppi->FragDiffPixelsAlloc= 0;
+ ppi->FragDiffPixels= 0;
+
+ ppi->BarBlockMapAlloc= 0;
+ ppi->BarBlockMap= 0;
+
+ ppi->TmpCodedMapAlloc= 0;
+ ppi->TmpCodedMap= 0;
+
+ ppi->RowChangedPixelsAlloc= 0;
+ ppi->RowChangedPixels= 0;
+
+ ppi->PixelScoresAlloc= 0;
+ ppi->PixelScores= 0;
+
+ ppi->PixelChangedMapAlloc= 0;
+ ppi->PixelChangedMap= 0;
+
+ ppi->ChLocalsAlloc= 0;
+ ppi->ChLocals= 0;
+
+ ppi->yuv_differencesAlloc= 0;
+ ppi->yuv_differences= 0;
+
+
+ return ppi;
+}
+/****************************************************************************
+ *
+ * ROUTINE : VPInitLibrary
+ *
+ *
+ * INPUTS : init VP library
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Fully initializes the playback library
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VPPInitLibrary(void)
+{
+
+}
+
+/*********************************************************/
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VPPDeinitLibrary
+ *
+ *
+ * INPUTS : init VP library
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Fully initializes the playback library
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VPPDeInitLibrary(void)
+{
+
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/preprocif.c b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocif.c
new file mode 100644
index 00000000..2f02f60c
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocif.c
@@ -0,0 +1,252 @@
+/****************************************************************************
+*
+* Module Title : PreProcIf.c
+*
+* Description : Pre-processor dll interface module.
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.09 PGW 27 Apr 01 Changes to use last frame coded list passed in from codec.
+* Removed code to set Y from UV.
+* 1.08 PGW 28 Feb 01 Removal of history buffer functionality.
+* 1.07 PGW 28 Feb 01 Removal of pre-processor output buffer.
+* 1.06 JBB 03 Aug 00 Added Malloc Checks
+* 1.05 PGW 27 Jul 00 Removed SetVcapParams() plus other housekeeping.
+* 1.04 PGW 10 Jul 00 Removed unused functions GetBlockStats(), BlockChangeVariance()
+* and GetBlockCategories().
+* Change interface to YUVAnalyseFrame() to include KF indicator.
+* 1.03 PGW 22/06/00 Removed speed specific code.
+* 1.02 JBB 30/05/00 Removed hard coded size limits
+* 1.01 PGW 12/07/99 Changes to reduce uneccessary dependancies.
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include <string.h>
+#include "type_aliases.h"
+#include "preproc.h"
+
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+#define MIN_STEP_THRESH 6
+
+#define VARIANCE_THRESH 200
+#define LOW_VARIANCE_THRESH 100
+#define HIGH_SCORE 400
+
+
+/****************************************************************************
+* Explicit Imports
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Foreward References
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+
+/****************************************************************************
+ *
+ * ROUTINE : ScanYUVInit
+ *
+ * INPUTS : SCAN_CONFIG_DATA * ScanConfigPtr
+ * Configuration data.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Initialises the scan process.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+extern BOOL PAllocateFrameInfo(PP_INSTANCE * ppi);
+BOOL ScanYUVInit( PP_INSTANCE * ppi, SCAN_CONFIG_DATA * ScanConfigPtr)
+{
+ // Test machine specific features such as MMX support
+ MachineSpecificConfig(ppi);
+
+ /* Set up the various imported data structure pointers. */
+ ppi->ScanConfig.Yuv0ptr = ScanConfigPtr->Yuv0ptr;
+ ppi->ScanConfig.Yuv1ptr = ScanConfigPtr->Yuv1ptr;
+ ppi->ScanConfig.FragInfo = ScanConfigPtr->FragInfo;
+ ppi->ScanConfig.FragInfoElementSize = ScanConfigPtr->FragInfoElementSize;
+ ppi->ScanConfig.FragInfoCodedMask = ScanConfigPtr->FragInfoCodedMask ;
+
+ ppi->ScanConfig.RegionIndex = ScanConfigPtr->RegionIndex;
+ ppi->ScanConfig.HFragPixels = ScanConfigPtr->HFragPixels;
+ ppi->ScanConfig.VFragPixels = ScanConfigPtr->VFragPixels;
+
+ ppi->ScanConfig.VideoFrameWidth = ScanConfigPtr->VideoFrameWidth;
+ ppi->ScanConfig.VideoFrameHeight = ScanConfigPtr->VideoFrameHeight;
+
+ // UV plane sizes.
+ ppi->VideoUVPlaneWidth = ScanConfigPtr->VideoFrameWidth / 2;
+ ppi->VideoUVPlaneHeight = ScanConfigPtr->VideoFrameHeight / 2;
+
+ /* Note the size of the entire frame and plaes in pixels. */
+ ppi->YFramePixels = ppi->ScanConfig.VideoFrameWidth * ppi->ScanConfig.VideoFrameHeight;
+ ppi->UVFramePixels = ppi->VideoUVPlaneWidth * ppi->VideoUVPlaneHeight;
+ ppi->TotFramePixels = ppi->YFramePixels + (2 * ppi->UVFramePixels);
+
+ /* Work out various fragment related values. */
+ ppi->ScanYPlaneFragments = ppi->YFramePixels / (ppi->HFragPixels * ppi->VFragPixels);
+ ppi->ScanUVPlaneFragments = ppi->UVFramePixels / (ppi->HFragPixels * ppi->VFragPixels);;
+ ppi->ScanHFragments = ppi->ScanConfig.VideoFrameWidth / ppi->HFragPixels;
+ ppi->ScanVFragments = ppi->ScanConfig.VideoFrameHeight / ppi->VFragPixels;
+ ppi->ScanFrameFragments = ppi->ScanYPlaneFragments + (2 * ppi->ScanUVPlaneFragments);
+
+ if(!PAllocateFrameInfo(ppi))
+ return FALSE;
+
+ /* Set up the scan pixel index table. */
+ ScanCalcPixelIndexTable(ppi);
+
+ /* Initialise scan arrays */
+ InitScanMapArrays(ppi);
+
+ return TRUE;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : YUVAnalyseFrame
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Number of "output" blocks to be updated.
+ *
+ * FUNCTION : Scores the fragments for the YUV planes
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT32 YUVAnalyseFrame( PP_INSTANCE *ppi, UINT32 * KFIndicator )
+{
+ UINT32 UpdatedYBlocks = 0;
+ UINT32 UpdatedUVBlocks = 0;
+ UINT32 i;
+
+ /* Initialise the map arrays. */
+ InitScanMapArrays(ppi);
+
+ /********** PGW 27/APR/2001 ***********/
+ // If the block is already marked as coded in the input block map then
+ // mark it as coded here to avoid unnecessary pre-processor work.
+ for ( i = 0; i < ppi->ScanFrameFragments; i++ )
+ {
+
+ if ( blockCoded(i) )
+ ppi->ScanDisplayFragments[i] = BLOCK_ALREADY_MARKED_FOR_CODING;
+ }
+
+ // If the motion level in the previous frame was high then adjust the high and low SAD
+ // thresholds to speed things up.
+ ppi->ModifiedGrpLowSadThresh = ppi->GrpLowSadThresh;
+ ppi->ModifiedGrpHighSadThresh = ppi->GrpHighSadThresh;
+ // testing force every block with any change to get coded
+ //ppi->ModifiedGrpHighSadThresh = 0;
+
+ // Set up the internal plane height and width variables.
+ ppi->VideoYPlaneWidth = ppi->ScanConfig.VideoFrameWidth;
+ ppi->VideoYPlaneHeight = ppi->ScanConfig.VideoFrameHeight;
+ ppi->VideoUVPlaneWidth = ppi->ScanConfig.VideoFrameWidth / 2;
+ ppi->VideoUVPlaneHeight = ppi->ScanConfig.VideoFrameHeight / 2;
+
+ // To start with *** TBD **** the stides will be set from the widths
+ ppi->VideoYPlaneStride = ppi->VideoYPlaneWidth;
+ ppi->VideoUPlaneStride = ppi->VideoUVPlaneWidth;
+ ppi->VideoVPlaneStride = ppi->VideoUVPlaneWidth;
+
+ // Set up the plane pointers
+ ppi->YPlanePtr0 = ppi->ScanConfig.Yuv0ptr;
+ ppi->YPlanePtr1 = ppi->ScanConfig.Yuv1ptr;
+ ppi->UPlanePtr0 = (ppi->ScanConfig.Yuv0ptr + ppi->YFramePixels);
+ ppi->UPlanePtr1 = (ppi->ScanConfig.Yuv1ptr + ppi->YFramePixels);
+ ppi->VPlanePtr0 = (ppi->ScanConfig.Yuv0ptr + ppi->YFramePixels + ppi->UVFramePixels);
+ ppi->VPlanePtr1 = (ppi->ScanConfig.Yuv1ptr + ppi->YFramePixels + ppi->UVFramePixels);
+
+ // Ananlyse the U and V palnes.
+ AnalysePlane( ppi, ppi->UPlanePtr0, ppi->UPlanePtr1, ppi->ScanYPlaneFragments, ppi->VideoUVPlaneWidth, ppi->VideoUVPlaneHeight, ppi->VideoUPlaneStride );
+ AnalysePlane( ppi, ppi->VPlanePtr0, ppi->VPlanePtr1, (ppi->ScanYPlaneFragments + ppi->ScanUVPlaneFragments), ppi->VideoUVPlaneWidth, ppi->VideoUVPlaneHeight, ppi->VideoVPlaneStride );
+
+ // Now analyse the Y plane.
+ AnalysePlane( ppi, ppi->YPlanePtr0, ppi->YPlanePtr1, 0, ppi->VideoYPlaneWidth, ppi->VideoYPlaneHeight, ppi->VideoYPlaneStride );
+
+ // Create an output block map for the calling process.
+ CreateOutputDisplayMap( ppi, ppi->ScanDisplayFragments);
+
+ // Set the candidate key frame indicator (0-100)
+ *KFIndicator = ppi->KFIndicator;
+
+ // Return the normalised block count (this is actually a motion level
+ // weighting not a true block count).
+ return ppi->OutputBlocksUpdated;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SetScanParam
+ *
+ * INPUTS : ParamID
+ * ParamValue
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Sets a scan parameter.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void SetScanParam( PP_INSTANCE *ppi, UINT32 ParamId, INT32 ParamValue )
+{
+ switch (ParamId)
+ {
+
+ case SCP_SET_VCAP_LEVEL_OFFSET:
+ SetVcapLevelOffset(ppi, ParamValue);
+ break;
+
+ }
+
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/pp/include/preproc.h b/Src/libvpShared/corelibs/cdxv/pp/include/preproc.h
new file mode 100644
index 00000000..59a3b1c8
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/include/preproc.h
@@ -0,0 +1,343 @@
+/****************************************************************************
+*
+* Module Title : preproc.h
+*
+* Description : Content analysis module header
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.08 PGW 27 Apr 01 Removed code to set Y from UV
+* 1.07 PGW 28 Feb 01 Removal of history buffer functionality.
+* 1.06 PGW 04 Oct 00 Added CANDIDATE_BLOCK_LOW.
+* 1.05 PGW 24 Jul 00 Added Column SAD functions. Deleted BeThreshold.
+* 1.04 PGW 13 Jul 00 Added BLOCK_CODED_LOW. Deleted BLOCK_CODED_EXTRA.
+* 1.03 PGW 10 Jul 00 Added lookup tables to reduce number of conditionals
+* in RowDiffScan(). Removed old "ifdef 0"ed code.
+* Added KFIndicator.
+* 1.02 JBB 30/05/00 Removed hard coded size limits
+* 1.01 YX 06/04/00 Added XMMEnabled for optimizations
+* 1.00 PGW 16/06/96 Configuration baseline.
+*
+*****************************************************************************
+*/
+
+#include "preprocconf.h"
+#include "type_aliases.h"
+#include "preprocif.h"
+
+/* Constants. */
+#define OUTPUT_BLOCK_HEIGHT 8
+#define OUTPUT_BLOCK_WIDTH 8
+
+#define INTERNAL_BLOCK_HEIGHT 8
+#define INTERNAL_BLOCK_WIDTH 8
+
+#define FILTER_BLOCK_SIZE (INTERNAL_BLOCK_WIDTH * INTERNAL_BLOCK_HEIGHT)
+
+/* NEW Line search values. */
+#define UP 0
+#define DOWN 1
+#define LEFT 2
+#define RIGHT 3
+
+/* Low Pass Filter levels. */
+#define NO_LOW_PASS 0
+#define VERY_LOW_LOW_PASS 1
+#define LOW_LOW_PASS 2
+#define MODERATE_LOW_PASS 5
+#define HIGH_LOW_PASS 7
+#define VERY_HIGH_LOW_PASS 9
+
+#define FIRST_ROW 0
+#define NOT_EDGE_ROW 1
+#define LAST_ROW 2
+
+#define YDIFF_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
+#define CHLOCALS_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
+#define PMAP_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
+#define FRAG_PIXEL_DIFF_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
+#define PSCORE_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 4)
+
+#define PIXEL_SCORES_BUFFER_SIZE SCAN_MAX_LINE_LENGTH * PSCORE_CB_ROWS
+
+#define YUV_DIFFS_CIRC_BUFFER_SIZE (SCAN_MAX_LINE_LENGTH * YDIFF_CB_ROWS)
+#define CH_LOCALS_CIRC_BUFFER_SIZE (SCAN_MAX_LINE_LENGTH * CHLOCALS_CB_ROWS)
+#define PIXEL_MAP_CIRC_BUFFER_SIZE (SCAN_MAX_LINE_LENGTH * PMAP_CB_ROWS)
+
+// Status values in block coding map
+#define CANDIDATE_BLOCK_LOW -2
+#define CANDIDATE_BLOCK -1
+#define BLOCK_NOT_CODED 0
+#define BLOCK_CODED_BAR 3
+#define BLOCK_ALREADY_MARKED_FOR_CODING 4
+#define BLOCK_CODED_SGC 4
+#define BLOCK_CODED_LOW 4
+#define BLOCK_CODED 5
+
+#define MAX_PREV_FRAMES 16
+#define MAX_SEARCH_LINE_LEN 7
+
+/******************************************************************/
+/* Type definitions. */
+/******************************************************************/
+#define blockCoded(i) (ppi->ScanConfig.FragInfo[(i)*ppi->ScanConfig.FragInfoElementSize]&ppi->ScanConfig.FragInfoCodedMask)
+#define setBlockCoded(i) ppi->ScanConfig.FragInfo[(i)*ppi->ScanConfig.FragInfoElementSize]|=ppi->ScanConfig.FragInfoCodedMask;
+#define setBlockUncoded(i) ppi->ScanConfig.FragInfo[(i)*ppi->ScanConfig.FragInfoElementSize]&=(~ppi->ScanConfig.FragInfoCodedMask);
+
+typedef struct PP_INSTANCE
+{
+ UINT32 *ScanPixelIndexTableAlloc;
+ INT8 *ScanDisplayFragmentsAlloc;
+
+ UINT32 *FragScoresAlloc; // The individual frame difference ratings.
+ INT8 *SameGreyDirPixelsAlloc;
+ INT8 *BarBlockMapAlloc;
+
+ // Number of pixels changed by diff threshold in row of a fragment.
+ UINT8 *FragDiffPixelsAlloc;
+
+ UINT8 *PixelScoresAlloc;
+ UINT8 *PixelChangedMapAlloc;
+ UINT8 *ChLocalsAlloc;
+ INT16 *yuv_differencesAlloc;
+ INT32 *RowChangedPixelsAlloc;
+ INT8 *TmpCodedMapAlloc;
+
+ UINT32 *ScanPixelIndexTable;
+ INT8 *ScanDisplayFragments;
+
+ UINT32 *FragScores; // The individual frame difference ratings.
+ INT8 *SameGreyDirPixels;
+ INT8 *BarBlockMap;
+
+ // Number of pixels changed by diff threshold in row of a fragment.
+ UINT8 *FragDiffPixels;
+
+ UINT8 *PixelScores;
+ UINT8 *PixelChangedMap;
+ UINT8 *ChLocals;
+ INT16 *yuv_differences;
+ INT32 *RowChangedPixels;
+ INT8 *TmpCodedMap;
+
+ // Plane pointers and dimension variables
+ UINT8 * YPlanePtr0;
+ UINT8 * YPlanePtr1;
+ UINT8 * UPlanePtr0;
+ UINT8 * UPlanePtr1;
+ UINT8 * VPlanePtr0;
+ UINT8 * VPlanePtr1;
+
+ UINT32 VideoYPlaneWidth;
+ UINT32 VideoYPlaneHeight;
+ UINT32 VideoUVPlaneWidth;
+ UINT32 VideoUVPlaneHeight;
+
+ UINT32 VideoYPlaneStride;
+ UINT32 VideoUPlaneStride;
+ UINT32 VideoVPlaneStride;
+
+/* Scan control variables. */
+ UINT8 HFragPixels;
+ UINT8 VFragPixels;
+
+ UINT32 ScanFrameFragments;
+ UINT32 ScanYPlaneFragments;
+ UINT32 ScanUVPlaneFragments;
+ UINT32 ScanHFragments;
+ UINT32 ScanVFragments;
+
+ UINT32 YFramePixels;
+ UINT32 UVFramePixels;
+ UINT32 TotFramePixels;
+
+ BOOL SgcOnOff;
+
+ UINT32 SgcThresh;
+
+ UINT32 OutputBlocksUpdated;
+ UINT32 KFIndicator;
+
+ BOOL ScanSRF_Enabled;
+
+/* The VCAP scan configuration. */
+ SCAN_CONFIG_DATA ScanConfig;
+
+ BOOL VcapOn;
+
+ INT32 SRFGreyThresh;
+ INT32 SRFColThresh;
+ INT32 SgcLevelThresh;
+ INT32 SuvcLevelThresh;
+
+ INT32 SRFGreyThreshOffset;
+ INT32 SRFColThreshOffset;
+ INT32 SgcLevelThreshOffset;
+ INT32 SuvcLevelThreshOffset;
+
+ UINT32 NoiseSupLevel;
+
+ /* Block Thresholds. */
+ UINT32 PrimaryBlockThreshold;
+
+ INT32 SRFLevel;
+ INT32 SRFLevelOffset;
+
+ BOOL PAKEnabled;
+
+ BOOL EBO_Enabled;
+ BOOL CategorisationEnabled;
+
+ int LevelThresh;
+ int NegLevelThresh;
+ int SrfThresh;
+ int NegSrfThresh;
+ int HighChange;
+ int NegHighChange;
+
+ // Threshold lookup tables
+ UINT8 SrfPakThreshTable[512];
+ UINT8 * SrfPakThreshTablePtr;
+ UINT8 SrfThreshTable[512];
+ UINT8 * SrfThreshTablePtr;
+ UINT8 SgcThreshTable[512];
+ UINT8 * SgcThreshTablePtr;
+
+ // Variables controlling S.A.D. break outs.
+ UINT32 GrpLowSadThresh;
+ UINT32 GrpHighSadThresh;
+ UINT32 ModifiedGrpLowSadThresh;
+ UINT32 ModifiedGrpHighSadThresh;
+
+ INT32 PlaneHFragments;
+ INT32 PlaneVFragments;
+ INT32 PlaneHeight;
+ INT32 PlaneWidth;
+ INT32 PlaneStride;
+
+ UINT32 BlockThreshold;
+ UINT32 BlockSgcThresh;
+ double UVBlockThreshCorrection;
+ double UVSgcCorrection;
+
+ UINT32 SpeedCritical;
+
+// Live test harness specific.
+
+// PC specific variables
+ BOOL MmxEnabled;
+ BOOL XmmEnabled;
+
+ double YUVPlaneCorrectionFactor;
+ double AbsDiff_ScoreMultiplierTable[256];
+ UINT8 NoiseScoreBoostTable[256];
+ UINT8 MaxLineSearchLen;
+
+ INT32 YuvDiffsCircularBufferSize;
+ INT32 ChLocalsCircularBufferSize;
+ INT32 PixelMapCircularBufferSize;
+
+ // Temp stats variable
+ UINT32 TotBlocksUpdated;
+
+ // Function pointers for mmx switches
+ UINT32 (*RowSAD)(UINT8 *, UINT8 * );
+ UINT32 (*ColSAD)(xPP_INST ppi, UINT8 *, UINT8 * );
+
+} PP_INSTANCE;
+
+/******************************************************************/
+/* Function prototypes. */
+/******************************************************************/
+
+
+INLINE UINT32 ScanGetFragIndex( PP_INSTANCE *ppi, UINT32 FragmentNo )
+{
+ return ppi->ScanPixelIndexTable[ FragmentNo ];
+}
+
+
+extern void InitScanMapArrays
+(
+ PP_INSTANCE *ppi
+);
+
+extern void AnalysePlane
+(
+ PP_INSTANCE *ppi, UINT8 * PlanePtr0, UINT8 * PlanePtr1, UINT32 FragArrayOffset, UINT32 PWidth, UINT32 PHeight, UINT32 PStride
+);
+
+extern void ScanCalcPixelIndexTable
+(
+ PP_INSTANCE *ppi
+);
+
+extern void CreateOutputDisplayMap
+(
+ PP_INSTANCE *ppi,
+ INT8 *InternalFragmentsPtr
+);
+
+extern void SetVcapLevelOffset
+(
+ PP_INSTANCE *ppi, INT32 LevelOffset
+);
+
+// Analysis functions
+extern void RowBarEnhBlockMap
+(
+ PP_INSTANCE *ppi,
+ UINT32 * FragScorePtr,
+ INT8 * FragSgcPtr,
+ INT8 * UpdatedBlockMapPtr,
+ INT8 * BarBlockMapPtr,
+ UINT32 RowNumber
+);
+
+extern void BarCopyBack
+(
+ PP_INSTANCE *ppi,
+ INT8 * UpdatedBlockMapPtr,
+ INT8 * BarBlockMapPtr
+);
+
+// Secondary filter functions
+extern UINT8 ApplyLowPass
+(
+ PP_INSTANCE *ppi, UINT8 * SrcPtr, UINT32 PlaneLineLength, INT32 Level
+);
+
+// PC specific functions
+extern void MachineSpecificConfig
+(
+
+);
+extern void ClearMmx
+(
+ PP_INSTANCE *ppi
+);
+
+extern UINT32 ScalarRowSAD
+(
+ UINT8 * Src1, UINT8 * Src2
+);
+extern UINT32 ScalarColSAD
+(
+ PP_INSTANCE *ppi, UINT8 * Src1, UINT8 * Src2
+);
+
+extern PP_INSTANCE * CreatePPInstance
+(
+ void
+);
+extern void DeletePPInstance
+(
+ PP_INSTANCE **ppi
+);
+
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/pp/include/preprocconf.h b/Src/libvpShared/corelibs/cdxv/pp/include/preprocconf.h
new file mode 100644
index 00000000..d75980c7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/include/preprocconf.h
@@ -0,0 +1,17 @@
+/****************************************************************************
+*
+* Module Title : PreProcConf.H
+*
+* Description : Content analysis module configuration header
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 PGW 11/10/98 Header to control different configurations
+*
+*****************************************************************************
+*/
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/preproc/Makefile b/Src/libvpShared/corelibs/cdxv/preproc/Makefile
new file mode 100644
index 00000000..88ff6918
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/preproc/Makefile
@@ -0,0 +1,54 @@
+## Target to built
+
+TARGET =libpreproc
+
+## TOOLS
+CC = ecc
+LD = ecc
+AR = ar
+OBJDUMP = objdump
+RM = rm -f
+
+## Directories
+TOPDIR =C:\DuckSoft
+PRIVATEINCLUDE =${TOPDIR}\private\include
+PRIVATEINCLUDE2 =${TOPDIR}\private\include\vp60
+CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
+CDXVINCLUDE =${TOPDIR}\private\corelibs\cdxv\include
+
+
+CURRENTDIR =${TOPDIR}\private\corelibs\cdxv\preproc
+LIBDIR =${TOPDIR}\private\corelibs\lib\mapca
+
+## Compile Flags
+ALLINCLUDES =-I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${PRIVATEINCLUDE2}
+VP6DEFINES =-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
+ETIDEFINES =-DMAPCA
+ALLDEFINES =${VP6DEFINES} ${ETIDEFINES}
+DEBUG =-O2
+CFLAGS =-msvc -align 8 -etswp -mP3OPT_nonlocal_calls_through_register=true \
+ -mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
+ -magen_interroutine_padding
+ALLFLAGS = $(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
+
+
+## Files
+OBJS = preproc.o \
+
+SRCS = $(OBJS:.o=.c)
+
+ARTARGET = ${TARGET}.a
+
+# archive
+
+ARTARGET:${OBJS}
+ ${AR} -cr ${ARTARGET} ${OBJS}
+ mv ${ARTARGET} ${LIBDIR}
+
+${OBJS} : ${SRCS}
+ $(CC) $(ALLFLAGS) -c $*.c -o $*.o
+
+clean:
+ ${RM} ${OBJS} ${ARTARGET}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/preproc/preproc.c b/Src/libvpShared/corelibs/cdxv/preproc/preproc.c
new file mode 100644
index 00000000..46e8e3bd
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/preproc/preproc.c
@@ -0,0 +1,693 @@
+/****************************************************************************
+*
+* Module Title : preproc.c
+*
+* Description : Simple pre-processor.
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+
+#include "memory.h"
+#include "preproc.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define FRAMECOUNT 7
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern void GetProcessorFlags (int *MmxEnabled, int *XmmEnabled, int *WmtEnabled );
+
+/****************************************************************************
+* Exported Global Variables
+****************************************************************************/
+void (*tempFilter)( PreProcInstance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength );
+
+#ifndef MAPCA
+/****************************************************************************
+ *
+ * ROUTINE : spatialFilter_wmt
+ *
+ * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
+ * unsigned char *s : Pointer to source frame.
+ * unsigned char *d : Pointer to destination frame.
+ * int width : WIdth of images.
+ * int height : Height of images.
+ * int pitch : Stride of images.
+ * int strength : Strength of filter to apply.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs a closesness adjusted temporarl blur
+ *
+ * SPECIAL NOTES : Destination frame can be same as source frame.
+ *
+ ****************************************************************************/
+void spatialFilter_wmt
+(
+ PreProcInstance *ppi,
+ unsigned char *s,
+ unsigned char *d,
+ int width,
+ int height,
+ int pitch,
+ int strength
+)
+{
+ int i;
+ int row = 1;
+ int PixelOffsets[] =
+ {
+ -pitch-1, -pitch, -pitch+1,
+ -1, 0, +1,
+ pitch-1, pitch, pitch+1
+ };
+ unsigned char *frameptr = ppi->frameBuffer;
+
+ __declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3, 3, 3, 3, 3};
+ __declspec(align(16)) unsigned short sixteens[]= {16,16,16,16,16,16,16,16};
+
+ memcpy ( d, s, width );
+
+ d += pitch;
+ s += pitch;
+
+ do
+ {
+ // NOTE: By doing it this way I am ensuring that pixels will always be unaligned!!!
+ int col = 1;
+ d[0] = s[0];
+ d[width - 1] = s[width - 1];
+ do
+ {
+ __declspec(align(16)) unsigned short counts[8];
+ __declspec(align(16)) unsigned short sums[8];
+ _asm
+ {
+ mov esi, s // get the source line
+ add esi, col // add the column offset
+ pxor xmm1,xmm1 // accumulator
+ pxor xmm2,xmm2 // count
+ pxor xmm7,xmm7 // 0s for use with unpack
+
+ movq xmm3, QWORD PTR [esi] // get 8 pixels
+ punpcklbw xmm3, xmm7 // unpack to shorts
+ xor eax, eax // neighbor iterator
+
+NextNeighbor:
+ mov ecx, [PixelOffsets+eax*4] // get eax index pixel neighbor offset
+ movq xmm4, QWORD PTR [esi + ecx] // get ecx index neighbor values
+ punpcklbw xmm4, xmm7 // xmm4 unpacked neighbor values
+ movdqa xmm6, xmm4 // save the pixel values
+ psubsw xmm4, xmm3 // subtracted pixel values
+ pmullw xmm4, xmm4 // square xmm4
+ movd xmm5, strength
+ psrlw xmm4, xmm5 // should be strength
+ pmullw xmm4, threes // 3 * modifier
+ movdqa xmm5, sixteens // 16s
+ psubusw xmm5, xmm4 // 16 - modifiers
+ movdqa xmm4, xmm5 // save the modifiers
+ pmullw xmm4, xmm6 // multiplier values
+ paddusw xmm1, xmm4 // accumulator
+ paddusw xmm2, xmm5 // count
+ inc eax // next neighbor
+ cmp eax,9 // there are nine neigbors
+ jne NextNeighbor
+
+ movdqa counts, xmm2
+ psrlw xmm2,1 // divide count by 2 for rounding
+ paddusw xmm1,xmm2 // rounding added in
+
+ mov frameptr,esi
+
+ movdqa sums, xmm1
+ }
+
+ for ( i=0; i<8; i++ )
+ {
+ int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
+ blurvalue >>= 16;
+ d[col+i] = blurvalue;
+ }
+ col += 8;
+
+ } while ( col<width-1 );
+
+ d += pitch;
+ s += pitch;
+ ++row;
+ } while ( row<height-1 );
+
+ memcpy ( d, s, width );
+ __asm emms
+}
+#endif
+/****************************************************************************
+ *
+ * ROUTINE : tempFilter_c
+ *
+ * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
+ * unsigned char *s : Pointer to source frame.
+ * unsigned char *d : Pointer to destination frame.
+ * int bytes : Number of bytes to filter.
+ * int strength : Strength of filter to apply.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs a closesness adjusted temporarl blur
+ *
+ * SPECIAL NOTES : Destination frame can be same as source frame.
+ *
+ ****************************************************************************/
+void tempFilter_c
+(
+ PreProcInstance *ppi,
+ unsigned char *s,
+ unsigned char *d,
+ int bytes,
+ int strength
+)
+{
+ int byte = 0;
+ unsigned char *frameptr = ppi->frameBuffer;
+
+ if ( ppi->frame == 0 )
+ {
+ do
+ {
+ int frame = 0;
+ do
+ {
+ *frameptr = s[byte];
+ ++frameptr;
+ ++frame;
+ } while ( frame < FRAMECOUNT );
+
+ d[byte] = s[byte];
+
+ ++byte;
+ } while ( byte < bytes );
+ }
+ else
+ {
+ int modifier;
+ int offset = (ppi->frame % FRAMECOUNT);
+
+ do
+ {
+ int accumulator = 0;
+ int count = 0;
+ int frame = 0;
+
+ frameptr[offset] = s[byte];
+
+ do
+ {
+ int pixelValue = *frameptr;
+
+ modifier = s[byte];
+ modifier -= pixelValue;
+ modifier *= modifier;
+ modifier >>= strength;
+ modifier *= 3;
+
+ if(modifier > 16)
+ modifier = 16;
+
+ modifier = 16 - modifier;
+
+ accumulator += modifier * pixelValue;
+
+ count += modifier;
+
+ frameptr++;
+
+ ++frame;
+ } while ( frame < FRAMECOUNT );
+
+ accumulator += (count >> 1);
+ accumulator *= ppi->fixedDivide[count];
+ accumulator >>= 16;
+
+ d[byte] = accumulator;
+
+ ++byte;
+ } while ( byte < bytes );
+ }
+ ++ppi->frame;
+}
+#ifndef MAPCA
+/****************************************************************************
+ *
+ * ROUTINE : tempFilter_wmt
+ *
+ * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
+ * unsigned char *s : Pointer to source frame.
+ * unsigned char *d : Pointer to destination frame.
+ * int bytes : Number of bytes to filter.
+ * int strength : Strength of filter to apply.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs a closesness adjusted temporarl blur
+ *
+ * SPECIAL NOTES : Destination frame can be same as source frame.
+ *
+ ****************************************************************************/
+void tempFilter_wmt
+(
+ PreProcInstance *ppi,
+ unsigned char *s,
+ unsigned char *d,
+ int bytes,
+ int strength
+)
+{
+ int byte = 0;
+ unsigned char * frameptr = ppi->frameBuffer;
+
+ __declspec(align(16)) unsigned short threes[] ={ 3, 3, 3, 3, 3, 3, 3, 3};
+ __declspec(align(16)) unsigned short sixteens[]={16,16,16,16,16,16,16,16};
+
+ if ( ppi->frame == 0 )
+ {
+ do
+ {
+ int i;
+ int frame = 0;
+
+ do
+ {
+ for ( i=0; i<8; i++ )
+ {
+ *frameptr = s[byte+i];
+ ++frameptr;
+ }
+ ++frame;
+ } while ( frame < FRAMECOUNT );
+
+ for ( i=0; i<8; i++ )
+ d[byte+i] = s[byte+i];
+
+ byte += 8;
+
+ } while ( byte < bytes );
+ }
+ else
+ {
+ int i;
+ int offset2 = (ppi->frame % FRAMECOUNT);
+
+ do
+ {
+ __declspec(align(16)) unsigned short counts[8];
+ __declspec(align(16)) unsigned short sums[8];
+ int accumulator = 0;
+ int count = 0;
+ int frame = 0;
+ _asm
+ {
+ mov eax,offset2
+ mov edi,s // source pixels
+ pxor xmm1,xmm1 // accumulator
+
+ pxor xmm7,xmm7
+
+ mov esi,frameptr // accumulator
+ pxor xmm2,xmm2 // count
+
+ movq xmm3, QWORD PTR [edi]
+
+ movq QWORD PTR [esi+8*eax],xmm3
+
+ punpcklbw xmm3, xmm2 // xmm3 source pixels
+ mov ecx, FRAMECOUNT
+
+NextFrame:
+ movq xmm4, QWORD PTR [esi] // get frame buffer values
+ punpcklbw xmm4, xmm7 // xmm4 frame buffer pixels
+ movdqa xmm6, xmm4 // save the pixel values
+ psubsw xmm4, xmm3 // subtracted pixel values
+ pmullw xmm4, xmm4 // square xmm4
+ movd xmm5, strength
+ psrlw xmm4, xmm5 // should be strength
+ pmullw xmm4, threes // 3 * modifier
+ movdqa xmm5, sixteens // 16s
+ psubusw xmm5, xmm4 // 16 - modifiers
+ movdqa xmm4, xmm5 // save the modifiers
+ pmullw xmm4, xmm6 // multiplier values
+ paddusw xmm1, xmm4 // accumulator
+ paddusw xmm2, xmm5 // count
+ add esi, 8 // next frame
+ dec ecx // next set of eight pixels
+ jnz NextFrame
+
+ movdqa counts, xmm2
+ psrlw xmm2,1 // divide count by 2 for rounding
+ paddusw xmm1,xmm2 // rounding added in
+
+ mov frameptr,esi
+
+ movdqa sums, xmm1
+ }
+
+ for ( i=0; i<8; i++ )
+ {
+ int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
+ blurvalue >>= 16;
+ d[i] = blurvalue;
+ }
+ s += 8;
+ d += 8;
+ byte += 8;
+ } while ( byte < bytes );
+ }
+ ++ppi->frame;
+ __asm emms
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : tempFilter_mmx
+ *
+ * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
+ * unsigned char *s : Pointer to source frame.
+ * unsigned char *d : Pointer to destination frame.
+ * int bytes : Number of bytes to filter.
+ * int strength : Strength of filter to apply.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs a closesness adjusted temporarl blur
+ *
+ * SPECIAL NOTES : Destination frame can be same as source frame.
+ *
+ ****************************************************************************/
+void tempFilter_mmx
+(
+ PreProcInstance *ppi,
+ unsigned char *s,
+ unsigned char *d,
+ int bytes,
+ int strength
+)
+{
+ int byte = 0;
+ unsigned char *frameptr = ppi->frameBuffer;
+
+ __declspec(align(16)) unsigned short threes[] ={ 3, 3, 3, 3};
+ __declspec(align(16)) unsigned short sixteens[]={16,16,16,16};
+
+ if ( ppi->frame == 0 )
+ {
+ do
+ {
+ int i;
+ int frame = 0;
+
+ do
+ {
+ for ( i=0; i<4; i++ )
+ {
+ *frameptr = s[byte+i];
+ ++frameptr;
+ }
+ ++frame;
+ } while ( frame < FRAMECOUNT );
+
+ for ( i=0; i<4; i++ )
+ d[byte+i] = s[byte+i];
+
+ byte += 4;
+
+ } while ( byte < bytes );
+ }
+ else
+ {
+ int i;
+ int offset2 = (ppi->frame % FRAMECOUNT);
+ do
+ {
+ __declspec(align(16)) unsigned short counts[8];
+ __declspec(align(16)) unsigned short sums[8];
+ int accumulator = 0;
+ int count = 0;
+ int frame = 0;
+ _asm
+ {
+
+ mov eax,offset2
+ mov edi,s // source pixels
+ pxor mm1,mm1 // accumulator
+ pxor mm7,mm7
+
+ mov esi,frameptr // accumulator
+ pxor mm2,mm2 // count
+
+ movd mm3, DWORD PTR [edi]
+ movd DWORD PTR [esi+4*eax],mm3
+
+ punpcklbw mm3, mm2 // mm3 source pixels
+ mov ecx, FRAMECOUNT
+
+NextFrame:
+ movd mm4, DWORD PTR [esi] // get frame buffer values
+ punpcklbw mm4, mm7 // mm4 frame buffer pixels
+ movq mm6, mm4 // save the pixel values
+ psubsw mm4, mm3 // subtracted pixel values
+ pmullw mm4, mm4 // square mm4
+ movd mm5, strength
+ psrlw mm4, mm5 // should be strength
+ pmullw mm4, threes // 3 * modifier
+ movq mm5, sixteens // 16s
+ psubusw mm5, mm4 // 16 - modifiers
+ movq mm4, mm5 // save the modifiers
+ pmullw mm4, mm6 // multiplier values
+ paddusw mm1, mm4 // accumulator
+ paddusw mm2, mm5 // count
+ add esi, 4 // next frame
+ dec ecx // next set of eight pixels
+ jnz NextFrame
+
+ movq counts, mm2
+ psrlw mm2,1 // divide count by 2 for rounding
+ paddusw mm1,mm2 // rounding added in
+
+ mov frameptr,esi
+
+ movq sums, mm1
+
+ }
+
+ for ( i=0; i<4; i++ )
+ {
+ int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
+ blurvalue >>= 16;
+ d[i] = blurvalue;
+ }
+ s += 4;
+ d += 4;
+ byte += 4;
+ } while ( byte < bytes );
+ }
+ ++ppi->frame;
+ __asm emms
+}
+#endif
+/****************************************************************************
+ *
+ * ROUTINE : DeletePreProc
+ *
+ * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Deletes a pre-processing instance.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeletePreProc ( PreProcInstance *ppi )
+{
+ if ( ppi->frameBufferAlloc )
+ duck_free ( ppi->frameBufferAlloc );
+ ppi->frameBufferAlloc = 0;
+ ppi->frameBuffer = 0;
+
+ if( ppi->fixedDivideAlloc )
+ duck_free ( ppi->fixedDivideAlloc );
+ ppi->fixedDivideAlloc = 0;
+ ppi->fixedDivide = 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : InitPreProc
+ *
+ * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
+ * int FrameSize : Number of bytes in one frame.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : int: 1 if successful, 0 if failed.
+ *
+ * FUNCTION : Initializes prepprocessor instance.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int InitPreProc ( PreProcInstance *ppi, int FrameSize )
+{
+ int i;
+ int MmxEnabled;
+ int XmmEnabled;
+ int WmtEnabled;
+#ifndef MAPCA
+ GetProcessorFlags ( &MmxEnabled, &XmmEnabled, &WmtEnabled );
+
+ if ( WmtEnabled )
+ tempFilter = tempFilter_wmt;
+ else if ( MmxEnabled )
+ tempFilter = tempFilter_mmx;
+ else
+#endif
+ tempFilter = tempFilter_c;
+
+ DeletePreProc ( ppi );
+
+ ppi->frameBufferAlloc = duck_malloc ( 32+FrameSize*7*sizeof(unsigned char), DMEM_GENERAL );
+ if ( !ppi->frameBufferAlloc ) { DeletePreProc( ppi ); return 0; }
+ ppi->frameBuffer = (unsigned char *) ROUNDUP32( ppi->frameBufferAlloc );
+
+ ppi->fixedDivideAlloc = duck_malloc ( 32+255*sizeof(unsigned int), DMEM_GENERAL );
+ if ( !ppi->fixedDivideAlloc ) { DeletePreProc( ppi ); return 0; }
+ ppi->fixedDivide = (unsigned int *) ROUNDUP32( ppi->fixedDivideAlloc );
+
+ for ( i=1; i<255; i++ )
+ ppi->fixedDivide[i] = 0x10000 / i;
+ return 1;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : spatialFilter_c
+ *
+ * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
+ * unsigned char *s : Pointer to source frame.
+ * unsigned char *d : Pointer to destination frame.
+ * int width : Width of images.
+ * int height : Height of images.
+ * int pitch : Stride of images.
+ * int strength : Strength of filter to apply.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs a closesness adjusted temporal blur.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void spatialFilter_c
+(
+ PreProcInstance *ppi,
+ unsigned char *s,
+ unsigned char *d,
+ int width,
+ int height,
+ int pitch,
+ int strength
+)
+{
+ int modifier;
+ int byte = 0;
+ int row = 1;
+ int PixelOffsets[9];
+
+
+ PixelOffsets[0] = -pitch - 1;
+ PixelOffsets[1] = -pitch;
+ PixelOffsets[2] = -pitch + 1;
+ PixelOffsets[3] = - 1;
+ PixelOffsets[4] = 0;
+ PixelOffsets[5] = + 1;
+ PixelOffsets[6] = pitch - 1;
+ PixelOffsets[7] = pitch ;
+ PixelOffsets[8] = pitch + 1;
+
+ memcpy ( d, s, width );
+
+ d += pitch;
+ s += pitch;
+
+ do
+ {
+ int col = 1;
+
+ d[0] = s[0];
+ d[width - 1] = s[width - 1];
+
+ do
+ {
+ int accumulator = 0;
+ int count = 0;
+ int neighbor = 0;
+
+ do
+ {
+ int pixelValue = s[ col + PixelOffsets[neighbor] ];
+
+ modifier = s[col];
+ modifier -= pixelValue;
+ modifier *= modifier;
+ modifier >>= strength;
+ modifier *= 3;
+
+ if(modifier > 16)
+ modifier = 16;
+
+ modifier = 16 - modifier;
+
+ accumulator += modifier * pixelValue;
+
+ count += modifier;
+
+ neighbor++;
+ } while ( neighbor < sizeof(PixelOffsets)/sizeof(int) );
+
+ accumulator += (count >> 1);
+ accumulator *= ppi->fixedDivide[count];
+ accumulator >>= 16;
+
+ d[col] = accumulator;
+
+ ++col;
+
+ } while ( col < width-1 );
+
+ d += pitch;
+ s += pitch;
+
+ ++row;
+
+ } while ( row < height-1 );
+
+ memcpy ( d, s, width );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/preproc/preproc.sln b/Src/libvpShared/corelibs/cdxv/preproc/preproc.sln
new file mode 100644
index 00000000..cac9c7e0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/preproc/preproc.sln
@@ -0,0 +1,23 @@
+Microsoft Visual Studio Solution File, Format Version 8.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "preproc", "preproc.vcproj", "{0FDF0DE2-6841-4C51-A008-A08C42E50948}"
+ ProjectSection(ProjectDependencies) = postProject
+ EndProjectSection
+EndProject
+Global
+ GlobalSection(SolutionConfiguration) = preSolution
+ Debug = Debug
+ Release = Release
+ EndGlobalSection
+ GlobalSection(ProjectDependencies) = postSolution
+ EndGlobalSection
+ GlobalSection(ProjectConfiguration) = postSolution
+ {0FDF0DE2-6841-4C51-A008-A08C42E50948}.Debug.ActiveCfg = Debug|Win32
+ {0FDF0DE2-6841-4C51-A008-A08C42E50948}.Debug.Build.0 = Debug|Win32
+ {0FDF0DE2-6841-4C51-A008-A08C42E50948}.Release.ActiveCfg = Release|Win32
+ {0FDF0DE2-6841-4C51-A008-A08C42E50948}.Release.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ EndGlobalSection
+ GlobalSection(ExtensibilityAddIns) = postSolution
+ EndGlobalSection
+EndGlobal
diff --git a/Src/libvpShared/corelibs/cdxv/preproc/preproc.vcproj b/Src/libvpShared/corelibs/cdxv/preproc/preproc.vcproj
new file mode 100644
index 00000000..52b65f22
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/preproc/preproc.vcproj
@@ -0,0 +1,302 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="9.00"
+ Name="preproc"
+ ProjectGUID="{0FDF0DE2-6841-4C51-A008-A08C42E50948}"
+ TargetFrameworkVersion="131072"
+ >
+ <Platforms>
+ <Platform
+ Name="Win32"
+ />
+ </Platforms>
+ <ToolFiles>
+ </ToolFiles>
+ <Configurations>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory="$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ InlineFunctionExpansion="1"
+ EnableIntrinsicFunctions="true"
+ FavorSizeOrSpeed="1"
+ OmitFramePointers="true"
+ AdditionalIncludeDirectories="..\vp60\include,..\include,..\..\include,.\include,..\..\..\include,..\..\..\..\include,..\..\..\..\include\vp60"
+ PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS"
+ StringPooling="true"
+ RuntimeLibrary="2"
+ BufferSecurityCheck="false"
+ EnableFunctionLevelLinking="true"
+ UsePrecompiledHeader="0"
+ PrecompiledHeaderFile=".\Release/preproc.pch"
+ AssemblerListingLocation=""
+ ObjectFile="$(IntDir)/"
+ ProgramDataBaseFileName="$(IntDir)/vc70.pdb"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ CompileAs="0"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="$(SolutionDir)lib\win32\release\s_preproc.lib"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory=".\Debug"
+ IntermediateDirectory=".\Debug"
+ ConfigurationType="4"
+ InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="..\vp60\include,..\..\include,.\include,..\include,..\..\..\include,..\..\..\..\include,..\..\..\..\include\vp60"
+ PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="1"
+ PrecompiledHeaderFile=".\Debug/preproc.pch"
+ AssemblerListingLocation=".\Debug/"
+ ObjectFile=".\Debug/"
+ ProgramDataBaseFileName=".\Debug/"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ DebugInformationFormat="4"
+ CompileAs="0"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="_DEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="..\..\..\Lib\Win32\Debug\s_preproc.lib"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release 64|Win32"
+ OutputDirectory="$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalOptions="/GS-"
+ Optimization="2"
+ InlineFunctionExpansion="1"
+ EnableIntrinsicFunctions="true"
+ FavorSizeOrSpeed="1"
+ OmitFramePointers="true"
+ AdditionalIncludeDirectories="..\vp60\include,..\include,..\..\include,.\include,..\..\..\include,..\..\..\..\include,..\..\..\..\include\vp60"
+ PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS"
+ StringPooling="true"
+ RuntimeLibrary="2"
+ EnableFunctionLevelLinking="true"
+ UsePrecompiledHeader="0"
+ PrecompiledHeaderFile=".\Release/preproc.pch"
+ AssemblerListingLocation=""
+ ObjectFile="$(IntDir)/"
+ ProgramDataBaseFileName="$(IntDir)/vc70.pdb"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ CompileAs="0"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ AdditionalOptions="/machine:AMD64"
+ OutputFile="..\..\..\Lib\Win64\Release\s_preproc.lib"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="Source Files"
+ Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+ >
+ <File
+ RelativePath="preproc.c"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ BasicRuntimeChecks="3"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release 64|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Header Files"
+ Filter="h;hpp;hxx;hm;inl"
+ >
+ <File
+ RelativePath="..\include\preproc.h"
+ >
+ </File>
+ </Filter>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/OptFunctions.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/OptFunctions.c
new file mode 100644
index 00000000..0ac90ff1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/OptFunctions.c
@@ -0,0 +1,315 @@
+/****************************************************************************
+*
+* Module Title : OptFunctions.c
+*
+* Description : MMX or otherwise processor specific
+* optimised versions of functions
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+ * 1.08 JBB 13 Jun 01 VP4 Code Clean Out
+* 1.07 JBB 26/01/01 Removed unused function
+* 1.06 YWX 23/05/00 Remove the clamping in MmxReconPostProcess()
+* 1.05 YWX 15/05/00 Added MmxReconPostProcess()
+* 1.04 SJL 03/14/00 Added in Tim's versions of MmxReconInter and MmxReconInterHalfPixel2.
+* 1.03 PGW 12/10/99 Changes to reduce uneccessary dependancies.
+* 1.02 PGW 30/08/99 Minor changes to MmxReconInterHalfPixel2().
+* 1.01 PGW 13/07/99 Changes to keep reconstruction data to 16 bit
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/*
+ Use Tim's optimized version.
+*/
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT // Strict type checking.
+
+#include "codec_common.h"
+
+#include "pbdll.h"
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imports.
+*****************************************************************************
+*/
+
+extern INT32 * XX_LUT;
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+
+INT16 Ones[4] = {1,1,1,1};
+INT16 OneTwoEight[4] = {128,128,128,128};
+UINT8 Eight128s[8] = {128,128,128,128,128,128,128,128};
+
+#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
+
+/****************************************************************************
+* Forward References
+*****************************************************************************
+*/
+/****************************************************************************
+ *
+ * ROUTINE : ClearSysState()
+ *
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : DoesNothing
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ClearSysStateC(void)
+{
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ClearMmx()
+ *
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Clears down the MMX state
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ClearMmx(void)
+{
+ __asm
+ {
+ emms ; Clear the MMX state.
+ }
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : MMXReconIntra
+ *
+ * INPUTS : INT16 * idct
+ * Pointer to the output from the idct for this block
+ *
+ * UINT32 stride
+ * Line Length in pixels in recon and reference images
+ *
+ *
+ *
+ *
+ * OUTPUTS : UINT8 * dest
+ * The reconstruction buffer
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs an intra block - MMX version
+ *
+ * SPECIAL NOTES : Tim Murphy's optimized version
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void MMXReconIntra( PB_INSTANCE *pbi, UINT8 * dest, INT16 * idct, INT32 stride )
+{
+ __asm
+ {
+ // u pipe
+ // v pipe
+ mov eax,[idct] ; Signed 16 bit inputs
+ mov edx,[dest] ; Signed 8 bit outputs
+ movq mm0,[Eight128s] ; Set mm0 to 0x8080808080808080
+ ;
+ mov ebx,[stride] ; Line stride in output buffer
+ lea ecx,[eax+128] ; Endpoint in input buffer
+loop_label: ;
+ movq mm2,[eax] ; First four input values
+ ;
+ packsswb mm2,[eax+8] ; pack with next(high) four values
+ por mm0,mm0 ; stall
+ pxor mm2,mm0 ; Convert result to unsigned (same as add 128)
+ lea eax,[eax + 16] ; Step source buffer
+ cmp eax,ecx ; are we done
+ ;
+ movq [edx],mm2 ; store results
+ ;
+ lea edx,[edx+ebx] ; Step output buffer
+ jc loop_label ; Loop back if we are not done
+ }
+ // 6c/8 elts = 9c/8 = 1.125 c/pix
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxReconInter
+ *
+ * INPUTS : UINT8 * RefPtr
+ * The last frame reference
+ *
+ * INT16 * ChangePtr
+ * Pointer to the change data
+ *
+ * UINT32 LineStep
+ * Line Length in pixels in recon and ref images
+ *
+ * OUTPUTS : UINT8 * ReconPtr
+ * The reconstruction
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs data from last data and change
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void MmxReconInter( PB_INSTANCE *pbi, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+ (void) pbi;
+
+ _asm {
+ push edi
+;; mov ebx, [ref]
+;; mov ecx, [diff]
+;; mov eax, [dest]
+;; mov edx, [stride]
+ mov ebx, [RefPtr]
+ mov ecx, [ChangePtr]
+ mov eax, [ReconPtr]
+ mov edx, [LineStep]
+ pxor mm0, mm0
+ lea edi, [ecx + 128]
+ ;
+ L:
+ movq mm2, [ebx] ; (+3 misaligned) 8 reference pixels
+ ;
+ movq mm4, [ecx] ; first 4 changes
+ movq mm3, mm2
+ movq mm5, [ecx + 8] ; last 4 changes
+ punpcklbw mm2, mm0 ; turn first 4 refs into positive 16-bit #s
+ paddsw mm2, mm4 ; add in first 4 changes
+ punpckhbw mm3, mm0 ; turn last 4 refs into positive 16-bit #s
+ paddsw mm3, mm5 ; add in last 4 changes
+ add ebx, edx ; next row of reference pixels
+ packuswb mm2, mm3 ; pack result to unsigned 8-bit values
+ lea ecx, [ecx + 16] ; next row of changes
+ cmp ecx, edi ; are we done?
+ ;
+ movq [eax], mm2 ; store result
+ ;
+ lea eax, [eax+edx] ; next row of output
+ jc L ; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+ pop edi
+ }
+}
+
+
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : CopyBlockUsingMMX
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Copies a block from source to destination
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride)
+{
+ unsigned char *s = src;
+ unsigned char *d = dest;
+ unsigned int stride = srcstride;
+ // recon copy
+ _asm
+ {
+ mov ecx, [stride]
+ mov eax, [s]
+ mov ebx, [d]
+ lea edx, [ecx + ecx * 2]
+
+ movq mm0, [eax]
+ movq mm1, [eax + ecx]
+ movq mm2, [eax + ecx*2]
+ movq mm3, [eax + edx]
+
+ lea eax, [eax + ecx*4]
+
+ movq [ebx], mm0
+ movq [ebx + ecx], mm1
+ movq [ebx + ecx*2], mm2
+ movq [ebx + edx], mm3
+
+ lea ebx, [ebx + ecx * 4]
+
+ movq mm0, [eax]
+ movq mm1, [eax + ecx]
+ movq mm2, [eax + ecx*2]
+ movq mm3, [eax + edx]
+
+ movq [ebx], mm0
+ movq [ebx + ecx], mm1
+ movq [ebx + ecx*2], mm2
+ movq [ebx + edx], mm3
+ }
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/WmtOptFunctions.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/WmtOptFunctions.c
new file mode 100644
index 00000000..d1106ec7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/WmtOptFunctions.c
@@ -0,0 +1,204 @@
+ /****************************************************************************
+ *
+ * Module Title : WmtOptFunctions.c
+ *
+ * Description : willamette processor specific
+ * optimised versions of functions
+ *
+ * AUTHOR : Yaowu Xu
+ *
+ * Special Note:
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ *
+ * 1.04 JBB 13 Jun 01 VP4 Code Clean Out
+ * 1.03 YWX 07-Dec-00 Removed constants and functions that are not in use
+ * Added push and pop ebx in WmtReconIntra
+ * 1.02 YWX 30 Aug 00 changed to be compatible with Microsoft compiler
+ * 1.01 YWX 13 JUL 00 New Willamette Optimized Functions
+ * 1.00 YWX 14/06/00 Configuration baseline from OptFunctions.c
+ *
+ *****************************************************************************
+ */
+
+/*
+ Use Tim's optimized version.
+*/
+
+/****************************************************************************
+ * Header Files
+ *****************************************************************************
+ */
+
+#define STRICT // Strict type checking.
+
+#include "codec_common.h"
+
+#include "pbdll.h"
+
+/****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Imports.
+ *****************************************************************************
+ */
+
+
+/****************************************************************************
+ * Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Exported Functions
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Module Statics
+ *****************************************************************************
+ */
+
+
+
+_declspec(align(16)) static UINT8 Eight128s[8] = {128,128,128,128,128,128,128,128};
+
+#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
+
+/****************************************************************************
+* Forward References
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtReconIntra
+ *
+ * INPUTS : INT16 * idct
+ * Pointer to the output from the idct for this block
+ *
+ * UINT32 stride
+ * Line Length in pixels in recon and reference images
+ *
+ *
+ *
+ *
+ * OUTPUTS : UINT8 * dest
+ * The reconstruction buffer
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs an intra block - wmt version
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void WmtReconIntra( PB_INSTANCE *pbi, UINT8 * dest, INT16 * idct, INT32 stride )
+{
+ __asm
+ {
+
+ push ebx
+
+ mov eax,[idct] ; Signed 16 bit inputs
+ mov edx,[dest] ; Unsigned 8 bit outputs
+
+ movq xmm0,QWORD PTR [Eight128s] ; Set xmm0 to 0x000000000000008080808080808080
+ pxor xmm3, xmm3 ; set xmm3 to 0
+ ;
+ mov ebx,[stride] ; Line stride in output buffer
+ lea ecx,[eax+128] ; Endpoint in input buffer
+
+loop_label:
+
+ movdqa xmm2,XMMWORD PTR [eax] ; Read the eight inputs
+ packsswb xmm2,xmm3 ;
+
+ pxor xmm2,xmm0 ; Convert result to unsigned (same as add 128)
+ lea eax,[eax + 16] ; Step source buffer
+
+ cmp eax,ecx ; are we done
+ movq QWORD PTR [edx],xmm2 ; store results
+
+ lea edx,[edx+ebx] ; Step output buffer
+ jc loop_label ; Loop back if we are not done
+
+ pop ebx
+ }
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtReconInter
+ *
+ * INPUTS : UINT8 * RefPtr
+ * The last frame reference
+ *
+ * INT16 * ChangePtr
+ * Pointer to the change data
+ *
+ * UINT32 LineStep
+ * Line Length in pixels in recon and ref images
+ *
+ * OUTPUTS : UINT8 * ReconPtr
+ * The reconstruction
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs data from last data and change
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void WmtReconInter( PB_INSTANCE *pbi, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+ (void) pbi;
+
+ _asm {
+ push edi
+
+ mov ebx, [RefPtr]
+ mov ecx, [ChangePtr]
+
+ mov eax, [ReconPtr]
+ mov edx, [LineStep]
+
+ pxor xmm0, xmm0
+ lea edi, [ecx + 128]
+ L:
+ movq xmm2, QWORD ptr [ebx] ; (+3 misaligned) 8 reference pixels
+ movdqa xmm4, XMMWORD ptr [ecx] ; 8 changes
+
+ punpcklbw xmm2, xmm0 ;
+
+ add ebx, edx ; next row of reference pixels
+ paddsw xmm2, xmm4 ; add in first 4 changes
+
+ lea ecx, [ecx + 16] ; next row of changes
+ packuswb xmm2, xmm0 ; pack result to unsigned 8-bit values
+
+ cmp ecx, edi ; are we done?
+ movq QWORD PTR [eax], xmm2 ; store result
+
+ lea eax, [eax+edx] ; next row of output
+ jc L ; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+ pop edi
+ }
+
+}
+
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/dsystemdependant.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/dsystemdependant.c
new file mode 100644
index 00000000..b5486f05
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/dsystemdependant.c
@@ -0,0 +1,369 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.19 YWX 15-Jun-01 added function pointer setups for new deblocking filter
+* 1.18 YWX 26-Apr-01 Fixed the cpu frequency detection bug caused by Sleep()
+* 1.17 JBX 22-Mar-01 Merged with new vp4-mapca bitstream
+* 1.16 JBB 26-Jan-01 Cleaned out unused function
+* 1.15 YWX 08-dec-00 Added WMT PostProcessor and
+* moved function declarations into _head files
+* 1.14 JBB 30 NOV 00 Version number changes
+* 1.13 YWX 03-Nov-00 Optimized postprocessor filters
+* 1.12 YWX 02-Nov-00 Added new loopfilter function pointers
+* 1.11 YWX 19-Oct-00 Added 1-2 Scaling functions pointers
+* 1.10 jbb 16 oct 00 added ifdefs to insure version code
+* 1.09 YWX 04-Oct-00 Added function pointers for scaling
+* 1.08 YWX 06 Sep 00 Added function pointers for new deringing filter
+* using frag baseed Q Value.
+* 1.07 JBB 21 Aug 00 New More Blurry in high variance area deringer
+* 1.06 YWX 2 Aug 00 Added function pointers for postprocess
+* 1.05 YWX 15/05/00 Added functions to check processor frequency
+* and more function pointers for postprocessor
+* 1.04 YWX 08/05/00 Added function pointers setup for postprocess
+* 1.03 SJL 20/04/00 Added ability to enable the new dequant code.
+* 1.02 SJL 22/03/00 Function pointers for the loop filter.
+* 1.01 JBB 21/03/00 More Function Pointers for optimized playback
+* 1.00 PGW 12/10/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include "pbdll.h"
+#pragma warning(disable:4115)
+#include <windows.h>
+
+extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
+
+//extern void ReadTokens_c(PB_INSTANCE *pbi, INT32 * HuffIndices );
+extern void (*VP5_BuildQuantIndex)( QUANTIZER * pbi);
+
+extern void UnPackVideo_C(PB_INSTANCE *pbi);
+extern void UnPackVideo2(PB_INSTANCE *pbi);
+
+extern void VP5_BuildQuantIndex_Generic(QUANTIZER *pbi);
+extern void VP5_BuildQuantIndex_ForMMX(QUANTIZER *pbi);
+extern void VP5_BuildQuantIndex_ForWMT(QUANTIZER *pbi);
+
+
+//extern void ReadTokens_mmx(PB_INSTANCE *pbi, INT32 * HuffIndices );
+extern void UnPackVideoMMX_LL (PB_INSTANCE *pbi);
+extern void ClearMmx(void);
+extern void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride);
+//extern void ReadTokensPredict_c( PB_INSTANCE *pbi, UINT32 BlockSize, UINT32 Hpos );
+
+/****************************************************************************
+* Explicit imports
+*****************************************************************************
+*/
+extern unsigned int CPUFrequency;
+
+//extern MmxEnabled; // Is MMX enabled flag
+
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module statics.
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+ *
+ * ROUTINE : readTSC
+ *
+ * INPUTS : None
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : read the cpu time stamp counter
+ *
+ * SPECIAL NOTES : Since this function uses RDTSC instruction, which is
+ * introduced in Pentium processor, so this routine is
+ * expected to work on Pentium and above.
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void VP5_readTSC(unsigned long *tsc)
+{
+ int time;
+
+ __asm
+ {
+ pushad
+ cpuid
+ rdtsc
+ mov time,eax
+ popad
+ }
+
+ *tsc=time;
+ return;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP5_GetProcessorFrequency()
+ *
+ * INPUTS : None
+ *
+ *
+ * OUTPUTS : The Frequency in MHZ
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Check the Processor's working freqency
+ *
+ * SPECIAL NOTES : This function should only be used here. Limited tests
+ * has verified it works till 166MHz Pentium with MMX.
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+unsigned long VP5_GetProcessorFrequency()
+{
+
+ LARGE_INTEGER pf; //Performance Counter Frequencey
+ LARGE_INTEGER startcount, endcount;
+ unsigned long tsc1, tsc2;
+
+ //If the cpu does not support the high resolution counter, return 0
+ unsigned long time1, time2;
+ unsigned long cpufreq=0;
+ unsigned long Nearest66Mhz, Nearest50Mhz;
+ unsigned long Delta66, Delta50;
+
+ if( QueryPerformanceFrequency(&pf))
+ {
+
+ // read the counter and TSC at start
+ QueryPerformanceCounter(&startcount);
+ VP5_readTSC(&tsc1);
+ // delay for 10 ms to get enough accuracy
+ time1 = timeGetTime();
+ time2 = time1;
+
+ while( time2 < time1+5 )
+ time2 = timeGetTime();
+
+ //read the counter and TSC at end
+ QueryPerformanceCounter(&endcount);
+ VP5_readTSC(&tsc2);
+
+ //calculate the frequency
+ cpufreq = (unsigned long )((double)( tsc2 - tsc1 )
+ * (double)pf.LowPart
+ / (double) ( endcount.LowPart - startcount.LowPart )
+ / 1000000);
+
+ }
+
+ Nearest66Mhz = ((cpufreq * 3 + 100)/200 * 200) / 3;
+ Delta66 = abs(Nearest66Mhz - cpufreq);
+ Nearest50Mhz = ((cpufreq + 25)/50 *50);
+ Delta50 = abs(Nearest50Mhz - cpufreq);
+
+ if(Delta50 < Delta66)
+ cpufreq = Nearest50Mhz;
+ else
+ {
+
+ cpufreq = Nearest66Mhz;
+ if(cpufreq == 666)
+ cpufreq = 667;
+ }
+ return cpufreq;
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : MachineSpecificConfig
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support
+ * sets approipriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+#define MMX_ENABLED 1
+void VP5_DMachineSpecificConfig(void)
+{
+ INT32 MmxEnabled;
+ INT32 XmmEnabled;
+ INT32 WmtEnabled;
+
+ GetProcessorFlags( &MmxEnabled,&XmmEnabled,&WmtEnabled);
+
+
+ // If MMX supported then set to use MMX versions of functions else
+ // use original 'C' versions.
+
+ if(WmtEnabled) //Willamette
+ {
+ VP5_BuildQuantIndex = VP5_BuildQuantIndex_ForWMT;
+ }
+ else if ( MmxEnabled )
+ {
+ VP5_BuildQuantIndex = VP5_BuildQuantIndex_ForMMX;
+ }
+ else
+ {
+ VP5_BuildQuantIndex = VP5_BuildQuantIndex_Generic;
+ }
+
+// ReadTokens = ReadTokensPredict_c;
+
+}
+
+// Issues a warning message
+void VP5_IssueWarning( char * WarningMessage )
+{
+ // Issue the warning messge
+ MessageBox(NULL, WarningMessage, NULL, MB_ICONEXCLAMATION | MB_TASKMODAL );
+}
+
+// Pause/Sleep for a X milliseconds
+void VP5_PauseProcess( unsigned int SleepMs )
+{
+ Sleep( SleepMs );
+}
+
+char * VP5_SytemGlobalAlloc( unsigned int Size )
+{
+ return GlobalAlloc( GPTR, Size );
+}
+
+void VP5_SystemGlobalFree( char * MemPtr )
+{
+ GlobalFree( (HGLOBAL) MemPtr );
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP5_SetPbParam
+ *
+ * INPUTS : PB_COMMAND_TYPE Command
+ * char * Parameter
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Generalised command interface to decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void CCONV VP5_SetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, UINT32 Parameter )
+{
+
+#if defined(POSTPROCESS)
+ switch ( Command )
+ {
+ case PBC_SET_CPUFREE:
+ {
+
+ double Pixels = pbi->Configuration.VideoFrameWidth * pbi->Configuration.VideoFrameHeight;
+ double FreeMhz = pbi->ProcessorFrequency * Parameter / 100;
+ double PixelsPerMhz = 100 * sqrt(1.0*Pixels) / FreeMhz;
+ pbi->CPUFree = Parameter;
+
+ if( PixelsPerMhz > 150 )
+ pbi->PostProcessingLevel = 0;
+ else if( PixelsPerMhz > 100 )
+ pbi->PostProcessingLevel = 8;
+ else if( PixelsPerMhz > 90 )
+ pbi->PostProcessingLevel = 4;
+ else if( PixelsPerMhz > 80 )
+ pbi->PostProcessingLevel = 5;
+ else
+ pbi->PostProcessingLevel = 6;
+ break;
+
+ }
+ case PBC_SET_REFERENCEFRAME:
+ CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->LastFrameRecon);
+ CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->GoldenFrame);
+ break;
+
+ case PBC_SET_POSTPROC:
+ if( Parameter == 9 )
+ {
+ VP5_SetPbParam( pbi, PBC_SET_CPUFREE, 70);
+ }
+ else
+
+ {
+ pbi->CPUFree = 0;
+ pbi->PostProcessingLevel = Parameter;
+ }
+ break;
+
+ case PBC_SET_DEINTERLACEMODE:
+ pbi->DeInterlaceMode = Parameter;
+ break;
+
+ case PBC_SET_BLACKCLAMP:
+ pbi->BlackClamp = Parameter;
+ break;
+
+ case PBC_SET_WHITECLAMP:
+ pbi->WhiteClamp = Parameter;
+ break;
+ default:
+ break;
+ }
+#endif
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/quantindexmmx.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/quantindexmmx.c
new file mode 100644
index 00000000..e76e7d49
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/quantindexmmx.c
@@ -0,0 +1,377 @@
+/****************************************************************************
+*
+* Module Title : quantindexmmx.c
+*
+* Description :
+*
+* AUTHOR :
+*
+*****************************************************************************
+* Revision History
+*
+* 1.03 JBB 15Nov00 Removed unnecessary ifdefs
+* 1.02 JBB 26Jul00 Removed unnecessary macro
+* 1.01 YWX 26 JUL 00 Bug Fixing, used WMT TI(x) for MMX processors
+* 1.00 SJL 14/04/00
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+#define STRICT /* Strict type checking. */
+#include "codec_common.h"
+#include "quantize.h"
+#define MIN16 ((1<<16)-1)
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imported Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Foreward References
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+static UINT32 dequant_index[64] =
+{ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static UINT32 dequant_indexMMX[64] =
+{
+ 0, 1, 5, 6, 14, 15, 27, 28,
+ 2, 4, 7, 13, 16, 26, 29, 42,
+ 3, 8, 12, 17, 25, 30, 41, 43,
+ 9, 11, 18, 24, 31, 40, 44, 53,
+ 10, 19, 23, 32, 39, 45, 52, 54,
+ 20, 22, 33, 38, 46, 51, 55, 60,
+ 21, 34, 37, 47, 50, 56, 59, 61,
+ 35, 36, 48, 49, 57, 58, 62, 63
+};
+/*
+ used to unravel the coeffs in the proper order required by MMX_idct
+ see mmxidct.cxx
+*/
+static UINT32 transIndexMMX[64] =
+{
+ 0, 8, 1, 2, 9, 16, 24, 17,
+ 10, 3, 32, 11, 18, 25, 4, 12,
+ 5, 26, 19, 40, 33, 34, 41, 48,
+ 27, 6, 13, 20, 28, 21, 14, 7,
+
+ 56, 49, 42, 35, 43, 50, 57, 36,
+ 15, 22, 29, 30, 23, 44, 37, 58,
+ 51, 59, 38, 45, 52, 31, 60, 53,
+ 46, 39, 47, 54, 61, 62, 55, 63
+};
+
+static UINT32 transIndexWMT[64] =
+{
+ 0, 8, 1, 2, 9, 16, 24, 17,
+ 10, 3, 4, 11, 18, 25, 32, 40,
+ 33, 26, 19, 12, 5, 6, 13, 20,
+ 27, 34, 41, 48, 56, 49, 42, 35,
+ 28, 21, 14, 7, 15, 22, 29, 36,
+ 43, 50, 57, 58, 51, 44, 37, 30,
+ 23, 31, 38, 45, 52, 59, 60, 53,
+ 46, 39, 47, 54, 61, 62, 55, 63
+};
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : BuildQuantIndex_ForMMX
+ *
+ * INPUTS :
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Builds the quant_index table in a transposed order.
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_BuildQuantIndex_ForMMX(QUANTIZER *pbi)
+{
+ INT32 i,j;
+
+ pbi->transIndex = transIndexMMX;
+
+ // invert the dequant index into the quant index
+ for ( i = 0; i < BLOCK_SIZE; i++ )
+ {
+ j = transIndexMMX[ dequant_indexMMX[i] ];
+ pbi->quant_index[j] = i;
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : BuildQuantIndex_ForWMT
+ *
+ * INPUTS :
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Builds the quant_index table in a transposed order.
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void VP5_BuildQuantIndex_ForWMT(QUANTIZER *pbi)
+{
+ INT32 i,j;
+
+ pbi->transIndex = transIndexWMT;
+
+ // invert the dequant index into the quant index
+ for ( i = 0; i < BLOCK_SIZE; i++ )
+ {
+ j = transIndexWMT[ dequant_indexMMX[i] ];
+ pbi->quant_index[j] = i;
+ }
+}
+
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP5_quantize_wmt
+ *
+ * INPUTS :
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Builds the quant_index table in a transposed order.
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_quantize_wmt( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp )
+{
+ UINT32 i, j;
+
+ INT32 * QuantRoundPtr = pbi->QuantRound[QTableSelect[bp]];
+ INT32 * QuantCoeffsPtr = pbi->QuantCoeffs[QTableSelect[bp]];
+ INT32 * ZBinPtr = pbi->ZeroBinSize[QTableSelect[bp]];
+
+ INT16 * DCT_blockPtr = DCT_block;
+ INT32 temp;
+ INT32 NonZeroACs = 0;
+ INT16 *round = &pbi->round[0];
+ INT16 *mult = &pbi->mult[0];
+ INT16 *zbin = &pbi->zbin[0];
+
+ // DC quantization
+ temp = 0;
+ if ( DCT_blockPtr[0] >= QuantRoundPtr[0] )
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] ) ;
+ else if ( DCT_blockPtr[0] <= -QuantRoundPtr[0] )
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+
+ // this quantizer stores its results back in the source!!
+ __asm
+ {
+
+ // setup and collect registers
+ mov esi, DCT_block
+ xor ecx, ecx // index ptr
+ mov edi, round
+ movdqu xmm2, [edi] // get the round values
+ mov edi, mult
+ movdqu xmm3, [edi] // get the quantizer values
+ mov edi, zbin
+ movdqu xmm4, [edi] // get the zerobin values
+
+ // 8 coefficients at a time loop
+next8:
+ movdqa xmm0, [esi+ecx] // get source values
+ movdqa xmm1, xmm0 // sign bits of the abs values
+ psraw xmm1, 15 // negative all 1's postive all 0's
+
+ // get the absolute value of the input values
+ pxor xmm0, xmm1 // one's complement of negatives
+ psubw xmm0, xmm1 // xmm0 = abs coeffs
+
+ // zero bin coefficients
+ movdqa xmm5, xmm0
+ pcmpgtw xmm5, xmm4 // ZBin > Coeffs
+ pand xmm0, xmm5 // zerobined coefficients
+
+ // calculate & round quantizer
+ paddw xmm0, xmm2 // Coeff + Quant Round
+ pmulhuw xmm0, xmm3 // *QuantCoeffs >> 16
+
+
+ // get back the sign bit
+ pxor xmm0, xmm1 // ones complement of negatives
+ psubw xmm0, xmm1 // negatives are back as negative
+
+ // output the results
+ movdqa [esi+ecx], xmm0
+
+ // loop back to the next set
+ add ecx, 16
+ cmp ecx, 128
+ jl next8
+ }
+
+ // zigzagify
+ for( i = 1; i < 64; i++)
+ {
+ // Zig Zag order
+ j = dequant_index[i];
+ quantized_list[i] = DCT_block[j];
+ }
+
+}
+/****************************************************************************
+ *
+ * ROUTINE : VP5_quantize_mmx
+ *
+ * INPUTS :
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Builds the quant_index table in a transposed order.
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_quantize_mmx( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp )
+{
+ UINT32 i, j;
+
+ INT32 * QuantRoundPtr = pbi->QuantRound[QTableSelect[bp]];
+ INT32 * QuantCoeffsPtr = pbi->QuantCoeffs[QTableSelect[bp]];
+ INT32 * ZBinPtr = pbi->ZeroBinSize[QTableSelect[bp]];
+
+ INT16 * DCT_blockPtr = DCT_block;
+ INT32 temp;
+ INT32 NonZeroACs = 0;
+ INT16 *round = &pbi->round[0];
+ INT16 *mult = &pbi->mult[0];
+ INT16 *zbin = &pbi->zbin[0];
+
+ // DC quantization
+ temp = 0;
+ if ( DCT_blockPtr[0] >= QuantRoundPtr[0] )
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] ) ;
+ else if ( DCT_blockPtr[0] <= -QuantRoundPtr[0] )
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+
+ // this quantizer stores its results back in the source!!
+ __asm
+ {
+
+ // setup and collect registers
+ mov esi, DCT_block
+ xor ecx, ecx // index ptr
+ mov edi, round
+ movq mm2, [edi] // get the round values
+ mov edi, mult
+ movq mm3, [edi] // get the quantizer values
+ mov edi, zbin
+ movq mm4, [edi] // get the zerobin values
+
+ // 8 coefficients at a time loop
+next4:
+ movq mm0, [esi+ecx] // get source values
+ movq mm1, mm0 // sign bits of the abs values
+ psraw mm1, 15 // negative all 1's postive all 0's
+
+ // get the absolute value of the input values
+ pxor mm0, mm1 // one's complement of negatives
+ psubw mm0, mm1 // mm0 = abs coeffs
+
+ // zero bin coefficients
+ movq mm5, mm0
+ pcmpgtw mm5, mm4 // ZBin > Coeffs
+ pand mm0, mm5 // zerobined coefficients
+
+ // calculate & round quantizer
+ paddw mm0, mm2 // Coeff + Quant Round
+ pmulhuw mm0, mm3 // *QuantCoeffs >> 16
+
+
+ // get back the sign bit
+ pxor mm0, mm1 // ones complement of negatives
+ psubw mm0, mm1 // negatives are back as negative
+
+ // output the results
+ movq [esi+ecx], mm0
+
+ // loop back to the next set
+ add ecx, 8
+ cmp ecx, 128
+ jl next4
+ }
+
+ // zigzagify
+ for( i = 1; i < 64; i++)
+ {
+ // Zig Zag order
+ j = dequant_index[i];
+ quantized_list[i] = DCT_block[j];
+ }
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/timer.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/timer.c
new file mode 100644
index 00000000..8df2b37f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/timer.c
@@ -0,0 +1,147 @@
+/****************************************************************************
+*
+* Module Title : Timer.C
+*
+* Description : Video CODEC timer module
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.01 PGW 09/07/99 Added code to support profile timing
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+#define INC_WIN_HEADER 1
+#include <windows.h>
+
+#include "type_aliases.h"
+#include <mmsystem.h>
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module Static Variables
+*****************************************************************************
+*/
+
+// Used for calculation of elapsed time
+UINT32 LastCPUTime;
+
+/****************************************************************************
+ *
+ * ROUTINE : MyInitTimer
+ *
+ * INPUTS :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Initialises the timer mechanism.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void MyInitTimer( void )
+{
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : MyGetTime
+ *
+ * INPUTS :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : Time in ms since startup.
+ *
+ * FUNCTION : Provides a model independant interface for getting times.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT32 MyGetTime( void )
+{
+/* Use different timing mechanisms for win32 and win16.
+* The win16 method is accurate to 1ms whilst the Win32 is not garauteed to better than 16ms
+*/
+ return timeGetTime();
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MyGetElapsedCpuTime
+ *
+ * INPUTS :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : CPU cycles since last call
+ *
+ * FUNCTION : Calculate the CPU cycles elapsed since the last call
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT32 MyGetElapsedCpuTime( void )
+{
+ UINT32 CurrCPUTime[2]; // Full 64 bit CPU time
+ UINT32 CurrentCpuTime; // modified 32 bit current time
+ UINT32 ElapsedTime;
+
+__asm
+ {
+ rdtsc ; Get CPU time into EDX:EAX
+
+ mov dword ptr [CurrCPUTime], eax ; Save to a global
+ mov dword ptr [CurrCPUTime+4], edx
+ }
+
+ // Save CurrCPUTime to LastCPUTime
+ CurrCPUTime[0] = (CurrCPUTime[0] >> 8);
+ CurrCPUTime[1] = (CurrCPUTime[1] & 0x000000FF) << 24;
+ CurrentCpuTime = CurrCPUTime[0] | CurrCPUTime[1];
+
+ // Check for wrapp around
+ if ( CurrentCpuTime >= LastCPUTime )
+ {
+ ElapsedTime = CurrentCpuTime - LastCPUTime;
+ }
+ else
+ {
+ ElapsedTime = (LastCPUTime - CurrentCpuTime) + 0xFFFF;
+ }
+ LastCPUTime = CurrentCpuTime;
+
+ return ElapsedTime;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DFrameR.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DFrameR.c
new file mode 100644
index 00000000..0ebfeedf
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DFrameR.c
@@ -0,0 +1,380 @@
+/****************************************************************************
+*
+* Module Title : DFrameR.C
+*
+* Description : Functions to read
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.18 YWX 17/dec/02 Added DeInterlacedMode setup
+* 1.17 YWX 05/08/02 Added initialization of postprocessor 's interlaced flag
+* 1.16 JBB 13 Jun 01 VP4 Code Clean Out
+* 1.15 AWG 08-Jun-01 Added support for DCT16
+* 1.14 JBB 04 May 01 Added set of ReadTokens Function for VP5
+* 1.13 JBB 04 Dec 00 Added new Center vs Scale Bits
+* 1.12 JBB 30 NOV 00 Version number changes
+* 1.11 JBB 14 Oct 00 Added ifdefs around version specific code
+* 1.10 PGW 06 Oct 00 QThreshTable[] made instance specific.
+* Changes to LoadFrameHeader() to call InitQTables().
+* 1.09 YWX 25 Aug 00 Added version number check
+* 1.08 JBB 22 Aug 00 Ansi C conversion
+* 1.07 JBB 27 Jul 00 Malloc checks
+* 1.06 PGW 20/03/00 Removed InterIntra mode flag.
+* 1.05 JBB 27/01/99 Globals Removed, use of PB_INSTANCE, Bit Management Functions
+* 1.04 PGW 17/12/99 Changes to Synch code to reflect the fact that 0 length
+* frames are no longer legal (simply not transmittedd)
+* Note that this change is only relevant to the live version
+* of the codec
+* 1.03 PGW 15/11/99 Added support for VP3 version ID.
+* 1.02 PGW 30/08/99 Use bit functions to read header data.
+* Changes to way bytes are read.
+* 1.01 PGW 16/08/99 Header changes for VFW version and key frames.
+* 1.00 PGW 22/06/99 pbi->Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+#include "pbdll.h"
+#include "duck_mem.h"
+#include "boolhuff.h"
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+#define START_SIZE 0
+#define END_SIZE 1
+
+#define READ_BUFFER_EMPTY_WAIT 20
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module Statics
+***** ************************************************************************
+*/
+#ifndef MAPCA
+static const UINT32 loMaskTbl_VP31[] = { 0,
+ 1, 3, 7, 15,
+ 31, 63, 127, 255,
+ 0x1ff, 0x3ff, 0x7ff, 0xfff,
+ 0x1fff, 0x3fff, 0x7fff, 0xffff,
+ 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xfFFFF,
+ 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
+ 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF,
+ 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, 0xffffFFFF
+};
+
+static const UINT32 hiMaskTbl_VP31[] = { 0,
+ 0x80000000, 0xC0000000, 0xE0000000, 0xF0000000,
+ 0xF8000000, 0xFC000000, 0xFE000000, 0xFF000000,
+ 0xFF800000, 0xFFC00000, 0xFFE00000, 0xFFF00000,
+ 0xFFF80000, 0xFFFC0000, 0xFFFE0000, 0xFFFF0000,
+ 0xFFFF8000, 0xFFFFC000, 0xFFFFE000, 0xFFFFF000,
+ 0xFFFFF800, 0xFFFFFC00, 0xFFFFFE00, 0xFFFFFF00,
+ 0xFFFFFF80, 0xFFFFFFC0, 0xFFFFFFE0, 0xFFFFFFF0,
+ 0xFFFFFFF8, 0xFFFFFFFC, 0xFFFFFFFE, 0xFFFFFFFF
+};
+
+#endif
+/****************************************************************************
+* Forward References.
+*****************************************************************************
+*/
+static BOOL LoadFrameHeader(PB_INSTANCE *pbi);
+
+
+/****************************************************************************
+* Imports
+*****************************************************************************
+*/
+
+/****************************************************************************
+ *
+ * ROUTINE : LoadFrame
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : FALSE if an Error is detected or the frame is empty else TRUE.
+ *
+ * FUNCTION : Loads a frame and decodes the fragment arrays.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+BOOL VP5_LoadFrame(PB_INSTANCE *pbi)
+{
+ BOOL RetVal = TRUE;
+
+ // Initialise the bit extractor.
+ //ExtractInit(pbi);
+
+ // Load the frame header (including the frame size).
+ if ( !LoadFrameHeader(pbi) )
+ {
+ RetVal = FALSE;
+ }
+
+ return RetVal;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : LoadFrameHeader
+ *
+ * INPUTS : fptr - The file pointer for the data file.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : FALSE if and Error is detected else TRUE.
+ *
+ * FUNCTION : Loads and interprets the frame header.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+// VFW codec version
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+static BOOL LoadFrameHeader(PB_INSTANCE *pbi)
+{
+ UINT8 VersionByte0; // Must be 0 for VP30b and later
+ UINT8 DctQMask;
+ UINT8 SpareBits; // Spare cfg bits
+ UINT8 Unused;
+
+ BOOL RetVal = TRUE;
+
+ // Is the frame and inter frame or a key frame
+ pbi->FrameType = DecodeBool(&pbi->br, 128);
+
+ // unused bit
+ Unused = DecodeBool(&pbi->br, 128);
+
+ // Quality (Q) index
+ DctQMask = (UINT8)VP5_bitread( &pbi->br, 6 );
+
+
+ // If the frame was a base frame then read the frame dimensions and build a bitmap structure.
+ if ( (pbi->FrameType == BASE_FRAME) )
+ {
+ // Read the frame dimensions bytes (0,0 indicates vp31 or later)
+ VersionByte0 = (UINT8)VP5_bitread( &pbi->br, 8 );
+ pbi->Vp3VersionNo = (UINT8)VP5_bitread( &pbi->br, 5 );
+
+ if(pbi->Vp3VersionNo > CURRENT_DECODE_VERSION)
+ {
+ RetVal = FALSE;
+ return RetVal;
+ }
+ // Initialise version specific quantiser values
+ VP5_InitQTables( pbi->quantizer, pbi->Vp3VersionNo );
+
+ // Read the type / coding method for the key frame.
+ pbi->KeyFrameType = (UINT8)DecodeBool(&pbi->br, 128);
+
+ SpareBits = (UINT8)DecodeBool(&pbi->br, 128);
+
+ // is this keyframe section of the file interlaced
+ pbi->Configuration.Interlaced = (UINT32)DecodeBool(&pbi->br, 128);
+#ifndef MAPCA
+ SetPPInterlacedMode(pbi->postproc, pbi->Configuration.Interlaced);
+ if(pbi->Configuration.Interlaced)
+ {
+ SetDeInterlaceMode(pbi->postproc, pbi->DeInterlaceMode);
+ }
+#endif
+ // Spare config bits
+ {
+ UINT32 HFragments;
+ UINT32 VFragments;
+ UINT32 HOldScaled;
+ UINT32 VOldScaled;
+ UINT32 HNewScaled;
+ UINT32 VNewScaled;
+ UINT32 OutputHFragments;
+ UINT32 OutputVFragments;
+
+ VFragments = 2 * ((UINT8)VP5_bitread( &pbi->br, 8 ));
+ HFragments = 2 * ((UINT8)VP5_bitread( &pbi->br, 8 ));
+
+ OutputVFragments = 2 * ((UINT8)VP5_bitread( &pbi->br, 8 ));
+ OutputHFragments = 2 * ((UINT8)VP5_bitread( &pbi->br, 8 ));
+
+ if(pbi->Configuration.HRatio == 0)
+ pbi->Configuration.HRatio = 1;
+
+ if(pbi->Configuration.VRatio == 0)
+ pbi->Configuration.VRatio = 1;
+
+ HOldScaled = pbi->Configuration.HScale * pbi->HFragments * 8 / pbi->Configuration.HRatio;
+ VOldScaled = pbi->Configuration.VScale * pbi->VFragments * 8 / pbi->Configuration.VRatio;
+
+ pbi->Configuration.ExpandedFrameWidth = OutputHFragments * 8;
+ pbi->Configuration.ExpandedFrameHeight = OutputVFragments * 8;
+
+ if(VFragments >= OutputVFragments)
+ {
+ pbi->Configuration.VScale = 1;
+ pbi->Configuration.VRatio = 1;
+ }
+ else if (5*VFragments >= 4*OutputVFragments)
+ {
+ pbi->Configuration.VScale = 5;
+ pbi->Configuration.VRatio = 4;
+ }
+ else if (5*VFragments >= 3*OutputVFragments)
+ {
+ pbi->Configuration.VScale = 5;
+ pbi->Configuration.VRatio = 3;
+ }
+ else
+ {
+ pbi->Configuration.VScale = 2;
+ pbi->Configuration.VRatio = 1;
+ }
+
+ if(HFragments >= OutputHFragments)
+ {
+ pbi->Configuration.HScale = 1;
+ pbi->Configuration.HRatio = 1;
+ }
+ else if (5*HFragments >= 4*OutputHFragments)
+ {
+ pbi->Configuration.HScale = 5;
+ pbi->Configuration.HRatio = 4;
+ }
+ else if (5*HFragments >= 3*OutputHFragments)
+ {
+ pbi->Configuration.HScale = 5;
+ pbi->Configuration.HRatio = 3;
+ }
+ else
+ {
+ pbi->Configuration.HScale = 2;
+ pbi->Configuration.HRatio = 1;
+ }
+
+ HNewScaled = pbi->Configuration.HScale * HFragments * 8 / pbi->Configuration.HRatio;
+ VNewScaled = pbi->Configuration.VScale * VFragments * 8 / pbi->Configuration.VRatio;
+
+ pbi->ScaleWidth = HNewScaled;
+ pbi->ScaleHeight = VNewScaled;
+
+ pbi->Configuration.ScalingMode = ((UINT32)VP5_bitread( &pbi->br, 2 ));
+
+ // we have a new input size
+ if( VFragments != pbi->VFragments ||
+ HFragments != pbi->HFragments)
+ {
+ // Validate the combination of height and width.
+ pbi->Configuration.VideoFrameWidth = HFragments*8;
+ pbi->Configuration.VideoFrameHeight = VFragments*8;
+ VP5_InitFrameDetails(pbi);
+ }
+
+
+ // we have a new intermediate buffer clean the screen
+ if( pbi->ScaleBuffer != 0 &&
+ (HOldScaled != HNewScaled ||
+ VOldScaled != VNewScaled ) )
+ {
+ // turn the screen black!!
+ memset(pbi->ScaleBuffer, 0x0, (pbi->OutputWidth+32) * (pbi->OutputHeight+32) );
+ memset(pbi->ScaleBuffer + (pbi->OutputWidth+32) * (pbi->OutputHeight+32),
+ 0x80, (pbi->OutputWidth+32) * (pbi->OutputHeight+32) / 2 );
+ }
+ }
+ }
+
+ // Set this frame quality value from Q Index
+ pbi->quantizer->FrameQIndex = DctQMask;
+#ifdef MAPCA
+ SetFLimit(DctQMask);
+ SetSimpleDeblockFlimit(DctQMask);
+#endif
+ pbi->quantizer->ThisFrameQuantizerValue = pbi->quantizer->QThreshTable[DctQMask];
+ VP5_UpdateQ( pbi->quantizer, pbi->Vp3VersionNo );
+
+ return RetVal;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP5_SetFrameType
+ *
+ * INPUTS : A Frame type.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Sets the current frame type.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_SetFrameType( PB_INSTANCE *pbi,UINT8 FrType )
+{
+ /* Set the appropriate frame type according to the request. */
+ switch ( FrType )
+ {
+
+ case BASE_FRAME:
+ pbi->FrameType = FrType;
+ break;
+
+ default:
+ pbi->FrameType = FrType;
+ break;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP5_GetFrameType
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : The current frame type.
+ *
+ * FUNCTION : Gets the current frame type.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+UINT8 VP5_GetFrameType(PB_INSTANCE *pbi)
+{
+ return pbi->FrameType;
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DSystemDependant.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DSystemDependant.c
new file mode 100644
index 00000000..fad7e93e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DSystemDependant.c
@@ -0,0 +1,198 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.20 YWX 06-Nov-01 Configuration Baseline for C only version
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include "pbdll.h"
+
+extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
+
+//extern void ReadTokens_c(PB_INSTANCE *pbi, INT32 * HuffIndices );
+extern void (*VP5_BuildQuantIndex)( QUANTIZER * pbi);
+
+extern void UnPackVideo_C(PB_INSTANCE *pbi);
+extern void UnPackVideo2(PB_INSTANCE *pbi);
+
+extern void VP5_BuildQuantIndex_Generic(QUANTIZER *pbi);
+
+/****************************************************************************
+* Explicit imports
+*****************************************************************************
+*/
+
+extern unsigned int CPUFrequency;
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module statics.
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Functions
+*****************************************************************************
+*/
+/****************************************************************************
+ *
+ * ROUTINE : GetProcessorFrequency()
+ *
+ * INPUTS : None
+ *
+ *
+ * OUTPUTS : The Frequency in MHZ
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Check the Processor's working freqency
+ *
+ * SPECIAL NOTES : This function should only be used here. Limited tests
+ * has verified it works till 166MHz Pentium with MMX.
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+unsigned long VP5_GetProcessorFrequency()
+{
+
+ return 0;
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : MachineSpecificConfig
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support
+ * sets approipriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_DMachineSpecificConfig(void)
+{
+ VP5_BuildQuantIndex = VP5_BuildQuantIndex_Generic;
+}
+
+// Issues a warning message
+void VP5_IssueWarning( char * WarningMessage )
+{
+ (void) WarningMessage;
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VP5_SetPbParam
+ *
+ * INPUTS : PB_COMMAND_TYPE Command
+ * char * Parameter
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Generalised command interface to decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void CCONV VP5_SetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, UINT32 Parameter )
+{
+
+#if defined(POSTPROCESS)
+ switch ( Command )
+ {
+ case PBC_SET_CPUFREE:
+ {
+
+ double PixelsPerMhz = 100 *10;
+ pbi->CPUFree = Parameter;
+
+ if( PixelsPerMhz > 150 )
+ pbi->PostProcessingLevel = 0;
+ else if( PixelsPerMhz > 100 )
+ pbi->PostProcessingLevel = 8;
+ else if( PixelsPerMhz > 90 )
+ pbi->PostProcessingLevel = 4;
+ else if( PixelsPerMhz > 80 )
+ pbi->PostProcessingLevel = 5;
+ else
+ pbi->PostProcessingLevel = 6;
+ break;
+
+ }
+ case PBC_SET_REFERENCEFRAME:
+ break;
+
+ case PBC_SET_POSTPROC:
+ if( Parameter == 9 )
+ {
+ VP5_SetPbParam( pbi, PBC_SET_CPUFREE, 70);
+ }
+ else
+
+ {
+ pbi->CPUFree = 0;
+ pbi->PostProcessingLevel = Parameter;
+ }
+ break;
+
+ case PBC_SET_DEINTERLACEMODE:
+ pbi->DeInterlaceMode = Parameter;
+ break;
+
+ case PBC_SET_BLACKCLAMP:
+ pbi->BlackClamp = Parameter;
+ break;
+
+ case PBC_SET_WHITECLAMP:
+ pbi->WhiteClamp = Parameter;
+ break;
+ default:
+ break;
+ }
+#endif
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/FrameIni.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/FrameIni.c
new file mode 100644
index 00000000..ff639f75
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/FrameIni.c
@@ -0,0 +1,484 @@
+/****************************************************************************
+*
+* Module Title : FrameIni.c
+*
+* Description : Video CODEC playback module
+*
+* AUTHOR : JimBankoski
+*
+*****************************************************************************
+* Revision History
+*
+* 1.21 YWX 06-Nov-01 Changed to align the MB coeffs buffer memory
+* 1.20 JBB 13-Jun-01 VP4 Code Clean Out
+* 1.19 AWG 11-Jun-01 Added support for DCT16
+* 1.18 JBB 24-May-01 Fixed Memory Allocation problem and frame recon prob
+* 1.17 JBB 09-Apr-01 CPUFree persistence
+* 1.16 SJL 05-Apr-01 Fixed MAC compile errors.
+* 1.15 JBB 23-Mar-01 New DC prediction
+* 1.14 JBX 22-Mar-01 Merged with vp4-mapca bitstream
+* 1.13 JBB 30 NOV 00 Version number changes
+* 1.12 JBB 15-NOV-00 cleaned out ifdefs
+* 1.11 JBB 17-oct-00 Ifdefs around version information
+* 1.10 YWX 17-Oct-00 Added Initialization of block coordinates for
+* new loop filtering strategy
+* 1.09 YWX 11-Oct-00 Added LastFrameNoMvRecon and LastFrameNoMvReconAlloc
+* 1.08 SJL 25 Aug 00 Fixed Mac compile error
+* 1.08 JBB 24 Aug 00 Removed extraneous definition of load and decode
+* 1.07 SJL 16 Aug 00 Fixed Mac compile error
+* 1.06 JBB 28 jul 00 Added fragment variance array for post processor
+* 1.05 JBB 27Jul00 Added checks on Mallocs
+* 1.04 SJL 24Jul00 Changed Frees to DUCK_FREE for Mac utilization
+* 1.03 YWX 08/05/00 Added #if defined(POSTPROCESS) for postprocess
+* 1.02 JBB 05/05/00 Added Post Processing Buffer & Block Quality Buffers
+* 1.01 YWX 06/04/00 Alligned more buffers for speed
+* 1.00 JBB 27/01/99 Globals Removed, use of PB_INSTANCE, common between
+* compressor and decompressor
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#include "pbdll.h"
+#include "stdlib.h"
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imports
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module Static Variables
+*****************************************************************************
+*/
+
+static const struct
+{
+ INT32 row;
+ INT32 col;
+} NearMacroBlocks[12] =
+{
+ { -1, 0 },
+ { 0, -1 },
+ { -1, -1 },
+ { -1, 1 },
+ { -2, 0 },
+ { 0, -2 },
+ { -1, -2 },
+ { -2, -1 },
+ { -2, 1 },
+ { -1, 2 },
+ { -2, -2 },
+ { -2, 2 }
+};
+
+/****************************************************************************
+* Forward References
+*****************************************************************************
+*/
+void InitializeFragCoordinates(PB_INSTANCE *pbi);
+/****************************************************************************
+* Explicit Imports
+*****************************************************************************
+*/
+
+
+#include "duck_mem.h"
+
+/****************************************************************************
+ *
+ * ROUTINE : DeleteFragmentInfo
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_DeleteFragmentInfo(PB_INSTANCE * pbi)
+{
+
+ // free prior allocs if present
+#ifndef MAPCA
+ if( pbi->mbi.CoeffsAlloc)
+ duck_free(pbi->mbi.CoeffsAlloc);
+ pbi->mbi.CoeffsAlloc = 0;
+ pbi->mbi.Coeffs=0;
+#endif
+
+ if( pbi->FragInfoAlloc)
+ duck_free(pbi->FragInfoAlloc);
+ pbi->FragInfoAlloc = 0;
+ pbi->FragInfo = 0;
+
+ if( pbi->fc.AboveYAlloc)
+ duck_free(pbi->fc.AboveYAlloc);
+ pbi->fc.AboveYAlloc = 0;
+ pbi->fc.AboveY = 0;
+
+ if( pbi->fc.AboveUAlloc)
+ duck_free(pbi->fc.AboveUAlloc);
+ pbi->fc.AboveUAlloc = 0;
+ pbi->fc.AboveU = 0;
+
+ if( pbi->fc.AboveVAlloc)
+ duck_free(pbi->fc.AboveVAlloc);
+ pbi->fc.AboveVAlloc = 0;
+ pbi->fc.AboveV = 0;
+
+ if( pbi->MBInterlacedAlloc)
+ duck_free(pbi->MBInterlacedAlloc);
+ pbi->MBInterlacedAlloc = 0;
+ pbi->MBInterlaced = 0;
+
+ if( pbi->MBMotionVectorAlloc)
+ duck_free(pbi->MBMotionVectorAlloc);
+ pbi->MBMotionVectorAlloc = 0;
+ pbi->MBMotionVector = 0;
+
+ if( pbi->predictionModeAlloc)
+ duck_free(pbi->predictionModeAlloc);
+ pbi->predictionModeAlloc = 0;
+ pbi->predictionMode = 0;
+
+#ifdef MAPCA
+ if(pbi->ReferenceBlocksAlloc)
+ duck_free(pbi->ReferenceBlocksAlloc);
+ pbi->ReferenceBlocksAlloc = 0;
+ pbi->ReferenceBlocks = 0;
+
+ if(pbi->ReconstructedMBsAlloc)
+ duck_free(pbi->ReconstructedMBsAlloc);
+ pbi->ReconstructedMBsAlloc=0;
+ pbi->ReconstructedMBs =0;
+#endif
+
+
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : AllocateFragmentInfo
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+BOOL VP5_AllocateFragmentInfo(PB_INSTANCE * pbi)
+{
+
+ // clear any existing info
+ VP5_DeleteFragmentInfo(pbi);
+#ifndef MAPCA
+ pbi->mbi.CoeffsAlloc = (Q_LIST_ENTRY(*)[72]) duck_malloc(32 + sizeof(Q_LIST_ENTRY)*72*6, DMEM_GENERAL);
+ if(!pbi->mbi.CoeffsAlloc) {VP5_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->mbi.Coeffs = (Q_LIST_ENTRY(*)[72])ROUNDUP32(pbi->mbi.CoeffsAlloc);
+#endif
+ // context allocations
+ pbi->fc.AboveYAlloc = (BLOCK_CONTEXTA *) duck_malloc(32 + (8+pbi->HFragments) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+ if(!pbi->fc.AboveYAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->fc.AboveY = (BLOCK_CONTEXTA *) ROUNDUP32(pbi->fc.AboveYAlloc);
+
+ pbi->fc.AboveUAlloc = (BLOCK_CONTEXTA *) duck_malloc(32 + (8+pbi->HFragments / 2) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+ if(!pbi->fc.AboveUAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->fc.AboveU = (BLOCK_CONTEXTA *) ROUNDUP32(pbi->fc.AboveUAlloc);
+
+ pbi->fc.AboveVAlloc = (BLOCK_CONTEXTA *) duck_malloc(32 + (8+pbi->HFragments / 2) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+ if(!pbi->fc.AboveVAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->fc.AboveV = (BLOCK_CONTEXTA *) ROUNDUP32(pbi->fc.AboveVAlloc);
+
+
+ // the encoder is the only thing using this move it to compdll
+ pbi->MBInterlacedAlloc = (char *) duck_malloc(32+pbi->MacroBlocks * sizeof(char), DMEM_GENERAL);
+ if(!pbi->MBInterlacedAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->MBInterlaced = (char *) ROUNDUP32(pbi->MBInterlacedAlloc );
+
+ pbi->predictionModeAlloc = (char *) duck_malloc(32+pbi->MacroBlocks * sizeof(char), DMEM_GENERAL);
+ if(!pbi->predictionModeAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->predictionMode = (char *) ROUNDUP32(pbi->predictionModeAlloc );
+
+ pbi->MBMotionVectorAlloc = (MOTION_VECTORA *) duck_malloc(32+pbi->MacroBlocks * sizeof(MOTION_VECTORA ), DMEM_GENERAL);
+ if(!pbi->MBMotionVectorAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->MBMotionVector = (MOTION_VECTORA *) ROUNDUP32(pbi->MBMotionVectorAlloc );
+
+
+ // the encoder is the only thing using this move it to compdll
+ pbi->FragInfoAlloc = (FRAG_INFO *) duck_malloc(32+pbi->UnitFragments * sizeof(FRAG_INFO), DMEM_GENERAL);
+ if(!pbi->FragInfoAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->FragInfo = (FRAG_INFO *) ROUNDUP32(pbi->FragInfoAlloc );
+
+
+#ifdef MAPCA
+ pbi->ReferenceBlocksAlloc=(UINT8(*)[192])duck_malloc(32 + 6*192, DMEM_GENERAL);
+ if(!pbi->ReferenceBlocksAlloc){ VP5_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->ReferenceBlocks = (UINT8(*)[192])ROUNDUP32(pbi->ReferenceBlocksAlloc);
+
+ pbi->ReconstructedMBsAlloc = (UINT8*) duck_malloc(32 + 768, DMEM_GENERAL);
+ if(!pbi->ReconstructedMBsAlloc){ VP5_DeleteFragmentInfo(pbi); return FALSE;}
+ pbi->ReconstructedMBs = (UINT8*) ROUNDUP32(pbi->ReconstructedMBsAlloc);
+#endif
+
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeleteFrameInfo
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_DeleteFrameInfo(PB_INSTANCE * pbi)
+{
+ if(pbi->ThisFrameReconAlloc )
+ duck_free(pbi->ThisFrameReconAlloc );
+ if(pbi->GoldenFrameAlloc)
+ duck_free(pbi->GoldenFrameAlloc);
+ if(pbi->LastFrameReconAlloc)
+ duck_free(pbi->LastFrameReconAlloc);
+ if(pbi->PostProcessBufferAlloc)
+ duck_free(pbi->PostProcessBufferAlloc);
+
+ pbi->ThisFrameReconAlloc = 0;
+ pbi->GoldenFrameAlloc = 0;
+ pbi->LastFrameReconAlloc = 0;
+ pbi->PostProcessBufferAlloc = 0;
+
+ pbi->ThisFrameRecon = 0;
+ pbi->GoldenFrame = 0;
+ pbi->LastFrameRecon = 0;
+ pbi->PostProcessBufferAlloc = 0;
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : AllocateFrameInfo
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+BOOL VP5_AllocateFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize)
+{
+
+ // clear any existing info
+ VP5_DeleteFrameInfo(pbi);
+
+ // allocate frames
+
+ // (JBB+YX ) Added 2 extra lines to framebuffer so that copy12x12
+ // doesn't fail when we have a large motion vector in V
+ // on the last v block. Note : We never use these pixels
+ // anyway so this doesn't hurt anything
+
+ pbi->ThisFrameReconAlloc = (UINT8 *)duck_malloc(32+pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+ if(!pbi->ThisFrameReconAlloc) { VP5_DeleteFrameInfo(pbi); return FALSE;}
+
+ pbi->GoldenFrameAlloc = (UINT8 *)duck_malloc(32+pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY ), DMEM_GENERAL);
+ if(!pbi->GoldenFrameAlloc) { VP5_DeleteFrameInfo(pbi); return FALSE;}
+
+ pbi->LastFrameReconAlloc = (UINT8 *)duck_malloc(32+pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+ if(!pbi->LastFrameReconAlloc) { VP5_DeleteFrameInfo(pbi); return FALSE;}
+
+ pbi->PostProcessBufferAlloc = (UINT8 *)duck_malloc(32+pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+ if(!pbi->PostProcessBufferAlloc) { VP5_DeleteFrameInfo(pbi); return FALSE;}
+
+
+ // adjust up to the next 32 byte boundary
+ pbi->ThisFrameRecon = (unsigned char *) ROUNDUP32(pbi->ThisFrameReconAlloc );
+ pbi->GoldenFrame = (unsigned char *) ROUNDUP32(pbi->GoldenFrameAlloc );
+ pbi->LastFrameRecon = (unsigned char *) ROUNDUP32(pbi->LastFrameReconAlloc );
+ pbi->PostProcessBuffer = (unsigned char *) ROUNDUP32( pbi->PostProcessBufferAlloc );
+
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP5_InitFrameDetails
+ *
+ * INPUTS : Nonex.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Initialises the frame details.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+BOOL VP5_InitFrameDetails(PB_INSTANCE *pbi)
+{
+ int FrameSize;
+ UINT32 i;
+
+ if(pbi->CPUFree > 0 )
+ VP5_SetPbParam( pbi, PBC_SET_CPUFREE, pbi->CPUFree );
+
+ /* Set the frame size etc. */
+ pbi->YPlaneSize = pbi->Configuration.VideoFrameWidth * pbi->Configuration.VideoFrameHeight;
+ pbi->UVPlaneSize = pbi->YPlaneSize / 4;
+ pbi->HFragments = pbi->Configuration.VideoFrameWidth / pbi->Configuration.HFragPixels;
+ pbi->VFragments = pbi->Configuration.VideoFrameHeight / pbi->Configuration.VFragPixels;
+ pbi->UnitFragments = ((pbi->VFragments * pbi->HFragments)*3)/2;
+ pbi->YPlaneFragments = pbi->HFragments * pbi->VFragments;
+ pbi->UVPlaneFragments = pbi->YPlaneFragments / 4;
+
+ pbi->Configuration.YStride = (pbi->Configuration.VideoFrameWidth + STRIDE_EXTRA);
+ pbi->Configuration.UVStride = pbi->Configuration.YStride / 2;
+ pbi->ReconYPlaneSize = pbi->Configuration.YStride * (pbi->Configuration.VideoFrameHeight + STRIDE_EXTRA);
+ pbi->ReconUVPlaneSize = pbi->ReconYPlaneSize / 4;
+ FrameSize = pbi->ReconYPlaneSize + 2 * pbi->ReconUVPlaneSize;
+
+ pbi->YDataOffset = 0;
+ pbi->UDataOffset = pbi->YPlaneSize;
+ pbi->VDataOffset = pbi->YPlaneSize + pbi->UVPlaneSize;
+ pbi->ReconYDataOffset = 0;//(pbi->Configuration.YStride * UMV_BORDER) + UMV_BORDER;
+ pbi->ReconUDataOffset = pbi->ReconYPlaneSize;// + (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2);
+ pbi->ReconVDataOffset = pbi->ReconYPlaneSize + pbi->ReconUVPlaneSize;// + (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2);
+
+ // Image dimensions in Macro-Blocks
+ pbi->MBRows = 4+(pbi->Configuration.VideoFrameHeight/16) + ( pbi->Configuration.VideoFrameHeight%16 ? 1 : 0 );
+ pbi->MBCols = 4+(pbi->Configuration.VideoFrameWidth/16) + ( pbi->Configuration.VideoFrameWidth%16 ? 1 : 0 );
+ pbi->MacroBlocks = pbi->MBRows * pbi->MBCols;
+
+
+ for(i=0;i<12;i++)
+ {
+ pbi->mvNearOffset[i] = MBOffset(NearMacroBlocks[i].row, NearMacroBlocks[i].col);
+ }
+#ifndef MAPCA
+ ChangePostProcConfiguration(pbi->postproc, &pbi->Configuration);
+#endif
+ if(!VP5_AllocateFragmentInfo(pbi))
+ return FALSE;
+
+ if(!VP5_AllocateFrameInfo(pbi, FrameSize))
+ {
+ VP5_DeleteFragmentInfo(pbi);
+ return FALSE;
+ }
+
+ // We have a differently output size than our scaling provides
+ if( pbi->ScaleBuffer == 0 && pbi->OutputWidth &&
+ (pbi->Configuration.VideoFrameWidth != pbi->OutputWidth ||
+ pbi->Configuration.VideoFrameHeight != pbi->OutputHeight ) )
+ {
+ // we add 32 to outputwidth to insure that we have enough to overscale (ie scale to a size that's bigger
+ // than our output size) we do this now even though we don't use it so that we don't have to check border conditions
+ pbi->ScaleBufferAlloc = (UINT8 *)
+ duck_malloc(32 + 3 *
+ (pbi->OutputWidth + 32) *
+ (pbi->OutputHeight + 32)*
+ sizeof(YUV_BUFFER_ENTRY) / 2, DMEM_GENERAL);
+
+ pbi->ScaleBuffer = (UINT8 *) ROUNDUP32(pbi->ScaleBufferAlloc );
+ }
+
+ // this is just so the post processor will work !!
+ for(i=0;i<pbi->UnitFragments;i++)
+ pbi->FragInfo[i].DisplayFragment = 1;
+
+
+ return TRUE;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : InitialiseConfiguration
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Sets up the default starting pbi->Configuration.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_InitialiseConfiguration(PB_INSTANCE *pbi)
+{
+
+ // IDCT table initialisation
+ //InitDctTables();
+
+ pbi->Configuration.HFragPixels = 8;
+ pbi->Configuration.VFragPixels = 8;
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/Huffman.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/Huffman.c
new file mode 100644
index 00000000..3e42b9b3
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/Huffman.c
@@ -0,0 +1,285 @@
+/****************************************************************************
+*
+* Module Title : Huffman.c
+*
+* Description : Video CODEC
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.13 YWX 06-Nov-01 Changed for compatibility with Equator C compiler
+* 1.12 JBB 13-Jun-01 VP4 Code Clean Out
+* 1.11 SJL 22-Mar-01 Fixed MAC compile errors
+* 1.10 JBX 22-Mar-01 Changed size of SORT_NODE array to 1024;
+* 1.09 JBB 26 Jan 00 Reworked Huffman to remove dynamic allocation and
+* to condense tree storage.
+* 1.08 PGW 11 Oct 00 Changes to support different entropy tables for
+* different encoder versions.
+* 1.07 PGW 17/03/00 Further Entropy changes.
+* 1.06 PGW 15/03/00 Updated entropy tables.
+* 1.05 JBB 27/01/99 Globals Removed, use of PB_INSTANCE, Bit Management
+* 1.04 PGW 05/11/99 Changes to support AC range entropy tables.
+* 1.03 PGW 12/10/99 Changes to reduce uneccessary dependancies.
+* 1.02 PGW 19/07/99 Deleted the funtion DecodeHuffToken().
+* 1.01 PGW 15/07/99 Added inline bit extraction to DecodeHuffToken().
+* 1.00 PGW 07/07/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include "systemdependant.h"
+#include "huffman.h"
+#include "pbdll.h"
+#include "boolhuff.h"
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Forward references.
+*****************************************************************************
+*/
+
+void VP5_BuildHuffTree(
+ HUFF_NODE *hn,
+ unsigned int *counts,
+ int values );
+
+void VP5_CreateCodeArray( HUFF_NODE *hn,
+ int node,
+ unsigned int *codearray,
+ unsigned char *lengtharray,
+ int codevalue,
+ int codelength );
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+typedef struct _SORT_NODE
+{
+ int next;
+ int freq;
+ unsigned char value;
+} SORT_NODE;
+
+/****************************************************************************
+* Module Static Variables
+*****************************************************************************
+*/
+
+//***********************************************************
+// Jim's version of Eric's condensed huffman trees!
+
+
+typedef struct _sortnode
+{
+ int next;
+ int freq;
+ tokenorptr value;
+} sortnode;
+
+
+// inserts a node into a sorted linklist
+static void InsertSorted(
+ sortnode *sn,
+ int node,
+ int *startnode )
+{
+ int which = *startnode;
+ int prior = *startnode;
+
+ // find the position at which to insert the node
+ while( which != -1 && sn[node].freq > sn[which].freq )
+ {
+ prior = which;
+ which = sn[which].next;
+ }
+
+ if(which == *startnode)
+ {
+ *startnode = node;
+ sn[node].next = which;
+ }
+ else
+ {
+ sn[prior].next = node;
+ sn[node].next = which;
+ }
+}
+
+// returns a pointer to the condensed huffman root node
+void VP5_BuildHuffTree(
+ HUFF_NODE *hn,
+ unsigned int *counts,
+ int values )
+{
+ int i;
+ sortnode sn[256];
+ int sncount=0;
+ int startnode=0;
+
+ // note we are creating the huffman tree in
+ // reverse order so that the root will always be 0
+ int huffptr=values-1;
+
+ // set up our sorted linked list of values
+ // or pointers into the huffman tree
+ for(i=0;i<values;i++)
+ {
+ sn[i].value.selector = 1;
+ sn[i].value.value = i;
+ if(counts[i] == 0)
+ counts[i] = 1;
+ sn[i].freq = counts[i];
+ sn[i].next = -1;
+ }
+ sncount=values;
+
+ // connected the above list into a linked list
+ for(i=1;i<values;i++)
+ {
+ InsertSorted(sn,i,&startnode);
+ }
+
+ // while there is more than one node in our linked list
+ while(sn[startnode].next!=-1)
+ {
+ int first = startnode;
+ int second = sn[startnode].next;
+ int sumfreq = sn[first].freq + sn[second].freq;
+
+ // setup new merged huffman node
+ --huffptr;
+ hn[huffptr].leftunion.left = sn[first].value;
+ hn[huffptr].rightunion.right = sn[second].value;
+ hn[huffptr].freq = 256 * sn[first].freq / sumfreq;
+
+ // set up new merged sort node pointing to our huffnode
+ sn[sncount].value.selector = 0;
+ sn[sncount].value.value = huffptr;
+ sn[sncount].freq = sumfreq;
+ sn[sncount].next = -1;
+
+ // remove the two nodes we just merged from the linked list
+ startnode = sn[second].next;
+
+ // insert the new sort node into the proper location
+ InsertSorted(sn, sncount, &startnode);
+
+ // account for new nodes
+ sncount++;
+
+ }
+ return ;
+}
+
+void VP5_CreateCodeArray( HUFF_NODE *hn,
+ int node,
+ unsigned int *codearray,
+ unsigned char *lengtharray,
+ int codevalue,
+ int codelength )
+{
+
+ /* If we are at a leaf then fill in a code array entry. */
+ /* Recursive calls to scan down the tree. */
+ if( hn[node].leftunion.left.selector )
+ {
+ codearray[hn[node].leftunion.left.value] = (codevalue<<1)+0;
+ lengtharray[hn[node].leftunion.left.value] = codelength+1;
+ }
+ else
+ {
+ VP5_CreateCodeArray(
+ hn,
+ hn[node].leftunion.left.value,
+ codearray,
+ lengtharray,
+ ((codevalue << 1) + 0),
+ (codelength + 1)
+ );
+ }
+
+ if( hn[node].rightunion.right.selector )
+ {
+ codearray[hn[node].rightunion.right.value] = (codevalue<<1)+1;
+ lengtharray[hn[node].rightunion.right.value] = codelength+1;
+ }
+ else
+ {
+ VP5_CreateCodeArray(
+ hn,
+ hn[node].rightunion.right.value,
+ codearray,
+ lengtharray,
+ ((codevalue << 1) + 1),
+ (codelength + 1)
+ );
+ }
+}
+
+int VP5_DecodeValue(
+ BOOL_CODER *bc,
+ HUFF_NODE *hn
+ )
+{
+ tokenorptr torp;
+ torp.value=0;
+ torp.selector=0;
+ // Loop searches down through tree based upon bits read from the bitstream
+ // until it hits a leaf at which point we have decoded a token
+
+ do
+ {
+ if(DecodeBool(bc, hn[torp.value].freq))
+ {
+ torp = hn[torp.value].rightunion.right;
+ }
+ else
+ {
+ torp = hn[torp.value].leftunion.left;
+ }
+ }
+ while ( !(torp.selector));
+
+ return torp.value;
+}
+
+void VP5_EncodeValue(
+ BOOL_CODER *bc,
+ HUFF_NODE *hn,
+ int value,
+ int length)
+{
+ int i;
+ int node = 0;
+ for(i=length-1;i>=0;i--)
+ {
+ int v= (value>>i) & 1;
+
+ if ( bc->MeasureCost )
+ EncodeBool2(bc,(BOOL) v , hn[node].freq);
+ else
+ EncodeBool(bc,(BOOL) v , hn[node].freq);
+
+ if(v)
+ {
+ node=hn[node].rightunion.right.value;
+ }
+ else
+ {
+ node=hn[node].leftunion.left.value;
+ }
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/MvEntropy.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/MvEntropy.c
new file mode 100644
index 00000000..e0025016
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/MvEntropy.c
@@ -0,0 +1,710 @@
+/****************************************************************************
+*
+* Module Title : MvEntropy.c
+*
+* Description : Video CODEC: Motion vector entropy module.
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.03 YWX 06-Nov-01 Changed for compatibility with Equator C compiler
+* 1.02 JBB 13 Jun 01 VP4 Code Clean Out
+* 1.01 PGW 23 Jan 01 Module created.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include "type_aliases.h"
+#include "systemdependant.h"
+#include "codec_common.h"
+#include "codec_common_interface.h"
+#include "huffman.h"
+#include "pbdll.h"
+
+
+/****************************************************************************
+* Explicit Imports
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Constants
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Types
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Data structures
+*****************************************************************************
+*/
+// VP5 MV coding tables
+UINT8 VP5_MvTableIndex[MV_ENTROPY_TOKENS] =
+{ 15, 15, 14, 14, 13, 13, 13, 13,
+ 12, 12, 12, 12, 11, 11, 11, 11,
+ 10, 10, 9, 9, 8, 8, 7, 7,
+ 6, 6, 5, 4, 3, 2, 1,
+ 0,
+ 1, 2, 3, 4, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10, 10,
+ 11, 11, 11, 11, 12, 12, 12, 12,
+ 13, 13, 13, 13, 14, 14, 15, 15
+};
+
+HUFF_NODE XMvHuffTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS-1];
+UINT32 XMvPatternTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS];
+UINT8 XMvBitsTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS];
+
+static UINT32 VP5_XMvFrequencyCounts[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS] =
+{
+ 67 , 15 , 32 , 4 , 21 , 8 , 33 , 2 , 32 , 6 , 29 , 4 , 25 , 5 , 83 , 16 , 58 , 3 , 59 , 7 , 75 , 11 ,129 , 19 ,126 , 22 ,159 , 29 ,207 , 88 ,636 ,388 ,579 ,125 ,269 , 66 ,223 , 33 ,177 , 15 ,118 , 14 ,130 , 14 , 81 , 10 , 91 , 25 ,106 , 10 , 68 , 12 , 51 , 10 , 61 , 13 , 46 , 8 , 55 , 6 , 63 , 29 ,207 ,
+ 77 , 6 , 23 , 1 , 19 , 1 , 25 , 4 , 38 , 2 , 39 , 6 , 48 , 6 ,116 , 25 ,110 , 16 ,105 , 16 ,125 , 26 ,223 , 28 ,279 , 56 ,407 , 98 ,509 ,238 ,921 ,461 ,1260 ,628 ,857 ,173 ,543 , 74 ,368 , 31 ,284 , 28 ,207 , 21 ,159 , 21 ,180 , 38 ,166 , 14 , 56 , 5 , 42 , 8 , 48 , 5 , 35 , 4 , 29 , 8 , 37 , 10 , 84 ,
+ 7 , 3 , 3 , 1 , 4 , 0 , 3 , 1 , 4 , 0 , 4 , 0 , 10 , 0 , 13 , 7 , 18 , 1 , 17 , 2 , 22 , 5 , 36 , 5 , 52 , 15 , 83 , 15 ,116 , 35 ,220 ,144 ,391 ,262 ,282 ,178 ,235 , 52 ,150 , 23 , 83 , 12 , 74 , 4 , 44 , 8 , 46 , 9 , 36 , 1 , 18 , 1 , 10 , 1 , 7 , 3 , 8 , 0 , 6 , 2 , 6 , 0 , 15 ,
+ 25 , 3 , 17 , 3 , 5 , 2 , 9 , 1 , 23 , 0 , 21 , 5 , 17 , 6 , 51 , 7 , 61 , 9 , 48 , 5 , 76 , 7 , 93 , 11 ,125 , 26 ,190 , 54 ,271 , 98 ,502 ,194 ,604 ,221 ,606 ,258 ,639 ,296 ,527 , 56 ,282 , 32 ,195 , 17 ,138 , 11 ,135 , 18 ,132 , 3 , 50 , 1 , 31 , 6 , 34 , 3 , 23 , 3 , 17 , 1 , 20 , 3 , 49 ,
+ 10 , 0 , 3 , 1 , 4 , 0 , 2 , 1 , 3 , 1 , 2 , 1 , 4 , 0 , 7 , 7 , 11 , 0 , 12 , 2 , 16 , 2 , 21 , 2 , 31 , 10 , 40 , 10 , 46 , 20 ,115 , 59 ,140 ,106 ,222 ,106 ,225 , 76 ,199 , 82 ,126 , 9 , 67 , 4 , 38 , 2 , 36 , 12 , 34 , 0 , 8 , 0 , 7 , 1 , 9 , 0 , 7 , 3 , 8 , 2 , 12 , 0 , 17 ,
+ 28 , 5 , 11 , 1 , 12 , 2 , 8 , 1 , 14 , 2 , 19 , 2 , 19 , 2 , 38 , 8 , 38 , 2 , 47 , 4 , 54 , 5 , 81 , 11 ,103 , 16 ,124 , 23 ,183 , 56 ,376 ,178 ,451 ,142 ,487 ,125 ,549 ,185 ,552 , 89 ,443 ,170 ,358 , 34 ,180 , 16 ,167 , 31 ,152 , 12 , 49 , 7 , 39 , 5 , 37 , 5 , 33 , 4 , 20 , 4 , 25 , 10 , 59 ,
+ 22 , 3 , 18 , 0 , 6 , 2 , 8 , 1 , 15 , 0 , 10 , 0 , 17 , 6 , 40 , 6 , 48 , 6 , 50 , 3 , 55 , 7 , 83 , 14 ,100 , 11 , 99 , 14 ,151 , 39 ,309 ,159 ,370 ,124 ,487 ,176 ,686 ,188 ,704 ,121 ,606 , 79 ,385 , 68 ,368 ,126 ,331 , 62 ,200 , 16 ,101 , 5 , 74 , 6 , 79 , 5 , 57 , 9 , 47 , 6 , 47 , 6 , 75 ,
+ 15 , 2 , 10 , 0 , 5 , 2 , 7 , 3 , 11 , 3 , 13 , 0 , 15 , 0 , 25 , 1 , 27 , 2 , 35 , 3 , 46 , 7 , 48 , 7 , 62 , 15 , 77 , 13 ,112 , 32 ,220 ,104 ,251 , 64 ,242 , 72 ,347 , 95 ,525 , 79 ,699 , 65 ,533 , 41 ,240 , 13 ,254 , 77 ,264 , 72 ,179 , 8 , 89 , 7 , 65 , 3 , 61 , 3 , 45 , 2 , 49 , 8 , 68 ,
+ 14 , 3 , 7 , 0 , 7 , 1 , 7 , 0 , 8 , 3 , 10 , 0 , 7 , 0 , 21 , 4 , 28 , 2 , 17 , 0 , 33 , 7 , 26 , 2 , 40 , 4 , 48 , 9 , 70 , 13 ,147 , 84 ,160 , 41 ,167 , 41 ,202 , 42 ,301 , 32 ,475 , 29 ,410 , 38 ,333 , 17 ,265 , 31 ,171 , 10 ,117 , 16 ,156 , 72 ,146 , 6 , 64 , 5 , 48 , 2 , 49 , 12 , 80 ,
+ 14 , 2 , 6 , 0 , 2 , 0 , 3 , 1 , 2 , 0 , 6 , 1 , 4 , 1 , 14 , 5 , 12 , 0 , 13 , 1 , 24 , 2 , 20 , 5 , 20 , 3 , 32 , 7 , 43 , 11 , 93 , 46 , 96 , 25 , 96 , 18 ,117 , 22 ,137 , 14 ,140 , 17 ,256 , 14 ,388 , 13 ,330 , 20 ,112 , 4 , 81 , 7 , 63 , 6 , 68 , 16 , 79 , 43 , 71 , 7 , 43 , 10 , 80 ,
+ 19 , 2 , 3 , 0 , 6 , 0 , 3 , 0 , 11 , 1 , 8 , 2 , 7 , 0 , 18 , 4 , 11 , 1 , 17 , 3 , 20 , 3 , 15 , 2 , 33 , 9 , 33 , 11 , 51 , 14 ,127 , 64 ,148 , 35 ,106 , 16 ,107 , 21 ,119 , 11 ,133 , 18 ,195 , 19 ,287 , 11 ,267 , 15 ,200 , 3 ,101 , 7 , 86 , 7 , 56 , 3 , 49 , 8 , 65 , 11 ,100 , 63 ,163 ,
+ 25 , 5 , 12 , 0 , 8 , 0 , 7 , 0 , 8 , 1 , 18 , 0 , 21 , 0 , 42 , 5 , 33 , 1 , 22 , 5 , 23 , 5 , 29 , 4 , 51 , 17 , 47 , 14 , 69 , 28 ,189 ,104 ,200 , 15 ,131 , 33 ,121 , 18 ,146 , 26 ,162 , 18 ,155 , 12 ,175 , 12 ,278 , 21 ,331 , 13 ,240 , 7 ,148 , 9 ,104 , 11 , 84 , 15 , 83 , 8 ,163 , 71 ,339 ,
+ 17 , 4 , 8 , 0 , 5 , 0 , 6 , 0 , 5 , 0 , 13 , 0 , 3 , 1 , 12 , 4 , 14 , 1 , 8 , 2 , 14 , 1 , 20 , 0 , 16 , 0 , 23 , 7 , 34 , 10 , 63 , 44 , 85 , 19 , 53 , 13 , 66 , 12 , 63 , 10 , 70 , 5 , 66 , 12 , 49 , 5 ,106 , 8 , 93 , 11 , 81 , 6 , 87 , 2 ,101 , 3 , 78 , 3 , 58 , 4 , 93 , 32 ,155 ,
+ 13 , 3 , 4 , 1 , 8 , 0 , 3 , 1 , 6 , 1 , 10 , 0 , 8 , 0 , 11 , 3 , 14 , 2 , 9 , 2 , 13 , 4 , 14 , 3 , 13 , 4 , 18 , 6 , 20 , 10 , 66 , 16 , 57 , 10 , 27 , 9 , 44 , 10 , 35 , 4 , 30 , 7 , 42 , 5 , 38 , 1 , 42 , 9 , 48 , 2 , 42 , 1 , 41 , 6 , 52 , 2 , 70 , 2 , 71 , 3 , 77 , 16 ,130 ,
+ 3 , 2 , 2 , 1 , 2 , 1 , 1 , 0 , 3 , 0 , 4 , 0 , 2 , 0 , 6 , 4 , 9 , 0 , 6 , 3 , 7 , 1 , 6 , 1 , 8 , 0 , 12 , 2 , 17 , 4 , 50 , 16 , 50 , 3 , 22 , 8 , 24 , 4 , 20 , 3 , 20 , 3 , 27 , 1 , 24 , 2 , 41 , 1 , 31 , 1 , 29 , 1 , 24 , 0 , 30 , 0 , 31 , 2 , 29 , 1 , 36 , 15 , 76 ,
+ 35 , 7 , 15 , 1 , 6 , 0 , 3 , 1 , 9 , 1 , 11 , 0 , 10 , 1 , 24 , 4 , 28 , 0 , 9 , 1 , 15 , 6 , 23 , 2 , 29 , 5 , 32 , 3 , 40 , 15 ,121 , 64 ,136 , 17 , 49 , 11 , 49 , 12 , 50 , 10 , 64 , 4 , 53 , 7 , 42 , 9 , 57 , 8 , 75 , 3 , 47 , 3 , 39 , 7 , 38 , 2 , 35 , 7 , 52 , 3 , 87 , 40 ,194 ,
+
+
+/*
+ 95 ,284 , 30 , 82 , 36 ,143 , 37 ,148 , 34 ,174 , 45 ,164 , 56 ,241 ,101 ,690 ,145 ,296 ,101 ,283 ,118 ,313 ,129 ,346 ,136 ,448 ,185 ,543 ,283 ,1035 ,725 ,3842 ,1009 ,1273 ,347 ,757 ,243 ,543 ,225 ,381 ,162 ,362 ,130 ,294 ,124 ,358 ,188 ,722 ,128 ,260 , 90 ,200 , 64 ,200 , 64 ,160 , 50 ,153 , 34 ,129 , 60 ,374 ,171 ,
+ 90 ,162 , 21 , 68 , 15 , 76 , 26 , 83 , 25 ,123 , 34 ,119 , 46 ,158 , 80 ,374 , 87 ,232 , 84 ,204 , 90 ,247 ,138 ,283 ,155 ,398 ,217 ,453 ,244 ,771 ,556 ,2108 ,915 ,1372 ,406 ,579 ,239 ,393 ,169 ,273 ,156 ,246 , 88 ,183 , 84 ,197 ,108 ,383 , 72 ,178 , 35 ,105 , 42 ,127 , 31 , 97 , 25 , 68 , 20 , 61 , 23 ,122 , 78 ,
+ 88 ,255 , 38 ,112 , 31 ,139 , 47 ,134 , 46 ,215 , 47 ,205 , 78 ,250 ,123 ,672 ,133 ,328 ,117 ,295 ,137 ,344 ,139 ,371 ,197 ,472 ,212 ,620 ,322 ,980 ,694 ,2402 ,1096 ,2773 ,1079 ,1699 ,549 ,971 ,317 ,650 ,277 ,552 ,185 ,381 ,147 ,428 ,206 ,799 ,125 ,293 , 82 ,212 , 64 ,211 , 60 ,151 , 71 ,163 , 29 ,126 , 33 ,280 ,120 ,
+ 40 , 78 , 25 , 29 , 19 , 57 , 18 , 53 , 14 , 58 , 33 , 85 , 36 ,100 , 56 ,224 , 52 ,109 , 47 ,133 , 44 ,122 , 70 ,134 , 70 ,200 , 81 ,208 ,142 ,343 ,223 ,827 ,373 ,1018 ,512 ,933 ,314 ,509 ,170 ,300 ,111 ,189 , 80 ,164 , 76 ,143 , 77 ,298 , 78 ,112 , 34 , 85 , 40 , 57 , 20 , 59 , 20 , 47 , 11 , 50 , 22 , 73 , 56 ,
+ 65 ,161 , 25 , 59 , 18 ,103 , 37 , 88 , 45 ,106 , 39 ,105 , 51 ,172 , 76 ,492 ,103 ,214 , 59 ,212 , 65 ,246 ,104 ,281 ,126 ,340 ,153 ,381 ,214 ,604 ,357 ,1262 ,444 ,1463 ,679 ,1863 ,785 ,1296 ,449 ,685 ,307 ,517 ,162 ,314 ,161 ,366 ,132 ,606 ,112 ,232 , 68 ,187 , 70 ,161 , 41 ,123 , 38 ,119 , 36 ,112 , 35 ,194 , 99 ,
+ 46 , 76 , 18 , 37 , 16 , 52 , 12 , 54 , 7 , 48 , 14 , 69 , 28 ,112 , 38 ,219 , 57 ,116 , 56 , 96 , 37 , 93 , 38 ,121 , 82 ,178 ,107 ,177 , 78 ,259 ,181 ,491 ,214 ,554 ,302 ,717 ,382 ,788 ,306 ,459 ,199 ,311 ,134 ,235 ,120 ,207 , 85 ,308 , 70 ,135 , 45 , 79 , 41 , 80 , 23 , 72 , 12 , 54 , 7 , 56 , 23 ,113 , 59 ,
+111 ,226 , 21 ,100 , 36 ,125 , 50 ,158 , 62 ,172 , 55 ,177 , 71 ,288 ,113 ,633 ,129 ,326 , 91 ,268 ,113 ,362 ,133 ,321 ,136 ,424 ,156 ,461 ,240 ,643 ,470 ,1336 ,464 ,1271 ,503 ,1739 ,898 ,2410 ,961 ,1777 ,643 ,1105 ,332 ,709 ,304 ,619 ,294 ,973 ,213 ,386 ,123 ,282 , 78 ,254 , 79 ,222 , 54 ,185 , 57 ,160 , 57 ,264 ,153 ,
+ 80 ,188 , 28 , 87 , 31 ,117 , 35 ,108 , 38 ,147 , 54 ,184 , 58 ,188 ,121 ,584 ,108 ,312 , 93 ,231 , 89 ,272 ,109 ,303 ,132 ,326 ,146 ,377 ,156 ,496 ,297 ,1039 ,320 ,825 ,281 ,1010 ,459 ,1403 ,696 ,2078 ,895 ,1575 ,441 ,800 ,260 ,567 ,315 ,891 ,239 ,357 ,118 ,240 , 91 ,212 , 93 ,184 , 69 ,174 , 56 ,129 , 52 ,284 ,120 ,
+ 89 ,177 , 17 , 70 , 23 , 77 , 39 ,110 , 31 ,147 , 44 ,138 , 64 ,183 , 81 ,461 , 87 ,234 , 79 ,236 , 70 ,253 ,113 ,244 ,125 ,290 ,111 ,241 ,124 ,384 ,221 ,940 ,251 ,599 ,220 ,602 ,320 ,840 ,336 ,1185 ,638 ,1811 ,664 ,1197 ,307 ,751 ,344 ,942 ,213 ,377 ,131 ,259 , 69 ,201 , 66 ,164 , 48 ,171 , 53 ,136 , 36 ,272 ,142 ,
+ 91 ,130 , 27 , 69 , 21 , 85 , 23 , 93 , 36 ,142 , 26 ,121 , 30 ,182 , 65 ,427 , 83 ,201 , 59 ,175 , 64 ,211 , 67 ,198 ,100 ,257 ,107 ,245 ,139 ,322 ,184 ,723 ,214 ,430 ,169 ,445 ,208 ,572 ,228 ,650 ,328 ,1034 ,483 ,1618 ,676 ,1115 ,348 ,1025 ,233 ,413 ,137 ,271 , 81 ,227 , 77 ,170 , 45 ,144 , 56 ,111 , 45 ,178 ,115 ,
+ 89 ,200 , 30 , 88 , 21 ,110 , 35 ,102 , 39 ,130 , 46 ,148 , 35 ,234 , 95 ,522 ,128 ,236 , 85 ,188 , 81 ,233 , 84 ,251 , 84 ,282 ,117 ,285 ,121 ,370 ,193 ,806 ,217 ,484 ,162 ,469 ,161 ,494 ,205 ,518 ,241 ,622 ,259 ,918 ,594 ,1431 ,675 ,1505 ,338 ,455 ,129 ,273 , 84 ,236 , 82 ,201 , 80 ,142 , 44 ,142 , 31 ,294 ,162 ,
+237 ,546 , 61 ,167 , 63 ,238 , 85 ,255 , 71 ,357 , 81 ,323 , 97 ,417 ,186 ,1285 ,302 ,588 ,189 ,469 ,174 ,517 ,224 ,561 ,207 ,619 ,234 ,593 ,285 ,800 ,469 ,1935 ,468 ,983 ,329 ,845 ,281 ,927 ,328 ,944 ,378 ,980 ,372 ,1070 ,523 ,1544 ,857 ,3091 ,995 ,1478 ,465 ,811 ,245 ,613 ,188 ,442 ,166 ,445 ,132 ,334 ,123 ,759 ,360 ,
+ 86 ,220 , 34 , 85 , 27 , 93 , 35 ,102 , 39 ,113 , 57 ,138 , 45 ,182 , 71 ,489 , 98 ,221 , 84 ,170 , 78 ,230 , 93 ,248 , 81 ,237 ,109 ,255 ,126 ,365 ,188 ,752 ,209 ,422 ,133 ,331 ,163 ,400 ,133 ,401 ,151 ,410 ,155 ,425 ,153 ,468 ,194 ,916 ,246 ,491 ,205 ,413 ,201 ,373 ,153 ,278 , 96 ,197 , 76 ,162 , 72 ,342 ,210 ,
+ 68 ,184 , 17 , 52 , 12 , 86 , 35 , 70 , 18 ,100 , 38 ,102 , 35 ,134 , 76 ,373 , 55 ,185 , 48 ,130 , 65 ,200 , 68 ,177 , 77 ,225 , 90 ,186 , 73 ,271 ,128 ,509 ,126 ,343 ,103 ,286 ,103 ,297 ,133 ,252 ,107 ,269 , 87 ,244 ,114 ,289 ,160 ,621 ,145 ,281 ,101 ,270 ,120 ,265 ,132 ,257 ,142 ,320 ,115 ,201 , 58 ,329 ,193 ,
+ 52 , 96 , 4 , 25 , 12 , 39 , 9 , 36 , 15 , 45 , 22 , 32 , 22 , 64 , 36 ,154 , 34 , 79 , 23 , 61 , 28 , 92 , 18 , 66 , 30 , 85 , 41 , 90 , 48 ,140 , 55 ,250 , 57 ,112 , 42 ,103 , 38 ,128 , 44 , 99 , 43 ,112 , 45 ,115 , 38 ,131 , 63 ,288 , 57 ,138 , 31 , 99 , 31 , 94 , 37 , 96 , 30 ,131 , 45 ,117 , 52 ,161 , 94 ,
+147 ,381 , 25 ,105 , 21 ,147 , 37 ,119 , 43 ,177 , 37 ,168 , 49 ,204 ,106 ,620 ,102 ,238 , 65 ,200 , 57 ,231 , 97 ,191 ,101 ,224 , 89 ,222 , 95 ,394 ,199 ,748 ,171 ,384 ,125 ,322 ,100 ,301 ,118 ,289 ,125 ,304 , 92 ,234 ,100 ,331 ,163 ,834 ,175 ,334 , 81 ,274 , 64 ,266 , 85 ,212 ,113 ,285 , 63 ,206 , 96 ,690 ,402 ,
+ {
+ 36, 69, 8, 21, 8, 28, 15, 29,
+ 11, 32, 14, 33, 15, 41, 25, 109,
+ 22, 61, 32, 62, 32, 78, 49, 91,
+ 64, 126, 102, 165, 207, 615, 1860, 2163,
+ 1514, 598, 254, 180, 105, 131, 75, 70,
+ 43, 60, 38, 48, 31, 60, 36, 105,
+ 26, 47, 29, 35, 17, 32, 16, 25,
+ 7, 29, 8, 21, 7, 49, 49,
+ },
+ {
+ 11, 22, 6, 8, 8, 11, 5, 11,
+ 1, 14, 5, 16, 13, 23, 14, 32,
+ 18, 29, 25, 32, 26, 37, 32, 52,
+ 55, 83, 94, 145, 201, 458, 1233, 2600,
+ 1790, 1313, 420, 221, 125, 126, 94, 71,
+ 38, 41, 30, 40, 26, 29, 22, 47,
+ 24, 23, 12, 17, 15, 14, 14, 14,
+ 8, 9, 9, 9, 2, 17, 23,
+ },
+ {
+ 29, 32, 9, 22, 7, 21, 10, 11,
+ 5, 27, 16, 17, 13, 23, 20, 59,
+ 14, 35, 14, 32, 25, 41, 43, 53,
+ 40, 75, 61, 135, 146, 298, 635, 1241,
+ 2473, 1648, 1042, 437, 182, 146, 94, 94,
+ 55, 49, 37, 52, 28, 51, 34, 49,
+ 22, 26, 13, 22, 15, 23, 10, 20,
+ 14, 26, 10, 14, 5, 27, 37,
+ },
+ {
+ 18, 18, 10, 10, 16, 20, 6, 22,
+ 16, 30, 14, 38, 16, 24, 12, 59,
+ 28, 61, 36, 53, 18, 59, 59, 57,
+ 43, 86, 100, 137, 178, 291, 560, 1069,
+ 1243, 1919, 1102, 866, 346, 270, 153, 137,
+ 90, 63, 43, 34, 38, 61, 36, 57,
+ 24, 47, 18, 30, 26, 24, 10, 24,
+ 14, 36, 8, 12, 4, 34, 34,
+ },
+ {
+ 38, 61, 15, 22, 12, 33, 22, 33,
+ 17, 35, 10, 33, 12, 27, 15, 86,
+ 25, 63, 33, 78, 35, 71, 55, 89,
+ 55, 165, 83, 132, 172, 256, 462, 778,
+ 671, 989, 1261, 1045, 803, 524, 259, 231,
+ 109, 109, 61, 89, 48, 76, 66, 101,
+ 55, 58, 20, 45, 20, 33, 27, 40,
+ 12, 33, 10, 25, 2, 71, 53,
+ },
+ {
+ 39, 35, 7, 39, 10, 31, 24, 21,
+ 35, 46, 35, 46, 21, 46, 17, 78,
+ 24, 49, 24, 99, 67, 74, 42, 92,
+ 56, 127, 95, 202, 191, 414, 475, 613,
+ 652, 656, 652, 1042, 680, 769, 574, 333,
+ 205, 195, 88, 74, 88, 74, 70, 81,
+ 53, 95, 46, 39, 24, 46, 14, 67,
+ 24, 17, 7, 28, 14, 63, 28,
+ },
+ {
+ 57, 58, 7, 29, 7, 36, 7, 58,
+ 27, 47, 22, 58, 23, 53, 33, 91,
+ 29, 84, 38, 57, 66, 69, 79, 84,
+ 77, 97, 93, 169, 139, 285, 364, 597,
+ 480, 388, 448, 636, 774, 995, 691, 551,
+ 320, 241, 160, 169, 99, 125, 79, 123,
+ 68, 79, 33, 60, 20, 47, 34, 62,
+ 22, 62, 25, 51, 27, 84, 77,
+ },
+ {
+ 91, 88, 16, 40, 13, 32, 10, 42,
+ 16, 53, 21, 50, 13, 66, 53, 107,
+ 34, 72, 29, 58, 32, 74, 64, 88,
+ 85, 109, 96, 168, 149, 326, 323, 495,
+ 374, 425, 364, 471, 412, 554, 589, 787,
+ 527, 460, 313, 214, 163, 168, 149, 184,
+ 104, 109, 61, 93, 66, 80, 37, 64,
+ 29, 40, 40, 29, 13, 72, 66,
+ },
+ {
+ 76, 83, 6, 56, 16, 46, 36, 73,
+ 26, 69, 36, 69, 53, 66, 56, 103,
+ 66, 106, 49, 79, 36, 93, 83, 133,
+ 79, 119, 89, 136, 119, 233, 226, 442,
+ 319, 306, 236, 312, 266, 409, 372, 532,
+ 502, 675, 496, 386, 236, 262, 239, 206,
+ 163, 143, 93, 96, 59, 99, 53, 93,
+ 53, 59, 33, 53, 36, 99, 123,
+ },
+ {
+ 60, 86, 18, 33, 18, 63, 11, 60,
+ 18, 105, 30, 48, 56, 90, 33, 82,
+ 78, 101, 30, 101, 67, 116, 75, 123,
+ 45, 108, 97, 131, 71, 213, 217, 330,
+ 292, 255, 180, 210, 243, 371, 258, 390,
+ 240, 562, 446, 675, 480, 498, 318, 348,
+ 225, 247, 112, 135, 93, 82, 48, 90,
+ 67, 90, 52, 45, 22, 90, 97,
+ },
+ {
+ 76, 129, 10, 45, 3, 48, 20, 66,
+ 20, 73, 17, 59, 17, 76, 31, 125,
+ 55, 94, 38, 80, 41, 108, 80, 111,
+ 87, 104, 55, 108, 76, 160, 234, 451,
+ 290, 248, 199, 223, 139, 297, 150, 318,
+ 199, 342, 321, 489, 433, 605, 563, 510,
+ 391, 286, 171, 139, 115, 108, 73, 108,
+ 59, 115, 48, 31, 20, 139, 132,
+ },
+ {
+ 93, 100, 15, 36, 24, 51, 17, 58,
+ 26, 58, 15, 68, 20, 58, 55, 201,
+ 79, 96, 39, 85, 49, 125, 53, 85,
+ 51, 89, 83, 112, 96, 186, 224, 440,
+ 224, 190, 115, 178, 100, 193, 114, 250,
+ 171, 229, 157, 294, 247, 421, 437, 859,
+ 482, 530, 336, 285, 186, 231, 142, 155,
+ 77, 112, 76, 74, 47, 125, 144,
+ },
+ {
+ 137, 99, 12, 54, 15, 54, 25, 60,
+ 25, 76, 25, 70, 44, 54, 76, 127,
+ 60, 70, 19, 99, 67, 127, 54, 127,
+ 79, 95, 60, 76, 60, 172, 134, 338,
+ 213, 213, 89, 150, 137, 210, 111, 182,
+ 95, 255, 105, 255, 150, 322, 265, 427,
+ 255, 472, 408, 443, 415, 495, 306, 287,
+ 178, 175, 134, 111, 83, 185, 252,
+ },
+ {
+ 105, 149, 28, 64, 32, 72, 24, 72,
+ 12, 64, 24, 64, 12, 64, 48, 157,
+ 80, 76, 40, 76, 56, 117, 64, 109,
+ 76, 141, 64, 109, 76, 198, 145, 307,
+ 133, 137, 109, 125, 93, 226, 80, 153,
+ 113, 214, 97, 234, 88, 218, 258, 384,
+ 234, 234, 214, 283, 222, 432, 311, 465,
+ 384, 343, 258, 283, 190, 323, 404,
+ },
+ {
+ 263, 160, 22, 80, 11, 148, 11, 91,
+ 34, 137, 22, 91, 80, 137, 34, 171,
+ 22, 57, 22, 102, 45, 160, 80, 91,
+ 80, 171, 45, 114, 80, 137, 171, 286,
+ 57, 148, 80, 286, 125, 160, 125, 171,
+ 102, 102, 57, 217, 125, 160, 80, 286,
+ 114, 102, 148, 251, 217, 297, 137, 343,
+ 251, 514, 228, 400, 331, 526, 675,
+ },
+ {
+ 188, 256, 21, 68, 21, 108, 25, 119,
+ 32, 101, 21, 54, 10, 83, 61, 151,
+ 68, 94, 43, 126, 43, 108, 43, 151,
+ 65, 137, 68, 159, 86, 209, 144, 368,
+ 159, 188, 72, 155, 83, 188, 97, 137,
+ 65, 155, 97, 144, 57, 195, 97, 249,
+ 104, 213, 61, 169, 101, 278, 101, 209,
+ 144, 365, 173, 285, 216, 824, 1356,
+ }
+*/
+};
+
+
+HUFF_NODE YMvHuffTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS-1];
+UINT32 YMvPatternTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS];
+UINT8 YMvBitsTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS];
+
+static UINT32 VP5_YMvFrequencyCounts[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS] =
+{
+ 19 , 12 , 31 , 74 , 10 , 11 , 33 , 66 , 10 , 14 , 26 , 82 , 14 , 27 , 22 ,118 , 27 , 28 , 49 ,170 , 32 , 54 , 70 ,313 , 94 ,101 ,190 ,999 ,326 ,643 ,695 ,1793 ,370 ,583 ,290 ,393 ,100 , 72 , 94 ,215 , 48 , 45 , 59 ,122 , 30 , 33 , 29 , 93 , 14 , 21 , 24 , 78 , 6 , 19 , 20 , 43 , 7 , 12 , 15 , 66 , 17 , 8 , 15 ,
+ 4 , 5 , 2 , 11 , 3 , 2 , 2 , 10 , 5 , 1 , 4 , 17 , 5 , 8 , 9 , 28 , 6 , 20 , 26 , 68 , 22 , 43 , 51 , 94 , 63 , 75 , 84 ,179 ,203 ,308 ,352 ,1106 ,655 ,1312 ,535 ,643 ,219 ,117 ,108 ,153 , 74 , 53 , 48 , 77 , 31 , 31 , 23 , 39 , 16 , 12 , 6 , 21 , 10 , 15 , 9 , 9 , 5 , 4 , 2 , 17 , 7 , 11 , 6 ,
+ 6 , 6 , 2 , 10 , 4 , 9 , 4 , 17 , 5 , 10 , 5 , 16 , 9 , 13 , 11 , 44 , 16 , 31 , 17 , 46 , 24 , 39 , 39 , 89 , 55 , 48 , 62 ,139 ,126 ,276 ,210 ,715 ,742 ,1649 ,361 ,945 ,265 ,242 ,110 ,206 , 93 , 78 , 55 ,101 , 31 , 45 , 23 , 68 , 22 , 25 , 16 , 22 , 6 , 14 , 9 , 23 , 3 , 17 , 9 , 26 , 2 , 10 , 8 ,
+ 2 , 5 , 2 , 6 , 3 , 3 , 2 , 11 , 3 , 8 , 3 , 12 , 2 , 12 , 7 , 23 , 11 , 13 , 8 , 28 , 16 , 31 , 30 , 65 , 28 , 43 , 69 ,110 ,120 ,149 ,183 ,502 ,403 ,627 ,370 ,750 ,259 ,180 ,136 ,173 , 71 , 50 , 39 , 67 , 34 , 27 , 23 , 44 , 15 , 13 , 6 , 21 , 9 , 11 , 3 , 12 , 3 , 2 , 2 , 13 , 2 , 9 , 4 ,
+ 6 , 5 , 3 , 22 , 3 , 6 , 2 , 15 , 4 , 4 , 3 , 24 , 4 , 8 , 10 , 32 , 13 , 12 , 12 , 48 , 15 , 20 , 26 ,117 , 33 , 45 , 58 ,181 , 87 ,146 ,146 ,891 ,341 ,639 ,524 ,1885 ,524 ,313 ,300 ,1314 ,194 ,108 , 74 ,252 , 48 , 53 , 30 ,140 , 31 , 42 , 14 , 58 , 13 , 15 , 10 , 39 , 5 , 11 , 4 , 31 , 6 , 14 , 7 ,
+ 1 , 4 , 1 , 9 , 0 , 0 , 6 , 10 , 2 , 2 , 1 , 9 , 2 , 1 , 8 , 13 , 4 , 7 , 4 , 13 , 10 , 8 , 8 , 32 , 19 , 21 , 24 , 58 , 54 , 52 , 57 ,163 , 87 ,164 ,199 ,599 ,183 ,127 ,168 ,557 ,119 ,104 , 59 ,108 , 49 , 24 , 20 , 53 , 19 , 11 , 7 , 17 , 6 , 3 , 3 , 10 , 7 , 4 , 4 , 6 , 4 , 3 , 0 ,
+ 4 , 5 , 1 , 11 , 1 , 3 , 5 , 6 , 2 , 9 , 3 , 8 , 3 , 5 , 4 , 19 , 4 , 9 , 8 , 18 , 6 , 12 , 10 , 35 , 16 , 29 , 26 , 60 , 47 , 82 , 64 ,120 , 70 ,185 ,145 ,335 ,155 ,118 ,104 ,405 ,100 , 85 , 57 ,192 , 49 , 81 , 45 , 83 , 22 , 20 , 16 , 42 , 15 , 17 , 6 , 35 , 7 , 14 , 6 , 22 , 9 , 8 , 5 ,
+ 6 , 7 , 1 , 12 , 3 , 8 , 4 , 14 , 7 , 7 , 3 , 23 , 8 , 5 , 8 , 33 , 6 , 15 , 4 , 33 , 13 , 21 , 20 , 69 , 23 , 38 , 40 ,135 , 57 , 91 , 84 ,287 ,133 ,196 ,172 ,730 ,229 ,174 ,234 ,1521 ,194 ,140 ,124 ,895 ,130 ,115 , 87 ,443 , 73 , 77 , 36 ,109 , 32 , 34 , 19 , 82 , 16 , 16 , 6 , 79 , 11 , 19 , 23 ,
+ 1 , 2 , 3 , 3 , 2 , 3 , 0 , 3 , 1 , 2 , 3 , 4 , 4 , 3 , 1 , 10 , 2 , 3 , 3 , 11 , 6 , 5 , 5 , 12 , 8 , 12 , 18 , 31 , 24 , 39 , 39 , 68 , 50 , 61 , 45 , 74 , 36 , 43 , 32 ,133 , 25 , 37 , 16 ,101 , 15 , 29 , 20 , 50 , 10 , 26 , 15 , 50 , 12 , 23 , 12 , 21 , 6 , 8 , 4 , 19 , 8 , 7 , 4 ,
+ 2 , 3 , 2 , 8 , 2 , 2 , 2 , 9 , 2 , 2 , 2 , 10 , 3 , 4 , 4 , 18 , 2 , 10 , 2 , 31 , 6 , 12 , 9 , 29 , 12 , 22 , 19 , 54 , 37 , 46 , 34 ,131 , 65 , 78 , 64 ,149 , 54 , 65 , 68 ,388 , 46 , 26 , 57 ,620 , 59 , 33 , 31 ,471 , 37 , 37 , 29 ,158 , 21 , 39 , 25 ,171 , 21 , 36 , 8 , 70 , 14 , 12 , 14 ,
+ 0 , 3 , 1 , 4 , 1 , 3 , 1 , 5 , 1 , 6 , 2 , 6 , 3 , 2 , 2 , 8 , 2 , 9 , 4 , 6 , 2 , 2 , 3 , 18 , 8 , 4 , 9 , 29 , 19 , 34 , 28 , 38 , 22 , 28 , 19 , 27 , 12 , 18 , 14 , 68 , 13 , 15 , 8 , 36 , 6 , 26 , 3 , 34 , 6 , 22 , 3 , 16 , 4 , 19 , 3 , 11 , 8 , 14 , 4 , 48 , 15 , 23 , 7 ,
+ 1 , 3 , 1 , 12 , 1 , 1 , 3 , 12 , 3 , 4 , 7 , 13 , 2 , 5 , 4 , 21 , 5 , 14 , 6 , 20 , 7 , 10 , 14 , 29 , 8 , 25 , 22 , 50 , 27 , 54 , 33 ,115 , 57 , 89 , 43 ,121 , 33 , 56 , 32 ,149 , 19 , 30 , 22 ,295 , 22 , 30 , 26 ,411 , 28 , 21 , 16 ,267 , 25 , 28 , 15 , 99 , 12 , 32 , 17 ,128 , 29 , 61 , 52 ,
+ 2 , 4 , 4 , 12 , 1 , 4 , 1 , 7 , 1 , 1 , 3 , 11 , 2 , 7 , 5 , 13 , 2 , 9 , 3 , 19 , 5 , 8 , 9 , 17 , 11 , 8 , 12 , 34 , 24 , 41 , 25 , 55 , 31 , 58 , 29 , 54 , 17 , 26 , 21 , 52 , 14 , 17 , 18 , 68 , 4 , 18 , 13 ,170 , 13 , 22 , 14 ,179 , 18 , 15 , 5 ,120 , 14 , 18 , 12 ,102 , 26 , 49 , 29 ,
+ 5 , 3 , 0 , 8 , 2 , 2 , 2 , 4 , 0 , 5 , 0 , 5 , 0 , 2 , 0 , 8 , 0 , 3 , 3 , 12 , 1 , 3 , 3 , 23 , 8 , 16 , 7 , 26 , 13 , 20 , 18 , 45 , 22 , 28 , 22 , 41 , 14 , 16 , 16 , 49 , 11 , 15 , 11 , 73 , 3 , 10 , 7 , 61 , 7 , 9 , 5 , 90 , 12 , 12 , 7 , 63 , 14 , 8 , 4 , 83 , 13 , 19 , 15 ,
+ 3 , 4 , 3 , 9 , 0 , 2 , 0 , 3 , 0 , 2 , 3 , 7 , 0 , 3 , 1 , 8 , 2 , 8 , 1 , 5 , 3 , 6 , 2 , 18 , 3 , 12 , 8 , 34 , 15 , 19 , 12 , 47 , 19 , 25 , 21 , 35 , 15 , 16 , 10 , 33 , 6 , 12 , 6 , 39 , 4 , 15 , 2 , 48 , 6 , 10 , 8 , 42 , 3 , 5 , 4 , 73 , 4 , 1 , 5 ,152 , 11 , 15 , 18 ,
+ 0 , 2 , 1 , 1 , 0 , 4 , 0 , 3 , 0 , 3 , 0 , 4 , 0 , 4 , 1 , 2 , 1 , 3 , 0 , 5 , 0 , 3 , 0 , 7 , 4 , 7 , 6 , 15 , 9 , 13 , 8 , 20 , 8 , 16 , 10 , 10 , 5 , 6 , 2 , 12 , 2 , 11 , 1 , 10 , 0 , 10 , 2 , 11 , 0 , 2 , 0 , 7 , 3 , 2 , 3 , 9 , 2 , 5 , 0 , 6 , 3 , 10 , 4 ,
+/*
+ 36 , 94 ,136 ,248 , 42 , 90 , 62 ,284 , 73 , 93 , 87 ,430 ,119 ,147 ,180 ,888 ,157 ,216 ,203 ,716 ,131 ,208 ,243 ,1216 ,272 ,346 ,457 ,2737 ,863 ,1978 ,2840 ,8045 ,1609 ,1342 ,538 ,1273 ,316 ,287 ,195 ,794 ,177 ,199 ,134 ,584 ,193 ,198 ,163 ,755 ,145 ,162 , 99 ,370 ,106 , 76 , 49 ,313 , 64 , 71 , 38 ,252 ,165 , 75 , 39 ,
+ 23 , 53 , 71 ,150 , 34 , 48 , 45 ,176 , 57 , 65 , 76 ,251 , 75 ,115 ,138 ,556 ,130 ,153 ,128 ,428 , 94 ,122 ,172 ,606 ,188 ,219 ,287 ,917 ,407 ,804 ,1000 ,5395 ,2602 ,2741 ,1082 ,2329 ,428 ,261 ,215 ,727 ,174 ,154 ,128 ,431 ,149 ,138 ,143 ,593 ,155 ,108 , 95 ,263 , 85 , 62 , 46 ,193 , 49 , 60 , 43 ,168 , 81 , 65 , 33 ,
+ 37 , 50 , 64 ,112 , 30 , 51 , 35 ,186 , 44 , 69 , 50 ,251 , 58 ,106 ,126 ,484 , 95 ,167 ,105 ,314 , 97 ,115 ,138 ,527 ,131 ,203 ,185 ,621 ,298 ,678 ,761 ,3375 ,2087 ,2890 ,1483 ,2215 ,482 ,432 ,260 ,745 ,180 ,208 ,128 ,411 ,152 ,134 ,136 ,568 ,124 ,152 , 78 ,226 , 72 , 92 , 51 ,181 , 39 , 66 , 28 ,149 ,102 , 71 , 22 ,
+ 16 , 30 , 45 , 84 , 14 , 51 , 29 ,100 , 28 , 49 , 38 ,159 , 48 , 78 , 70 ,326 , 81 , 77 , 71 ,243 , 65 , 64 ,100 ,330 ,100 ,135 ,149 ,378 ,210 ,370 ,370 ,1619 ,907 ,1450 ,1128 ,2217 ,513 ,260 ,240 ,710 ,200 ,128 ,118 ,320 ,128 , 94 , 86 ,353 , 92 , 98 , 80 ,147 , 50 , 36 , 31 ,127 , 45 , 27 , 24 , 78 , 63 , 38 , 22 ,
+ 30 , 76 ,122 ,219 , 38 , 74 , 58 ,308 , 50 , 97 ,116 ,409 , 89 ,168 ,143 ,810 ,146 ,169 ,189 ,626 ,150 ,194 ,199 ,809 ,177 ,230 ,261 ,1073 ,322 ,567 ,620 ,3684 ,1380 ,1976 ,1723 ,7569 ,1900 ,854 ,742 ,3046 ,468 ,334 ,267 ,990 ,273 ,249 ,215 ,1029 ,273 ,190 ,133 ,524 ,120 ,107 , 58 ,358 , 77 , 84 , 52 ,292 ,139 , 82 , 49 ,
+ 7 , 23 , 47 , 61 , 22 , 23 , 21 ,103 , 14 , 22 , 33 ,136 , 54 , 45 , 57 ,229 , 48 , 54 , 62 ,169 , 52 , 63 , 76 ,241 , 68 , 68 ,115 ,272 ,140 ,153 ,185 ,772 ,302 ,420 ,445 ,1755 ,642 ,242 ,319 ,1349 ,276 ,128 ,105 ,421 ,151 , 86 , 83 ,362 , 77 , 57 , 66 ,167 , 64 , 27 , 34 ,134 , 32 , 29 , 14 , 90 , 61 , 38 , 24 ,
+ 23 , 17 , 46 , 77 , 18 , 40 , 28 ,109 , 20 , 37 , 34 ,157 , 36 , 72 , 47 ,242 , 50 , 60 , 69 ,205 , 63 , 75 , 77 ,240 , 77 , 98 ,118 ,363 ,181 ,266 ,269 ,889 ,305 ,457 ,344 ,1010 ,388 ,304 ,365 ,1372 ,317 ,167 ,131 ,572 ,191 ,109 ,110 ,438 , 98 , 93 , 75 ,213 , 83 , 58 , 57 ,157 , 45 , 46 , 32 , 96 , 63 , 78 , 31 ,
+ 38 , 89 , 80 ,239 , 44 , 71 , 68 ,286 , 59 , 98 , 76 ,355 , 94 ,145 ,134 ,677 ,142 ,149 ,163 ,540 ,135 ,154 ,157 ,706 ,143 ,212 ,239 ,882 ,307 ,403 ,489 ,1903 ,579 ,795 ,614 ,2498 ,984 ,648 ,760 ,5794 ,1295 ,394 ,465 ,2504 ,467 ,394 ,289 ,1266 ,232 ,250 ,164 ,561 ,146 ,128 , 74 ,383 ,129 ,103 , 47 ,265 ,156 , 96 , 58 ,
+ 11 , 31 , 34 , 51 , 12 , 19 , 22 , 67 , 14 , 25 , 46 , 78 , 32 , 41 , 44 ,145 , 28 , 37 , 39 ,130 , 26 , 37 , 46 ,159 , 67 , 47 , 61 ,206 ,102 ,138 ,116 ,429 ,131 ,207 ,121 ,325 ,119 , 98 ,106 ,347 ,108 , 77 , 62 ,330 , 83 , 72 , 51 ,242 , 56 , 44 , 28 ,145 , 32 , 34 , 27 , 78 , 31 , 24 , 15 , 81 , 52 , 27 , 15 ,
+ 39 , 72 ,103 ,179 , 29 , 52 , 53 ,207 , 40 , 62 , 66 ,258 , 75 , 80 ,114 ,420 , 81 , 89 ,125 ,392 ,105 , 88 ,120 ,488 ,115 ,119 ,184 ,614 ,194 ,321 ,286 ,1271 ,327 ,367 ,316 ,1021 ,303 ,287 ,238 ,1478 ,452 ,159 ,231 ,2465 ,662 ,191 ,265 ,1859 ,292 ,161 ,158 ,523 ,132 , 82 , 76 ,326 ,108 , 82 , 60 ,247 ,182 , 79 , 44 ,
+ 15 , 21 , 32 , 40 , 12 , 15 , 15 , 63 , 15 , 16 , 28 , 72 , 24 , 35 , 31 ,154 , 38 , 35 , 44 ,120 , 37 , 27 , 51 ,141 , 46 , 43 , 61 ,227 ,109 ,149 ,128 ,410 ,143 ,176 ,104 ,288 , 73 , 99 , 56 ,259 , 83 , 55 , 47 ,206 , 73 , 51 , 59 ,315 , 64 , 43 , 32 ,146 , 37 , 40 , 23 , 84 , 22 , 23 , 23 , 64 , 48 , 25 , 14 ,
+ 34 , 80 , 79 ,191 , 41 , 62 , 73 ,272 , 41 , 73 , 67 ,295 , 64 ,116 ,137 ,579 ,129 ,141 ,166 ,443 ,102 ,132 ,132 ,535 ,151 ,183 ,209 ,800 ,282 ,476 ,461 ,1521 ,406 ,526 ,351 ,1005 ,285 ,248 ,223 ,1036 ,341 ,172 ,215 ,1113 ,427 ,204 ,254 ,2027 ,432 ,172 ,189 ,828 ,198 ,118 , 99 ,437 ,127 ,108 , 89 ,355 ,204 ,114 , 50 ,
+ 13 , 70 , 76 ,116 , 28 , 41 , 57 ,139 , 32 , 37 , 48 ,203 , 57 , 66 , 74 ,308 , 60 , 62 ,131 ,273 , 89 , 66 , 83 ,322 , 79 , 81 ,138 ,423 ,180 ,250 ,246 ,760 ,214 ,276 ,174 ,540 ,165 ,102 , 86 ,610 ,155 , 82 ,109 ,509 ,164 , 87 ,121 ,723 ,212 , 81 ,101 ,637 ,168 , 46 , 69 ,296 , 76 , 54 , 61 ,209 ,182 , 78 , 66 ,
+ 14 , 36 , 53 ,132 , 17 , 38 , 21 ,121 , 24 , 34 , 40 ,153 , 38 , 39 , 45 ,290 , 39 , 64 , 68 ,185 , 49 , 54 , 61 ,256 , 53 , 88 , 71 ,330 ,135 ,213 ,177 ,577 ,159 ,162 ,164 ,401 ,113 , 96 ,114 ,415 ,138 , 91 , 73 ,369 ,148 , 79 , 75 ,502 ,112 , 69 , 87 ,295 , 97 , 63 , 40 ,274 ,105 , 51 , 34 ,194 ,156 , 70 , 54 ,
+ 14 , 46 , 63 , 89 , 14 , 50 , 17 , 97 , 21 , 24 , 30 ,116 , 22 , 41 , 44 ,181 , 35 , 37 , 43 ,185 , 32 , 55 , 62 ,201 , 39 , 47 , 82 ,273 ,112 ,159 ,131 ,512 ,110 ,163 ,117 ,303 ,103 , 67 , 86 ,322 , 90 , 65 , 46 ,304 , 94 , 55 , 88 ,376 , 89 , 50 , 61 ,248 , 89 , 36 , 58 ,234 , 85 , 44 , 40 ,210 ,293 , 73 , 44 ,
+ 17 , 16 , 21 , 40 , 8 , 10 , 14 , 45 , 8 , 14 , 7 , 42 , 13 , 13 , 15 , 70 , 17 , 29 , 20 , 64 , 19 , 10 , 29 , 69 , 23 , 16 , 20 , 90 , 25 , 64 , 58 ,167 , 72 , 75 , 41 ,121 , 35 , 29 , 15 ,118 , 25 , 30 , 25 , 90 , 28 , 29 , 20 , 95 , 19 , 25 , 16 , 73 , 17 , 18 , 12 , 54 , 17 , 31 , 14 , 66 , 43 , 27 , 23 ,
+ {
+ 25, 61, 10, 20, 8, 25, 9, 24,
+ 10, 23, 9, 23, 13, 35, 19, 77,
+ 18, 56, 21, 38, 21, 49, 35, 65,
+ 45, 86, 80, 141, 197, 521, 1533, 2830,
+ 1891, 605, 232, 160, 91, 100, 63, 71,
+ 51, 57, 26, 43, 27, 36, 23, 68,
+ 22, 39, 13, 25, 14, 25, 10, 21,
+ 6, 23, 5, 19, 3, 49, 29,
+ },
+ {
+ 17, 14, 2, 15, 4, 10, 3, 16,
+ 5, 11, 6, 17, 8, 23, 17, 38,
+ 20, 34, 14, 35, 17, 34, 29, 42,
+ 47, 75, 83, 115, 145, 372, 910, 2907,
+ 2317, 1284, 375, 198, 116, 93, 52, 52,
+ 35, 46, 18, 33, 23, 22, 24, 34,
+ 10, 23, 9, 16, 9, 15, 8, 10,
+ 8, 7, 4, 9, 4, 17, 10,
+ },
+ {
+ 12, 34, 9, 24, 2, 26, 6, 15,
+ 7, 22, 18, 28, 11, 31, 17, 51,
+ 18, 29, 20, 35, 21, 51, 53, 68,
+ 53, 85, 68, 120, 151, 310, 525, 1339,
+ 2210, 1761, 988, 498, 210, 167, 96, 93,
+ 76, 54, 32, 44, 30, 53, 25, 60,
+ 26, 40, 24, 30, 6, 34, 13, 22,
+ 4, 26, 10, 18, 7, 35, 17,
+ },
+ {
+ 27, 33, 5, 15, 6, 15, 5, 41,
+ 12, 24, 15, 19, 12, 48, 12, 24,
+ 17, 29, 8, 34, 26, 60, 47, 69,
+ 40, 95, 94, 121, 151, 303, 468, 1041,
+ 1205, 1641, 1136, 986, 548, 304, 212, 195,
+ 123, 83, 55, 76, 27, 41, 33, 60,
+ 31, 36, 29, 24, 15, 27, 10, 17,
+ 6, 33, 8, 12, 3, 48, 26,
+ },
+ {
+ 42, 61, 6, 36, 16, 40, 8, 20,
+ 30, 20, 4, 30, 28, 40, 18, 47,
+ 26, 53, 22, 44, 26, 77, 53, 79,
+ 61, 128, 77, 108, 145, 265, 298, 776,
+ 670, 1079, 1128, 1269, 840, 603, 265, 226,
+ 165, 167, 63, 75, 49, 79, 42, 94,
+ 36, 44, 14, 59, 20, 30, 16, 28,
+ 12, 38, 22, 38, 12, 67, 34,
+ },
+ {
+ 22, 45, 8, 19, 5, 31, 8, 16,
+ 16, 28, 31, 14, 19, 36, 16, 56,
+ 31, 48, 59, 59, 22, 76, 50, 59,
+ 33, 96, 96, 127, 118, 266, 379, 651,
+ 577, 722, 835, 1152, 1019, 864, 535, 325,
+ 186, 184, 118, 101, 79, 73, 59, 127,
+ 50, 70, 45, 33, 25, 39, 22, 39,
+ 8, 28, 14, 8, 8, 45, 33,
+ },
+ {
+ 38, 53, 15, 34, 5, 25, 8, 44,
+ 20, 32, 19, 20, 19, 51, 25, 74,
+ 34, 55, 36, 76, 39, 67, 60, 74,
+ 36, 95, 82, 120, 131, 190, 352, 606,
+ 400, 509, 463, 769, 888, 966, 762, 570,
+ 330, 316, 162, 174, 120, 159, 86, 127,
+ 67, 86, 44, 53, 29, 70, 31, 38,
+ 24, 31, 10, 34, 12, 53, 53,
+ },
+ {
+ 46, 80, 7, 43, 4, 24, 7, 41,
+ 7, 48, 12, 43, 24, 51, 17, 73,
+ 38, 68, 29, 68, 34, 90, 14, 109,
+ 55, 126, 58, 146, 126, 221, 250, 630,
+ 370, 462, 370, 489, 430, 613, 737, 890,
+ 747, 535, 233, 197, 163, 185, 109, 99,
+ 65, 94, 29, 80, 51, 70, 31, 53,
+ 12, 48, 14, 41, 19, 87, 60,
+ },
+ {
+ 35, 86, 6, 41, 16, 25, 9, 38,
+ 45, 38, 28, 48, 22, 51, 28, 83,
+ 41, 73, 38, 41, 32, 112, 80, 80,
+ 70, 112, 96, 119, 141, 292, 337, 521,
+ 299, 347, 241, 340, 289, 488, 402, 649,
+ 572, 810, 440, 385, 295, 273, 167, 183,
+ 90, 102, 57, 102, 54, 73, 48, 61,
+ 32, 67, 25, 64, 12, 99, 115,
+ },
+ {
+ 47, 95, 21, 39, 13, 69, 8, 52,
+ 17, 65, 21, 60, 26, 43, 21, 95,
+ 82, 112, 21, 47, 43, 138, 52, 99,
+ 82, 125, 60, 130, 143, 273, 225, 555,
+ 286, 416, 251, 316, 234, 390, 268, 360,
+ 338, 507, 438, 646, 446, 386, 308, 281,
+ 182, 186, 86, 138, 26, 143, 34, 47,
+ 43, 121, 21, 69, 13, 30, 82,
+ },
+ {
+ 90, 129, 12, 60, 8, 51, 17, 17,
+ 30, 124, 38, 60, 34, 90, 47, 120,
+ 64, 99, 38, 94, 47, 163, 64, 112,
+ 73, 133, 77, 116, 133, 323, 262, 478,
+ 288, 353, 168, 185, 150, 336, 202, 232,
+ 232, 340, 284, 422, 469, 577, 478, 409,
+ 245, 189, 120, 163, 94, 155, 51, 112,
+ 64, 68, 51, 64, 21, 137, 107,
+ },
+ {
+ 67, 111, 29, 35, 24, 65, 16, 73,
+ 8, 51, 35, 70, 10, 97, 65, 189,
+ 59, 105, 40, 94, 51, 97, 62, 97,
+ 84, 127, 86, 151, 121, 311, 281, 512,
+ 289, 349, 178, 257, 151, 257, 214, 241,
+ 176, 273, 173, 252, 235, 344, 355, 650,
+ 376, 401, 214, 254, 140, 168, 84, 102,
+ 43, 97, 65, 97, 32, 140, 140,
+ },
+ {
+ 60, 174, 34, 60, 26, 87, 30, 43,
+ 17, 100, 26, 78, 47, 104, 52, 126,
+ 39, 108, 60, 113, 78, 113, 82, 143,
+ 60, 143, 69, 169, 91, 283, 239, 522,
+ 261, 274, 139, 217, 148, 174, 117, 226,
+ 156, 204, 143, 174, 191, 283, 248, 413,
+ 261, 331, 222, 383, 243, 296, 222, 239,
+ 100, 178, 74, 156, 34, 265, 222,
+ },
+ {
+ 160, 183, 41, 41, 5, 89, 29, 100,
+ 41, 89, 65, 47, 47, 77, 59, 136,
+ 53, 100, 47, 183, 47, 94, 35, 100,
+ 77, 142, 89, 142, 106, 272, 332, 522,
+ 249, 302, 178, 178, 178, 189, 106, 172,
+ 100, 261, 112, 178, 94, 290, 160, 284,
+ 136, 249, 106, 231, 172, 272, 201, 403,
+ 178, 308, 148, 213, 100, 338, 332,
+ },
+ {
+ 114, 158, 1, 86, 57, 86, 28, 100,
+ 86, 57, 28, 114, 57, 86, 43, 129,
+ 14, 86, 43, 57, 86, 114, 57, 158,
+ 57, 186, 129, 186, 158, 215, 215, 474,
+ 158, 316, 186, 172, 129, 316, 114, 114,
+ 201, 201, 71, 100, 186, 201, 114, 272,
+ 100, 172, 86, 143, 86, 330, 244, 301,
+ 158, 201, 129, 445, 86, 502, 675,
+ },
+ {
+ 173, 195, 21, 75, 37, 75, 5, 102,
+ 43, 113, 21, 86, 27, 92, 37, 173,
+ 37, 140, 54, 108, 48, 124, 48, 146,
+ 70, 173, 108, 195, 119, 227, 292, 596,
+ 238, 265, 151, 157, 130, 195, 54, 184,
+ 86, 249, 65, 130, 54, 178, 102, 254,
+ 124, 216, 54, 162, 86, 216, 92, 222,
+ 113, 205, 86, 216, 146, 807, 970,
+ }
+*/};
+
+
+/****************************************************************************
+* Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+ *
+ * ROUTINE : CreateMvTrees
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Builds the VP5 huffman trees used for decoding motion vectors.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_CreateMvTrees ()
+{
+ UINT32 i;
+
+ // Build the VP5 trees.
+ memset( XMvHuffTables, 0, (sizeof(HUFF_NODE) * MV_ENTROPY_TABLES * (MV_ENTROPY_TOKENS-1)));
+ memset( YMvHuffTables, 0, (sizeof(HUFF_NODE) * MV_ENTROPY_TABLES * (MV_ENTROPY_TOKENS-1)));
+ for ( i = 0; i < MV_ENTROPY_TABLES; i ++ )
+ {
+ VP5_BuildHuffTree( XMvHuffTables[i], VP5_XMvFrequencyCounts[i], MV_ENTROPY_TOKENS );
+ VP5_BuildHuffTree( YMvHuffTables[i], VP5_YMvFrequencyCounts[i], MV_ENTROPY_TOKENS );
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CreateMvCodeArrays
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Creates the VP5 Mv huffman code arrays from the VP5
+ * Mv huffman trees.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+//sjlhack - Jim, is this code used???????????????
+#if 0
+void VP5_CreateMvCodeArrays()
+{
+ UINT32 i;
+
+ for ( i = 0; i < MV_ENTROPY_TABLES; i++ )
+ {
+ VP5_CreateCodeArray( XMvHuffTables[i], 0, XMvPatternTables[i], XMvBitsTables[i], 0, 0 );
+ VP5_CreateCodeArray( YMvHuffTables[i], 0, YMvPatternTables[i], YMvBitsTables[i], 0, 0 );
+ }
+}
+#endif
+
+
+/* Decoder specific functions */
+#ifdef PBDLL
+/****************************************************************************
+ *
+ * ROUTINE : VP5_ExtractMVectorComponent
+ *
+ * INPUTS : Decoder Instance
+ * Tree root
+ * Invert sign flag
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Extracts a motion vector component for VP5
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+INT32 VP5_ExtractMVectorComponent(PB_INSTANCE *pbi, HUFF_NODE * hn, BOOL Invert )
+{
+ int nodeptr = 0;
+ int selector = 0;
+ INT32 MvComponent;
+
+ // Loop searches down through tree based upon bits read from the bitstream
+ // until it hits a leaf at which point we have decoded a token
+ do
+ {
+ int which =DecodeBool(&pbi->br, hn[nodeptr].freq);
+ if(which)
+ {
+ selector = hn[nodeptr].rightunion.right.selector;
+ nodeptr = hn[nodeptr].rightunion.right.value;
+ }
+ else
+ {
+ selector = hn[nodeptr].leftunion.left.selector;
+ nodeptr = (int) hn[nodeptr].leftunion.left.value;
+ }
+ }
+ while ( !selector);
+
+ MvComponent = (INT32)(nodeptr - 31);
+ return ( Invert ) ? (-MvComponent) : MvComponent;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ExtractMVectorComponentA
+ *
+ * INPUTS : Decoder Instance
+ * Tree root (Not used for VP4)
+ * Invert sign flag (Not used for VP4)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Extracts a motion vector component coded with method A.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+static INT32 ExtractMVectorComponentA(PB_INSTANCE *pbi, HUFF_NODE * hn, BOOL Invert )
+{
+ INT32 MVectComponent; // temp storage for motion vector
+ UINT32 MVCode = 0; // Temporary storage while decoding the MV
+ UINT32 ExtraBits = 0;
+
+ // Get group to which coded component belongs
+ MVCode = VP5_bitread( &pbi->br, 3 );
+
+ // Now extract the appropriate number of bits to identify the component
+ switch ( MVCode )
+ {
+ case 0:
+ MVectComponent = 0;
+ break;
+ case 1:
+ MVectComponent = 1;
+ break;
+ case 2:
+ MVectComponent = -1;
+ break;
+ case 3:
+ if ( VP5_bitread1( &pbi->br ))
+ MVectComponent = -2;
+ else
+ MVectComponent = 2;
+ break;
+ case 4:
+ if ( VP5_bitread1( &pbi->br ) )
+ MVectComponent = -3;
+ else
+ MVectComponent = 3;
+ break;
+ case 5:
+ ExtraBits = VP5_bitread( &pbi->br, 2 );
+ MVectComponent = 4 + ExtraBits;
+ if ( VP5_bitread1( &pbi->br ) )
+ MVectComponent = -MVectComponent;
+ break;
+ case 6:
+ ExtraBits = VP5_bitread( &pbi->br, 3 );
+ MVectComponent = 8 + ExtraBits;
+ if ( VP5_bitread1( &pbi->br ))
+ MVectComponent = -MVectComponent;
+ break;
+ case 7:
+ ExtraBits = VP5_bitread( &pbi->br, 4 );
+ MVectComponent = 16 + ExtraBits;
+ if ( VP5_bitread1( &pbi->br ) )
+ MVectComponent = -MVectComponent;
+ break;
+ }
+
+ return MVectComponent;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : ExtractMVectorComponentB
+ *
+ * INPUTS : Decoder Instance
+ * Tree root (Not used for VP4)
+ * Invert sign flag (Not used for VP4)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Extracts an MV component coded using the fallback method
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+static INT32 ExtractMVectorComponentB(PB_INSTANCE *pbi, HUFF_NODE * MvNodePtr, BOOL Invert )
+{
+ INT32 MVectComponent; // temp storage for motion vector
+
+ // Get group to which coded component belongs
+ MVectComponent = VP5_bitread( &pbi->br, 5 );
+ if ( VP5_bitread1( &pbi->br ) )
+ MVectComponent = -MVectComponent;
+
+ return MVectComponent;
+}
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/TokenEntropy.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/TokenEntropy.c
new file mode 100644
index 00000000..c06fb611
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/TokenEntropy.c
@@ -0,0 +1,439 @@
+/****************************************************************************
+*
+* Module Title : TokenEntropy.c
+*
+* Description : Video CODEC: Coefficient token entropy module.
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.01 PGW 27 Jun 01 Created
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include "type_aliases.h"
+#include "systemdependant.h"
+#include "codec_common.h"
+#include "codec_common_interface.h"
+#include "tokenentropy.h"
+#include "pbdll.h"
+
+
+/****************************************************************************
+* Explicit Imports
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Constants
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Types
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Data structures
+*****************************************************************************
+*/
+#ifndef MAPCA
+
+// Costs in bits for different probabilities (expressed in range 0-255)
+// Costs are multiplied by 256
+const UINT32 ProbCost[256] =
+{
+ 2047,
+ 2047,1791,1641,1535,1452,1385,1328,1279,1235,1196,
+ 1161,1129,1099,1072,1046,1023,1000,979,959,940,
+ 922,905,889,873,858,843,829,816,803,790,
+ 778,767,755,744,733,723,713,703,693,684,
+ 675,666,657,649,641,633,625,617,609,602,
+ 594,587,580,573,567,560,553,547,541,534,
+ 528,522,516,511,505,499,494,488,483,477,
+ 472,467,462,457,452,447,442,437,433,428,
+ 424,419,415,410,406,401,397,393,389,385,
+ 381,377,373,369,365,361,357,353,349,346,
+ 342,338,335,331,328,324,321,317,314,311,
+ 307,304,301,297,294,291,288,285,281,278,
+ 275,272,269,266,263,260,257,255,252,249,
+ 246,243,240,238,235,232,229,227,224,221,
+ 219,216,214,211,208,206,203,201,198,196,
+ 194,191,189,186,184,181,179,177,174,172,
+ 170,168,165,163,161,159,156,154,152,150,
+ 148,145,143,141,139,137,135,133,131,129,
+ 127,125,123,121,119,117,115,113,111,109,
+ 107,105,103,101,99,97,95,93,92,90,
+ 88,86,84,82,81,79,77,75,73,72,
+ 70,68,66,65,63,61,60,58,56,55,
+ 53,51,50,48,46,45,43,41,40,38,
+ 37,35,33,32,30,29,27,25,24,22,
+ 21,19,18,16,15,13,12,10,9,7,
+ 6,4,3,1,
+ 1,
+};
+#endif
+// Index categories for previous tokens in this block
+const UINT8 PrevTokenIndex[MAX_ENTROPY_TOKENS] = { 0,1,2,2,2,2,2,2,2,2,2,0 };
+
+// For details of tokens and extra bit breakdown see token definitions in huffman.h
+const UINT8 ExtraBitLengths_VP5[MAX_ENTROPY_TOKENS] = { 0, 1, 1, 1, 1, 2, 3, 4, 5, 6, 12, 0 };
+const UINT32 DctRangeMinVals[MAX_ENTROPY_TOKENS] = { 0, 1, 2, 3, 4, 5, 7, 11, 19, 35, 67, 0 };
+
+const UINT8 DcUpdateProbs[2][MAX_ENTROPY_TOKENS-1] =
+{
+ { 146, 197, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+ { 179, 219, 214, 240, 250, 254, 244, 254, 254, 254, 254 }
+};
+
+const UINT8 AcUpdateProbs[PREC_CASES][2][VP5_AC_BANDS][MAX_ENTROPY_TOKENS-1] =
+{
+ { // preceded by 0
+ {
+ { 227, 246, 230, 247, 244, 254, 254, 254, 254, 254, 254 },
+ { 202, 254, 209, 231, 231, 249, 249, 253, 254, 254, 254 },
+ { 206, 254, 225, 242, 241, 251, 253, 254, 254, 254, 254 },
+ { 235, 254, 241, 253, 252, 254, 254, 254, 254, 254, 254 },
+ { 234, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ {
+ { 240, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 238, 254, 240, 253, 254, 254, 254, 254, 254, 254, 254 },
+ { 244, 254, 251, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ },
+ { // preceded by 1
+ {
+ { 206, 203, 227, 239, 247, 254, 253, 254, 254, 254, 254 },
+ { 207, 199, 220, 236, 243, 252, 252, 254, 254, 254, 254 },
+ { 212, 219, 230, 243, 244, 253, 252, 254, 254, 254, 254 },
+ { 236, 237, 247, 252, 253, 254, 254, 254, 254, 254, 254 },
+ { 240, 240, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ {
+ { 230, 233, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 238, 238, 250, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 248, 251, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ },
+ { // preceded by > 1
+ {
+ { 225, 239, 227, 231, 244, 253, 243, 254, 254, 253, 254 },
+ { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 254 },
+ { 235, 249, 238, 240, 251, 254, 249, 254, 253, 253, 254 },
+ { 249, 253, 251, 250, 254, 254, 254, 254, 254, 254, 254 },
+ { 251, 250, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ {
+ { 243, 244, 250, 250, 254, 254, 254, 254, 254, 254, 254 },
+ { 249, 248, 250, 253, 254, 254, 254, 254, 254, 254, 254 },
+ { 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ },
+};
+
+/*
+{
+ { // preceded by 0
+ {
+ { 234, 246, 250, 249, 244, 254, 254, 254, 254, 254, 254 },
+ { 225, 254, 242, 238, 234, 253, 252, 254, 254, 254, 254 },
+ { 230, 254, 248, 243, 238, 254, 254, 254, 254, 254, 254 },
+ { 244, 254, 254, 252, 247, 254, 254, 254, 254, 254, 254 },
+ { 253, 254, 254, 254, 253, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+
+ 227, 246, 230, 247, 244, 254, 254, 254, 254, 254, 254,
+ 202, 254, 209, 231, 231, 249, 249, 253, 254, 254, 254,
+ 206, 254, 225, 242, 241, 251, 253, 254, 254, 254, 254,
+ 235, 254, 241, 253, 252, 254, 254, 254, 254, 254, 254,
+ 234, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254,
+ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
+ },
+ {
+ { 251, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 243, 254, 254, 253, 253, 254, 254, 254, 254, 254, 254 },
+ { 252, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ },
+ { // preceded by 1
+ {
+ { 211, 216, 233, 233, 234, 252, 251, 254, 254, 254, 254 },
+ { 224, 219, 236, 237, 236, 252, 250, 254, 254, 253, 254 },
+ { 227, 230, 245, 241, 238, 253, 254, 254, 254, 254, 254 },
+ { 237, 235, 253, 252, 250, 254, 254, 254, 254, 254, 254 },
+ { 252, 251, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ {
+ { 237, 242, 253, 253, 253, 254, 254, 254, 254, 254, 254 },
+ { 248, 250, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ },
+ { // preceded by > 1
+ {
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ {
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+ },
+ },
+};
+*/
+// DC context equations
+LINE_EQ DcNodeEqs[CONTEXT_NODES][TOKEN_CONTEXTS][TOKEN_CONTEXTS] =
+{
+ { // zero
+ { { 154, 61 },{ 141, 54 },{ 90, 45 },{ 54, 34 },{ 54, 13 },{ 128, 109 }, },
+ { { 136, 54 },{ 148, 45 },{ 92, 41 },{ 54, 33 },{ 51, 15 },{ 87, 113 }, },
+ { { 87, 44 },{ 97, 40 },{ 67, 36 },{ 46, 29 },{ 41, 15 },{ 64, 80 }, },
+ { { 59, 33 },{ 61, 31 },{ 51, 28 },{ 44, 22 },{ 33, 12 },{ 49, 63 }, },
+ { { 69, 12 },{ 59, 16 },{ 46, 14 },{ 31, 13 },{ 26, 6 },{ 92, 26 }, },
+ { { 128, 108 },{ 77, 119 },{ 54, 84 },{ 26, 71 },{ 87, 19 },{ 95, 155 }, },
+ },
+ { // eob
+ { { 154, 4 },{ 182, 0 },{ 159, -8 },{ 128, -5 },{ 143, -5 },{ 187, 55 }, },
+ { { 182, 0 },{ 228, -3 },{ 187, -7 },{ 174, -9 },{ 189, -11 },{ 169, 79 }, },
+ { { 161, -9 },{ 192, -8 },{ 187, -9 },{ 169, -10 },{ 136, -9 },{ 184, 40 }, },
+ { { 164, -11 },{ 179, -10 },{ 174, -10 },{ 161, -10 },{ 115, -7 },{ 197, 20 }, },
+ { { 195, -11 },{ 195, -11 },{ 146, -10 },{ 110, -6 },{ 95, -4 },{ 195, 39 }, },
+ { { 182, 55 },{ 172, 77 },{ 177, 37 },{ 169, 29 },{ 172, 52 },{ 92, 162 }, },
+ },
+ { // one
+ { { 174, 80 },{ 164, 80 },{ 95, 80 },{ 46, 66 },{ 56, 24 },{ 36, 193 }, },
+ { { 164, 80 },{ 166, 77 },{ 105, 76 },{ 49, 68 },{ 46, 31 },{ 49, 186 }, },
+ { { 97, 78 },{ 110, 74 },{ 72, 72 },{ 44, 60 },{ 33, 30 },{ 69, 131 }, },
+ { { 61, 61 },{ 69, 63 },{ 51, 57 },{ 31, 48 },{ 26, 27 },{ 64, 89 }, },
+ { { 67, 23 },{ 51, 32 },{ 36, 33 },{ 26, 28 },{ 20, 12 },{ 44, 68 }, },
+ { { 26, 197 },{ 41, 189 },{ 61, 129 },{ 28, 103 },{ 49, 52 },{ -12, 245 }, },
+ },
+ { // low value
+ { { 102, 141 },{ 79, 166 },{ 72, 162 },{ 97, 125 },{ 179, 4 },{ 307, 0 }, },
+ { { 72, 168 },{ 69, 175 },{ 84, 160 },{ 105, 127 },{ 148, 34 },{ 310, 0 }, },
+ { { 84, 151 },{ 82, 161 },{ 87, 153 },{ 87, 135 },{ 115, 51 },{ 317, 0 }, },
+ { { 97, 125 },{ 102, 131 },{ 105, 125 },{ 87, 122 },{ 84, 64 },{ 54, 184 }, },
+ { { 166, 18 },{ 146, 43 },{ 125, 51 },{ 90, 64 },{ 95, 7 },{ 38, 154 }, },
+ { { 294, 0 },{ 13, 225 },{ 10, 225 },{ 67, 168 },{ 0, 167 },{ 161, 94 }, },
+ },
+ { // two
+ { { 172, 76 },{ 172, 75 },{ 136, 80 },{ 64, 98 },{ 74, 67 },{ 315, 0 }, },
+ { { 169, 76 },{ 207, 56 },{ 164, 66 },{ 97, 80 },{ 67, 72 },{ 328, 0 }, },
+ { { 136, 80 },{ 187, 53 },{ 154, 62 },{ 72, 85 },{ -2, 105 },{ 305, 0 }, },
+ { { 74, 91 },{ 128, 64 },{ 113, 64 },{ 61, 77 },{ 41, 75 },{ 259, 0 }, },
+ { { 46, 84 },{ 51, 81 },{ 28, 89 },{ 31, 78 },{ 23, 77 },{ 202, 0 }, },
+ { { 323, 0 },{ 323, 0 },{ 300, 0 },{ 236, 0 },{ 195, 0 },{ 328, 0 }, },
+ },
+};
+// AC context equations
+LINE_EQ AcNodeEqs[PREC_CASES][VP5_AC_BANDS-3][CONTEXT_NODES][TOKEN_CONTEXTS] =
+{
+ { // Preceded by 0
+ { // Band 0
+ { { 276, 0 },{ 238, 0 },{ 195, 0 },{ 156, 0 },{ 113, 0 },{ 274, 0 }, },
+ { { 0, 1 },{ 0, 1 },{ 0, 1 },{ 0, 1 },{ 0, 1 },{ 0, 1 }, },
+ { { 192, 59 },{ 182, 50 },{ 141, 48 },{ 110, 40 },{ 92, 19 },{ 125, 128 }, },
+ { { 169, 87 },{ 169, 83 },{ 184, 62 },{ 220, 16 },{ 184, 0 },{ 264, 0 }, },
+ { { 212, 40 },{ 212, 36 },{ 169, 49 },{ 174, 27 },{ 8, 120 },{ 182, 71 }, },
+ },
+ { // Band 1
+ { { 259, 10 },{ 197, 19 },{ 143, 22 },{ 123, 16 },{ 110, 8 },{ 133, 88 }, },
+ { { 0, 1 },{ 256, 0 },{ 0, 1 },{ 0, 1 },{ 0, 1 },{ 0, 1 }, },
+ { { 207, 46 },{ 187, 50 },{ 97, 83 },{ 23, 100 },{ 41, 56 },{ 56, 188 }, },
+ { { 166, 90 },{ 146, 108 },{ 161, 88 },{ 136, 95 },{ 174, 0 },{ 266, 0 }, },
+ { { 264, 7 },{ 243, 18 },{ 184, 43 },{ -14, 154 },{ 20, 112 },{ 20, 199 }, },
+ },
+ { // Band 2
+ { { 230, 26 },{ 197, 22 },{ 159, 20 },{ 146, 12 },{ 136, 4 },{ 54, 162 }, },
+ { { 0, 1 },{ 0, 1 },{ 0, 1 },{ 0, 1 },{ 0, 1 },{ 0, 1 }, },
+ { { 192, 59 },{ 156, 72 },{ 84, 101 },{ 49, 101 },{ 79, 47 },{ 79, 167 }, },
+ { { 138, 115 },{ 136, 116 },{ 166, 80 },{ 238, 0 },{ 195, 0 },{ 261, 0 }, },
+ { { 225, 33 },{ 205, 42 },{ 159, 61 },{ 79, 96 },{ 92, 66 },{ 28, 195 }, },
+ },
+ },
+ { // Preceded by 1
+ { // Band 0
+ { { 200, 37 },{ 197, 18 },{ 159, 13 },{ 143, 7 },{ 102, 5 },{ 123, 126 }, },
+ { { 197, 3 },{ 220, -9 },{ 210, -12 },{ 187, -6 },{ 151, -2 },{ 174, 80 }, },
+ { { 200, 53 },{ 187, 47 },{ 159, 40 },{ 118, 38 },{ 100, 18 },{ 141, 111 }, },
+ { { 179, 78 },{ 166, 86 },{ 197, 50 },{ 207, 27 },{ 187, 0 },{ 115, 139 }, },
+ { { 218, 34 },{ 220, 29 },{ 174, 46 },{ 128, 61 },{ 54, 89 },{ 187, 65 }, },
+ },
+ { // Band 1
+ { { 238, 14 },{ 197, 18 },{ 125, 26 },{ 90, 25 },{ 82, 13 },{ 161, 86 }, },
+ { { 189, 1 },{ 205, -2 },{ 156, -4 },{ 143, -4 },{ 146, -4 },{ 172, 72 }, },
+ { { 230, 31 },{ 192, 45 },{ 102, 76 },{ 38, 85 },{ 56, 41 },{ 64, 173 }, },
+ { { 166, 91 },{ 141, 111 },{ 128, 116 },{ 118, 109 },{ 177, 0 },{ 23, 222 }, },
+ { { 253, 14 },{ 236, 21 },{ 174, 49 },{ 33, 118 },{ 44, 93 },{ 23, 187 }, },
+ },
+ { // Band 2
+ { { 218, 28 },{ 179, 28 },{ 118, 35 },{ 95, 30 },{ 72, 24 },{ 128, 108 }, },
+ { { 187, 1 },{ 174, -1 },{ 125, -1 },{ 110, -1 },{ 108, -1 },{ 202, 52 }, },
+ { { 197, 53 },{ 146, 75 },{ 46, 118 },{ 33, 103 },{ 64, 50 },{ 118, 126 }, },
+ { { 138, 114 },{ 128, 122 },{ 161, 86 },{ 243, -6 },{ 195, 0 },{ 38, 210 }, },
+ { { 215, 39 },{ 179, 58 },{ 97, 101 },{ 95, 85 },{ 87, 70 },{ 69, 152 }, },
+ },
+ },
+ { // Preceded by 2
+ { // Band 0
+ { { 236, 24 },{ 205, 18 },{ 172, 12 },{ 154, 6 },{ 125, 1 },{ 169, 75 }, },
+ { { 187, 4 },{ 230, -2 },{ 228, -4 },{ 236, -4 },{ 241, -2 },{ 192, 66 }, },
+ { { 200, 46 },{ 187, 42 },{ 159, 34 },{ 136, 25 },{ 105, 10 },{ 179, 62 }, },
+ { { 207, 55 },{ 192, 63 },{ 192, 54 },{ 195, 36 },{ 177, 1 },{ 143, 98 }, },
+ { { 225, 27 },{ 207, 34 },{ 200, 30 },{ 131, 57 },{ 97, 60 },{ 197, 45 }, },
+ },
+ { // Band 1
+ { { 271, 8 },{ 218, 13 },{ 133, 19 },{ 90, 19 },{ 72, 7 },{ 182, 51 }, },
+ { { 179, 1 },{ 225, -1 },{ 154, -2 },{ 110, -1 },{ 92, 0 },{ 195, 41 }, },
+ { { 241, 26 },{ 189, 40 },{ 82, 64 },{ 33, 60 },{ 67, 17 },{ 120, 94 }, },
+ { { 192, 68 },{ 151, 94 },{ 146, 90 },{ 143, 72 },{ 161, 0 },{ 113, 128 }, },
+ { { 256, 12 },{ 218, 29 },{ 166, 48 },{ 44, 99 },{ 31, 87 },{ 148, 78 }, },
+ },
+ { // Band 2
+ { { 238, 20 },{ 184, 22 },{ 113, 27 },{ 90, 22 },{ 74, 9 },{ 192, 37 }, },
+ { { 184, 0 },{ 215, -1 },{ 141, -1 },{ 97, 0 },{ 49, 0 },{ 264, 13 }, },
+ { { 182, 51 },{ 138, 61 },{ 95, 63 },{ 54, 59 },{ 64, 25 },{ 200, 45 }, },
+ { { 179, 75 },{ 156, 87 },{ 174, 65 },{ 177, 44 },{ 174, 0 },{ 164, 85 }, },
+ { { 195, 45 },{ 148, 65 },{ 105, 79 },{ 95, 72 },{ 87, 60 },{ 169, 63 }, },
+ },
+ },
+};
+
+/****************************************************************************
+* Functions
+*****************************************************************************
+*/
+/****************************************************************************
+ *
+ * ROUTINE : ConfigureContexts
+ *
+ * INPUTS : Decoder Instance
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Configures the context dependant entropy probabilities.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ConfigureContexts(PB_INSTANCE *pbi)
+{
+ UINT32 i,j;
+
+ UINT32 Band;
+ UINT32 Node;
+ UINT32 Plane;
+ UINT32 Prec;
+ INT32 Temp;
+
+
+ // Clear MMX state so floating point can work again
+ #ifndef MACPPC
+#ifndef MAPCA
+ ClearSysState();
+#endif
+ #endif
+
+ // DC Node Probabilities
+ for ( Plane = 0; Plane < 2; Plane ++ )
+ {
+ for ( i = 0; i < TOKEN_CONTEXTS; i++ )
+ {
+ for ( j = 0; j < TOKEN_CONTEXTS; j++ )
+ {
+ // Tree Nodes
+ for ( Node = 0; Node < CONTEXT_NODES; Node ++ )
+ {
+ Temp = ( ( pbi->DcProbs[DCProbOffset(Plane,Node)] * DcNodeEqs[Node][i][j].M + 128 ) >> 8)
+ + DcNodeEqs[Node][i][j].C;
+ Temp = (Temp > 254)? 254: Temp;
+ Temp = (Temp < 1)? 1 : Temp;
+ pbi->DcNodeContexts[DCContextOffset(Plane,i,j,Node)] = (UINT8)Temp;
+
+ }
+
+ }
+ }
+ }
+
+
+ // AC Node Probabilities
+ for ( Prec = 0; Prec < PREC_CASES; Prec++ )
+ {
+ for ( Plane = 0; Plane < 2; Plane ++ )
+ {
+ // Higher AC bands do not use contexts.
+ for ( Band = 0; Band < VP5_AC_BANDS-3; Band++ )
+ {
+ for ( i = 0; i < TOKEN_CONTEXTS; i++ )
+ {
+ // Tree Nodes
+ for ( Node = 0; Node < CONTEXT_NODES; Node ++ )
+ {
+ Temp = ( ( pbi->AcProbs[ACProbOffset(Plane,Prec,Band,Node)]
+ * AcNodeEqs[Prec][Band][Node][i].M + 128 ) >> 8)
+ + AcNodeEqs[Prec][Band][Node][i].C;
+
+ Temp = (Temp > 254)? 254: Temp;
+ Temp = (Temp < 1)? 1 : Temp;
+ pbi->AcNodeContexts[ACContextOffset(Plane,Prec,Band,i,Node)] = (UINT8)Temp;
+
+ }
+ }
+ }
+ }
+ }
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/boolhuff.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/boolhuff.c
new file mode 100644
index 00000000..22d45ed6
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/boolhuff.c
@@ -0,0 +1,815 @@
+
+/****************************************************************************
+*
+* Module Title : boolhuff.c
+*
+* Description : Video CODEC
+*
+* AUTHOR : James Bankoski
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 JBB 01JUN01 Configuration baseline
+*
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+#include "boolhuff.h"
+#ifdef MAPCA
+#include "eti/mm.h"
+#endif
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Forward references.
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+// in the bool coder defined herein a probability of 4 means 4/256 chance its a 0 252/256 chance its a 1
+// so shannon cost of 0 given prob x = 8 - log2(x) | shannon cost of 1 given prob x = 8-log2(256-x)
+#ifndef MAPCA
+double shannonCost0[256]=
+{
+8.000000000,8.000000000,7.000000000,6.415037499,6.000000000,5.678071905,5.415037499,5.192645078,5.000000000,4.830074999,4.678071905,4.540568381,4.415037499,4.299560282,4.192645078,4.093109404,
+4.000000000,3.912537159,3.830074999,3.752072487,3.678071905,3.607682577,3.540568381,3.476438044,3.415037499,3.356143810,3.299560282,3.245112498,3.192645078,3.142019005,3.093109404,3.045803690,
+3.000000000,2.955605881,2.912537159,2.870716983,2.830074999,2.790546634,2.752072487,2.714597781,2.678071905,2.642447995,2.607682577,2.573735245,2.540568381,2.508146904,2.476438044,2.445411148,
+2.415037499,2.385290156,2.356143810,2.327574658,2.299560282,2.272079545,2.245112498,2.218640286,2.192645078,2.167109986,2.142019005,2.117356951,2.093109404,2.069262662,2.045803690,2.022720077,
+2.000000000,1.977632187,1.955605881,1.933910810,1.912537159,1.891475543,1.870716983,1.850252880,1.830074999,1.810175441,1.790546634,1.771181310,1.752072487,1.733213459,1.714597781,1.696219252,
+1.678071905,1.660149997,1.642447995,1.624960569,1.607682577,1.590609064,1.573735245,1.557056504,1.540568381,1.524266569,1.508146904,1.492205360,1.476438044,1.460841189,1.445411148,1.430144392,
+1.415037499,1.400087158,1.385290156,1.370643380,1.356143810,1.341788517,1.327574658,1.313499473,1.299560282,1.285754482,1.272079545,1.258533014,1.245112498,1.231815675,1.218640286,1.205584134,
+1.192645078,1.179821038,1.167109986,1.154509949,1.142019005,1.129635280,1.117356951,1.105182237,1.093109404,1.081136763,1.069262662,1.057485495,1.045803690,1.034215715,1.022720077,1.011315313,
+1.000000000,0.988772745,0.977632187,0.966576998,0.955605881,0.944717564,0.933910810,0.923184403,0.912537159,0.901967917,0.891475543,0.881058927,0.870716983,0.860448648,0.850252880,0.840128663,
+0.830074999,0.820090910,0.810175441,0.800327655,0.790546634,0.780831480,0.771181310,0.761595261,0.752072487,0.742612157,0.733213459,0.723875595,0.714597781,0.705379251,0.696219252,0.687117045,
+0.678071905,0.669083122,0.660149997,0.651271846,0.642447995,0.633677786,0.624960569,0.616295708,0.607682577,0.599120564,0.590609064,0.582147485,0.573735245,0.565371772,0.557056504,0.548788888,
+0.540568381,0.532394450,0.524266569,0.516184223,0.508146904,0.500154113,0.492205360,0.484300162,0.476438044,0.468618539,0.460841189,0.453105540,0.445411148,0.437757576,0.430144392,0.422571172,
+0.415037499,0.407542963,0.400087158,0.392669686,0.385290156,0.377948181,0.370643380,0.363375379,0.356143810,0.348948309,0.341788517,0.334664083,0.327574658,0.320519900,0.313499473,0.306513043,
+0.299560282,0.292640868,0.285754482,0.278900811,0.272079545,0.265290380,0.258533014,0.251807150,0.245112498,0.238448768,0.231815675,0.225212940,0.218640286,0.212097441,0.205584134,0.199100100,
+0.192645078,0.186218809,0.179821038,0.173451513,0.167109986,0.160796212,0.154509949,0.148250959,0.142019005,0.135813855,0.129635280,0.123483053,0.117356951,0.111256751,0.105182237,0.099133192,
+0.093109404,0.087110664,0.081136763,0.075187496,0.069262662,0.063362061,0.057485495,0.051632768,0.045803690,0.039998068,0.034215715,0.028456446,0.022720077,0.017006425,0.011315313,0.005646563
+};
+double shannonCost1[256]=
+{
+0.000000000,0.005646563,0.011315313,0.017006425,0.022720077,0.028456446,0.034215715,0.039998068,0.045803690,0.051632768,0.057485495,0.063362061,0.069262662,0.075187496,0.081136763,0.087110664,
+0.093109404,0.099133192,0.105182237,0.111256751,0.117356951,0.123483053,0.129635280,0.135813855,0.142019005,0.148250959,0.154509949,0.160796212,0.167109986,0.173451513,0.179821038,0.186218809,
+0.192645078,0.199100100,0.205584134,0.212097441,0.218640286,0.225212940,0.231815675,0.238448768,0.245112498,0.251807150,0.258533014,0.265290380,0.272079545,0.278900811,0.285754482,0.292640868,
+0.299560282,0.306513043,0.313499473,0.320519900,0.327574658,0.334664083,0.341788517,0.348948309,0.356143810,0.363375379,0.370643380,0.377948181,0.385290156,0.392669686,0.400087158,0.407542963,
+0.415037499,0.422571172,0.430144392,0.437757576,0.445411148,0.453105540,0.460841189,0.468618539,0.476438044,0.484300162,0.492205360,0.500154113,0.508146904,0.516184223,0.524266569,0.532394450,
+0.540568381,0.548788888,0.557056504,0.565371772,0.573735245,0.582147485,0.590609064,0.599120564,0.607682577,0.616295708,0.624960569,0.633677786,0.642447995,0.651271846,0.660149997,0.669083122,
+0.678071905,0.687117045,0.696219252,0.705379251,0.714597781,0.723875595,0.733213459,0.742612157,0.752072487,0.761595261,0.771181310,0.780831480,0.790546634,0.800327655,0.810175441,0.820090910,
+0.830074999,0.840128663,0.850252880,0.860448648,0.870716983,0.881058927,0.891475543,0.901967917,0.912537159,0.923184403,0.933910810,0.944717564,0.955605881,0.966576998,0.977632187,0.988772745,
+1.000000000,1.011315313,1.022720077,1.034215715,1.045803690,1.057485495,1.069262662,1.081136763,1.093109404,1.105182237,1.117356951,1.129635280,1.142019005,1.154509949,1.167109986,1.179821038,
+1.192645078,1.205584134,1.218640286,1.231815675,1.245112498,1.258533014,1.272079545,1.285754482,1.299560282,1.313499473,1.327574658,1.341788517,1.356143810,1.370643380,1.385290156,1.400087158,
+1.415037499,1.430144392,1.445411148,1.460841189,1.476438044,1.492205360,1.508146904,1.524266569,1.540568381,1.557056504,1.573735245,1.590609064,1.607682577,1.624960569,1.642447995,1.660149997,
+1.678071905,1.696219252,1.714597781,1.733213459,1.752072487,1.771181310,1.790546634,1.810175441,1.830074999,1.850252880,1.870716983,1.891475543,1.912537159,1.933910810,1.955605881,1.977632187,
+2.000000000,2.022720077,2.045803690,2.069262662,2.093109404,2.117356951,2.142019005,2.167109986,2.192645078,2.218640286,2.245112498,2.272079545,2.299560282,2.327574658,2.356143810,2.385290156,
+2.415037499,2.445411148,2.476438044,2.508146904,2.540568381,2.573735245,2.607682577,2.642447995,2.678071905,2.714597781,2.752072487,2.790546634,2.830074999,2.870716983,2.912537159,2.955605881,
+3.000000000,3.045803690,3.093109404,3.142019005,3.192645078,3.245112498,3.299560282,3.356143810,3.415037499,3.476438044,3.540568381,3.607682577,3.678071905,3.752072487,3.830074999,3.912537159,
+4.000000000,4.093109404,4.192645078,4.299560282,4.415037499,4.540568381,4.678071905,4.830074999,5.000000000,5.192645078,5.415037499,5.678071905,6.000000000,6.415037499,7.000000000,8.000000000
+};
+
+unsigned int shannon64Cost0[256]={
+512,512,448,411,384,363,347,332,320,309,299,291,283,275,268,262,
+256,250,245,240,235,231,227,222,219,215,211,208,204,201,198,195,
+192,189,186,184,181,179,176,174,171,169,167,165,163,161,158,157,
+155,153,151,149,147,145,144,142,140,139,137,136,134,132,131,129,
+128,127,125,124,122,121,120,118,117,116,115,113,112,111,110,109,
+107,106,105,104,103,102,101,100,99,98,97,96,94,93,93,92,
+91,90,89,88,87,86,85,84,83,82,81,81,80,79,78,77,
+76,76,75,74,73,72,72,71,70,69,68,68,67,66,65,65,
+64,63,63,62,61,60,60,59,58,58,57,56,56,55,54,54,
+53,52,52,51,51,50,49,49,48,48,47,46,46,45,45,44,
+43,43,42,42,41,41,40,39,39,38,38,37,37,36,36,35,
+35,34,34,33,33,32,32,31,30,30,29,29,29,28,28,27,
+27,26,26,25,25,24,24,23,23,22,22,21,21,21,20,20,
+19,19,18,18,17,17,17,16,16,15,15,14,14,14,13,13,
+12,12,12,11,11,10,10,9,9,9,8,8,8,7,7,6,
+6,6,5,5,4,4,4,3,3,3,2,2,1,1,1,0,
+};
+unsigned int shannon64Cost1[256]={
+0,0,1,1,1,2,2,3,3,3,4,4,4,5,5,6,
+6,6,7,7,8,8,8,9,9,9,10,10,11,11,12,12,
+12,13,13,14,14,14,15,15,16,16,17,17,17,18,18,19,
+19,20,20,21,21,21,22,22,23,23,24,24,25,25,26,26,
+27,27,28,28,29,29,29,30,30,31,32,32,33,33,34,34,
+35,35,36,36,37,37,38,38,39,39,40,41,41,42,42,43,
+43,44,45,45,46,46,47,48,48,49,49,50,51,51,52,52,
+53,54,54,55,56,56,57,58,58,59,60,60,61,62,63,63,
+64,65,65,66,67,68,68,69,70,71,72,72,73,74,75,76,
+76,77,78,79,80,81,81,82,83,84,85,86,87,88,89,90,
+91,92,93,93,94,96,97,98,99,100,101,102,103,104,105,106,
+107,109,110,111,112,113,115,116,117,118,120,121,122,124,125,127,
+128,129,131,132,134,136,137,139,140,142,144,145,147,149,151,153,
+155,157,158,161,163,165,167,169,171,174,176,179,181,184,186,189,
+192,195,198,201,204,208,211,215,219,222,227,231,235,240,245,250,
+256,262,268,275,283,291,299,309,320,332,347,363,384,411,448,512,
+};
+#endif
+// TEMP STATS VARIABLES
+
+/****************************************************************************
+* Module Static Variables
+*****************************************************************************
+*/
+
+#ifdef NOTNORMALIZED
+/****************************************************************************
+ *
+ * ROUTINE : StartDecode
+ *
+ * INPUTS : bc ptr to instance of our boolean coder
+ * buffer ptr to data to start decoding
+ *
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function fills initializes the boolean coder
+ *
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void StartDecode(BOOL_CODER *bc, unsigned char *buffer)
+{
+ bc->pos = 0;
+ bc->value = 0;
+ bc->range = 0;
+ bc->buffer = buffer;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DecodeBool
+ *
+ * INPUTS : bc ptr to instance of our boolean coder
+ * prob probability of getting a 0 normalized to 8 bits
+ *
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : 0 or 1
+ *
+ * FUNCTION : This function determines the next value stored in the
+ * boolean coder based upon the probability passed in.
+ * It uses a simple probability model to approximate
+ * an arithmetic coder.
+ *
+ *
+ * SPECIAL NOTES : The accuracy of this encoder gets worse as the range
+ * approaches 0. This can be avoided with more complex
+ * normalization functions (as in a standard arithmetic)
+ * coder. I chose to avoid this for speed reasons.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+int DecodeBool(
+ BOOL_CODER *bc,
+ int probability)
+{
+ unsigned int split;
+
+ // we don't have enough in our range to tell between a 0 and 1 so get
+ // a new 3 bytes.
+ if( bc->range < 2)
+ {
+ unsigned char *spot = bc->buffer+bc->pos;
+ bc->v[0] = spot[0];
+ bc->v[1] = spot[1];
+ bc->v[2] = spot[2];
+
+ // range is set to 0x01000001 to avoid having the range * probability
+ // calculation outrange ( this can be handled differently at the cost
+ // of an extra if.
+ bc->range = 0x01000000;
+ bc->pos+=3;
+ }
+
+ // calculate the decision point
+ // black magic: This code works better than if I calculate probability *
+ // range and then truncating to 1 ( I can't explain why)
+ split = bc->range;
+ split --; // we always have to maintain
+ split *= probability;
+ split >>= 8;
+ split ++;
+
+ if( bc->value < split )
+ {
+ bc->range = split;
+ return 0;
+ }
+ else
+ {
+ bc->range-=split;
+ bc->value-=split;
+ return 1;
+ }
+}
+/****************************************************************************
+ *
+ * ROUTINE : StopDecode
+ *
+ * INPUTS : bc ptr to instance of our boolean coder
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function does clean up for boolean decoder
+ *
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void StopDecode(BOOL_CODER *bc)
+{
+ return;
+}
+/****************************************************************************
+ *
+ * ROUTINE : StartEncode
+ *
+ * INPUTS : bc ptr to instance of our boolean coder
+ * buffer ptr to hold encoded data
+ *
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function fills initializes the boolean coder
+ *
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void StartEncode(BOOL_CODER *bc, unsigned char *buffer)
+{
+ bc->pos = 0;
+ bc->value = 0;
+ bc->range = 0x01000000;
+ bc->buffer = buffer;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : EncodeBool
+ *
+ * INPUTS : bc ptr to instance of our boolean coder
+ * x value to encode
+ * prob probability of getting a 0 normalized to 8 bits
+ *
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function encodes a boolean value using the
+ * boolean coder.
+ *
+ *
+ * SPECIAL NOTES : The accuracy of this encoder gets worse as the range
+ * approaches 0. This can be avoided with more complex
+ * normalization functions (as in a standard arithmetic)
+ * coder. I chose to avoid this for speed reasons.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void EncodeBool(BOOL_CODER *bc, int x, int probability)
+{
+ unsigned int split;
+
+ // we don't have enough in our range to tell between a 0 and 1 so get
+ // a new 3 bytes.
+ if( bc->range < 2 )
+ {
+ bc->buffer[bc->pos] = bc->v[0];
+ bc->buffer[bc->pos+1] = bc->v[1];
+ bc->buffer[bc->pos+2] = bc->v[2];
+ bc->pos+=3;
+
+ // range is set to 0x01000001 to avoid having the range * probability
+ // calculation outrange ( this can be handled differently at the cost
+ // of an extra if.
+ bc->range = 0x01000000;
+ bc->value = 0;
+ }
+
+ // calculate the decision point
+ // black magic: This code works better than if I calculate probability *
+ // range and then truncating to 1 ( I can't explain why)
+ split = bc->range;
+ split --;
+ split *= probability;
+ split >>= 8;
+ split ++;
+
+ if( x )
+ {
+ bc->range-=split;
+ bc->value+=split;
+ }
+ else
+ {
+ bc->range = split;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : StopEncode
+ *
+ * INPUTS : bc ptr to instance of our boolean coder
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function does clean up for boolean encoder
+ *
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void StopEncode(BOOL_CODER *bc)
+{
+ int i;
+ for(i=0;i<3;i++)
+ {
+ bc->buffer[bc->pos + i] =
+ *((unsigned char *) &bc->value + i);
+ }
+ bc->pos+=3;
+}
+
+#else
+
+#ifndef MAPCA
+/****************************************************************************
+ *
+ * ROUTINE : StartEncode
+ *
+ * INPUTS : br ptr to instance of our boolean coder
+ * source ptr to data to start decoding
+ *
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function initializes the boolean coder
+ *
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void StartEncode
+(
+ BOOL_CODER *br,
+ unsigned char *source
+)
+{
+ br->lowvalue = 0;
+ br->range = 255;
+ br->value = 0;
+ br->count = -24;
+ br->buffer=source;
+ br->pos=0;
+}
+/****************************************************************************
+ *
+ * ROUTINE : StopEncode
+ *
+ * INPUTS : bc ptr to instance of our boolean coder
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function does clean up for boolean encoder
+ *
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void StopEncode
+(
+ BOOL_CODER *br
+)
+{
+ if(br->count<-16)
+ br->lowvalue <<= (24-(br->count&7));
+ else if(br->count<-8)
+ br->lowvalue <<= (16-(br->count&7));
+ else
+ br->lowvalue <<= (8-(br->count&7));
+
+ br->buffer[br->pos++]=(br->lowvalue>>24);
+ br->buffer[br->pos++]=(br->lowvalue>>16)& 0xff;
+ br->buffer[br->pos++]=(br->lowvalue>>8)& 0xff;
+ br->buffer[br->pos++]=(br->lowvalue)& 0xff;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : EncodeBool
+ *
+ * INPUTS : bc ptr to instance of our boolean coder
+ * x value to encode
+ * prob probability of getting a 0 normalized to 8 bits
+ *
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function encodes a boolean value using the
+ * boolean coder.
+ *
+ *
+ * SPECIAL NOTES : This encoder uses normalizations, and is fairly accurate,
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void EncodeBool
+(
+ BOOL_CODER * br,
+ int bit,
+ int probability
+)
+{
+ unsigned int split;
+ split = 1 + (((br->range-1) * probability) >> 8);
+ if(bit)
+ {
+ br->lowvalue += split;
+ br->range -= split;
+ }
+ else
+ {
+ br->range = split;
+ }
+ while(br->range < 0x80)
+ {
+ br->range <<= 1;
+
+
+ if((br->lowvalue & 0x80000000 ))
+ {
+ int x = br->pos-1;
+ while(x>=0 && br->buffer[x] == 0xff)
+ {
+ br->buffer[x] =(unsigned char)0;
+ x--;
+ }
+ br->buffer[x]+=1;
+
+ }
+ br->lowvalue <<= 1;
+ if (!++br->count)
+ {
+ br->count = -8;
+ br->buffer[br->pos++]=(br->lowvalue >> 24);
+ br->lowvalue &= 0xffffff;
+ }
+ }
+}
+
+
+
+
+// TEMP
+
+extern const unsigned long ProbCost[256];
+extern const unsigned long ProbCost[256];
+void EncodeBool2
+(
+ BOOL_CODER * br,
+ int bit,
+ int probability
+)
+{
+ if (bit)
+ br->BitCounter += ProbCost[255-probability];
+ else
+ br->BitCounter += ProbCost[probability];
+}
+
+#endif
+
+/****************************************************************************
+ *
+ * ROUTINE : DecodeBool
+ *
+ * INPUTS : br ptr to instance of our boolean coder
+ * prob probability of getting a 0 normalized to 8 bits
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : 0 or 1
+ *
+ * FUNCTION : This function determines the next value stored in the
+ * boolean coder based upon the probability passed in.
+ * It uses a simple probability model to approximate
+ * an arithmetic coder.
+ *
+ *
+ * ERRORS : None.
+ *
+ * SPECIAL NOTES : The DecodeBool128() is a special case for this
+ * function that assums the input probability is 128
+ *
+ ****************************************************************************/
+#ifdef MAPCA
+
+int DecodeBool
+(
+ BOOL_CODER * br,
+ int probability
+)
+{
+
+ unsigned int bit;
+ unsigned int split;
+ unsigned int bigsplit;
+ unsigned int lmbdoffset;
+ int count = br->count;
+ unsigned int range = br->range;
+ unsigned int value = br->value;
+
+ split = 1 + (((range-1) * probability) >> 8);
+ bigsplit = (split<<24);
+
+ if(value >= bigsplit)
+ {
+ range = range-split;
+ value = value-bigsplit;
+ bit = 1;
+ }
+ else
+ {
+ range = split;
+ bit = 0;
+ }
+
+
+ if(range>=0x80)
+ {
+ br->value = value;
+ br->range = range;
+ return bit;
+
+ }
+
+ lmbdoffset = 7 - hmpv_lmo_32(range);
+ value <<= lmbdoffset;
+ range <<= lmbdoffset;
+ count -= lmbdoffset;
+
+ if(count<=0)
+ {
+ count +=8;
+ value |= ((unsigned int)br->buffer[br->pos]<<(8-count));
+ br->pos++;
+
+ }
+
+ br->count = count;
+ br->value = value;
+ br->range = range;
+ return bit;
+}
+
+
+#else
+int DecodeBool
+(
+ BOOL_CODER * br,
+ int probability
+)
+{
+
+ unsigned int bit=0;
+ unsigned int split;
+ unsigned int bigsplit;
+ unsigned int count = br->count;
+ unsigned int range = br->range;
+ unsigned int value = br->value;
+
+ split = 1 + (((range-1) * probability) >> 8);
+ bigsplit = (split<<24);
+
+ if(value >= bigsplit)
+ {
+ range -= split;
+ value -= bigsplit;
+ bit = 1;
+ }
+ else
+ {
+ range = split;
+ }
+
+ if(range>=0x80)
+ {
+ br->value = value;
+ br->range = range;
+ return bit;
+
+ }
+ else
+ {
+ do
+ {
+
+ range +=range;
+ value <<=1;
+
+ if (!--count)
+ {
+ count = 8;
+ value |= br->buffer[br->pos];
+ br->pos++;
+ }
+ }while(range < 0x80 );
+ }
+ br->count = count;
+ br->value = value;
+ br->range = range;
+ return bit;
+}
+#endif
+/****************************************************************************
+ *
+ * ROUTINE : DecodeBool128
+ *
+ * INPUTS : br ptr to instance of our boolean coder
+ *
+ * RETURNS : 0 or 1
+ *
+ * FUNCTION : This function determines the next value stored in the
+ * boolean coder based upon the probability passed in.
+ * It uses a simple probability model to approximate
+ * an arithmetic coder.
+ *
+ * ERRORS : None.
+ *
+ * SPECIAL NOTES : The DecodeBool128() is a special case for DecodeBool()
+ * functionf and assums the input probability is 128
+ *
+ ****************************************************************************/
+int DecodeBool128
+(
+ BOOL_CODER * br
+)
+{
+ unsigned int bit;
+ unsigned int split;
+ unsigned int bigsplit;
+ unsigned int count = br->count;
+ unsigned int range = br->range;
+ unsigned int value = br->value;
+
+ split = ( range + 1) >> 1;
+ bigsplit = (split<<24);
+
+ if(value >= bigsplit)
+ {
+ range = (range-split)<<1;
+ value = (value-bigsplit)<<1;
+ bit = 1;
+ }
+ else
+ {
+ range = split<<1;
+ value = value<<1;
+ bit = 0;
+ }
+
+ if(!--count)
+ {
+ count=8;
+ value |= br->buffer[br->pos];
+ br->pos++;
+ }
+ br->count = count;
+ br->value = value;
+ br->range = range;
+ return bit;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : StartDecode
+ *
+ * INPUTS : bc ptr to instance of our boolean coder
+ * buffer ptr to data to start decoding
+ *
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function fills initializes the boolean coder
+ *
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void StartDecode
+(
+ BOOL_CODER *br,
+ unsigned char *source
+)
+{
+ br->lowvalue = 0;
+ br->range = 255;
+ br->count = 8;
+ br->buffer=source;
+ br->pos =0;
+ br->value = (br->buffer[0]<<24)+(br->buffer[1]<<16)+(br->buffer[2]<<8)+(br->buffer[3]);
+ br->pos+=4;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : StopDecode
+ *
+ * INPUTS : bc ptr to instance of our boolean coder
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function does clean up for boolean decoder
+ *
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void StopDecode(BOOL_CODER *bc)
+{
+}
+#endif
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/debug.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/debug.c
new file mode 100644
index 00000000..0c7c5192
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/debug.c
@@ -0,0 +1,410 @@
+#include "pbdll.h"
+#include "misc_common.h"
+
+
+//#define OVERLAY_MOTION_VECTORS
+#include "xprintf.h"
+#if defined OVERLAY_MOTION_VECTORS
+/****************************************************************************
+ *
+ * ROUTINE : DrawVector
+ *
+ * INPUTS : PB_INSTANCE *pbi
+ * UINT8 *BlockPtr
+ * INT32 x
+ * INT32 y
+ * UINT8 VectorColour
+ * UINT8 DotColour
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None .
+ *
+ * FUNCTION : Draws motion vector into reconstruction buffer
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void DrawVector( PB_INSTANCE *pbi, UINT8 *BlockPtr, INT32 x, INT32 y, UINT8 VectorColour, UINT8 DotColour )
+{
+ UINT8 *PixelPtr;
+ double Xpos, Ypos;
+ double Xdelta, Ydelta;
+ INT32 x0, x1, y0, y1;
+
+ if ( abs(x) > abs(y) )
+ {
+ // Step along x axis
+ if ( x < 0 )
+ {
+ x0 = x;
+ x1 = 0;
+ Ypos = (double)y;
+ }
+ else
+ {
+ x0 = 0;
+ x1 = x;
+ Ypos = 0.0;
+ }
+
+ Ydelta = (double)y / (double)x;
+
+ for ( x=x0; x<=x1; x++ )
+ {
+ y = (UINT32)( Ypos<0.0 ? (Ypos-0.5) : (Ypos+0.5) );
+ PixelPtr = BlockPtr + y*pbi->Configuration.YStride + x;
+ *PixelPtr = VectorColour;
+ Ypos += Ydelta;
+ }
+ }
+ else if ( abs(y) > abs(x) )
+ {
+ // Step along y axis
+ if ( y < 0 )
+ {
+ y0 = y;
+ y1 = 0;
+ Xpos = (double)x;
+ }
+ else
+ {
+ y0 = 0;
+ y1 = y;
+ Xpos = 0.0;
+ }
+
+ Xdelta = (double)x / (double)y;
+
+ for ( y=y0; y<=y1; y++ )
+ {
+ x = (UINT32)( Xpos<0.0 ? (Xpos-0.5) : (Xpos+0.5) );
+ PixelPtr = BlockPtr + y*pbi->Configuration.YStride + x;
+ *PixelPtr = VectorColour;
+ Xpos += Xdelta;
+ }
+ }
+
+ // Indicate current position in specified colour
+ *BlockPtr = DotColour;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : DisplayMotionVectors
+ *
+ * INPUTS : PB_INSTANCE *pbi
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None .
+ *
+ * FUNCTION : Overlays colour coded motion vectors into reconstruction buffer
+ *
+ * SPECIAL NOTES : This routine will only display motion vectors when Post-processing
+ * is enabled since it draws into the PostProcessBuffer.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void DisplayMotionVectors ( PB_INSTANCE *pbi )
+{
+ INT32 FragIndex; // Fragment number
+ UINT32 MB, B; // Macro-Block, Block indices
+ UINT32 CodingMethod; // Temp Storage for coding mode.
+ INT32 x, y;
+ UINT32 Blocks;
+ UINT32 BlockOffset[4] = {0, 1, pbi->HFragments, pbi->HFragments + 1};
+ UINT8 *BlockPtr;
+ UINT8 DotColour;
+ UINT8 VectorColour;
+
+ // Nothing to display if keyframe
+ if ( VP5_GetFrameType(pbi) == BASE_FRAME )
+ {
+ return;
+ }
+
+ // Traverse the quad-tree
+ for ( MB=0; MB<pbi->YMacroBlocks; MB++ )
+ {
+ // Is the Macro-Block coded:
+// if ( pbi->MBCodedFlags[MB] )
+ {
+ CodingMethod = pbi->FragInfo[FragIndex].FragCodingMode;
+
+ if ( VP5_ModeUsesMC[CodingMethod] )
+ {
+ // Indicate previous/golden frame predictor
+ if ( CodingMethod == CODE_GOLDEN_MV )
+ {
+ DotColour = 0x00; // Black dot
+ VectorColour = 0x7F; // Mid-Grey Vector
+ }
+ else if( (CodingMethod == CODE_INTER_LAST_MV) || (CodingMethod == CODE_INTER_PRIOR_LAST) )
+ {
+ DotColour = 0xFF; // White dot
+ VectorColour = 0x00; // Black Vector
+ }
+ else
+ {
+ DotColour = 0x00; // Black dot
+ VectorColour = 0xFF; // White Vector
+ }
+
+ if ( CodingMethod == CODE_INTER_FOURMV )
+ Blocks = 4;
+ else
+ Blocks = 1;
+
+ for ( B=0; B<Blocks; B++ )
+ {
+ // Pointer to top LH-corner of block
+ BlockPtr = pbi->PostProcessBuffer ;// sorry adrian I'll fix it soon (removing getfragindex)
+ //+ ReconGetFragIndex(pbi->recon_pixel_index_table, FragIndex+BlockOffset[B]);
+
+ // Motion vector ( oops motion vectors only remembered at the macroblock level now!!
+ /*
+ x = pbi->FragInfo[FragIndex + BlockOffset[B]].MVectorX;
+ y = pbi->FragInfo[FragIndex + BlockOffset[B]].MVectorY;
+ */
+ DrawVector( pbi, BlockPtr, x/2, y/2, VectorColour, DotColour );
+ }
+ }
+ }
+ }
+}
+#endif
+/****************************************************************************
+ Debugging Aid Only
+*/
+
+/****************************************************************************
+ Debugging Aid Only
+*/
+#ifdef _MSC_VER
+#include <stdio.h>
+void vp5_writeframe(PB_INSTANCE *pbi, char * address,int x)
+{
+ // write the frame
+ FILE *yframe;
+ char filename[255];
+ sprintf(filename,"y%04d.raw",x);
+ yframe=fopen(filename,"wb");
+ fwrite(address,pbi->ReconYPlaneSize+2*pbi->ReconUVPlaneSize,1,yframe);
+ fclose(yframe);
+}
+void vp5_writeframe2(PB_INSTANCE *pbi, char * address,int x)
+{
+ // write the frame
+ FILE *yframe;
+ char filename[255];
+ sprintf(filename,"y%d.raw",x);
+ yframe=fopen(filename,"wb");
+ fwrite(address,pbi->YPlaneSize,1,yframe);
+ fclose(yframe);
+}
+void vp5_draw(unsigned char *prefix, int frame, char * address,int size)
+{
+ // write the frame
+ FILE *yframe;
+ char filename[255];
+ sprintf(filename,"%s%04d.raw",prefix,frame);
+ yframe=fopen(filename,"wb");
+ fwrite(address,size,1,yframe);
+ fclose(yframe);
+}
+void vp5_drawb(unsigned char *prefix, int frame, char * address,int pitch,int width,int height)
+{
+ // write the frame
+ FILE *yframe;
+ int i;
+ char filename[255];
+ sprintf(filename,"%s%04d.raw",prefix,frame);
+ yframe=fopen(filename,"wb");
+ for(i=0;i<height;i++)
+ {
+ fwrite(address,width,1,yframe);
+ address+=pitch;
+ }
+ fclose(yframe);
+}
+void vp5_drawc(char *filename, char * address,int pitch,int width,int height)
+{
+ // write the frame
+ FILE *yframe;
+ int i;
+ yframe=fopen(filename,"ab");
+ for(i=0;i<height;i++)
+ {
+ fwrite(address,width,1,yframe);
+ address+=pitch;
+ }
+ fclose(yframe);
+}
+
+void vp5_showinfo2(PB_INSTANCE *pbi)
+{
+// int i;
+// for (i=0;i<pbi->PostProcessingLevel;i++)
+// pbi->PostProcessBuffer[pbi->Configuration.YStride * 32 + 32 + +4 +4*i] = 255;
+
+ vp5_xprintf(pbi,
+ pbi->Configuration.YStride * 32 + 32,
+ "F:%d Q:%d S:%d W:%d H:%d V:%d Decode:%8d, Blit:%8d, PP:%8d, P:%d",
+ pbi->FrameType,
+ pbi->quantizer->ThisFrameQuantizerValue,
+ pbi->CurrentFrameSize,
+ pbi->HFragments,
+ pbi->VFragments,
+ pbi->Vp3VersionNo,
+ pbi->avgDecodeTime,
+ pbi->avgBlitTime,
+ pbi->avgPPTime[8],
+ pbi->PostProcessingLevel);
+
+}
+void vp5_appendframe(PB_INSTANCE *pbi)
+{
+ // write the frame
+ FILE *yframe;
+ yframe=fopen("test.raw","ab");
+ fwrite(pbi->LastFrameRecon,pbi->ReconYPlaneSize+2*pbi->ReconUVPlaneSize,1,yframe);
+ fclose(yframe);
+}
+
+void vp5_showinfo(PB_INSTANCE *pbi)
+{
+ UINT32 MBrow, MBcol;
+ UINT32 MBRows = pbi->MBRows;
+ UINT32 MBCols = pbi->MBCols;
+
+ // for each row of macroblocks
+ for ( MBrow=0; MBrow<MBRows; MBrow++ )
+ {
+
+ // for each macroblock within a row of macroblocks
+ for ( MBcol=0; MBcol<MBCols; MBcol++)
+ {
+ vp5_xprintf(pbi,
+ ((MBrow+1)* 16+5) * pbi->Configuration.YStride + (MBcol+1)*16+5,
+ "%d",
+ pbi->predictionMode[MBOffset(MBrow,MBcol)]);
+
+ } // mb col
+
+
+ } // mbrow
+
+ {
+ }
+}
+/****************************************************************************
+ *
+ * ROUTINE : PredictBlockToPostProcessBuffer
+ *
+ * INPUTS :
+ *
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Codes a DCT block
+ *
+ * Motion vectors and modes asumed to be defined at the MB level.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void PredictBlockToPostProcessBuffer
+(
+ PB_INSTANCE *pbi,
+ BLOCK_POSITION bp
+)
+{
+
+ memset(pbi->ReconDataBuffer,0,64*sizeof(short));
+
+ // Action depends on decode mode.
+ if ( pbi->mbi.Mode == CODE_INTER_NO_MV ) // Inter with no motion vector
+ {
+ ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon],
+ (UINT8 *)&pbi->LastFrameRecon[pbi->mbi.Recon],
+ pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride);
+
+ }
+ else if ( VP5_ModeUsesMC[pbi->mbi.Mode] ) // The mode uses a motion vector.
+ {
+ // For the compressor we did this already ( possible optimization).
+ PredictFilteredBlock( pbi, pbi->TmpDataBuffer,bp);
+
+ ReconBlock(
+ pbi->TmpDataBuffer,
+ pbi->ReconDataBuffer,
+ (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon],
+ pbi->mbi.CurrentReconStride );
+ }
+ else if ( pbi->mbi.Mode == CODE_USING_GOLDEN ) // Golden frame with motion vector
+ {
+ // Reconstruct the pixel data using the golden frame reconstruction and change data
+ ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon],
+ (UINT8 *)&pbi->GoldenFrame[ pbi->mbi.Recon ],
+ pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+ }
+ else // Simple Intra coding
+ {
+ // Get the pixel index for the first pixel in the fragment.
+ ReconIntra( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon], (UINT16 *)pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+ }
+}
+
+
+void printmodes(PB_INSTANCE *pbi)
+{
+ static int nFrame = 0; // PB_INSTANCE doesn't provide a frame number, does it?
+ FILE *f=fopen("modes.txt","a");
+ unsigned int i,j;
+
+ fprintf(f, "Frame %d\n\n", nFrame);
+
+ for(i=2;i<pbi->MBRows-2;i++)
+ {
+ if(pbi->Configuration.Interlaced == 1)
+ {
+ for(j=2;j<pbi->MBCols-2;j++)
+ {
+ fprintf(f,"%d",pbi->MBInterlaced[MBOffset(i,j)]);
+ }
+ fprintf(f," ");
+ }
+ for(j=2;j<pbi->MBCols-2;j++)
+ {
+ fprintf(f,"%d",pbi->predictionMode[MBOffset(i,j)]);
+ }
+ fprintf(f," ");
+ for(j=2;j<pbi->MBCols-2;j++)
+ {
+ fprintf(f,"%3d:%-3d",pbi->MBMotionVector[MBOffset(i,j)].x,pbi->MBMotionVector[MBOffset(i,j)].y);
+ }
+ fprintf(f,"\n");
+ }
+
+ fprintf(f,"\n");
+ fprintf(f,"\n");
+ fclose(f);
+
+ ++nFrame;
+
+ return;
+}
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodembs.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodembs.c
new file mode 100644
index 00000000..8347299d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodembs.c
@@ -0,0 +1,1071 @@
+/****************************************************************************
+*
+* Module Title : Decodembs.c
+*
+* Description : Compressor functions for block order transmittal
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+* 1.28 YWX 27-Dec-01 Rewrote ReadTokensPredictA()
+* 1.27 YWX 06-Nov-01 Removed Warning errors
+* 1.26 JBB 13 Jun 01 VP4 Code Clean Out
+* 1.25 AWG 08-JUN-01 Added support for DCT16.
+* 1.24 AWG 22-MAY-01 Removed HExtra/VExtra from call to QuadCodeComponent2
+* 1.23 JBB 01-MAY-01 VP5 Functionality
+* 1.22 JBB 09-Apr-01 first pass file clean up
+* 1.21 JBB 23-Mar-01 New DC preidction
+* 1.20 JBB 30 NOV 00 Configuration BaseLine
+*****************************************************************************
+*/
+#define STRICT /* Strict type checking. */
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+//#include "compdll.h"
+//#include "misc_common.h"
+#include "pbdll.h"
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include "codec_common_interface.h"
+#include "tokenentropy.h"
+#include "decodemode.h"
+#include "decodemv.h"
+/****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+
+#define DCT_MAX_VALUE 2048
+
+// For details of tokens and extra bit breakdown see token definitions in huffman.h
+typedef struct
+{
+ UINT16 MinVal;
+ INT16 Length;
+ UINT8 Probs[11];
+} TOKENEXTRABITS;
+
+const TOKENEXTRABITS TokenExtraBits2[ MAX_ENTROPY_TOKENS]=
+{
+ { 0, -1,{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN
+ { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN
+ { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN
+ { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN
+ { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN
+ { 5, 0, { 159,0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1
+ { 7, 1, { 145,165,0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2
+ { 11,2, { 140,148,173,0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3
+ { 19,3, { 135,140,155,176,0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4
+ { 35,4, { 130,134,141,157,180,0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5
+ { 67,10,{ 129,130,133,140,153,177,196,230,243,254,254 } }, //DCT_VAL_CATEGORY6
+ { 0, -1,{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN
+};
+
+const UINT32 LTIndex[MAX_ENTROPY_TOKENS] = { 0,1, 2, 3,3,4,4,4,4,4,4, 5 };
+const INT32 CoeffToBand[65] =
+{ -1,0,1,1,2,1,1,2,
+ 2,1,1,2,2,2,1,2,
+ 2,2,2,2,1,1,2,2,
+ 3,3,4,3,4,4,4,3,
+ 3,3,3,3,4,3,3,3,
+ 4,4,4,4,4,3,3,4,
+ 4,4,3,4,4,4,4,4,
+ 4,4,5,5,5,5,5,5,7
+};
+
+
+const UINT32 toggleBand3[]= { 4,5,7,9,11,14,15,20,22 };
+
+const int VP5_Mode2Frame[] =
+{
+ 1, // CODE_INTER_NO_MV 0 => Encoded diff from same MB last frame
+ 0, // CODE_INTRA 1 => DCT Encoded Block
+ 1, // CODE_INTER_PLUS_MV 2 => Encoded diff from included MV MB last frame
+ 1, // CODE_INTER_LAST_MV 3 => Encoded diff from MRU MV MB last frame
+ 1, // CODE_INTER_PRIOR_MV 4 => Encoded diff from included 4 separate MV blocks
+ 2, // CODE_USING_GOLDEN 5 => Encoded diff from same MB golden frame
+ 2, // CODE_GOLDEN_MV 6 => Encoded diff from included MV MB golden frame
+ 1, // CODE_INTER_FOUR_MV 7 => Encoded diff from included 4 separate MV blocks
+ 2, // CODE_GOLD_NEAREST_MV 8 => Encoded diff from MRU MV MB last frame
+ 2, // CODE_GOLD_NEAR_MV 9 => Encoded diff from included 4 separate MV blocks
+};
+/****************************************************************************
+* Explicit imports
+*****************************************************************************
+*/
+extern UINT32 LoopFilterLimitValuesV2[Q_TABLE_SIZE];
+extern void decodeModeAndMotionVector(PB_INSTANCE *pbi,UINT32 MBrow,UINT32 MBcol);
+
+
+INLINE
+int nDecodeBool128
+(
+ BOOL_CODER * br
+)
+{
+ unsigned int bit;
+ unsigned int split;
+ unsigned int bigsplit;
+ unsigned int count = br->count;
+ unsigned int range = br->range;
+ unsigned int value = br->value;
+
+ split = ( range + 1) >> 1;
+ bigsplit = (split<<24);
+ bit = (value >= bigsplit);
+ range = bit?range-split:split;
+ value = bit?value-bigsplit:value;
+ value += value;
+ range += range;
+ if(!--count)
+ {
+ count=8;
+ value |= br->buffer[br->pos];
+ br->pos++;
+
+ }
+ br->count = count;
+ br->value = value;
+ br->range = range;
+ return bit;
+
+}
+
+INLINE
+int nDecodeBool
+(
+ BOOL_CODER * br,
+ int probability
+)
+{
+
+ unsigned int bit=0;
+ unsigned int split;
+ unsigned int bigsplit;
+ int count = br->count;
+ unsigned int range = br->range;
+ unsigned int value = br->value;
+
+ // perform the actual encoding
+ split = 1 + (((range-1) * probability) >> 8);
+ bigsplit = (split<<24);
+
+ if(value >= bigsplit)
+ {
+ range = range-split;
+ value = value-bigsplit;
+ bit = 1;
+ }
+ else
+ {
+ range = split;
+ }
+ while(range < 0x80 )
+ {
+ range +=range;
+ value +=value;
+
+ if (!--count)
+ {
+ count = 8;
+ value |= br->buffer[br->pos];
+ br->pos++;
+ }
+ }
+ br->count = count;
+ br->value = value;
+ br->range = range;
+ return bit;
+}
+
+
+/****************************************************************************
+*
+* ROUTINE : ConfigureEntropyDecoder
+*
+* INPUTS : None
+*
+* OUTPUTS : None
+*
+* RETURNS : None.
+*
+* FUNCTION : Configure entropy subsystem for decode
+*
+* SPECIAL NOTES : None.
+*
+*
+* ERRORS : None.
+*
+****************************************************************************/
+void ConfigureEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType )
+{
+ UINT32 i;
+ UINT32 Plane;
+ UINT32 Band;
+ INT32 Prec;
+ UINT8 PrecNonZero;
+ UINT8 LastProb[MAX_ENTROPY_TOKENS-1];
+
+ // Clear down Last Probs data structure
+ memset( LastProb, 128, MAX_ENTROPY_TOKENS-1 );
+
+ // Read in the Baseline DC probabilities and initialise the DC context for Y and then UV plane
+ for ( Plane = 0; Plane < 2; Plane++ )
+ {
+ // If so then read them in.
+ for ( i = 0; i < MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ if ( nDecodeBool(&pbi->br, DcUpdateProbs[Plane][i] ) )
+ {
+ // 0 is not a legal value.
+ LastProb[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ LastProb[i] += ( LastProb[i] == 0 );
+ pbi->DcProbs[DCProbOffset(Plane,i)] = LastProb[i];
+
+ }
+ else if ( FrameType == BASE_FRAME )
+ {
+ pbi->DcProbs[DCProbOffset(Plane,i)] = LastProb[i];
+ }
+ }
+ }
+
+
+ // Read in the Baseline AC band probabilities and initialise the appropriate contexts
+ // Prec=0 means last token in current block was 0: Prec=1 means it was !0
+ for ( Prec = 0; Prec < PREC_CASES; Prec++ )
+ {
+ PrecNonZero = ( Prec > 0 ) ? 1 : 0;
+ for ( Plane = 0; Plane < 2; Plane++ )
+ {
+ for ( Band = 0; Band < VP5_AC_BANDS; Band++ )
+ {
+ // If so then read them in.
+ for ( i = 0; i < MAX_ENTROPY_TOKENS-1; i++ )
+ {
+ if ( nDecodeBool(&pbi->br, AcUpdateProbs[Prec][Plane][Band][i] ) )
+ {
+ // Probabilities transmitted at reduced resolution.
+ // 0 is not a legal value.
+ LastProb[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ LastProb[i] += ( LastProb[i] == 0 );
+ pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = LastProb[i];
+ }
+ else if ( FrameType == BASE_FRAME )
+ {
+ pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = LastProb[i];
+ }
+ }
+ }
+ }
+ }
+
+ // Create all the context specific propabilities based upon the new baseline data
+ ConfigureContexts(pbi);
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ResetLeftContext
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Updates the left contexts
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ResetLeftContext
+(
+ PB_INSTANCE *pbi
+)
+{
+
+ memset((void *) &pbi->fc.LeftY[0], 0, sizeof(BLOCK_CONTEXT));
+ memset((void *) &pbi->fc.LeftY[1], 0, sizeof(BLOCK_CONTEXT));
+ memset((void *) &pbi->fc.LeftU, 0, sizeof(BLOCK_CONTEXT));
+ memset((void *) &pbi->fc.LeftV, 0, sizeof(BLOCK_CONTEXT));
+
+ pbi->fc.LeftY[0].Mode = (CODING_MODE)-1;
+ pbi->fc.LeftY[1].Mode = (CODING_MODE)-1;
+ pbi->fc.LeftU.Mode = (CODING_MODE)-1;
+ pbi->fc.LeftV.Mode = (CODING_MODE)-1;
+
+ pbi->fc.LeftY[0].Frame = 4;
+ pbi->fc.LeftY[1].Frame = 4;
+ pbi->fc.LeftU.Frame = 4;
+ pbi->fc.LeftV.Frame = 4;
+
+ pbi->fc.LeftY[0].EOBPos = 24;
+ pbi->fc.LeftY[1].EOBPos = 24;
+ pbi->fc.LeftU.EOBPos = 24;
+ pbi->fc.LeftV.EOBPos = 24;
+
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ResetAboveContext
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Updates the above contexts
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ResetAboveContext
+(
+ PB_INSTANCE *pbi
+)
+{
+ UINT32 i;
+
+ /*
+ memset ((void *) pbi->fc.AboveY, 0, (pbi->HFragments+2)*sizeof(BLOCK_CONTEXT));
+ memset ((void *) pbi->fc.AboveU, 0, (pbi->HFragments/2+2)*sizeof(BLOCK_CONTEXT));
+ memset ((void *) pbi->fc.AboveV, 0, (pbi->HFragments/2+2)*sizeof(BLOCK_CONTEXT));
+ */
+ for ( i = 0 ; i < pbi->HFragments+8;i++)
+ {
+ pbi->fc.AboveY[i].Mode = -1;
+ pbi->fc.AboveY[i].Frame = 4;
+ pbi->fc.AboveY[i].Dc =0;
+ pbi->fc.AboveY[i].Tokens[0]=0;
+
+
+ }
+ for ( i = 0 ; i < pbi->HFragments/2 + 8;i++)
+ {
+ pbi->fc.AboveU[i].Mode = -1;
+ pbi->fc.AboveU[i].Frame = 4;
+ pbi->fc.AboveU[i].Tokens[0]=0;
+ pbi->fc.AboveU[i].Dc=0;
+ pbi->fc.AboveV[i].Mode = -1;
+ pbi->fc.AboveV[i].Frame = 4;
+ pbi->fc.AboveV[i].Tokens[0]=0;
+ pbi->fc.AboveV[i].Dc=0;
+ }
+
+ if(pbi->Vp3VersionNo < 6)
+ {
+ pbi->fc.AboveU[1].Mode = 0;
+ pbi->fc.AboveU[1].Frame = 0;
+ pbi->fc.AboveV[1].Mode = 0;
+ pbi->fc.AboveV[1].Frame = 0;
+ }
+
+ pbi->fc.LastDcY[0] = 0;
+ pbi->fc.LastDcU[0] = 128;
+ pbi->fc.LastDcV[0] = 128;
+ for ( i = 1 ; i < 3 ; i++)
+ {
+ pbi->fc.LastDcY[i] = 0;
+ pbi->fc.LastDcU[i] = 0;
+ pbi->fc.LastDcV[i] = 0;
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateContext
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Updates the frame context
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void UpdateContext
+(
+ PB_INSTANCE *pbi,
+ BLOCK_CONTEXT *c,
+ BLOCK_POSITION bp
+)
+{
+ c->Mode = pbi->mbi.BlockMode[bp];
+ c->Dc = pbi->mbi.Coeffs[bp][0];
+ c->Frame = VP5_Mode2Frame[pbi->mbi.Mode];
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateContext
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Updates the frame context
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void UpdateContextA
+(
+ PB_INSTANCE *pbi,
+ BLOCK_CONTEXTA *c,
+ BLOCK_POSITION bp
+)
+{
+ c->Mode = pbi->mbi.BlockMode[bp];
+ c->Dc = pbi->mbi.Coeffs[bp][0];
+ c->Frame = VP5_Mode2Frame[pbi->mbi.Mode];
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : PredictDc
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Predicts coefficients in this block based on the
+ * contexts we have
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+#define HIGHBITDUPPED(X) (((signed short) X) >> 15)
+void PredictDC
+(
+ PB_INSTANCE *pbi,
+ BLOCK_POSITION bp,
+ Q_LIST_ENTRY *LastDC,
+ BLOCK_CONTEXTA *Above,
+ BLOCK_CONTEXT *Left
+)
+{
+ UINT8 Frame = VP5_Mode2Frame[pbi->mbi.Mode];
+ UINT8 Count = 0;
+#if 0
+ INT32 Avg = 0;
+ if( Frame==Left->Frame)
+ {
+ Avg += Left->Dc;
+ Count ++;
+ }
+ if( Frame==Above->Frame)
+ {
+ Avg += Above->Dc;
+ Count ++;
+ }
+
+ if( Count < 2 && Frame == Above[-1].Frame)
+ {
+ Avg += Above[-1].Dc;
+ Count ++;
+ }
+
+ if( Count < 2 && Frame == Above[+1].Frame)
+ {
+ Avg += Above[+1].Dc;
+ Count ++;
+ }
+ if(Count==0)
+ {
+ Avg = LastDC[Frame];
+ }
+ else if(Count==2)
+ {
+ // trick to determine when to add 1 if negative (for proper truncation)
+ Avg += (HIGHBITDUPPED(Avg)&1);
+ Avg >>= 1;
+ }
+
+#else
+INT32 Avg ;
+//state_L:
+ if( Frame != Left->Frame)
+ goto state_A0;
+ Avg = Left->Dc;
+// goto state_A1;
+
+//state_A1:
+ if(Frame != Above->Frame)
+ goto state_AM1;
+ Avg += Above->Dc;
+// goto state_TWO;
+
+state_TWO:
+ Avg += (HIGHBITDUPPED(Avg)&1);
+ Avg >>= 1;
+ goto state_done;
+
+state_A0:
+ if(Frame != Above->Frame)
+ goto state_AM0;
+ Avg = Above->Dc;
+// goto state_AM1;
+
+state_AM1:
+ if(Frame == Above[-1].Frame)
+ {
+ Avg += Above[-1].Dc;
+ goto state_TWO;
+ }
+// goto state_AP1;
+
+state_AP1:
+ if(Frame != Above[+1].Frame)
+ goto state_done;
+ Avg += Above[+1].Dc;
+ goto state_TWO;
+
+
+state_AM0:
+ if(Frame == Above[-1].Frame)
+ {
+ Avg = Above[-1].Dc;
+ goto state_AP1;
+ }
+ //goto state_AP0;
+
+//state_AP0:
+ if(Frame != Above[+1].Frame)
+ Avg = LastDC[Frame];
+ else
+ Avg = Above[+1].Dc;
+
+state_done:
+
+#endif
+
+ pbi->mbi.Coeffs[bp][0] += Avg;
+ LastDC[Frame] = pbi->mbi.Coeffs[bp][0];
+
+ return ;
+}
+
+
+#define TI(x) (TransIndex[x])
+
+/****************************************************************************
+*
+* ROUTINE : ReadTokensPredictA
+*
+* INPUTS : None
+*
+* OUTPUTS : None
+*
+* RETURNS : None
+*
+* FUNCTION : Fills CoeffData with one blocks worth of coefficients
+* decoded from the bitstream.
+*
+* SPECIAL NOTES :
+*
+*
+* ERRORS : None.
+*
+****************************************************************************/
+UINT8 ReadTokensPredictA(
+ PB_INSTANCE *pbi,
+ INT16 * CoeffData,
+ UINT32 BlockSize,
+ UINT32 Plane,
+ BLOCK_CONTEXTA *Above,
+ BLOCK_CONTEXT *Left
+)
+{
+ INT32 token;
+ BOOL_CODER * br = &pbi->br;
+ UINT8 EncodedCoeffs = 0;
+ UINT8 LeftContext;
+ UINT8 AboveContext;
+ UINT8 *BaselineProbsPtr;
+ UINT8 *ContextProbsPtr;
+ BOOL LastTokenNonZero; // Was last token in this block non-zero
+ UINT8 PrecTokenIndex; // Preceeding token index
+ UINT32 Band;
+ INT32 SignBit;
+ INT32 BitsCount ;
+ UINT8 *AcProbsPtr = pbi->AcProbs + ACProbOffset(Plane,0,0,0);
+ UINT8 *AcContextPtr = pbi->AcNodeContexts + ACContextOffset(Plane,0,0,0,0);
+ BOOL EOB = FALSE;
+ UINT32 *TransIndex = pbi->quantizer->transIndex;
+ INT32 value;
+
+
+ // determine the contexts for dc
+ LastTokenNonZero = TRUE;
+ LeftContext = Left->Tokens[EncodedCoeffs];
+ AboveContext = Above->Tokens[EncodedCoeffs];
+
+ BaselineProbsPtr = pbi->DcProbs+DCProbOffset(Plane,0);
+ ContextProbsPtr = pbi->DcNodeContexts+DCContextOffset(Plane,LeftContext,AboveContext,0);
+
+ do
+ {
+ // First test for the ! ZeroContext
+ if ( !nDecodeBool(br, ContextProbsPtr[ZERO_CONTEXT_NODE] ) )
+ {
+ // Zero or EOB
+ if ( LastTokenNonZero )
+ {
+ if ( nDecodeBool(br, ContextProbsPtr[EOB_CONTEXT_NODE]) )
+ {
+ PrecTokenIndex = 0;
+ Left->Tokens[EncodedCoeffs] = 0;
+ }
+ else
+ {
+ EncodedCoeffs++;
+ break;
+ }
+ }
+ else
+ {
+ PrecTokenIndex = 0;
+ Left->Tokens[EncodedCoeffs] = 0;
+ }
+ LastTokenNonZero = FALSE;
+ }
+ else
+ {
+
+ // Was the value a 1
+ if ( nDecodeBool(br, ContextProbsPtr[ONE_CONTEXT_NODE]) )
+ {
+ // Value token > 1
+ if ( nDecodeBool(br, ContextProbsPtr[LOW_VAL_CONTEXT_NODE]) )
+ {
+ // High value (value category) token
+ Left->Tokens[EncodedCoeffs] = 4;
+ if ( nDecodeBool(br, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE]) )
+ {
+ // Cat3,Cat4 or Cat5
+ if ( nDecodeBool(br, BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE]) )
+ {
+ token = DCT_VAL_CATEGORY5 + nDecodeBool(br, BaselineProbsPtr[CAT_FIVE_CONTEXT_NODE]);
+ }
+ else
+ {
+ token = DCT_VAL_CATEGORY3 + nDecodeBool(br, BaselineProbsPtr[CAT_THREE_CONTEXT_NODE]);
+ }
+ }
+ else
+ {
+ // Either Cat1 or Cat2
+ token = DCT_VAL_CATEGORY1 + nDecodeBool(br, BaselineProbsPtr[CAT_ONE_CONTEXT_NODE]);
+ }
+
+
+ // Get the Sign Bit
+ SignBit = nDecodeBool128(br);
+
+ value = TokenExtraBits2[token].MinVal;
+
+ // Read the extra bits
+ BitsCount = TokenExtraBits2[token].Length;
+
+ do
+ {
+ value += (nDecodeBool(br, TokenExtraBits2[token].Probs[BitsCount])<<BitsCount);
+ BitsCount -- ;
+ }
+ while( BitsCount >= 0);
+
+
+ // Combine the signa and value
+ CoeffData[TI(EncodedCoeffs)] =(Q_LIST_ENTRY)((value ^ -SignBit) + SignBit);
+
+ }
+ else
+ {
+ // Low value token
+ if ( nDecodeBool(br, ContextProbsPtr[TWO_CONTEXT_NODE]) )
+ {
+ // Either a 3 or a 4
+ Left->Tokens[EncodedCoeffs] = 3;
+ token = THREE_TOKEN + nDecodeBool(br, BaselineProbsPtr[THREE_CONTEXT_NODE]);
+ }
+ else
+ {
+ // Is it a 2
+ token = TWO_TOKEN;
+ Left->Tokens[EncodedCoeffs] = 2;
+ }
+
+ // Get the Sign Bit and store the result in our coeff array
+ SignBit = nDecodeBool128(br);
+ CoeffData[TI(EncodedCoeffs)] =(Q_LIST_ENTRY)((token ^ -SignBit) + SignBit);
+
+ }
+ PrecTokenIndex = 2;
+ }
+ else
+ {
+ PrecTokenIndex = 1;
+ Left->Tokens[EncodedCoeffs] = 1;
+
+ // Get the Sign Bit
+ SignBit = nDecodeBool128(br);
+
+ // Combine the signa and value
+ CoeffData[TI(EncodedCoeffs)] =(Q_LIST_ENTRY)((1 ^ -SignBit) + SignBit);
+ }
+ LastTokenNonZero = TRUE;
+
+ }
+
+ // calculate the context for the next token.
+ EncodedCoeffs ++;
+ Band = CoeffToBand [ EncodedCoeffs ];
+ BaselineProbsPtr = AcProbsPtr + ACProbOffset(0,PrecTokenIndex,Band,0);
+ if(Band < 3)
+ {
+ ContextProbsPtr = AcContextPtr + ACContextOffset(0,PrecTokenIndex,Band,Left->Tokens[EncodedCoeffs],0);
+ }
+ else
+ {
+ if(EncodedCoeffs >= BlockSize)
+ break;
+
+ ContextProbsPtr = BaselineProbsPtr;
+ }
+
+
+ } while ( 1 );
+ EncodedCoeffs --;
+
+ return EncodedCoeffs;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DecodeBlock
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Decodes A Block
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void DecodeBlock
+(
+ PB_INSTANCE *pbi,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ BLOCK_POSITION bp
+)
+{
+
+ unsigned int lastEOB = pbi->mbi.Left->EOBPos;
+
+ if(lastEOB >24)
+ lastEOB =24;
+
+ // read tokens from the bitstream and convert to coefficients.
+ pbi->mbi.Left->EOBPos = ReadTokensPredictA(pbi, pbi->mbi.Coeffs[bp], 64, (pbi->mbi.Plane!=0), pbi->mbi.Above, pbi->mbi.Left);
+
+ // Update LEFT and ABOVE Contexts
+ if(pbi->mbi.Left->EOBPos < lastEOB )
+ memset (&pbi->mbi.Left->Tokens[pbi->mbi.Left->EOBPos], LTIndex[DCT_EOB_TOKEN], lastEOB - pbi->mbi.Left->EOBPos);
+
+ pbi->mbi.Above->Tokens[0] = pbi->mbi.Left->Tokens[0];
+
+ // predict our dc values from the surrounding guys
+ PredictDC(pbi, bp, pbi->mbi.LastDc, pbi->mbi.Above, pbi->mbi.Left);
+
+ // do the inverse transform
+ pbi->idct[pbi->mbi.Left->EOBPos]( pbi->mbi.Coeffs[bp], pbi->quantizer->dequant_coeffs[QTableSelect[bp]], pbi->ReconDataBuffer );
+
+ // put it into our reconstruction buffer
+ ReconstructBlock(pbi,bp);
+
+ // update the context info for the next block
+ UpdateContextA(pbi,pbi->mbi.Above,bp);
+ UpdateContext(pbi,pbi->mbi.Left,bp);
+
+ // Default clear data area down to 0s
+ if(pbi->mbi.Left->EOBPos <= 1)
+ {
+ pbi->mbi.Coeffs[bp][0] = 0;
+ }
+ else if(pbi->mbi.Left->EOBPos <= 10)
+ {
+ memset(pbi->mbi.Coeffs[bp], 0,8*sizeof(Q_LIST_ENTRY));
+ memset(pbi->mbi.Coeffs[bp]+8, 0,4*sizeof(Q_LIST_ENTRY));
+ memset(pbi->mbi.Coeffs[bp]+16, 0,4*sizeof(Q_LIST_ENTRY));
+ memset(pbi->mbi.Coeffs[bp]+24, 0,4*sizeof(Q_LIST_ENTRY));
+ }
+ else
+ {
+ memset(pbi->mbi.Coeffs[bp], 0,64*sizeof(Q_LIST_ENTRY));
+ }
+
+}
+
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : DecodeMacroBlock
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Decodes A MacroBlock
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void DecodeMacroBlock
+(
+ PB_INSTANCE *pbi,
+ UINT32 MBrow,
+ UINT32 MBcol
+)
+{
+ UINT32 MBPointer;
+ INT32 NextBlock;
+
+ //***********************************************************************
+ // Copy the existing structures into what we have now I'll fix this next.
+
+ // dumb way to encode the interlaced decision but it works!!!
+
+ if(pbi->Configuration.Interlaced)
+ {
+ UINT8 prob = pbi->probInterlaced;
+ // super simple context adjustment
+ if(MBcol>2)
+ {
+ // adjust the probability per the last one we did
+ if(pbi->mbi.Interlaced)
+ prob=prob-(prob>>1);
+ else
+ prob=prob+((256-prob)>>1);
+ }
+ pbi->mbi.Interlaced = nDecodeBool( &pbi->br, prob);
+ }
+ else
+ pbi->mbi.Interlaced = 0;
+
+ if(pbi->FrameType == BASE_FRAME )
+ {
+ pbi->mbi.Mode = CODE_INTRA;
+ }
+ else
+ {
+ decodeModeAndMotionVector(pbi, MBrow, MBcol );
+ }
+
+ if(pbi->mbi.Interlaced == 0)
+ {
+ NextBlock = 8;
+ pbi->mbi.CurrentReconStride = pbi->Configuration.YStride ;
+ }
+ else
+ {
+ NextBlock = 1;
+ pbi->mbi.CurrentReconStride = pbi->Configuration.YStride * 2;
+ }
+
+ // y plane values
+ pbi->mbi.FrameReconStride = pbi->Configuration.YStride;
+ pbi->mbi.MvShift = 1;
+ pbi->mbi.MvModMask = 1;
+ pbi->mbi.LastDc = pbi->fc.LastDcY;
+ pbi->mbi.Plane = 0;
+ pbi->mbi.SourceY = MBrow * 16;
+ pbi->mbi.SourceX = MBcol * 16;
+ MBPointer = pbi->ReconYDataOffset
+ + pbi->mbi.SourceY * pbi->Configuration.YStride
+ + pbi->mbi.SourceX;
+
+ // Block 0
+ pbi->mbi.Recon = MBPointer;
+ pbi->mbi.Above = &pbi->fc.AboveY[MBcol*2];
+ pbi->mbi.Left = &pbi->fc.LeftY[0];
+ DecodeBlock(pbi, MBrow, MBcol, (BLOCK_POSITION)0);
+
+ // Block 1
+ pbi->mbi.Recon += 8;
+ pbi->mbi.Above = &pbi->fc.AboveY[MBcol*2+1];
+ pbi->mbi.Left = &pbi->fc.LeftY[0];
+ pbi->mbi.SourceX += 8;
+ DecodeBlock(pbi, MBrow, MBcol, (BLOCK_POSITION)1);
+
+ // Block 2
+ pbi->mbi.Recon = MBPointer + NextBlock * pbi->Configuration.YStride;
+ pbi->mbi.Above = &pbi->fc.AboveY[MBcol*2];
+ pbi->mbi.Left = &pbi->fc.LeftY[1];
+ pbi->mbi.SourceX -= 8;
+ pbi->mbi.SourceY += NextBlock;
+ DecodeBlock(pbi, MBrow, MBcol, 2);
+
+ // Block 3
+ pbi->mbi.Recon += 8;
+ pbi->mbi.Above = &pbi->fc.AboveY[MBcol*2+1];
+ pbi->mbi.Left = &pbi->fc.LeftY[1];
+ pbi->mbi.SourceX += 8;
+ DecodeBlock(pbi, MBrow, MBcol, (BLOCK_POSITION)3);
+
+ // uv plane values
+ pbi->mbi.FrameReconStride = pbi->Configuration.UVStride;
+ pbi->mbi.CurrentReconStride = pbi->Configuration.UVStride;
+ pbi->mbi.SourceY = MBrow * 8;
+ pbi->mbi.SourceX = MBcol * 8;
+ pbi->mbi.MvShift = 2;
+ pbi->mbi.MvModMask = 3;
+
+ // Block 4
+ pbi->mbi.Recon = pbi->ReconUDataOffset + pbi->mbi.SourceY * pbi->mbi.CurrentReconStride + pbi->mbi.SourceX;
+ pbi->mbi.Above = &pbi->fc.AboveU[MBcol];
+ pbi->mbi.Left = &pbi->fc.LeftU;
+ pbi->mbi.LastDc = pbi->fc.LastDcU;
+ pbi->mbi.Plane = 1;
+ DecodeBlock(pbi, MBrow, MBcol, (BLOCK_POSITION)4);
+
+ // Block 5
+ pbi->mbi.Above = &pbi->fc.AboveV[MBcol];
+ pbi->mbi.Left = &pbi->fc.LeftV;
+ pbi->mbi.Recon = pbi->ReconVDataOffset + pbi->mbi.SourceY * pbi->mbi.CurrentReconStride + pbi->mbi.SourceX;
+ pbi->mbi.LastDc = pbi->fc.LastDcV;
+ pbi->mbi.Plane = 2;
+ DecodeBlock(pbi, MBrow, MBcol, (BLOCK_POSITION)5);
+
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : DecodeFrame
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Decodes MacroBlocks of a Frame
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void DecodeFrameMbs
+(
+ PB_INSTANCE *pbi
+)
+{
+ UINT32 MBrow, MBcol;
+ UINT32 MBRows = pbi->MBRows;
+ UINT32 MBCols = pbi->MBCols;
+ UINT32 MB = 0;
+
+ if(pbi->FrameType != BASE_FRAME )
+ {
+ DecodeModeProbs(pbi);
+ ConfigureMvEntropyDecoder( pbi, pbi->FrameType );
+ pbi->LastMode = CODE_INTER_NO_MV;
+ }
+ else
+ {
+ memcpy ( pbi->probXmitted,BaselineXmittedProbs,sizeof(pbi->probXmitted));
+ // For now these are just 128
+ memset ( pbi->MvSignProbs, 128, sizeof(pbi->MvSignProbs) );
+ memset ( pbi->MvZeroProbs, 128, sizeof(pbi->MvZeroProbs) );
+ memset ( pbi->MvHalfPixelProbs, DEFAULT_HALF_PIXEL_PROB, sizeof(pbi->MvHalfPixelProbs) );
+ memset ( pbi->MvLowBitProbs, 128, sizeof(pbi->MvLowBitProbs) );
+ memset ( pbi->MvSizeProbs, 128, sizeof(pbi->MvSizeProbs) );
+ memset ( pbi->MBModeProb,128,sizeof(pbi->MBModeProb));
+ memset ( pbi->BModeProb,128,sizeof(pbi->MBModeProb));
+ memset ( pbi->predictionMode,1,sizeof(char)*pbi->MacroBlocks );
+ }
+
+ ConfigureEntropyDecoder( pbi, pbi->FrameType );
+
+ if(pbi->Configuration.Interlaced == 1)
+ pbi->probInterlaced = ((UINT8)VP5_bitread( &pbi->br, 8 ));
+
+ // since we are on a new frame reset the above contexts
+ ResetAboveContext(pbi);
+
+ // Default clear data area down to 0s
+ memset(pbi->mbi.Coeffs, 0,6*72*sizeof(Q_LIST_ENTRY));
+
+ // for each row of macroblocks
+ for ( MBrow=2; MBrow<MBRows-2; MBrow++ )
+ {
+
+ ResetLeftContext(pbi);
+
+ // for each macroblock within a row of macroblocks
+ for ( MBcol=2; MBcol<MBCols-2; MBcol++,MB++ )
+ {
+
+ // Decode the macroblock
+ DecodeMacroBlock(pbi,MBrow,MBcol);
+
+ } // mb col
+
+
+ } // mbrow
+
+// printmodes(pbi);
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemode.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemode.c
new file mode 100644
index 00000000..2f3a565b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemode.c
@@ -0,0 +1,799 @@
+/****************************************************************************
+*
+* Module Title : Decodemode.c
+*
+* Description : functions for decoding modes and motionvectors
+*
+* AUTHOR : James Bankoski
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 JBB 30OCT01 New Configuration baseline.
+* 1.01 JBB 04AP402 Reworked lower footprint mode compression scheme
+*
+*****************************************************************************
+*/
+//************************************************************************************
+// Decoding the Modes:
+//
+// Decode Mode Tree Looks like this :
+//
+//
+//
+//
+// zz
+//
+// 0 Mode Same As Last
+//
+//
+// 1 2
+//
+// 3 4 5 6
+//
+// NoMV +MV Nest Near Intra FourMV 7 8
+//
+// 00Gold GoldMV GNrst GNear
+//
+//
+// 30 probabilitity contexts are set up at each branch (in probMode) corresponding to
+//
+// 3 for what situation we are in at the mode level ( all modes available,
+// no nearest mv found, and no near mv found)
+//
+// 10 one for each possible last mode
+//
+// Note: if the last mode was near then the probability of getting near at position 4
+// above is set to 0 (it would have been coded as same as last). Note also that the
+// probablity of getting near when no near mv is available is also always set to 0.
+//
+// These probs are created from the 20 that can be xmitted in the bitstream (probXmitted)
+// For each mode 2 probabilities can be transmitted:
+// probability that the mode will appear if the last mode was the same
+// probability that the mode will appear if the last mode is not that mode
+//
+//************************************************************************************
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+#include "pbdll.h"
+#include "decodemode.h"
+#include "decodemv.h"
+
+
+/****************************************************************************
+* Implicit Imports
+*****************************************************************************
+*/
+#define STRICT /* Strict type checking. */
+
+#ifdef MAPCA
+ #include <eti/mm.h>
+#endif
+
+/****************************************************************************
+* Exported data structures.
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module statics.
+*****************************************************************************
+*/
+//*****************************************************************************
+// ModeVQ: This structure holds a table of probability vectors for encoding modes
+// To build this table a number of clips were run through and allowed to
+// select each of the probabilities that were best for them on each frame. These
+// choices were output and a vector quantizer was used to optimize the selection
+// of 16 vectors for each MODETYPE (allmodes available, nonearest, and no near)
+//*****************************************************************************
+UINT8 ModeVq[MODETYPES][MODEVECTORS][MAX_MODES*2]=
+{
+9,15,32,25,7,19,9,21,1,12,14,12,3,18,14,23,3,10,0,4,
+48,39,1,2,11,27,29,44,7,27,1,4,0,3,1,6,1,2,0,0,
+21,32,1,2,4,10,32,43,6,23,2,3,1,19,1,6,12,21,0,7,
+69,83,0,0,0,2,10,29,3,12,0,1,0,3,0,3,2,2,0,0,
+11,20,1,4,18,36,43,48,13,35,0,2,0,5,3,12,1,2,0,0,
+70,44,0,1,2,10,37,46,8,26,0,2,0,2,0,2,0,1,0,0,
+8,15,0,1,8,21,74,53,22,42,0,1,0,2,0,3,1,2,0,0,
+141,42,0,0,1,4,11,24,1,11,0,1,0,1,0,2,0,0,0,0,
+8,19,4,10,24,45,21,37,9,29,0,3,1,7,11,25,0,2,0,1,
+46,42,0,1,2,10,54,51,10,30,0,2,0,2,0,1,0,1,0,0,
+28,32,0,0,3,10,75,51,14,33,0,1,0,2,0,1,1,2,0,0,
+100,46,0,1,3,9,21,37,5,20,0,1,0,2,1,2,0,1,0,0,
+27,29,0,1,9,25,53,51,12,34,0,1,0,3,1,5,0,2,0,0,
+80,38,0,0,1,4,69,33,5,16,0,1,0,1,0,0,0,1,0,0,
+16,20,0,0,2,8,104,49,15,33,0,1,0,1,0,1,1,1,0,0,
+194,16,0,0,1,1,1,9,1,3,0,0,0,1,0,1,0,0,0,0,
+
+41,22,1,0,1,31,0,0,0,0,0,1,1,7,0,1,98,25,4,10,
+123,37,6,4,1,27,0,0,0,0,5,8,1,7,0,1,12,10,0,2,
+26,14,14,12,0,24,0,0,0,0,55,17,1,9,0,36,5,7,1,3,
+209,5,0,0,0,27,0,0,0,0,0,1,0,1,0,1,0,0,0,0,
+2,5,4,5,0,121,0,0,0,0,0,3,2,4,1,4,2,2,0,1,
+175,5,0,1,0,48,0,0,0,0,0,2,0,1,0,2,0,1,0,0,
+83,5,2,3,0,102,0,0,0,0,1,3,0,2,0,1,0,0,0,0,
+233,6,0,0,0,8,0,0,0,0,0,1,0,1,0,0,0,1,0,0,
+34,16,112,21,1,28,0,0,0,0,6,8,1,7,0,3,2,5,0,2,
+159,35,2,2,0,25,0,0,0,0,3,6,0,5,0,1,4,4,0,1,
+75,39,5,7,2,48,0,0,0,0,3,11,2,16,1,4,7,10,0,2,
+212,21,0,1,0,9,0,0,0,0,1,2,0,2,0,0,2,2,0,0,
+4,2,0,0,0,172,0,0,0,0,0,1,0,2,0,0,2,0,0,0,
+187,22,1,1,0,17,0,0,0,0,3,6,0,4,0,1,4,4,0,1,
+133,6,1,2,1,70,0,0,0,0,0,2,0,4,0,3,1,1,0,0,
+251,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+
+2,3,2,3,0,2,0,2,0,0,11,4,1,4,0,2,3,2,0,4,
+49,46,3,4,7,31,42,41,0,0,2,6,1,7,1,4,2,4,0,1,
+26,25,1,1,2,10,67,39,0,0,1,1,0,14,0,2,31,26,1,6,
+103,46,1,2,2,10,33,42,0,0,1,4,0,3,0,1,1,3,0,0,
+14,31,9,13,14,54,22,29,0,0,2,6,4,18,6,13,1,5,0,1,
+85,39,0,0,1,9,69,40,0,0,0,1,0,3,0,1,2,3,0,0,
+31,28,0,0,3,14,130,34,0,0,0,1,0,3,0,1,3,3,0,1,
+171,25,0,0,1,5,25,21,0,0,0,1,0,1,0,0,0,0,0,0,
+17,21,68,29,6,15,13,22,0,0,6,12,3,14,4,10,1,7,0,3,
+51,39,0,1,2,12,91,44,0,0,0,2,0,3,0,1,2,3,0,1,
+81,25,0,0,2,9,106,26,0,0,0,1,0,1,0,1,1,1,0,0,
+140,37,0,1,1,8,24,33,0,0,1,2,0,2,0,1,1,2,0,0,
+14,23,1,3,11,53,90,31,0,0,0,3,1,5,2,6,1,2,0,0,
+123,29,0,0,1,7,57,30,0,0,0,1,0,1,0,1,0,1,0,0,
+13,14,0,0,4,20,175,20,0,0,0,1,0,1,0,1,1,1,0,0,
+202,23,0,0,1,3,2,9,0,0,0,1,0,1,0,1,0,0,0,0
+};
+
+// These are the probabilities that we reset to after each keyframe.
+// It was created as the average probabilities of the trees.
+UINT8 BaselineXmittedProbs[4][2][10]=
+{
+ 42, 2, 7, 42, 22, 3, 2, 5, 1, 0, 69, 1, 1, 44, 6, 1, 0, 1, 0, 0,
+ 8, 1, 8, 0, 0, 2, 1, 0, 1, 0,229, 1, 0, 0, 0, 1, 0, 0, 1, 0,
+ 35, 1, 6, 34, 0, 2, 1, 1, 1, 0,122, 1, 1, 46, 0, 1, 0, 0, 1, 0,
+ 64, 0, 64, 64, 64, 0, 0, 0, 0, 0, 64, 0, 64, 64, 64, 0, 0, 0, 0, 0,
+};
+
+
+/****************************************************************************
+ *
+ * ROUTINE : BuildModeTree
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ * FUNCTION : Fills in probabilities at each branch of the huffman tree
+ * based upon the frequencies transmitted in the bitstream.
+ * probXmitted
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void BuildModeTree
+(
+ PB_INSTANCE *pbi
+)
+{
+ int i,j,k;
+
+ // make a huffman tree and code array for each of our modes (note each of the trees is minus the node give by probmodesame)
+ for(i=0;i<10;i++)
+ {
+ unsigned int Counts[MAX_MODES];
+ unsigned int total;
+
+ // set up the probabilities for each tree
+ for(k=0;k<MODETYPES;k++)
+ {
+ total=0;
+ for(j=0;j<10;j++)
+ {
+ if(i==j)
+ {
+ Counts[j]=0;
+ }
+ else
+ {
+ Counts[j]=100*pbi->probXmitted[k][0][j];
+ }
+
+
+ total+=Counts[j];
+ }
+
+
+ pbi->probModeSame[k][i] = 255-
+ 255 * pbi->probXmitted[k][1][i]
+ /
+ ( 1 +
+ pbi->probXmitted[k][1][i] +
+ pbi->probXmitted[k][0][i]
+ );
+
+ // each branch is basically calculated via
+ // summing all posibilities at that branch.
+ pbi->probMode[k][i][0]= 1 + 255 *
+ (
+ Counts[CODE_INTER_NO_MV]+
+ Counts[CODE_INTER_PLUS_MV]+
+ Counts[CODE_INTER_NEAREST_MV]+
+ Counts[CODE_INTER_NEAR_MV]
+ ) /
+ ( 1 +
+ total
+ );
+
+ pbi->probMode[k][i][1]= 1 + 255 *
+ (
+ Counts[CODE_INTER_NO_MV]+
+ Counts[CODE_INTER_PLUS_MV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_INTER_NO_MV]+
+ Counts[CODE_INTER_PLUS_MV]+
+ Counts[CODE_INTER_NEAREST_MV]+
+ Counts[CODE_INTER_NEAR_MV]
+ );
+
+ pbi->probMode[k][i][2]= 1 + 255 *
+ (
+ Counts[CODE_INTRA]+
+ Counts[CODE_INTER_FOURMV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_INTRA]+
+ Counts[CODE_INTER_FOURMV]+
+ Counts[CODE_USING_GOLDEN]+
+ Counts[CODE_GOLDEN_MV]+
+ Counts[CODE_GOLD_NEAREST_MV]+
+ Counts[CODE_GOLD_NEAR_MV]
+ );
+
+ pbi->probMode[k][i][3]= 1 + 255 *
+ (
+ Counts[CODE_INTER_NO_MV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_INTER_NO_MV]+
+ Counts[CODE_INTER_PLUS_MV]
+ );
+
+ pbi->probMode[k][i][4]= 1 + 255 *
+ (
+ Counts[CODE_INTER_NEAREST_MV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_INTER_NEAREST_MV]+
+ Counts[CODE_INTER_NEAR_MV]
+ ) ;
+
+ pbi->probMode[k][i][5]= 1 + 255 *
+ (
+ Counts[CODE_INTRA]
+ ) /
+ (
+ 1 +
+ Counts[CODE_INTRA]+
+ Counts[CODE_INTER_FOURMV]
+ );
+
+ pbi->probMode[k][i][6]= 1 + 255 *
+ (
+ Counts[CODE_USING_GOLDEN]+
+ Counts[CODE_GOLDEN_MV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_USING_GOLDEN]+
+ Counts[CODE_GOLDEN_MV]+
+ Counts[CODE_GOLD_NEAREST_MV]+
+ Counts[CODE_GOLD_NEAR_MV]
+ );
+
+ pbi->probMode[k][i][7]= 1 + 255 *
+ (
+ Counts[CODE_USING_GOLDEN]
+ ) /
+ (
+ 1 +
+ Counts[CODE_USING_GOLDEN]+
+ Counts[CODE_GOLDEN_MV]
+ );
+
+ pbi->probMode[k][i][8]= 1 + 255 *
+ (
+ Counts[CODE_GOLD_NEAREST_MV]
+ ) /
+ (
+ 1 +
+ Counts[CODE_GOLD_NEAREST_MV]+
+ Counts[CODE_GOLD_NEAR_MV]
+ );
+ }
+ }
+}
+/****************************************************************************
+ *
+ * ROUTINE : decodeModeDiff
+ *
+ * INPUTS :
+ *
+ * OUTPUTS : diff -> the probability difference value decoded from the bitstream
+ *
+ * RETURNS :
+ *
+ * FUNCTION : this function returns a value probability difference value
+ * -256 to +256 in steps of 4 transmitted in the bitstream
+ * using a fixed tree and hardcoded probabilities
+ *
+ * SPECIAL NOTES : The hard coded probabilities for the difference tree
+ * were calcualated by taking the average number of times a
+ * branch was taken on some sample material ie
+ * (bond,bike,beautifulmind)
+ *
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+int decodeModeDiff
+(
+ PB_INSTANCE *pbi
+)
+{
+
+ int sign;
+ if(DecodeBool(&pbi->br, 205)==0)
+ {
+ return 0;
+ }
+
+ sign = 1 + -2 * DecodeBool128(&pbi->br);
+
+ if( !DecodeBool(&pbi->br,171))
+ {
+ return sign<<(3-DecodeBool( &pbi->br,83));
+ /*
+ if( DecodeBool( &pbi->br,83))
+ return sign*4;
+ else
+ return sign*8;
+ */
+ }
+ else
+ {
+ if( !DecodeBool( &pbi->br,199) )
+ {
+ if(DecodeBool( &pbi->br,140))
+ return sign * 12;
+
+ if(DecodeBool( &pbi->br,125))
+ return sign * 16;
+
+ if(DecodeBool( &pbi->br,104))
+ return sign * 20;
+
+ return sign * 24;
+
+ }
+ else
+ {
+ int diff =VP5_bitread(&pbi->br,7);
+ return sign *diff*4;
+ }
+ }
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : DecodeModeProbs
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ * FUNCTION : This function parses the probabilities xmitted in
+ * the bitstream. The bitstream may either use the
+ * lastframes baselines, or transmit a pointer to a
+ * vector of new probabilities. It may then also
+ * contain updates to each of these probabilities.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void DecodeModeProbs
+(
+ PB_INSTANCE *pbi
+)
+{
+ int i,j;
+ // For each mode type (all modes available, no nearest, no near mode)
+ for(j=0;j<MODETYPES;j++)
+ {
+ // determine whether we are sending a vector for this mode byte
+ if(DecodeBool( &pbi->br, PROBVECTORXMIT) )
+ {
+ // figure out which vector we have encoded
+ int whichVector = VP5_bitread(&pbi->br, 4);
+
+ // adjust the vector
+ for(i=0;i<MAX_MODES;i++)
+ {
+ pbi->probXmitted[j][1][i] = ModeVq[j][whichVector][i*2];
+ pbi->probXmitted[j][0][i] = ModeVq[j][whichVector][i*2+1];
+ }
+ }
+
+ // decode whether updates to bring it closer to ideal
+ if( DecodeBool( &pbi->br, PROBIDEALXMIT) )
+ {
+ for(i=0;i<10;i++)
+ {
+ int diff;
+
+ // determine difference
+ diff = decodeModeDiff(pbi);
+ diff += pbi->probXmitted[j][1][i];
+
+ pbi->probXmitted[j][1][i] = (diff<0?0:(diff>255?255:diff));
+
+ // determine difference
+ diff = decodeModeDiff(pbi);
+ diff += pbi->probXmitted[j][0][i];
+
+ pbi->probXmitted[j][0][i] = (diff<0?0:(diff>255?255:diff));
+
+ }
+ }
+ }
+
+ BuildModeTree(pbi);
+}
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : decodeModeandMotionVector
+ *
+ * INPUTS : MBrow -> row
+ MBcol -> column
+
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ * FUNCTION : decodes a macroblock's mode and motion vectors from
+ the bitstream
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void decodeModeAndMotionVector
+(
+ PB_INSTANCE *pbi,
+ UINT32 MBrow,
+ UINT32 MBcol
+ )
+{
+ CODING_MODE mode;//lastmode;
+ int type,type2;
+ UINT32 k;
+ MOTION_VECTORA NearestInterMVect,NearInterMVect;
+ MOTION_VECTORA NearestGoldMVect,NearGoldMVect;
+ MOTION_VECTOR mv;
+ int x, y;
+
+ FindNearestandNextNearest(pbi,MBrow,MBcol,&NearestInterMVect,&NearInterMVect,1,&type);
+
+ mode = DecodeMode(pbi,pbi->LastMode,type);
+ pbi->LastMode = mode;
+
+ pbi->predictionMode[MBOffset(MBrow,MBcol)] = mode;
+ pbi->mbi.Mode = mode;
+ if(mode ==CODE_INTER_FOURMV)
+ {
+ pbi->mbi.BlockMode[0] = DecodeBlockMode(pbi);
+ pbi->mbi.BlockMode[1] = DecodeBlockMode(pbi);
+ pbi->mbi.BlockMode[2] = DecodeBlockMode(pbi);
+ pbi->mbi.BlockMode[3] = DecodeBlockMode(pbi);
+
+ pbi->mbi.BlockMode[4] = CODE_INTER_FOURMV;
+ pbi->mbi.BlockMode[5] = CODE_INTER_FOURMV;
+
+ x=0;
+ y=0;
+ for(k=0;k<4;k++)
+ {
+ if(pbi->mbi.BlockMode[k]==CODE_INTER_NO_MV)
+ {
+ pbi->mbi.Mv[k].x = 0;
+ pbi->mbi.Mv[k].y = 0;
+ }
+ else if( pbi->mbi.BlockMode[k]==CODE_INTER_NEAREST_MV)
+ {
+ pbi->mbi.Mv[k].x = NearestInterMVect.x;
+ pbi->mbi.Mv[k].y = NearestInterMVect.y;
+ x+=NearestInterMVect.x;
+ y+=NearestInterMVect.y;
+ }
+ else if( pbi->mbi.BlockMode[k]==CODE_INTER_NEAR_MV)
+ {
+ pbi->mbi.Mv[k].x = NearInterMVect.x;
+ pbi->mbi.Mv[k].y = NearInterMVect.y;
+ x+=NearInterMVect.x;
+ y+=NearInterMVect.y;
+ }
+ else if ( pbi->mbi.BlockMode[k]==CODE_INTER_PLUS_MV)
+ {
+ decodeMotionVector(pbi,&mv,NULL);
+ pbi->mbi.Mv[k].x = mv.x;
+ pbi->mbi.Mv[k].y = mv.y;
+ x+=mv.x;
+ y+=mv.y;
+ }
+ }
+ x = (x+1+(x>=0))>>2;
+ y = (y+1+(y>=0))>>2;
+
+ pbi->MBMotionVector[MBOffset(MBrow,MBcol)].x = pbi->mbi.Mv[3].x;
+ pbi->MBMotionVector[MBOffset(MBrow,MBcol)].y = pbi->mbi.Mv[3].y;
+
+ pbi->mbi.Mv[4].x = x;
+ pbi->mbi.Mv[4].y = y;
+
+ pbi->mbi.Mv[5].x = x;
+ pbi->mbi.Mv[5].y = y;
+
+
+ }
+ else
+ {
+ if(mode == CODE_INTER_NEAREST_MV)
+ {
+ x = NearestInterMVect.x;
+ y = NearestInterMVect.y;
+ }
+ else if(mode == CODE_INTER_NEAR_MV)
+ {
+ x = NearInterMVect.x;
+ y = NearInterMVect.y;
+ }
+ else
+ {
+ switch(mode)
+ {
+ /*
+ case CODE_INTER_NEAREST_MV:
+ x = NearestInterMVect.x;
+ y = NearestInterMVect.y;
+ break;
+ case CODE_INTER_NEAR_MV:
+ x = NearInterMVect.x;
+ y = NearInterMVect.y;
+ break;
+ */
+ case CODE_GOLD_NEAREST_MV:
+ FindNearestandNextNearest(pbi,MBrow,MBcol,&NearestGoldMVect,&NearGoldMVect,2,&type2);
+ x = NearestGoldMVect.x;
+ y = NearestGoldMVect.y;
+ break;
+ case CODE_GOLD_NEAR_MV:
+ FindNearestandNextNearest(pbi,MBrow,MBcol,&NearestGoldMVect,&NearGoldMVect,2,&type2);
+ x = NearGoldMVect.x;
+ y = NearGoldMVect.y;
+ break;
+ case CODE_INTER_PLUS_MV:
+ case CODE_GOLDEN_MV:
+ decodeMotionVector(pbi,&mv,NULL);
+ x = mv.x;
+ y = mv.y;
+ break;
+ default:
+ x =0;
+ y =0;
+ }
+ }
+ pbi->MBMotionVector[MBOffset(MBrow,MBcol)].x = x;
+ pbi->MBMotionVector[MBOffset(MBrow,MBcol)].y = y;
+ for(k=0;k<6;k++)
+ {
+
+ pbi->mbi.Mv[k].x = x;
+ pbi->mbi.Mv[k].y = y;
+ pbi->mbi.BlockMode[k] = mode;
+ }
+ }
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : decodeBlockMode
+ *
+ * INPUTS : mode -> mode we are trying to encode
+ *
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ * FUNCTION : decodes a block mode from the bitstream as 2 bits
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+CODING_MODE
+DecodeBlockMode
+(
+ PB_INSTANCE *pbi
+)
+
+{
+
+ int choice = DecodeBool128(&pbi->br)<<1;
+ choice += DecodeBool128(&pbi->br);
+
+
+ switch(choice)
+ {
+ case 0:return CODE_INTER_NO_MV;//0
+ case 1:return CODE_INTER_PLUS_MV;//2
+ case 2:return CODE_INTER_NEAREST_MV;//3
+ case 3:return CODE_INTER_NEAR_MV;//4
+ }
+ return (CODING_MODE)0;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : decodeMode
+ *
+ * INPUTS : lastmode -> mode of the last coded macroblock
+ * mode -> mode we are trying to encode
+ * type -> MODE_TYPE (all modes available, nonearest
+ * macroblock, no near macroblock)
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ * FUNCTION : decodes a MBmode from the bitstream using modecodearray
+ * and probabilities that the value is the same as
+ * lastmode stored in probModeSame, and the probability
+ * of mode occuring if lastmode != mode stored in
+ * probMode
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+CODING_MODE DecodeMode
+(
+ PB_INSTANCE *pbi,
+ CODING_MODE lastmode,
+ UINT32 type
+)
+{
+ CODING_MODE mode;
+ if(DecodeBool(&pbi->br,pbi->probModeSame[type][lastmode]))
+ {
+ mode = lastmode;
+ }
+ else
+ { // 0
+ UINT8 * Stats =pbi->probMode[type][lastmode];
+ if(DecodeBool(&pbi->br,Stats[0]))
+ { // 2
+ if(DecodeBool(&pbi->br,Stats[2]))
+ { //6
+ if(DecodeBool(&pbi->br,Stats[6]))
+ { // 8
+
+ mode = CODE_GOLD_NEAREST_MV + DecodeBool(&pbi->br,Stats[8]);
+ /*
+ if(DecodeBool(&pbi->br,Stats[8]))
+ {
+ mode = CODE_GOLD_NEAR_MV;
+ }
+ else
+ {
+ mode = CODE_GOLD_NEAREST_MV;
+ }
+ */
+
+ }
+ else
+ { // 7
+ mode = CODE_USING_GOLDEN + DecodeBool(&pbi->br,Stats[7]);
+ /*
+ if(DecodeBool(&pbi->br,Stats[7]))
+ {
+ mode = CODE_GOLDEN_MV;
+ }
+ else
+ {
+ mode = CODE_USING_GOLDEN;
+ }
+ */
+ }
+
+ }
+ else
+ { //5
+ //mode = CODE_INTRA + 6*DecodeBool(&pbi->br,Stats[5]);
+
+ if(DecodeBool(&pbi->br,Stats[5]))
+ {
+ mode = CODE_INTER_FOURMV;
+ }
+ else
+ {
+ mode = CODE_INTRA;
+ }
+
+ }
+ }
+ else
+ { // 1
+ if(DecodeBool(&pbi->br,Stats[1]))
+ { // 4
+ mode = CODE_INTER_NEAREST_MV + DecodeBool(&pbi->br,Stats[4]);
+ /*
+ if(DecodeBool(&pbi->br,Stats[4]))
+ {
+ mode = CODE_INTER_NEAR_MV;
+ }
+ else
+ {
+ mode = CODE_INTER_NEAREST_MV;
+ }
+ */
+
+ }
+ else
+ { // 3
+ mode = CODE_INTER_NO_MV + 2 * DecodeBool(&pbi->br,Stats[3]);
+ /*
+ if(DecodeBool(&pbi->br,Stats[3]))
+ {
+ mode = CODE_INTER_PLUS_MV;
+ }
+ else
+ {
+ mode = CODE_INTER_NO_MV;
+ }
+ */
+ }
+ }
+ }
+ return mode;
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemv.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemv.c
new file mode 100644
index 00000000..37254267
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemv.c
@@ -0,0 +1,366 @@
+/****************************************************************************
+*
+* Module Title : Decodemv.c
+*
+* Description : functions for decoding modes and motionvectors
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 JBB 30OCT01 New Configuration baseline.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+#include "pbdll.h"
+#include "boolhuff.h"
+#include "huffman.h"
+#include "stdio.h"
+#include "decodemode.h"
+#include "decodemv.h"
+
+/****************************************************************************
+* Implicit Imports
+*****************************************************************************
+*/
+#define STRICT /* Strict type checking. */
+
+#ifdef MAPCA
+ #include <eti/mm.h>
+#endif
+
+
+
+/****************************************************************************
+* Exported data structures.
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module statics.
+*****************************************************************************
+*/
+
+UINT8 MvUpdateProbs[2][MV_NODES] =
+{
+ { 243, 220, 251, 253, 237, 232, 241, 245, 247, 251, 253 },
+ { 235, 211, 246, 249, 234, 231, 248, 249, 252, 252, 254 }
+};
+
+/****************************************************************************
+ *
+ * ROUTINE : ConfigureMvEntropyDecoder
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ * FUNCTION : Build the MV entropy decoding tree
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+***************************************************************************/
+void ConfigureMvEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType )
+{
+ int i;
+
+ //This funciton is not called at all if it is a BASE_FRAME
+ /*
+ if ( FrameType == BASE_FRAME)
+ {
+ // Set up the default values for each of the MV probabilities
+ // For now these are just 128
+ memset ( pbi->MvSignProbs, 128, sizeof(pbi->MvSignProbs) );
+ memset ( pbi->MvZeroProbs, 128, sizeof(pbi->MvZeroProbs) );
+ memset ( pbi->MvHalfPixelProbs, DEFAULT_HALF_PIXEL_PROB, sizeof(pbi->MvHalfPixelProbs) );
+ memset ( pbi->MvLowBitProbs, 128, sizeof(pbi->MvLowBitProbs) );
+ memset ( pbi->MvSizeProbs, 128, sizeof(pbi->MvSizeProbs) );
+ }
+ else
+ */
+ {
+ // Calculate and if necessary send the Zero, sign, half pixel and Low order probabilities.
+ for ( i = 0; i < 2; i++ )
+ {
+ // Zero probability
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][0]) )
+ {
+ pbi->MvZeroProbs[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvZeroProbs[i] == 0 )
+ pbi->MvZeroProbs[i] = 1;
+ }
+
+ // Sign probability
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][1]) )
+ {
+ pbi->MvSignProbs[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvSignProbs[i] == 0 )
+ pbi->MvSignProbs[i] = 1;
+ }
+
+ // Half pixel bit probability
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][2]) )
+ {
+ pbi->MvHalfPixelProbs[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvHalfPixelProbs[i] == 0 )
+ pbi->MvHalfPixelProbs[i] = 1;
+ }
+
+ // Low order magnitude bit Probability
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][3]) )
+ {
+ pbi->MvLowBitProbs[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvLowBitProbs[i] == 0 )
+ pbi->MvLowBitProbs[i] = 1;
+ }
+ }
+
+ // Now vector magnitude Probabilities
+ for ( i = 0; i < 2; i++ )
+ {
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][4]) )
+ {
+ pbi->MvSizeProbs[i][0] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvSizeProbs[i][0] == 0 )
+ pbi->MvSizeProbs[i][0] = 1;
+ }
+
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][5]) )
+ {
+ pbi->MvSizeProbs[i][1] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvSizeProbs[i][1] == 0 )
+ pbi->MvSizeProbs[i][1] = 1;
+ }
+
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][6]) )
+ {
+ pbi->MvSizeProbs[i][2] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvSizeProbs[i][2] == 0 )
+ pbi->MvSizeProbs[i][2] = 1;
+ }
+
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][7]) )
+ {
+ pbi->MvSizeProbs[i][3] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvSizeProbs[i][3] == 0 )
+ pbi->MvSizeProbs[i][3] = 1;
+ }
+
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][8]) )
+ {
+ pbi->MvSizeProbs[i][4] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvSizeProbs[i][4] == 0 )
+ pbi->MvSizeProbs[i][4] = 1;
+ }
+
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][9]) )
+ {
+ pbi->MvSizeProbs[i][5] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvSizeProbs[i][5] == 0 )
+ pbi->MvSizeProbs[i][5] = 1;
+ }
+
+ if ( DecodeBool(&pbi->br, MvUpdateProbs[i][10]) )
+ {
+ pbi->MvSizeProbs[i][6] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+ if ( pbi->MvSizeProbs[i][6] == 0 )
+ pbi->MvSizeProbs[i][6] = 1;
+ }
+ }
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : decodeMotionVector
+ *
+ * INPUTS : *mv -> returned motion vector
+ *nearestMv -> passed in mv acting as context
+
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ * FUNCTION : decodes a motion vector from the bitstream
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void decodeMotionVector
+(
+ PB_INSTANCE *pbi,
+ MOTION_VECTOR *mv,
+ MOTION_VECTOR *nearestMv
+)
+{
+ UINT32 i;
+ INT32 Vector = 0;
+ INT32 SignBit;
+ INT32 HpBit;
+ INT32 LowBit;
+
+ for ( i = 0; i < 2; i++ )
+ {
+ Vector = 0;
+
+ // Is the vector non-zero
+ if ( DecodeBool(&pbi->br, pbi->MvZeroProbs[i]) )
+ {
+ // Read the sign, half pixel and low order bits
+ SignBit = DecodeBool(&pbi->br, pbi->MvSignProbs[i]);
+
+ // Read half pixel and low order bits
+ HpBit = DecodeBool(&pbi->br, pbi->MvHalfPixelProbs[i]);
+ LowBit = DecodeBool(&pbi->br, pbi->MvLowBitProbs[i]);
+
+ // Now read the magnitude bits
+ if ( DecodeBool(&pbi->br, pbi->MvSizeProbs[i][0] ) )
+ {
+ Vector = 1 << 4;
+ if ( DecodeBool(&pbi->br, pbi->MvSizeProbs[i][4]) )
+ {
+ Vector |= (1 << 3);
+ Vector |= DecodeBool(&pbi->br, pbi->MvSizeProbs[i][6]) << 2;
+ }
+ else
+ {
+ Vector |= DecodeBool(&pbi->br, pbi->MvSizeProbs[i][5]) << 2;
+ }
+ }
+ else
+ {
+ if ( DecodeBool(&pbi->br, pbi->MvSizeProbs[i][1]) )
+ {
+ Vector |= (1 << 3);
+ Vector |= DecodeBool(&pbi->br, pbi->MvSizeProbs[i][3]) << 2;
+ }
+ else
+ {
+ Vector |= DecodeBool(&pbi->br, pbi->MvSizeProbs[i][2]) << 2;
+ }
+ }
+
+ // Now Add in the low order and sign bits
+ Vector |= HpBit;
+ Vector |= (LowBit << 1);
+ if ( SignBit )
+ Vector = -Vector;
+ }
+
+ if ( i )
+ mv->y = Vector;
+ else
+ mv->x = Vector;
+
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : FindNearestandNextNearest
+ *
+ * INPUTS :
+ MBrow row of macroblock to check
+ MBcol col of macroblock to check
+ *nearest returns nearest motion vector if found 0,0 otherwise
+ *near returns next nearest motion vector if found 0,0 otherwise
+ frame which frame motion vector should come from (gold or last)
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : true if motion vector differs
+ false otherwise
+ *
+ * FUNCTION : search through the existing motion vectors for two different MVs
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void FindNearestandNextNearest
+(
+ PB_INSTANCE *pbi,
+ UINT32 MBrow,
+ UINT32 MBcol,
+ MOTION_VECTORA *nearest,
+ MOTION_VECTORA *nextnearest,
+ UINT8 Frame,
+ int *type
+)
+{
+ UINT32 BaseMB = MBOffset(MBrow,MBcol);
+ UINT32 OffsetMB;
+ int i;
+
+ nearest->x=0;
+ nearest->y=0;
+ nextnearest->x=0;
+ nextnearest->y=0;
+ *type = NONEAREST_MACROBLOCK;
+
+ for(i=0;i<12;i++)
+ {
+
+ OffsetMB = pbi->mvNearOffset[i]+BaseMB;
+
+ if(VP5_Mode2Frame[pbi->predictionMode[OffsetMB]] != Frame)
+ continue;
+
+ if(*((unsigned int *) &pbi->MBMotionVector[OffsetMB]) == 0)
+ continue;
+
+ *((unsigned int *) nearest) = *((unsigned int *) &pbi->MBMotionVector[OffsetMB]);
+ *type = NONEAR_MACROBLOCK;
+
+ break;
+
+ }
+
+ if(*((unsigned int *) nearest))
+ {
+ for(i=i+1;i<12;i++)
+ {
+
+ OffsetMB = pbi->mvNearOffset[i]+BaseMB;
+
+ if(VP5_Mode2Frame[pbi->predictionMode[OffsetMB]] != Frame)
+ continue;
+
+ if( *((unsigned int *) &pbi->MBMotionVector[OffsetMB])
+ == *((unsigned int *) nearest) )
+ continue;
+
+ if(*((unsigned int *) &pbi->MBMotionVector[OffsetMB]) == 0)
+ continue;
+
+ *((unsigned int *) nextnearest) = *((unsigned int *) &pbi->MBMotionVector[OffsetMB]);
+ *type = MACROBLOCK;
+
+ break;
+ }
+
+ }
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/dxv2_vp50.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/dxv2_vp50.c
new file mode 100644
index 00000000..52a36eae
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/dxv2_vp50.c
@@ -0,0 +1,438 @@
+/****************************************************************************
+*
+* Module Title : vp5dxv.c
+*
+* Description : VP50 interface to DXV.
+*
+* AUTHOR : SJL
+*
+*****************************************************************************
+* Revision History
+*
+* 1.03 SJL 17/10/02 Up the version to 1.0.0.3, added new dxv interface
+* 1.02 YWX 30/09/02 Up the version to 1.0.0.2, added support of scaling
+* 1.01 YWX 19/09/02 Fixed bug in blit and up the version to 1.0.0.1
+* 1.00 SJL 17/06/02 Base
+*
+*****************************************************************************
+*/
+//#include <stdlib.h>
+
+#include "duck_mem.h" /* interface to memory manager */
+#include "dxl_plugin.h" /* interface to dxv */
+
+#include "pbdll.h"
+
+
+const char* VP5LIBVERSION="ON2 VP5 Decode Library for MAC Version 1.0.0.3";
+
+typedef unsigned int FourCC;
+
+#define VP50_FOURCC DXL_MKFOURCC( 'V', 'P', '5', '0')
+
+
+static dxvBitDepth bitDepths[] =
+{
+ DXYV12,DXRGBNULL
+};
+
+
+void vp50_SetParameter(DXL_XIMAGE_HANDLE src,int Command, unsigned int Parameter );
+
+extern void VP5_VPInitLibrary(void);
+extern void VP5_VPDeInitLibrary(void);
+
+#include "duck_dxl.h"
+
+typedef struct tFrameInfo
+{
+ int KeyFrame;
+ int Version;
+ int Quality;
+ int vp30Flag;
+} FrameInfo;
+
+void
+vp50_GetInfo(unsigned char * source, FrameInfo * frameInfo)
+{
+
+ // Is the frame and inter frame or a key frame
+ frameInfo->KeyFrame = !(source[0] > 0x7f);
+ frameInfo->Quality = source[0] >> 2;
+ if(frameInfo->KeyFrame)
+ frameInfo->Version = ((source[2]>>3) & 0x1f );
+ else
+ frameInfo->Version = 0;
+
+ frameInfo->vp30Flag = (int)source[1];
+
+}
+
+
+// YUV buffer configuration structure
+typedef struct
+{
+ int YWidth;
+ int YHeight;
+ int YStride;
+
+ int UVWidth;
+ int UVHeight;
+ int UVStride;
+
+ char * YBuffer;
+ char * UBuffer;
+ char * VBuffer;
+
+ char * uvStart;
+ int uvDstArea;
+ int uvUsedArea;
+
+} DXV_YUV_BUFFER_CONFIG;
+
+/* define an algorithm base container */
+typedef struct tXImageCODEC
+{
+ FourCC myFourCC;
+ DXV_YUV_BUFFER_CONFIG FrameBuffer;
+ PB_INSTANCE *myPBI;
+} vp50_XIMAGE, *vp50_XIMAGE_HANDLE;
+
+
+typedef void ((*VP5BLIT_FUNC)(unsigned char *, int, YUV_BUFFER_CONFIG *));
+//typedef void ((*vp5_VSCREEN_FUNC)(void));
+
+/****************************************************************************
+ *
+ * ROUTINE : vp50_decompress
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES :
+ *
+ ****************************************************************************/
+static int
+vp50_decompress(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE vScreen)
+{
+
+ int retVal;
+ vp50_XIMAGE_HANDLE thisAlgorithmBase = (vp50_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+ unsigned char *cAddr;
+ int cSize;
+ int w, h;
+
+
+ // if we have a compressed frame decompress it ( otherwise we'll just redo
+ // the scaling and postprocessing from the last frame )
+ cAddr = DXL_GetXImageCDataAddr(src);
+ cSize = DXL_GetXImageCSize(src);
+
+ if(cAddr)
+ {
+ if((cSize != 0) && (cAddr[0]>=1 || cAddr[1]>=1 || cAddr[2] >=1))
+ {
+ int w, h;
+
+ DXL_GetXImageXYWH(src, NULL, NULL, &w, &h);
+
+ // decode the frame
+ retVal = VP5_DecodeFrameToYUV(thisAlgorithmBase->myPBI, (char *)cAddr, cSize, w, h);
+ if(retVal != 0 )
+ {
+ if(retVal == -1)
+ return DXL_VERSION_CONFLICT;
+ else
+ return DXL_BAD_DATA;
+ }
+ }
+ }
+
+
+ if (vScreen) /* if there is a vScreen, blit to it */
+ {
+ unsigned char * ptrScrn;
+ short thisPitch, vsHeight;
+ dxvBlitQuality bq;
+ dxvBitDepth bd;
+ VP5BLIT_FUNC blitter;
+
+ DXL_GetVScreenAttributes(vScreen, (void **)&ptrScrn, &bq, &bd, &thisPitch, &vsHeight);
+
+ if(ptrScrn)
+ {
+ int x, y, pSize;
+ int viewX, viewY;
+
+ DXL_GetVScreenView(vScreen, &viewX, &viewY, NULL, NULL);
+
+ /* get a frame pointer to the scaled and postprocessed reconstructed buffer */
+ VP5_GetYUVConfig(thisAlgorithmBase->myPBI, (YUV_BUFFER_CONFIG *) &(thisAlgorithmBase->FrameBuffer));
+
+ pSize = VPX_GetSizeOfPixel(bd);
+
+ DXL_GetXImageXYWH(src, &x, &y, NULL, NULL);
+
+ /* remember to offset if requested */
+ y += viewY;
+ x += viewX;
+
+ ptrScrn += (x * pSize) + (y * thisPitch);
+
+ /* setup ptrs so we can work backwards through Paul's frame buffers */
+ #if 1
+ thisAlgorithmBase->FrameBuffer.YBuffer = thisAlgorithmBase->FrameBuffer.YBuffer +
+ ((thisAlgorithmBase->FrameBuffer.YHeight - 1) *
+ (thisAlgorithmBase->FrameBuffer.YStride));
+
+ thisAlgorithmBase->FrameBuffer.UBuffer = thisAlgorithmBase->FrameBuffer.UBuffer +
+ ((thisAlgorithmBase->FrameBuffer.UVHeight - 1) *
+ (thisAlgorithmBase->FrameBuffer.UVStride));
+
+ thisAlgorithmBase->FrameBuffer.VBuffer = thisAlgorithmBase->FrameBuffer.VBuffer +
+ ((thisAlgorithmBase->FrameBuffer.UVHeight - 1) *
+ (thisAlgorithmBase->FrameBuffer.UVStride));
+ #endif
+
+ if((bd != DXYUY2) && (bd != DXYV12))
+ {
+ if(bq == DXBLIT_STRETCH)
+ {
+ thisPitch *= 2;
+ }
+ }
+
+ if(bd == DXYV12 || bd == DXI420)
+ {
+ if(thisPitch < 0)
+ {
+ thisAlgorithmBase->FrameBuffer.uvStart = (char *) (ptrScrn + abs(thisPitch) + abs(thisPitch) * h/4 + thisPitch/2 );
+ thisAlgorithmBase->FrameBuffer.uvDstArea = abs((thisPitch * h)/4);
+ thisAlgorithmBase->FrameBuffer.uvUsedArea = 0;
+ }
+ else
+ {
+ thisAlgorithmBase->FrameBuffer.uvStart = (char *) (ptrScrn + (thisPitch * h));
+ thisAlgorithmBase->FrameBuffer.uvDstArea = ((thisPitch * h)/4);
+ thisAlgorithmBase->FrameBuffer.uvUsedArea = ((thisPitch * thisAlgorithmBase->FrameBuffer.UVHeight)/2);
+ }
+
+ }
+
+ blitter = (VP5BLIT_FUNC)VPX_GetBlitter(bq, bd);
+
+ if ((void *)blitter != (void *)-1)
+ {
+ blitter(ptrScrn, thisPitch, (YUV_BUFFER_CONFIG *)(&thisAlgorithmBase->FrameBuffer));
+ }
+ else
+ {
+ return DXL_INVALID_BLIT;
+ }
+
+
+ }
+ }
+
+ return DXL_OK;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : vp50_xImageDestroy
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : close down a decompressor, releasing the wilk decompressor,
+ * the xImage (decompressor), and the intermediate vScreen (surface)
+ *
+ * SPECIAL NOTES :
+ *
+ ****************************************************************************/
+static int
+vp50_xImageDestroy(DXL_XIMAGE_HANDLE src)
+{
+ vp50_XIMAGE_HANDLE thisAlgorithmBase = (vp50_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ if(thisAlgorithmBase)
+ {
+ VP5_StopDecoder(&(thisAlgorithmBase->myPBI));
+ duck_free(thisAlgorithmBase);
+ }
+
+ return DXL_OK;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : vp50_xImageReCreate
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES :
+ * called during initialization and/or when xImage (decompressor)
+ * attributes change, note that nImage and src are actually
+ * synonymous and should be cleared out a bit (to say the least!)
+ *
+ *
+ * !!!!!!
+ * This function should be prepared to get data that is NOT of the
+ * type native to the decoder, It should do it's best to verify it
+ * as valid data and should clean up after itself and return NULL
+ * if it doesn't recognize the format of the data
+ *
+ ****************************************************************************/
+static void *
+vp50_xImageReCreate(DXL_XIMAGE_HANDLE src, unsigned char *data, int type, enum BITDEPTH bitDepth, int w, int h)
+{
+ vp50_XIMAGE_HANDLE thisAlgorithmBase = (vp50_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ (void) bitDepth;
+
+ if(type != VP50_FOURCC)
+ return NULL;
+
+ /* if an algorithm base container already exists, destroy it */
+ if(thisAlgorithmBase != NULL)
+ {
+ VP5_StopDecoder(&(thisAlgorithmBase->myPBI));
+ duck_free(thisAlgorithmBase);
+ }
+
+ /* create a new algorithm base container */
+ thisAlgorithmBase = (vp50_XIMAGE_HANDLE)duck_calloc(1,sizeof(vp50_XIMAGE),DMEM_GENERAL);
+ if(thisAlgorithmBase == NULL)
+ return NULL;
+
+
+ DXL_RegisterXImageRecreate(src, (RECREATE_FUNC) vp50_xImageReCreate);
+
+ DXL_RegisterXImageDestroy(src, (DESTROY_FUNC) vp50_xImageDestroy);
+
+ DXL_RegisterXImageDx(src, (DX_FUNC) vp50_decompress);
+
+ DXL_RegisterXImageSetParameter(src, (SET_PARAMETER_FUNC) vp50_SetParameter);
+
+ thisAlgorithmBase->myFourCC = VP50_FOURCC;
+
+ /* create new PBI */
+ if(!VP5_StartDecoder( &(thisAlgorithmBase->myPBI), w, h))
+ {
+ duck_free(thisAlgorithmBase);
+ thisAlgorithmBase = NULL;
+ }
+
+ return (DXL_HANDLE) thisAlgorithmBase;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : vp50_xImageCreate
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES : in this "glue" case, just calls through to the create function.
+ *
+ ****************************************************************************/
+static DXL_HANDLE
+vp50_xImageCreate(DXL_XIMAGE_HANDLE src, unsigned char *data)
+{
+ /* our default wxh is always 320x240 */
+ return vp50_xImageReCreate(src, data, VP50_FOURCC, (enum BITDEPTH ) 0, 320, 240);
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : vp50_Init
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int
+vp50_Init(void)
+{
+ DXL_RegisterXImage((CREATE_FUNC) vp50_xImageCreate, VP50_FOURCC);
+
+
+ vp3SetBlit();
+
+ /* initialize all the global variables */
+ VP5_VPInitLibrary();
+
+ return DXL_OK;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : vp50_Exit
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : main exit routine, called during DXL_ExitVideo()
+ * clean up any global information if necessary
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int
+vp50_Exit(void)
+{
+ VP5_VPDeInitLibrary();
+
+ return DXL_OK;
+}
+/****************************************************************************
+ *
+ * ROUTINE :
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void
+vp50_SetParameter(DXL_XIMAGE_HANDLE src, int Command, unsigned int Parameter)
+{
+ vp50_XIMAGE_HANDLE thisAlgorithmBase = (vp50_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+ VP5_SetPbParam(thisAlgorithmBase->myPBI, (PB_COMMAND_TYPE) Command, (UINT32) Parameter );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/pb_globals.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/pb_globals.c
new file mode 100644
index 00000000..ac683b0d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/pb_globals.c
@@ -0,0 +1,389 @@
+/****************************************************************************
+*
+* Module Title : PB_Globals.c
+*
+* Description : Video CODEC Demo: playback dll global declarations
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.21 JBB 13 Jun 01 VP4 Code Clean Out
+* 1.20 AWG 08-Jun-01 Added support for DCT16
+* 1.19 JBB 01-MAY-01 VP5 Functionality (set up coefftoband array)
+* 1.18 YWX 26-Apr 01 Added global "CPUFrequency" and its initializing
+* in VPInitlibrary()
+* 1.17 JBB 06 Apr 01 new cpu free variable initialized
+* 1.16 SJL 30 Mar 01 Added #if defined(POSTPROCESS) around InitPostProcessing();
+* 1.15 PGW 25 Jan 01 Add code to create and destroy MV huffman trees.
+* 1.15 JBB 26 Jan 01 No need to destroy huffman trees
+* 1.14 JBB 22 Aug 00 Ansi C conversion
+* 1.13 JBB 21 Aug 00 New More Blurry in high variance area deringer
+* 1.12 YWX 2 Aug 00 Removed redundant kernel modifiers
+* 1.11 JBB 27 Jul 00 Moved kernel modifiers to pbi mallocs -> duck_malloc
+* for scott added malloc checks
+* 1.10 YWX 15/05/00 change the initialization of PostProcessLevel
+* 1.09 JBB 27/01/99 Globals Removed, use of PB_INSTANCE, added PB_Instance
+* allocation and deletion funcitons
+* 1.08 PGW 17/12/99 Draw dib functionality removed.
+* 1.07 PGW 16/12/99 Added support for VP3 version id.
+* 1.06 PGW 15/12/99 Added key frame type variable
+* 1.05 PGW 22/11/99 Changes relating to restructuring of block map stuff.
+* 1.04 PGW 14/10/99 Changes to reduce uneccessary dependancies.
+* 1.05 PGW 06/09/99 DivBySix changed to UINT8 [].
+* 1.04 PGW 24/08/99 Removed of EOF token and assosciated data sturctures etc.
+* Deleted COrderList[].
+* 1.03 PGW 15/07/99 Added bit extraction variables.
+* 1.02 PGW 14/07/99 Changes to interface to idct and reconstruction functions.
+* Added ModeUsesMC[] truth table. Added (*ReconIntra) funtion
+* pointer.
+* 1.01 PGW 09/07/99 Added code to support profile timing
+* 1.00 PGW 22/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include "pbdll.h"
+#include "duck_mem.h"
+
+/****************************************************************************
+* Explicit imports
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+extern unsigned long VP5_GetProcessorFrequency();
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+//extern Q_LIST_ENTRY VP5_DcScaleFactorTableV1[ Q_TABLE_SIZE ] ;
+extern Q_LIST_ENTRY VP5_DcQuant[ Q_TABLE_SIZE ];
+
+UINT32 DCQuantScaleP[Q_TABLE_SIZE];
+
+//****************************************************************
+// Function Pointers now library globals!
+//****************************************************************
+
+// Process Frequency
+unsigned int CPUFrequency;
+
+// Truth table to indicate if the given mode uses motion estimation
+BOOL VP5_ModeUsesMC[MAX_MODES] = { FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE };
+
+/****************************************************************************
+ *
+ * ROUTINE : DeleteTmpBuffers
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_DeleteTmpBuffers(PB_INSTANCE * pbi)
+{
+
+ if(pbi->ReconDataBufferAlloc)
+ duck_free(pbi->ReconDataBufferAlloc);
+ if(pbi->LoopFilteredBlockAlloc)
+ duck_free(pbi->LoopFilteredBlockAlloc);
+ if(pbi->TmpDataBufferAlloc)
+ duck_free(pbi->TmpDataBufferAlloc);
+ if(pbi->TmpReconBufferAlloc)
+ duck_free(pbi->TmpReconBufferAlloc);
+ if(pbi->ScaleBufferAlloc)
+ duck_free(pbi->ScaleBufferAlloc);
+
+ pbi->ReconDataBufferAlloc=0;
+ pbi->TmpDataBufferAlloc = 0;
+ pbi->TmpReconBufferAlloc = 0;
+ pbi->ScaleBufferAlloc = 0;
+ pbi->ScaleBuffer = 0;
+ pbi->ReconDataBuffer=0;
+ pbi->TmpDataBuffer = 0;
+ pbi->TmpReconBuffer = 0;
+
+ pbi->LoopFilteredBlockAlloc = 0;
+ pbi->LoopFilteredBlock = 0;
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : AllocateTmpBuffers
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+BOOL VP5_AllocateTmpBuffers(PB_INSTANCE * pbi)
+{
+
+ // clear any existing info
+ VP5_DeleteTmpBuffers(pbi);
+#ifdef MAPCA
+ pbi->ReconDataBufferAlloc = (INT16 (*)[64])duck_malloc(32+64*sizeof(INT16)*6, DMEM_GENERAL);
+ if(!pbi->ReconDataBufferAlloc) { VP5_DeleteTmpBuffers(pbi); return FALSE;};
+ pbi->ReconDataBuffer = (INT16 (*)[64])ROUNDUP32(pbi->ReconDataBufferAlloc);
+#else
+ // Adjust the position of all of our temporary
+ pbi->ReconDataBufferAlloc = (INT16 *)duck_malloc(32+64*sizeof(INT16), DMEM_GENERAL);
+ if(!pbi->ReconDataBufferAlloc) { VP5_DeleteTmpBuffers(pbi); return FALSE;};
+ pbi->ReconDataBuffer = (INT16 *)ROUNDUP32(pbi->ReconDataBufferAlloc);
+#endif
+
+ pbi->TmpDataBufferAlloc = (INT16 *)duck_malloc(32 + 64 * sizeof(INT16), DMEM_GENERAL);
+ if(!pbi->TmpDataBufferAlloc) { VP5_DeleteTmpBuffers(pbi); return FALSE;};
+ pbi->TmpDataBuffer = (INT16 *)ROUNDUP32(pbi->TmpDataBufferAlloc);
+
+ pbi->LoopFilteredBlockAlloc = (UINT8 *)duck_malloc(32 + 256 * sizeof(UINT8), DMEM_GENERAL);
+ if(!pbi->LoopFilteredBlockAlloc) { VP5_DeleteTmpBuffers(pbi); return FALSE;};
+ pbi->LoopFilteredBlock = (UINT8 *)ROUNDUP32(pbi->LoopFilteredBlockAlloc);
+
+ pbi->TmpReconBufferAlloc = (INT16 *)duck_malloc(32 + 64 * sizeof(INT16), DMEM_GENERAL);
+ if(!pbi->TmpReconBufferAlloc) { VP5_DeleteTmpBuffers(pbi); return FALSE;};
+ pbi->TmpReconBuffer = (INT16 *)ROUNDUP32(pbi->TmpReconBufferAlloc);
+
+
+ return TRUE;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeletePBInstance
+ *
+ *
+ * INPUTS : Instance of PB to be deleted
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : frees the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_DeletePBInstance(PB_INSTANCE **pbi)
+{
+ // clear any existing info
+ if(*pbi)
+ {
+ // Delete the motion vector huffman trees.
+ //DestroyMvTrees(*pbi);
+
+ // Delete any other dynamically allocaed temporary buffers
+ VP5_DeleteTmpBuffers(*pbi);
+ VP5_DeleteQuantizer(&(*pbi)->quantizer);
+#ifndef MAPCA
+ DeletePostProcInstance(&(*pbi)->postproc);
+#endif
+ }
+
+ duck_free(*pbi);
+ *pbi=0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CreatePBInstance
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+PB_INSTANCE * VP5_CreatePBInstance(void)
+{
+ PB_INSTANCE *pbi=0;
+ CONFIG_TYPE ConfigurationInit =
+ {
+ 0,0,0,0,
+ 8,8,
+ };
+
+
+ int pbi_size = sizeof(PB_INSTANCE);
+ pbi=(PB_INSTANCE *) duck_malloc(pbi_size, DMEM_GENERAL);
+ if(!pbi)
+ {
+ return 0;
+ }
+
+ // initialize whole structure to 0
+ memset((unsigned char *) pbi, 0, sizeof(PB_INSTANCE));
+
+ memcpy((void *) &pbi->Configuration, (void *) &ConfigurationInit, sizeof(CONFIG_TYPE));
+
+ if(!VP5_AllocateTmpBuffers(pbi))
+ {
+ duck_free(pbi);
+ return 0;
+ }
+
+
+ pbi->KeyFrameType = DCT_KEY_FRAME;
+ pbi->CPUFree = 70;
+#ifndef MAPCA
+ pbi->idct = idct;
+#endif
+
+ // Initialise Entropy related data structures.
+ memset( pbi->DcProbs, 0, sizeof(pbi->DcProbs) );
+ memset( pbi->AcProbs, 0, sizeof(pbi->AcProbs) );
+
+
+ return pbi;
+}
+
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VPInitLibrary
+ *
+ *
+ * INPUTS : init VP library
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Fully initializes the playback library
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_VPInitLibrary(void)
+{
+ int i;
+#if !defined(__POWERPC__)
+ CPUFrequency = VP5_GetProcessorFrequency();
+#endif
+
+
+ VP5_DMachineSpecificConfig();
+
+ for( i = 0 ; i < Q_TABLE_SIZE; i++)
+ {
+ INT32 dcScale;
+
+// if(i<4)
+// dcScale = ((6-i) * VP5_DcQuant[i]/4);
+// else
+ dcScale = VP5_DcQuant[i]/2;
+
+ DCQuantScaleP[i] = dcScale;
+
+ }
+
+#ifndef MAPCA
+ InitPostProcessing(
+ DCQuantScaleP,
+ DCQuantScaleP,
+ DCQuantScaleP,
+ CURRENT_DECODE_VERSION);
+ InitVPUtil();
+#else
+ VP5_InitPostProcess();
+#endif
+}
+
+/*********************************************************/
+
+
+/****************************************************************************
+ *
+ * ROUTINE : VPDeinitLibrary
+ *
+ *
+ * INPUTS : init VP library
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Fully initializes the playback library
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_VPDeInitLibrary(void)
+{
+#ifdef MAPCA
+ VP5_ClosePostProcess();
+#endif
+
+
+}
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/quantize.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/quantize.c
new file mode 100644
index 00000000..15df1f30
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/quantize.c
@@ -0,0 +1,845 @@
+/****************************************************************************
+*
+* Module Title : Quantise
+*
+* Description : Quantisation and dequanitsation of an 8x8 dct block. .
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+*
+* 1.18 PGW 03 Dec 01 Changes to available Q values.
+* 1.17 PGW 14 Sep 01 Added support for ZB varying on zero-run.
+* 1.16 JBX 22-Mar-01 Merged with vp4-mapca bitstream
+* 1.15 PGW 19 Oct 00 Added select_InterUV_quantiser and related data structures
+* to support use of different DC behaviour for UV.
+* 1.11 PGW 18 Sep 00 QThreshTable[] and Inter_coeffs[] made instacne specific.
+* 1.10 PGW 14 Sep 00 Added support for different Q, ZB and Rounding tables
+* in different encoder versions.
+* 1.09 PGW 04 Sep 00 Fixed bugs in code to set up rounding and zero bins
+* Added support for ZB to change with Q and coefficient.
+* 1.08 PGW 29 Aug 00 Correction to UpdateQ() and UpdateQC() re. Q limits.
+* Changes to rounding and ZBF.
+* 1.08 JBB 22 Aug 00 Ansi C conversion
+* 1.07 SJL 14/04/00 Added the BuildQuantIndex function.
+* 1.06 PGW 18/02/00 Rate targeting changes.
+* 1.05 JBB 27/01/99 Globals Removed, use of QUANTIZER, Dequant no longer
+* used
+* 1.04 PGW 05/11/99 Changes to support AC range entropy tables
+* 1.03 PGW 12/10/99 Removal of spurious windows dependancies.
+* 1.02 PGW 14/09/99 Removal of some floating point code.
+* 1.01 PGW 13/07/99 Changes to keep dequant output to 16 bit
+* 1.01 PGW 07/07/99 Tweaks to baseline matrix.
+* 1.00 PGW 18/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+#define STRICT /* Strict type checking. */
+#include <string.h>
+#include "quantize.h"
+#include "duck_mem.h"
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+#define MIN16 ((1<<16)-1)
+
+// DC quantizer characteristics
+#define VP5_MIN_QUANT 1
+
+#define UV_Q_ADJUSTMENT 0
+
+// Scale factors used to improve precision of DCT/IDCT
+#define IDCT_SCALE_FACTOR 2 // Shift left bits to improve IDCT precision
+
+/****************************************************************************
+* Imported Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+void (*VP5_BuildQuantIndex)( QUANTIZER * pbi);
+
+UINT8 QTableSelect[6] = { 0,0,0,0,1,1 }; // Controls selection of Q Table,rounding,zero bin etc for Y, U & V blocks
+
+/****************************************************************************
+* Foreward References
+*****************************************************************************
+*/
+void VP5_InitQTables( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+void VP5_BuildQuantIndex_Generic(QUANTIZER *pbi);
+void VP5_UpdateQ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+void VP5_UpdateQC( QUANTIZER *pbi,UINT8 Vp3VersionNo );
+void VP5_init_quantizer ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+void (*VP5_quantize)( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+void VP5_init_dequantizer ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+QUANTIZER * VP5_CreateQuantizer(void);
+void VP5_DeleteQuantizer(QUANTIZER **pbi);
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+
+// AC Quantizer Tables
+static UINT32 VP5_QThreshTable[Q_TABLE_SIZE] =
+{ 94, 92, 90, 88, 86, 82, 78, 74,
+ 70, 66, 62, 58, 54, 53, 52, 51,
+ 50, 49, 48, 47, 46, 45, 44, 43,
+ 42, 40, 39, 37, 36, 35, 34, 33,
+ 32, 31, 30, 29, 28, 27, 26, 25,
+ 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9,
+ 8, 7, 6, 5, 4, 3, 2, 1
+};
+static UINT32 VP5_UvQThreshTable[Q_TABLE_SIZE] =
+{ 94, 92, 90, 88, 86, 82, 78, 74,
+ 70, 66, 62, 58, 54, 53, 52, 51,
+ 50, 49, 48, 47, 46, 45, 44, 43,
+ 42, 40, 39, 37, 36, 35, 34, 33,
+ 32, 31, 30, 29, 28, 27, 26, 25,
+ 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9,
+ 8, 7, 6, 5, 4, 3, 2, 1
+};
+
+// AC Zero Bin and Rounding Tables (include fdct normalisation)
+UINT32 VP5_ZBinTable[Q_TABLE_SIZE] =
+{
+ 330,314,298,284,264,246,228,213,
+ 201,190,178,167,156,153,149,146,
+ 144,141,138,135,132,130,127,124,
+ 121,115,110,104,99, 96, 94, 90,
+ 85, 82, 79, 76, 74, 71, 69, 66,
+ 63, 61, 58, 55, 53, 50, 47, 45,
+ 43, 40, 38, 36, 33, 31, 28, 24,
+ 21, 18, 16, 13, 10, 7, 4, 2,
+};
+UINT32 VP5_UvZBinTable[Q_TABLE_SIZE] =
+{
+ 330,314,298,284,264,246,228,213,
+ 201,190,178,167,156,153,149,146,
+ 144,141,138,135,132,130,127,124,
+ 121,115,110,104,99, 96, 94, 90,
+ 85, 82, 79, 76, 74, 71, 69, 66,
+ 63, 61, 58, 55, 53, 50, 47, 45,
+ 43, 40, 38, 36, 33, 31, 28, 24,
+ 21, 18, 16, 13, 10, 7, 4, 2,
+};
+UINT32 VP5_RTable[Q_TABLE_SIZE] =
+{
+ 48, 56, 64, 70, 78, 82, 86, 88,
+ 91, 92, 94, 94, 99,103,102,100,
+ 99, 97, 95, 93, 91, 89, 87, 85,
+ 83, 79, 77, 73, 71, 69, 67, 65,
+ 64, 62, 60, 58, 56, 54, 52, 50,
+ 48, 46, 44, 42, 40, 38, 36, 34,
+ 32, 30, 28, 26, 24, 22, 20, 18,
+ 16, 14, 12, 10, 8, 6, 4, 2,
+};
+UINT32 VP5_UvRTable[Q_TABLE_SIZE] =
+{
+ 48, 56, 64, 70, 78, 82, 86, 88,
+ 91, 92, 94, 94, 99,103,102,100,
+ 99, 97, 95, 93, 91, 89, 87, 85,
+ 83, 79, 77, 73, 71, 69, 67, 65,
+ 64, 62, 60, 58, 56, 54, 52, 50,
+ 48, 46, 44, 42, 40, 38, 36, 34,
+ 32, 30, 28, 26, 24, 22, 20, 18,
+ 16, 14, 12, 10, 8, 6, 4, 2,
+};
+
+// DC Quantizer tables
+Q_LIST_ENTRY VP5_DcQuant[ Q_TABLE_SIZE ] =
+{
+ 47, 47, 47, 47, 45, 43, 43, 43,
+ 43, 43, 42, 41, 41, 40, 40, 40,
+ 40, 35, 35, 35, 35, 33, 33, 33,
+ 33, 32, 32, 32, 27, 27, 26, 26,
+ 25, 25, 24, 24, 23, 23, 19, 19,
+ 19, 19, 18, 18, 17, 16, 16, 16,
+ 16, 16, 15, 11, 11, 11, 10, 10,
+ 9, 8, 7, 5, 3, 3, 2, 2,
+};
+Q_LIST_ENTRY VP5_UvDcQuant[ Q_TABLE_SIZE ] =
+{
+ 47, 47, 47, 47, 45, 43, 43, 43,
+ 43, 43, 42, 41, 41, 40, 40, 40,
+ 40, 35, 35, 35, 35, 33, 33, 33,
+ 33, 32, 32, 32, 27, 27, 26, 26,
+ 25, 25, 24, 24, 23, 23, 19, 19,
+ 19, 19, 18, 18, 17, 16, 16, 16,
+ 16, 16, 15, 11, 11, 11, 10, 10,
+ 9, 8, 7, 5, 3, 3, 2, 2,
+};
+// DC Zero Bin and Rounding Tables (include fdct normalisation)
+UINT32 VP5_DcZBinTable[Q_TABLE_SIZE] =
+{
+ 170,162,152,150,140,130,125,121,
+ 121,118,113,111,110,108,108,106,
+ 105,96, 93, 87, 86, 83, 83, 83,
+ 83, 78, 78, 78, 66, 66, 63, 63,
+ 61, 61, 58, 58, 56, 56, 46, 46,
+ 46, 46, 43, 43, 41, 38, 38, 38,
+ 38, 38, 35, 24, 24, 24, 23, 23,
+ 20, 19, 16, 13, 6, 6, 4, 4,
+};
+UINT32 VP5_UvDcZBinTable[Q_TABLE_SIZE] =
+{
+ 170,162,152,150,140,130,125,121,
+ 121,118,113,111,110,108,108,106,
+ 105,96, 93, 87, 86, 83, 83, 83,
+ 83, 78, 78, 78, 66, 66, 63, 63,
+ 61, 61, 58, 58, 56, 56, 46, 46,
+ 46, 46, 43, 43, 41, 38, 38, 38,
+ 38, 38, 35, 24, 24, 24, 23, 23,
+ 20, 19, 16, 13, 6, 6, 4, 4,
+};
+
+UINT32 VP5_DcRTable[Q_TABLE_SIZE] =
+{
+ 20, 28, 38, 40, 44, 46, 50, 50,
+ 51, 57, 59, 61, 62, 64, 66, 67,
+ 67, 62, 63, 64, 64, 62, 62, 62,
+ 62, 62, 62, 62, 54, 54, 52, 52,
+ 50, 50, 48, 48, 46, 46, 38, 38,
+ 38, 38, 36, 36, 34, 32, 32, 32,
+ 32, 32, 30, 22, 22, 22, 20, 20,
+ 18, 16, 14, 10, 6, 6, 4, 4,
+};
+UINT32 VP5_UvDcRTable[Q_TABLE_SIZE] =
+{
+ 20, 30, 38, 40, 44, 46, 50, 50,
+ 51, 57, 59, 61, 62, 64, 66, 67,
+ 67, 62, 63, 64, 64, 62, 62, 62,
+ 62, 62, 62, 62, 54, 54, 52, 52,
+ 50, 50, 48, 48, 46, 46, 38, 38,
+ 38, 38, 36, 36, 34, 32, 32, 32,
+ 32, 32, 30, 22, 22, 22, 20, 20,
+ 18, 16, 14, 10, 6, 6, 4, 4,
+};
+
+/* Inverse fast DCT index */
+/* This contains the offsets needed to convert zigzag order into */
+/* x, y order for decoding. It is generated from the input zigzag */
+/* indexat run time. */
+
+/* For maximum speed during both quantisation and dequantisation */
+/* we maintain separate quantisation and zigzag tables for each */
+/* operation. */
+
+/* pbi->quant_index: the zigzag index used during quantisation */
+/* dequant_index: zigzag index used during dequantisation */
+/* the pbi->quant_index is the inverse of dequant_index */
+/* and is calculated during initialisation */
+
+static UINT32 dequant_index[64] =
+{ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static UINT32 transIndexC[64] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63
+};
+
+static UINT32 quant_indexC[64] =
+{
+ 0, 1, 5, 6, 14, 15, 27, 28,
+ 2, 4, 7, 13, 16, 26, 29, 42,
+ 3, 8, 12, 17, 25, 30, 41, 43,
+ 9, 11, 18, 24, 31, 40, 44, 53,
+ 10, 19, 23, 32, 39, 45, 52, 54,
+ 20, 22, 33, 38, 46, 51, 55, 60,
+ 21, 34, 37, 47, 50, 56, 59, 61,
+ 35, 36, 48, 49, 57, 58, 62, 63
+};
+
+
+/****************************************************************************
+ *
+ * ROUTINE : InitQTables
+ *
+ * INPUTS :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Initialises Q tables based upon version number
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_InitQTables( QUANTIZER *pbi, UINT8 Vp3VersionNo )
+{
+ // Make version specific assignments.
+ memcpy ( pbi->QThreshTable, VP5_QThreshTable, sizeof( pbi->QThreshTable ) );
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : BuildQuantIndex_Generic
+ *
+ * INPUTS :
+ *
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Builds the quant_index table.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_BuildQuantIndex_Generic(QUANTIZER *pbi)
+{
+ INT32 i,j;
+
+ pbi->transIndex = transIndexC;
+
+ // invert the dequant index into the quant index
+ for ( i = 0; i < BLOCK_SIZE; i++ )
+ {
+ j = dequant_index[i];
+ pbi->quant_index[j] = i;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateQ
+ *
+ * INPUTS : UINT32 NewQ
+ * (A New Q value (50 - 1000))
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Updates the quantisation tables for a new Q
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_UpdateQ( QUANTIZER *pbi, UINT8 Vp3VersionNo )
+{
+ if ( pbi->QThreshTable[pbi->FrameQIndex] == pbi->LastQuantizerValue )
+ return;
+
+ // Update the record of last Q and last Q index.
+ pbi->LastQuantizerValue = pbi->ThisFrameQuantizerValue;
+
+ // invert the dequant index into the quant index
+ // the dxer has a different order than the cxer.
+ VP5_BuildQuantIndex(pbi);
+
+ // Re-initialise the q tables for forward and reverse transforms.
+ VP5_init_dequantizer ( pbi, Vp3VersionNo );
+}
+
+/********************* COMPRESSOR SPECIFIC **********************************/
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateQC (compressor's update q)
+ *
+ * INPUTS : UINT32 NewQ
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Updates the quantisation tables for a new Q
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_UpdateQC( QUANTIZER *pbi, UINT8 Vp3VersionNo )
+{
+ if ( pbi->QThreshTable[pbi->FrameQIndex] == pbi->LastQuantizerValue )
+ return;
+
+ // Update the record of last Q.
+ pbi->LastQuantizerValue = pbi->ThisFrameQuantizerValue;
+
+ // invert the dequant index into the quant index
+ // the dxer has a different order than the cxer.
+ VP5_BuildQuantIndex_Generic(pbi);
+
+ // Re-initialise the q tables for forward and reverse transforms.
+ VP5_init_quantizer ( pbi, Vp3VersionNo );
+ VP5_init_dequantizer ( pbi, Vp3VersionNo );
+}
+
+/****************************************************************************
+*
+* Routine: init_quantizer
+*
+* Purpose: Used to initialize the encoding/decoding data structures
+* and to select DCT algorithm
+*
+* Parameters :
+* Input :
+* UINT32 scale_factor
+* Defines the factor by which to scale QUANT_ARRAY to
+* produce quantization_array
+*
+* UINT8 QIndex ::
+* Index into Q table for current quantiser value.
+* Return value :
+* None.
+*
+****************************************************************************
+*/
+#define SHIFT16 (1<<16)
+void VP5_init_quantizer ( QUANTIZER *pbi, UINT8 Vp3VersionNo )
+{
+ int i; // Loop counters
+
+ double temp_fp_quant_coeffs;
+
+
+ // Notes on setup of quantisers.
+ // The "* 4" is a normalisation factor for the forward DCT transform.
+
+ // ******************* Y *********************
+
+ // Calculate DC quant values (Include a *4 for FDCT normalization)
+ temp_fp_quant_coeffs = ( VP5_DcQuant[pbi->FrameQIndex] * 4 );
+
+ // 1/X (Y)
+ temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+ pbi->QuantCoeffs[0][0] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+ // DC rounding (Y)
+ pbi->QuantRound[0][0] = VP5_DcRTable[pbi->FrameQIndex];
+
+ // Set DC zero Bin (Y)
+ pbi->ZeroBinSize[0][0] = VP5_DcZBinTable[pbi->FrameQIndex];
+
+
+ // AC for Y
+ for ( i = 1; i < 64; i++ )
+ {
+ // Normalize the quantizer (* 4 for fdct normalisation)
+ temp_fp_quant_coeffs = (double)(VP5_QThreshTable[pbi->FrameQIndex] * 4);
+
+ // Convert to 1/x
+ temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+ pbi->QuantCoeffs[0][i] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+ // AC rounding
+ pbi->QuantRound[0][i] = VP5_RTable[pbi->FrameQIndex];
+
+ // Zero Bins
+ pbi->ZeroBinSize[0][i] = VP5_ZBinTable[pbi->FrameQIndex];
+ }
+
+
+ // ******************* UV *********************
+ // Calculate DC quant values (Include a *4 for FDCT normalization)
+ temp_fp_quant_coeffs = ( VP5_UvDcQuant[pbi->FrameQIndex] * 4 );
+
+ // 1/X (UV)
+ temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+ pbi->QuantCoeffs[1][0] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+ // DC rounding (UV)
+ pbi->QuantRound[1][0] = VP5_UvDcRTable[pbi->FrameQIndex];
+
+ // Set DC zero Bin (UV)
+ pbi->ZeroBinSize[1][0] = VP5_UvDcZBinTable[pbi->FrameQIndex];
+
+
+ // AC for UV
+ for ( i = 1; i < 64; i++ )
+ {
+ // Normalize the quantizer (* 4 for fdct normalisation)
+ temp_fp_quant_coeffs = (double)(VP5_UvQThreshTable[pbi->FrameQIndex] * 4);
+
+ // 1/x
+ temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+ pbi->QuantCoeffs[1][i] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+ // AC rounding
+ pbi->QuantRound[1][i] = VP5_UvRTable[pbi->FrameQIndex];
+
+ // Zero Bins
+ pbi->ZeroBinSize[1][i] = VP5_UvZBinTable[pbi->FrameQIndex];
+ }
+ for(i=0;i<8;i++)
+ {
+ pbi->round[i] = pbi->QuantRound[0][1];
+ pbi->mult[i] = pbi->QuantCoeffs[0][1];
+ pbi->zbin[i] = pbi->ZeroBinSize[0][1]-1;
+ }
+
+}
+
+
+/***************************************************************************
+*
+* Routine: quantize
+*
+* Purpose: Quantizes a block of pixels by dividing
+* each element by the corresponding entry in the quantization
+* array. Output is in a list of values in the zig-zag order.
+*
+* Parameters :
+* Input :
+* DCT_block -- The block to by quantized
+* Output :
+* quantized_list -- The quantized values in zig-zag order
+*
+* Return value :
+* None.
+*
+* Persistent data referenced :
+* quantization_array Module static array read
+* zig_zag_index Module static array read
+*
+****************************************************************************
+*/
+#define HIGHBITDUPPED(X) (((signed short) X) >> 15)
+void VP5_quantize_c( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp )
+{
+ UINT32 i, j;
+
+ INT32 * QuantRoundPtr = pbi->QuantRound[QTableSelect[bp]];
+ INT32 * QuantCoeffsPtr = pbi->QuantCoeffs[QTableSelect[bp]];
+ INT32 * ZBinPtr = pbi->ZeroBinSize[QTableSelect[bp]];
+
+ INT16 * DCT_blockPtr = DCT_block;
+ INT32 temp;
+ INT32 NonZeroACs = 0;
+ INT16 *round = &pbi->round[0];
+ INT16 *mult = &pbi->mult[0];
+ INT16 *zbin = &pbi->zbin[0];
+
+ // Set the quantized_list to default to 0
+ memset( quantized_list, 0, 64 * sizeof(Q_LIST_ENTRY) );
+
+ // dc quantization (disabled the zerobinning!!)
+ temp = 0;
+ if ( DCT_blockPtr[0] >= QuantRoundPtr[0] )
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] ) ;
+ else if ( DCT_blockPtr[0] <= -QuantRoundPtr[0] )
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+
+ // Note that we add in a value to effect rounding.
+ // AC Quantization
+ for( i = 1; i < 64; i++)
+ {
+ // Zig Zag order
+ j = dequant_index[i];
+
+ if ( DCT_blockPtr[j] >= ZBinPtr[j] )
+ {
+ temp = QuantCoeffsPtr[j] * ( DCT_blockPtr[j] + QuantRoundPtr[j] ) ;
+ quantized_list[i] = (Q_LIST_ENTRY) (temp>>16);
+ //NonZeroACs += quantized_list[i];;
+ }
+ else if ( DCT_blockPtr[j] <= -ZBinPtr[j] )
+ {
+ temp = QuantCoeffsPtr[j] * ( DCT_blockPtr[j] - QuantRoundPtr[j] ) + MIN16;
+ quantized_list[i] = (Q_LIST_ENTRY) (temp>>16);
+ //NonZeroACs -= quantized_list[i];
+ }
+ }
+
+
+ // Now the DC quantization
+/*
+ if ( NonZeroACs > 0 )
+ {
+ if ( DCT_blockPtr[0] >= QuantRoundPtr[0] )
+ {
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] ) ;
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+ }
+ else if ( DCT_blockPtr[0] <= -QuantRoundPtr[0] )
+ {
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+ }
+ }
+ // Use larger Zero Bin only if there are no ACs as this will help us get an EOB
+ else
+ {
+ if ( DCT_blockPtr[0] >= ZBinPtr[0] )
+ {
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] ) ;
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+ }
+ else if ( DCT_blockPtr[0] <= -ZBinPtr[0] )
+ {
+ temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+ quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+ }
+ }
+*/
+}
+/**************************** END COMPRESSOR SPECIFIC **********************************/
+/***************************************************************************************
+* Dequantiser code for decode loop
+/***************************************************************************************/
+
+/****************************************************************************
+*
+* Routine: init_pbi->dequantizer
+*
+* Purpose: Used to initialize the encoding/decoding data structures
+* and to select DCT algorithm
+*
+* Parameters :
+* Input :
+* UINT32 scale_factor
+* Defines the factor by which to scale QUANT_ARRAY to
+* produce quantization_array
+*
+* UINT8 QIndex ::
+* Index into Q table for current quantiser value.
+* Return value :
+* None.
+*
+****************************************************************************
+*/
+
+void VP5_init_dequantizer ( QUANTIZER *pbi, UINT8 Vp3VersionNo )
+{
+ int i, j;
+
+
+ // *************** Y ******************/
+ // Set up the Ac dequant values and then place in the zig-zag/transposed order as appropriate.
+ for ( i = 1; i < 64; i++ )
+ {
+ j = pbi->quant_index[i];
+
+ pbi->dequant_coeffs[0][j] = VP5_QThreshTable[pbi->FrameQIndex] << IDCT_SCALE_FACTOR;
+ }
+
+ // DC
+ pbi->dequant_coeffs[0][0] = VP5_DcQuant[pbi->FrameQIndex] << IDCT_SCALE_FACTOR;
+
+ // *************** UV ******************/
+ // Set up the Ac dequant values and then place in the zig-zag/transposed order as appropriate.
+ for ( i = 1; i < 64; i++ )
+ {
+ j = pbi->quant_index[i];
+
+ pbi->dequant_coeffs[1][j] = VP5_UvQThreshTable[pbi->FrameQIndex] << IDCT_SCALE_FACTOR;
+ }
+
+ // DC
+ pbi->dequant_coeffs[1][0] = VP5_UvDcQuant[pbi->FrameQIndex] << IDCT_SCALE_FACTOR;
+
+}
+
+/****************************************************************************/
+/* */
+/* Select Quantisation Parameters */
+/* */
+/* void select_Y_dequantiser ( void ) */
+/* sets dequantiser to use for intra Y */
+/* */
+/* void select_Inter_dequantiser ( void ) */
+/* sets dequantiser to use for inter Y */
+/* */
+/* void select_UV_dequantiser ( void ) */
+/* sets dequantiser to use UV compression constants */
+/* */
+/****************************************************************************/
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : DeleteQuantizerBuffers
+ *
+ *
+ * INPUTS : Instance of PB to be cleared
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+static void DeleteQuantizerBuffers(QUANTIZER *pbi)
+{
+
+ if(pbi->dequant_coeffsAlloc[0])
+ duck_free(pbi->dequant_coeffsAlloc[0]);
+ pbi->dequant_coeffsAlloc[0] = 0;
+ pbi->dequant_coeffs[0] = 0;
+
+ if(pbi->dequant_coeffsAlloc[1])
+ duck_free(pbi->dequant_coeffsAlloc[1]);
+ pbi->dequant_coeffsAlloc[1] = 0;
+ pbi->dequant_coeffs[1] = 0;
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : AllocateQuantizerBuffers
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+static INT32 AllocateQuantizerBuffers(QUANTIZER *pbi)
+{
+ DeleteQuantizerBuffers(pbi);
+
+ pbi->dequant_coeffsAlloc[0] = (INT16 *)duck_malloc(32+64*sizeof(INT16), DMEM_GENERAL);
+ if(!pbi->dequant_coeffsAlloc[0]) { DeleteQuantizerBuffers(pbi); return FALSE;};
+ pbi->dequant_coeffs[0] = (INT16 *)ROUNDUP32(pbi->dequant_coeffsAlloc[0]);
+
+ pbi->dequant_coeffsAlloc[1] = (INT16 *)duck_malloc(32+64*sizeof(INT16), DMEM_GENERAL);
+ if(!pbi->dequant_coeffsAlloc[1]) { DeleteQuantizerBuffers(pbi); return FALSE;};
+ pbi->dequant_coeffs[1] = (INT16 *)ROUNDUP32(pbi->dequant_coeffsAlloc[1]);
+
+ return TRUE;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : DeleteQuantizer
+ *
+ *
+ * INPUTS : Instance of POSTPROC to be deleted
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : frees the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void VP5_DeleteQuantizer(QUANTIZER **pbi)
+{
+ // clear any existing info
+ if(*pbi)
+ {
+ // Delete any other dynamically allocaed temporary buffers
+
+ DeleteQuantizerBuffers(*pbi);
+ duck_free(*pbi);
+ *pbi=0;
+ }
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CreateQuantizer
+ *
+ *
+ * INPUTS : Instance of PB to be initialized
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Initializes the Playback instance passed in
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+QUANTIZER * VP5_CreateQuantizer(void)
+{
+ QUANTIZER *pbi=0;
+ int postproc_size = sizeof(QUANTIZER);
+ pbi=(QUANTIZER *) duck_malloc(postproc_size, DMEM_GENERAL);
+ if(!pbi)
+ {
+ return 0;
+ }
+
+ // initialize whole structure to 0
+ memset((unsigned char *) pbi, 0, sizeof(QUANTIZER));
+
+ if(!AllocateQuantizerBuffers(pbi))
+ VP5_DeleteQuantizer(&pbi);
+
+ return pbi;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/recon.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/recon.c
new file mode 100644
index 00000000..c863af9f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/recon.c
@@ -0,0 +1,338 @@
+/****************************************************************************
+*
+* Module Title : recon.c
+*
+* Description : reconstruction code
+*
+* AUTHOR : jimb b
+*
+*****************************************************************************
+* Revision History
+*
+* 1.19 JBB 18 Mar 01 Reorganized code created this file
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#include "pbdll.h"
+#include "codec_common_interface.h"
+#include <string.h>
+
+/****************************************************************************
+* Explicit imports
+*****************************************************************************
+*/
+extern void AverageBlockBicubic_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void NewAverageBlock( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void UvAverageBlock( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT8 ModX, INT8 ModY );
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Exported Functions
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+#define MIN(a, b) ( ( a < b ) ? a : b )
+#define Mod8(a) ( ((a) & 7))
+
+/****************************************************************************
+ *
+ * ROUTINE : PredictFilteredBlock
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : try to build an 8x8 block motion prediction block. If
+ * the block is copied across a block boundary attempt
+ * to eliminate the internal block border by applying the
+ * loop filter internally to the block
+ *
+ * SPECIAL NOTES :
+ *
+ * ERRORS : None.
+ *
+ *****************************************************************************/
+void PredictFiltered
+(
+ PB_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ INT32 mx,
+ INT32 my,
+ INT32 MvShift
+)
+{
+
+ INT32 BoundaryX, BoundaryY;
+ INT32 mVx, mVy;
+ UINT32 ReconIndex = 0;
+ MACROBLOCK_INFO *mbi=&pbi->mbi;
+
+ UINT8 TempPtr1 = 2 * 16 + 2;
+ UINT32 TempPtr2 = TempPtr1;
+ UINT8 *TempBuffer = pbi->LoopFilteredBlock;
+
+ // Calculate full pixel motion vector position
+ if(mx > 0 )
+ mVx = (mx >> MvShift);
+ else
+ mVx = -((-mx) >> MvShift);
+
+ if(my > 0 )
+ mVy = (my >> MvShift);
+ else
+ mVy = -((-my) >> MvShift);
+
+ // calculate offset in last frame matching motion vector
+ ReconIndex += mbi->FrameReconStride * mVy + mVx;
+
+ // give our selves a border of 2 extra pixel on all sides (for loop filter and half pixel moves)
+ ReconIndex -= 2 * mbi->CurrentReconStride;
+ ReconIndex -= 2;
+
+ // copy the 12x12 region starting from reconpixel index into our temp buffer.
+ Copy12x12( SrcPtr + ReconIndex, TempBuffer, mbi->CurrentReconStride, 16);
+
+ // calculate block border position for x
+ BoundaryX = (8 - Mod8(mVx))&7;
+
+ // calculate block border position for y
+ BoundaryY = (8 - Mod8(mVy))&7;
+
+ // apply the loop filter at the horizontal boundary we selected
+ if(BoundaryX)
+ FilteringHoriz_12(
+ pbi ->quantizer->FrameQIndex,
+ TempBuffer + 2 + BoundaryX,
+ 16);
+
+ // apply the loop filter at the vertical boundary we selected
+ if(BoundaryY)
+ FilteringVert_12(
+ pbi->quantizer->FrameQIndex,
+ TempBuffer + 2 * 16 + BoundaryY * 16,
+ 16);
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PredictFilteredBlock
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : try to build an 8x8 block motion prediction block. If
+ * the block is copied across a block boundary attempt
+ * to eliminate the internal block border by applying the
+ * loop filter internally to the block
+ *
+ * SPECIAL NOTES :
+ *
+ * ERRORS : None.
+ *
+ *****************************************************************************/
+#define AVERAGE_ROUTINE AverageBlock
+//#define AVERAGE_ROUTINE AverageBlockBicubic_C
+//#define AVERAGE_ROUTINE NewAverageBlock
+
+//#define UV_AVERAGE_ROUTINE AverageBlock
+#define UV_AVERAGE_ROUTINE UvAverageBlock
+
+void PredictFilteredBlock
+(
+ PB_INSTANCE *pbi,
+ INT16* OutputPtr,
+ BLOCK_POSITION bp
+)
+{
+ MACROBLOCK_INFO *mbi=&pbi->mbi;
+
+ UINT8 *SrcPtr;
+
+ UINT8 *TempBuffer = pbi->LoopFilteredBlock;
+
+ UINT32 TempPtr1 = 2*16+2;
+ UINT32 TempPtr2 = TempPtr1;
+ INT8 ModX, ModY;
+
+ // Which buffer are we working on?
+ if ( VP5_Mode2Frame[pbi->mbi.Mode] == 2 )
+ {
+ SrcPtr = pbi->GoldenFrame;
+ }
+ else
+ {
+ SrcPtr = pbi->LastFrameRecon;
+ }
+
+ PredictFiltered( pbi, SrcPtr+mbi->Recon, pbi->mbi.Mv[bp].x, pbi->mbi.Mv[bp].y, pbi->mbi.MvShift) ;
+
+ // determine if we have a half pixel move in the x direction
+ if(pbi->mbi.Mv[bp].x & pbi->mbi.MvModMask)
+ {
+ if ( pbi->mbi.Mv[bp].x > 0 )
+ {
+ TempPtr2 += 1;
+ }
+ else
+ {
+ TempPtr2 -= 1;
+ }
+ }
+
+ // handle half pixel motion in Y
+ if(pbi->mbi.Mv[bp].y & pbi->mbi.MvModMask)
+ {
+ if ( pbi->mbi.Mv[bp].y > 0 )
+ {
+ TempPtr2 += 16;
+ }
+ else
+ {
+ TempPtr2 -= 16;
+ }
+ }
+
+ // put the results back into the real reconstruction buffer
+ if (TempPtr1!=TempPtr2)
+ {
+ if ( bp < 4 )
+ AVERAGE_ROUTINE(&TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, 16);
+ else
+ {
+ ModX = pbi->mbi.Mv[bp].x & 0x03;
+ ModY = pbi->mbi.Mv[bp].y & 0x03;
+
+ //UV_AVERAGE_ROUTINE(&TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, 16, ModX, ModY );
+ AverageBlock(&TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, 16);
+ }
+ }
+ else
+ UnpackBlock(&TempBuffer[TempPtr1], OutputPtr, 16);
+
+}
+
+#ifndef RECONSTRUCTMBATONCE
+/****************************************************************************
+ *
+ * ROUTINE : ReconstructBlock
+ *
+ * INPUTS :
+ *
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Codes a DCT block
+ *
+ * Motion vectors and modes asumed to be defined at the MB level.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ReconstructBlock
+(
+ PB_INSTANCE *pbi,
+ BLOCK_POSITION bp
+)
+{
+
+ // Action depends on decode mode.
+ if ( pbi->mbi.Mode == CODE_INTER_NO_MV ) // Inter with no motion vector
+ {
+ ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->ThisFrameRecon[pbi->mbi.Recon],
+ (UINT8 *)&pbi->LastFrameRecon[pbi->mbi.Recon],
+ pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride);
+
+ }
+ else if ( VP5_ModeUsesMC[pbi->mbi.Mode] ) // The mode uses a motion vector.
+ {
+ // For the compressor we did this already ( possible optimization).
+ PredictFilteredBlock( pbi, pbi->TmpDataBuffer,bp);
+
+ ReconBlock(
+ pbi->TmpDataBuffer,
+ pbi->ReconDataBuffer,
+ (UINT8 *)&pbi->ThisFrameRecon[pbi->mbi.Recon],
+ pbi->mbi.CurrentReconStride );
+ }
+ else if ( pbi->mbi.Mode == CODE_USING_GOLDEN ) // Golden frame with motion vector
+ {
+ // Reconstruct the pixel data using the golden frame reconstruction and change data
+ ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->ThisFrameRecon[pbi->mbi.Recon],
+ (UINT8 *)&pbi->GoldenFrame[ pbi->mbi.Recon ],
+ pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+ }
+ else // Simple Intra coding
+ {
+ // Get the pixel index for the first pixel in the fragment.
+ ReconIntra( pbi->TmpDataBuffer, (UINT8 *)&pbi->ThisFrameRecon[pbi->mbi.Recon], (UINT16 *)pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+ }
+}
+
+#endif
+
+/************************************************************************** *
+ * ROUTINE : CopyBlock
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Copies a block from source to destination
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void CopyBlockC(unsigned char *src, unsigned char *dest, unsigned int srcstride)
+{
+ unsigned char * s = src;
+ unsigned char * d = dest;
+ unsigned int stride = srcstride;
+
+ int j;
+ for ( j = 0; j < 8; j++ )
+ {
+ ((UINT32*)d)[0] = ((UINT32*)s)[0];
+ ((UINT32*)d)[1] = ((UINT32*)s)[1];
+ s+=stride;
+ d+=stride;
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vfwpbdll_if.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vfwpbdll_if.c
new file mode 100644
index 00000000..e18b8e93
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vfwpbdll_if.c
@@ -0,0 +1,750 @@
+/****************************************************************************
+*
+* Module Title : vfwpbdll_if.c
+*
+* Description : Video codec demo playback dll interface
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.29 YWX 17/dec/02 Added support of deinterlace
+* 1.28 YWX 05/08/02 Changed postprocess level setup for interlaced material
+* 1.27 AWG 20 Jun 01 Added code to overlay Motion Vectors onto display
+* 1.26 JBB 13 Jun 01 VP4 Code Clean Out
+* 1.25 YWX 26-Apr-01 Removed call of SetPbParam() in StartDecoder()
+* And set CPUFree as 70 when PostProcessingLevel=9
+* 1.24 JBB 25-apr-01 clear sysstate added at end of frame blit
+* 1.23 JBB 06-Apr-01 CPU Free variable respond
+* 1.22 SJL 22-Mar-01 Fixed MAC compile errors
+* 1.21 JBX 22-Mar-01 Merged with new vp4-mapca bitstream
+* 1.20 SJL 01 Dec 00 Fixed MAC compile errors
+* 1.19 JBB 30 Nov 00 Version number changes
+* 1.18 JBB 14 Nov 00 Added version information function and pragma and cleaned
+* out unused code
+* 1.17 JBB 17-oct-00 Ifdefs around version information
+* 1.16 SJL 25 Aug 00 Fixed Mac compile error
+* 1.15 JBB 25 Aug 00 Better versioning
+* 1.14 JBB 22 Aug 00 Ansi C conversion
+* 1.13 SJL 14 Aug00 Moved SetPbParam into another file for the MAC
+* 1.12 YWX 2 Aug00 Changed Postprocessing level initialization
+* 1.11 JBB 31Jul00 Changed requirements for postprocessing due to new
+* optimiztions
+* 1.10 JBB 27Jul00 Added malloc checks
+* 1.09 YWX 15/05/00 Check Processor and Frame size to enable/disable
+* postprocessor
+* 1.08 YWX 08/05/00 Added #if defined directives for postprocess
+* 1.07 JBB 05/05/00 Added PostProcessing Parameter
+* 1.06 JBB 27/01/99 Globals Removed, use of PB_INSTANCE, must be created
+* 1.05 PGW 05/11/99 Changes to support AC range entropy tables and to output
+* the appropriate stats to tune them.
+* 1.04 PGW 01/09/99 Modified to simulate Tim's DxReference interface.
+* 1.03 PGW 30/07/99 Added exception handlers and some code to try and insure
+* decoder is initialised before any frames are decoded.
+* 1.02 PGW 09/07/99 Added code to support profile timing
+* 1.01 PGW 29/06/99 Changes in DecodeFrame() to handle inversion of DIB when
+* requested plus offsets into and pitch of the output image
+* buffer.
+* 1.00 PGW 28/06/99 New Configuration baseline.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+#include <stdio.h>
+
+#ifndef _MSC_VER
+
+#define __try
+
+#endif
+
+#include "huffman.h"
+#include "pbdll.h"
+#include <math.h>
+#include "vp50dversion.h"
+#include "decodemode.h"
+#include "postproc_if.h"
+
+#ifndef MAPCA
+ #define CommentString "\nON2.COM VERSION VP50D " VP50DVERSION "\n"
+ #pragma comment(exestr,CommentString)
+#endif
+/****************************************************************************
+ * Explicit Imports
+ *****************************************************************************
+ */
+
+extern void DecodeFrameMbs(PB_INSTANCE *pbi);
+extern unsigned int CPUFrequency;
+
+/****************************************************************************
+* Module statics.
+*****************************************************************************
+*/
+
+
+#ifdef PBSTATS1
+INT32 TotQ = 0;
+INT32 PBFrameNumber = 0;
+#endif
+static const char vp31dVersion[] = VP50DVERSION;
+
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+#if defined(_MSC_VER)
+#if defined(POSTPROCESS)
+static const unsigned long PP_MACHINE_LOWLIMIT = 350; //Lowest CPU (MHz) to enable PostProcess
+static const unsigned long PP_MACHINE_MIDLIMIT = 400; //Lowest CPU (MHz) to enable PostProcess
+static const unsigned long PP_MACHINE_TOPLIMIT = 590; //Lowest CPU (MHz) to enable PostProcess
+#endif
+#endif
+
+extern void VP5_InitialiseConfiguration(PB_INSTANCE *pbi);
+#ifdef PBSTATS1
+// TEMP diagnostic variables
+INT32 TotBlocksCoded;
+#endif
+
+
+/****************************************************************************
+* Foreward references
+*****************************************************************************
+*/
+/****************************************************************************
+ *
+ * ROUTINE : VP31D_GetVersionNumber
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None .
+ *
+ * FUNCTION : Returns a pointer to the version string
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+const char * CCONV VP50D_GetVersionNumber(void)
+{
+ return vp31dVersion;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : StartDecoder
+ *
+ * INPUTS : The handle of the display window.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : TRUE if succeeds else FALSE.
+ *
+ * FUNCTION : Starts the compressor grabber
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+BOOL CCONV VP5_StartDecoder( PB_INSTANCE **pbi, UINT32 ImageWidth, UINT32 ImageHeight )
+{
+ __try
+ {
+
+
+ // set up our structure holding all formerly global information about a playback instance
+ *pbi = VP5_CreatePBInstance();
+
+ // Set Flag to indicate that a key frame is required as the first input
+ (*pbi)->ScaleWidth = ImageWidth;
+ (*pbi)->ScaleHeight = ImageHeight;
+ (*pbi)->OutputWidth = ImageWidth;
+ (*pbi)->OutputHeight = ImageHeight;
+ (*pbi)->OutputStride = ImageWidth + 32;
+
+
+ // Validate the combination of height and width.
+ (*pbi)->Configuration.VideoFrameWidth = ImageWidth;
+ (*pbi)->Configuration.VideoFrameHeight = ImageHeight;
+
+#ifndef MAPCA
+ (*pbi)->postproc = CreatePostProcInstance(&(*pbi)->Configuration);
+#endif
+ //(*pbi)->postproc = CreatePostProcInstance(&(*pbi)->Configuration);
+ (*pbi)->quantizer = VP5_CreateQuantizer();
+
+ (*pbi)->ProcessorFrequency = CPUFrequency;
+ (*pbi)->DeInterlaceMode = 1;
+ // Fills in fragment counts as well
+ if(!VP5_InitFrameDetails(*pbi) )
+ {
+ VP5_DeletePBInstance(pbi);
+ return FALSE;
+ }
+
+
+ /* Set last_dct_thresh to an illegal value to make sure the
+ * Q tables are initialised for the new video sequence.
+ */
+ (*pbi)->quantizer->LastQuantizerValue = -1;
+
+ // Set up various configuration parameters.
+ VP5_InitialiseConfiguration(*pbi);
+
+ #ifdef MAPCA
+ InitDMAWriteReconDS(*pbi);
+ InitDMAReadReferenceDS(*pbi);
+ #endif
+
+ return TRUE;
+ }
+
+#if defined(_MSC_VER)
+ __except( TRUE )
+ {
+ VP5_ErrorTrap( *pbi, GEN_EXCEPTIONS );
+ return FALSE;
+ }
+#endif
+
+}
+/****************************************************************************
+ *
+ * ROUTINE : VP5_GetPbParam
+ *
+ * INPUTS : PB_COMMAND_TYPE Command
+ * char * Parameter
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Generalised command interface to decoder.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void CCONV VP5_GetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, UINT32 *Parameter )
+{
+ switch ( Command )
+ {
+#if defined(POSTPROCESS)
+ case PBC_SET_POSTPROC:
+ *Parameter =pbi->PostProcessingLevel;
+#endif
+
+ default:
+ break;
+ }
+}
+
+
+#define CRITICALWATERMARK (int) (31000 * pbi->CPUFree / 100)
+#define DOWNWATERMARK (int) (30000 * pbi->CPUFree / 100)
+#define UPWATERMARK (int) (28000 * pbi->CPUFree / 100)
+int PickPostProcessingLevel(PB_INSTANCE *pbi)
+{
+ int minimumTime = pbi->thisDecodeTime + pbi->avgBlitTime + pbi->avgPPTime[8];
+ int thisTime = minimumTime + pbi->avgPPTime[pbi->PostProcessingLevel];
+ int avgTime = pbi->avgDecodeTime + pbi->avgBlitTime;
+
+ // estimate the times of all of our unknown postprocessors
+ if(pbi->avgPPTime[6]==0)
+ pbi->avgPPTime[6] = avgTime>>1;
+
+ if(pbi->avgPPTime[5]==0)
+ pbi->avgPPTime[5] = avgTime>>1;
+
+ if(pbi->avgPPTime[4]==0)
+ pbi->avgPPTime[4] = (avgTime ) >> 2;
+
+ if(pbi->avgPPTime[8]==0)
+ pbi->avgPPTime[8] = avgTime>>3;
+
+
+ if(pbi->CPUFree == 0 )
+ return pbi->PostProcessingLevel;
+
+ // automatically select a postprocessing level based on the amount
+ // of time taken to decode blit and postprocess etc
+
+ // more than 1/30 of a second no postprocessing at all (its better to show an
+ // ugly frame than none at all). We use 1/30th of a second because nothing
+ // tells us the actual framerate
+ if(thisTime > (int) (CRITICALWATERMARK))
+ {
+ // this frame's taking to long try to make up time on the subsequent frames
+ pbi->avgDecodeTime = pbi->thisDecodeTime;
+
+ // pick a post processor we can decode in less than 2/3 the time
+ if(pbi->avgPPTime[6] + minimumTime < CRITICALWATERMARK )
+ return 6;
+
+ if(pbi->avgPPTime[5] + minimumTime < CRITICALWATERMARK )
+ return 5;
+
+ if(pbi->avgPPTime[4] + minimumTime < CRITICALWATERMARK )
+ return 4;
+
+ if(pbi->avgPPTime[8] + minimumTime < CRITICALWATERMARK )
+ return 8;
+
+ return 0;
+ }
+
+ if(thisTime < DOWNWATERMARK && thisTime > UPWATERMARK)
+ return pbi->PostProcessingLevel;
+
+
+
+ // pick a post processor we can decode in less than 2/3 the time
+ if(pbi->avgPPTime[6] + avgTime < UPWATERMARK )
+ return 6;
+
+ if(pbi->avgPPTime[5] + avgTime < UPWATERMARK )
+ return 5;
+
+ if(pbi->avgPPTime[4] + avgTime < UPWATERMARK )
+ return 4;
+
+ if(pbi->avgPPTime[8] + avgTime < UPWATERMARK )
+ return 8;
+
+ return 0;
+
+}
+#ifndef MAPCA
+/****************************************************************************
+ *
+ * ROUTINE : VP5_GetYUVConfig
+ *
+ * INPUTS : YUV_BUFFER_CONFIG * YuvConfig
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Gets details of the reconstruction buffer
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void CCONV VP5_GetYUVConfig( PB_INSTANCE (*pbi), YUV_BUFFER_CONFIG * YuvConfig )
+{
+ __try
+ {
+#ifdef _MSC_VER
+ unsigned int duration;
+ unsigned int starttsc,endtsc;
+ VP5_readTSC(&starttsc);
+ pbi->PostProcessingLevel = PickPostProcessingLevel(pbi);
+#endif
+ if( pbi->PostProcessingLevel ||(pbi->Configuration.Interlaced && pbi->DeInterlaceMode))
+ {
+#ifdef _MSC_VER
+ extern void vp5_showinfo2(PB_INSTANCE *pbi);
+ extern void vp5_showinfo(PB_INSTANCE *pbi);
+
+
+ if(pbi->PostProcessingLevel > 200 )
+ {
+ PostProcess
+ (
+ pbi->postproc,
+ pbi->Vp3VersionNo,
+ pbi->FrameType,
+ pbi->PostProcessingLevel-200,
+ pbi->AvgFrameQIndex,
+ pbi->LastFrameRecon,
+ pbi->PostProcessBuffer,
+ (unsigned char *) pbi->FragInfo,
+ sizeof(FRAG_INFO),
+ 0x0001
+ );
+ VP5_readTSC(&endtsc);
+ vp5_showinfo(pbi);
+ }
+ else if(pbi->PostProcessingLevel > 100 )
+ {
+
+ PostProcess
+ (
+ pbi->postproc,
+ pbi->Vp3VersionNo,
+ pbi->FrameType,
+ pbi->PostProcessingLevel-100,
+ pbi->AvgFrameQIndex,
+ pbi->LastFrameRecon,
+ pbi->PostProcessBuffer,
+ (unsigned char *) pbi->FragInfo,
+ sizeof(FRAG_INFO),
+ 0x0001
+ );
+ VP5_readTSC(&endtsc);
+ vp5_showinfo2(pbi);
+ }
+ else
+#endif
+ {
+ pbi->AvgFrameQIndex = pbi->quantizer->FrameQIndex;
+
+ PostProcess
+ (
+ pbi->postproc,
+ pbi->Vp3VersionNo,
+ pbi->FrameType,
+ pbi->PostProcessingLevel,
+ pbi->AvgFrameQIndex,
+ pbi->LastFrameRecon,
+ pbi->PostProcessBuffer,
+ (unsigned char *) pbi->FragInfo,
+ sizeof(FRAG_INFO),
+ 0x0001
+ );
+#ifdef _MSC_VER
+ VP5_readTSC(&endtsc);
+#endif
+ }
+
+ }
+
+ if(pbi->BlackClamp)
+ {
+ ClampLevels( pbi->postproc,pbi->BlackClamp,pbi->WhiteClamp,pbi->PostProcessBuffer, pbi->PostProcessBuffer);
+ }
+ if( pbi->Configuration.VideoFrameWidth < pbi->OutputWidth ||
+ pbi->Configuration.VideoFrameHeight < pbi->OutputHeight )
+ {
+ YuvConfig->YWidth = pbi->OutputWidth+32;
+ YuvConfig->YHeight = pbi->OutputHeight+32;
+ YuvConfig->YStride = YuvConfig->YWidth;
+
+ YuvConfig->UVWidth = YuvConfig->YWidth / 2;
+ YuvConfig->UVHeight = YuvConfig->YHeight / 2;
+ YuvConfig->UVStride = YuvConfig->YStride / 2;
+
+ YuvConfig->YBuffer = (char *)pbi->ScaleBuffer;
+ YuvConfig->UBuffer = (char *)pbi->ScaleBuffer+YuvConfig->YWidth*YuvConfig->YHeight;
+ YuvConfig->VBuffer = (char *)pbi->ScaleBuffer+YuvConfig->YWidth*YuvConfig->YHeight+YuvConfig->UVWidth*YuvConfig->UVHeight;
+
+ if(pbi->PostProcessingLevel)
+ {
+ ScaleOrCenter( pbi->postproc, pbi->PostProcessBuffer, YuvConfig );
+ }
+ else
+ {
+ ScaleOrCenter( pbi->postproc, pbi->LastFrameRecon, YuvConfig );
+ }
+
+ YuvConfig->YBuffer +=
+ (YuvConfig->YHeight - pbi->OutputHeight ) / 2 * YuvConfig->YStride
+ +(YuvConfig->YWidth - pbi->OutputWidth) / 2;
+ YuvConfig->YWidth = pbi->OutputWidth;
+ YuvConfig->YHeight = pbi->OutputHeight;
+
+ YuvConfig->UBuffer +=
+ (YuvConfig->UVHeight - pbi->OutputHeight/2 ) / 2 * YuvConfig->UVStride
+ +(YuvConfig->UVWidth - pbi->OutputWidth/2) / 2;
+
+ YuvConfig->VBuffer +=
+ (YuvConfig->UVHeight - pbi->OutputHeight/2 ) / 2 * YuvConfig->UVStride
+ +(YuvConfig->UVWidth - pbi->OutputWidth/2) / 2;
+
+ YuvConfig->UVWidth = pbi->OutputWidth / 2;
+ YuvConfig->UVHeight = pbi->OutputHeight / 2;
+ //YuvConfig->UVStride = pbi->OutputWidth / 2;
+ }
+ else
+ {
+ YuvConfig->YWidth = pbi->Configuration.VideoFrameWidth;
+ YuvConfig->YHeight = pbi->Configuration.VideoFrameHeight;
+ YuvConfig->YStride = pbi->Configuration.YStride;
+
+ YuvConfig->UVWidth = pbi->Configuration.VideoFrameWidth / 2;
+ YuvConfig->UVHeight = pbi->Configuration.VideoFrameHeight / 2;
+ YuvConfig->UVStride = pbi->Configuration.UVStride;
+
+ //if(pbi->PostProcessingLevel && (pbi->quantizer->FrameQIndex < PPROC_QTHRESH))
+ if( pbi->PostProcessingLevel ||(pbi->Configuration.Interlaced && pbi->DeInterlaceMode))
+ {
+ YuvConfig->YBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconYDataOffset+(pbi->Configuration.YStride * UMV_BORDER) + UMV_BORDER];
+ YuvConfig->UBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconUDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+ YuvConfig->VBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconVDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+ }
+ else
+ {
+ YuvConfig->YBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconYDataOffset+ (pbi->Configuration.YStride * UMV_BORDER) + UMV_BORDER];
+ YuvConfig->UBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconUDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+ YuvConfig->VBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconVDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+ }
+ }
+
+#if defined(_MSC_VER)
+ duration = ( endtsc - starttsc )/ pbi->ProcessorFrequency ;
+
+ if( pbi->avgPPTime[pbi->PostProcessingLevel%10] == 0)
+ {
+ pbi->avgPPTime[pbi->PostProcessingLevel%10] = duration;
+ }
+ else
+ {
+ pbi->avgPPTime[pbi->PostProcessingLevel%10] = ( 7 * pbi->avgPPTime[pbi->PostProcessingLevel%10] + duration ) >> 3;
+ }
+#endif
+ }
+#if defined(_MSC_VER)
+ __except ( TRUE )
+ {
+ VP5_ErrorTrap( pbi, GEN_EXCEPTIONS );
+ }
+#endif
+}
+#endif
+/****************************************************************************
+Debugging Aid Only */
+
+void writeframeYX(PB_INSTANCE *pbi, char * address,int x)
+{ // write the frame
+ FILE *yframe;
+ char filename[255];
+#ifdef MAPCA
+ sprintf(filename,"MapYF%d.raw",x);
+#else
+ sprintf(filename,"PcYF%d.raw",x);
+#endif
+ yframe=fopen(filename,"wb");
+ fwrite(address,pbi->ReconYPlaneSize,1,yframe);
+ fclose(yframe);
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP5_DecodeFrameToYUV
+ *
+ * INPUTS : UINT8 * VideoBufferPtr
+ * Compressed input video data
+ *
+ * UINT32 ByteCount
+ * Number of bytes compressed data in buffer. *
+ *
+ * UINT32 Height and width of image to be decoded
+ *
+ * OUTPUTS : None
+ * None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Decodes a frame into the internal YUV reconstruction buffer.
+ * Details of this buffer can be obtained by calling GetYUVConfig().
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+int CCONV VP5_DecodeFrameToYUV( PB_INSTANCE (*pbi), char * VideoBufferPtr, unsigned int ByteCount,
+ UINT32 ImageWidth, UINT32 ImageHeight )
+{
+ unsigned char *tmp;
+ (void) ImageHeight;
+ (void) ImageWidth;
+ __try
+ {
+#ifdef _MSC_VER
+ unsigned int duration;
+ unsigned int starttsc,endtsc;
+ VP5_readTSC(&starttsc);
+#endif
+ pbi->CurrentFrameSize = ByteCount;
+
+ // start the boolean decoder
+ StartDecode(&pbi->br, (unsigned char*)VideoBufferPtr);
+
+ // decode the frame header
+ if ( !VP5_LoadFrame(pbi) )
+ return -1;
+
+
+ // decode and reconstruct frame
+ DecodeFrameMbs(pbi);
+
+ // switch pointers so lastframe recon is this frame
+ tmp = pbi->LastFrameRecon;
+ pbi->LastFrameRecon = pbi->ThisFrameRecon;
+ pbi->ThisFrameRecon = tmp;
+
+
+#ifndef MAPCA
+ // update the border
+ UpdateUMVBorder(pbi->postproc, pbi->LastFrameRecon);
+#else
+ VP5_UpdateUMVBorder(pbi, pbi->LastFrameRecon);
+#endif
+
+
+ if( pbi->FrameType == BASE_FRAME )
+ {
+ memcpy(pbi->GoldenFrame, pbi->LastFrameRecon, pbi->ReconYPlaneSize + 2* pbi->ReconUVPlaneSize);
+ }
+
+#ifdef MAPCA
+ //if(debugme<1)
+ {
+ //EtiSysDcFlushDcache();
+ //writeframeYX(pbi,pbi->LastFrameRecon,debugme);
+ //debugme++;
+ }
+#endif
+ // If appropriate clear the MMX state.
+ ClearSysState();
+
+ //temp
+ //vp5_appendframe(pbi);
+
+ #ifdef PBSTATS1
+ // Update PB stats
+ TotQ += pbi->quantizer->ThisFrameQualityValue;
+ PBFrameNumber += 1;
+ #endif
+
+ if(pbi->FrameType == BASE_FRAME )
+ pbi->AvgFrameQIndex = pbi->quantizer->FrameQIndex;
+ else
+ pbi->AvgFrameQIndex = (2 + 3 * pbi->AvgFrameQIndex + pbi->quantizer->FrameQIndex) / 4 ;
+
+#ifdef _MSC_VER
+ VP5_readTSC(&endtsc);
+
+ duration = (endtsc-starttsc)/ (pbi->ProcessorFrequency) ;
+
+ pbi->thisDecodeTime = duration;
+
+ if( pbi->avgDecodeTime == 0)
+ {
+ pbi->avgDecodeTime = duration;
+ }
+ else
+ {
+ pbi->avgDecodeTime = (7*pbi->avgDecodeTime + duration)>>3;
+ }
+
+#endif
+
+
+ }
+#if defined(_MSC_VER)
+ __except ( TRUE )
+ {
+ VP5_ErrorTrap( pbi, GEN_EXCEPTIONS );
+ return -2;
+ }
+#endif
+ return 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VP5_StopDecoder
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None .
+ *
+ * FUNCTION : Stops the encoder and grabber
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+int CCONV VP5_StopDecoder(PB_INSTANCE **pbi)
+{
+
+#ifdef MAPCA
+ CloseDMAReadReferenceDS();
+ CloseDMAWriteReconDS();
+#endif
+
+ __try
+ {
+ if(*pbi)
+ {
+ // Set flag to say that the decoder is no longer initialised
+ VP5_DeleteQuantizer(&(*pbi)->quantizer);
+#ifndef MAPCA
+ DeletePostProcInstance(&(*pbi)->postproc);
+#endif
+ VP5_DeleteFragmentInfo(*pbi);
+ VP5_DeleteFrameInfo(*pbi);
+
+
+ VP5_DeletePBInstance(pbi);
+
+ return TRUE;
+ }
+ }
+
+#if defined(_MSC_VER)
+ __except ( TRUE )
+ {
+ VP5_ErrorTrap( *pbi, GEN_EXCEPTIONS );
+ return FALSE;
+ }
+#endif
+ return TRUE;
+}
+
+#ifndef MAPCA
+/****************************************************************************
+ *
+ * ROUTINE : VP5_ErrorTrap
+ *
+ * INPUTS : Nonex.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Called when a fatal error is detected.
+ * Sets an error flag and loops untill the thread is
+ * terminated.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+ static void VP5_ErrorTrap( PB_INSTANCE *pbi, int ErrorCode )
+ {
+ }
+#endif
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vp50dxv.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vp50dxv.c
new file mode 100644
index 00000000..13e2bc55
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vp50dxv.c
@@ -0,0 +1,429 @@
+/*
+ dxvmpg.cpp : Defines the entry point for the console application.
+*/
+#include <stdlib.h>
+
+#include "dkpltfrm.h" /* platform specifics */
+#include "duktypes.h" /* predefined general types used at duck */
+
+#include "duck_mem.h" /* interface to memory manager */
+#include "dxl_main.h" /* interface to dxv */
+#include "pbdll.h"
+
+typedef unsigned long FourCC;
+
+#define VP50_FOURCC DXL_MKFOURCC( 'V', 'P', '5', '0')
+void vp50_SetParameter(DXL_XIMAGE_HANDLE src,int Command, unsigned long Parameter );
+
+extern void vp3SetBlit(void);
+extern void VP5_VPInitLibrary(void);
+extern void VP5_VPDeInitLibrary(void);
+#ifdef _MSC_VER
+#pragma warning(disable:4055)
+#endif
+
+#include "duck_dxl.h"
+extern void VP5_readTSC(unsigned long *tsc);
+
+void vp50_GetInfo(unsigned char * source, FrameInfo * frameInfo)
+{
+
+ // Is the frame and inter frame or a key frame
+ frameInfo->KeyFrame = !(source[0] > 0x7f);
+ frameInfo->Quality = source[0] >> 2;
+ if(frameInfo->KeyFrame)
+ frameInfo->Version = ((source[2]>>3) & 0x1f );
+ else
+ frameInfo->Version = 0;
+
+ frameInfo->vp30Flag = (int)source[1];
+
+}
+
+
+// YUV buffer configuration structure
+typedef struct
+{
+ int YWidth;
+ int YHeight;
+ int YStride;
+
+ int UVWidth;
+ int UVHeight;
+ int UVStride;
+
+ char * YBuffer;
+ char * UBuffer;
+ char * VBuffer;
+
+ char * uvStart;
+ int uvDstArea;
+ int uvUsedArea;
+
+} DXV_YUV_BUFFER_CONFIG;
+
+/* define an xImage structure based on the core xImage struct */
+typedef struct tXImageCODEC
+{
+ xImageBaseStruct;
+ FourCC myFourCC;
+ DXV_YUV_BUFFER_CONFIG FrameBuffer;
+ PB_INSTANCE *myPBI;
+ int owned;
+
+} vp50_XIMAGE,*vp50_XIMAGE_HANDLE;
+
+static dxvBitDepth bitDepths[] =
+{
+ DXRGB32,DXRGB24,DXRGB16,DXRGBNULL
+};
+
+
+typedef void ((*vp5BLIT_FUNC)(unsigned char *, int, YUV_BUFFER_CONFIG *));
+typedef void ((*vp5_VSCREEN_FUNC)(void));
+
+
+DXL_INTERNAL_FORMAT vp50_GetXImageInternalFormat(DXL_XIMAGE_HANDLE xImage,
+ DXL_VSCREEN_HANDLE vScreen)
+{
+ (void) vScreen;
+ (void) xImage;
+ return YV12;
+}
+int vp50_blit(PB_INSTANCE *pbi,DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE vScreen,DXV_YUV_BUFFER_CONFIG *FrameBuffer,int x, int y )
+{
+ if(vScreen && ((void *)(src->internalFormat) != NULL)) {
+ /* get your hamdy damdy((c)1997 Duck North) registered blitter setup */
+ vScreen->blitSetup = DXL_GetBlitSetupFunc(src,vScreen);
+ vScreen->blitExit = DXL_GetBlitExitFunc(src,vScreen);
+ vScreen->blitter = DXL_GetBlitFunc(src, vScreen);
+
+ if (vScreen->blitter == (void *) -1)
+ return DXL_INVALID_BLIT;
+ }
+
+ if (vScreen) /* if there is a vScreen, blit to it */
+ {
+ if (vScreen->addr)
+ {
+ int pSize;
+ int w,h;
+ unsigned char *ptrScrn;
+ int thisPitch = vScreen->pitch;
+ unsigned int duration;
+ unsigned int starttsc,endtsc;
+
+ /* get a frame pointer to the scaled and postprocessed reconstructed buffer */
+ VP5_GetYUVConfig(pbi, (YUV_BUFFER_CONFIG *) FrameBuffer);
+
+ pSize = DXL_GetVScreenSizeOfPixel(vScreen);
+
+ /* remember to offset if requested */
+ y += vScreen->viewY;
+ x += vScreen->viewX ;
+
+ /* for planar destinations */
+ w = vScreen->viewW;//pitch;
+ h = vScreen->height;
+
+ if(w != FrameBuffer->YWidth)
+ {
+ FrameBuffer->YWidth = w;
+ FrameBuffer->UVWidth = (w+1)/2;
+ }
+ if(h != FrameBuffer->YHeight)
+ {
+ FrameBuffer->YHeight = h;
+ FrameBuffer->UVHeight = (h+1)/2;
+ }
+ ptrScrn = vScreen->addr;
+ ptrScrn += (x * pSize) + (y * thisPitch);
+
+ /* setup ptrs so we can work backwards through Paul's frame buffers */
+ FrameBuffer->YBuffer = FrameBuffer->YBuffer +
+ ((FrameBuffer->YHeight - 1) *
+ (FrameBuffer->YStride));
+
+ FrameBuffer->UBuffer = FrameBuffer->UBuffer +
+ ((FrameBuffer->UVHeight - 1) *
+ (FrameBuffer->UVStride));
+
+ FrameBuffer->VBuffer = FrameBuffer->VBuffer +
+ ((FrameBuffer->UVHeight - 1) *
+ (FrameBuffer->UVStride));
+
+
+ if((vScreen->bd != DXYUY2) && (vScreen->bd != DXYV12))
+ {
+ if(vScreen->bq == DXBLIT_STRETCH)
+ {
+ thisPitch *= 2;
+ }
+ }
+
+ if(vScreen->bd == DXYV12||vScreen->bd == DXI420)
+ {
+ if(thisPitch < 0)
+ {
+ FrameBuffer->uvStart = (char *) (ptrScrn + abs(thisPitch) + abs(thisPitch) * h/4 + thisPitch/2 );
+ FrameBuffer->uvDstArea = abs((thisPitch * h)/4);
+ FrameBuffer->uvUsedArea = 0;
+ }
+ else
+ {
+ FrameBuffer->uvStart = (char *) (ptrScrn + (thisPitch * h));
+ FrameBuffer->uvDstArea = (((thisPitch+1)/2) * (( h+1)/2));
+ FrameBuffer->uvUsedArea = (((thisPitch+1)/2) * FrameBuffer->UVHeight);
+ }
+
+ // Temporary fix for Scott Kludge Kludge Kludge !!!!!!!!!
+ // ptrScrn -= thisPitch; // fixes a bug in assembly code for some reason the buttnutt is adding pitch to Y buffer
+ }
+
+ /* if a blitter hasn't been set up set one up ! */
+ if (vScreen->blitSetup != (void *)-1)
+ ((vp5_VSCREEN_FUNC)vScreen->blitSetup)();
+
+ /* if its still not set up return that it failed */
+ if ((vp5BLIT_FUNC)vScreen->blitter == (vp5BLIT_FUNC)-1)
+ return DXL_INVALID_BLIT;
+
+ /* blit the screen */
+
+ VP5_readTSC(&starttsc);
+ if(pbi->Configuration.Interlaced==1 && (vScreen->bd != DXYV12 && vScreen->bd != DXI420))
+ {
+ int ypitch = FrameBuffer->YStride;
+ int uvpitch = FrameBuffer->UVStride;
+
+ FrameBuffer->YStride <<= 1;
+ FrameBuffer->YHeight >>= 1;
+ FrameBuffer->UVStride <<= 1;
+ FrameBuffer->UVHeight >>= 1;
+
+ ptrScrn+=thisPitch;
+ FrameBuffer->YBuffer -= ypitch;
+ FrameBuffer->UBuffer -= uvpitch;
+ FrameBuffer->VBuffer -= uvpitch;
+ ((vp5BLIT_FUNC)vScreen->blitter)(ptrScrn, thisPitch*2, (YUV_BUFFER_CONFIG *)(FrameBuffer));
+
+ ptrScrn-=thisPitch;
+ FrameBuffer->YBuffer += ypitch;
+ FrameBuffer->UBuffer += uvpitch;
+ FrameBuffer->VBuffer += uvpitch;
+ ((vp5BLIT_FUNC)vScreen->blitter)(ptrScrn, thisPitch*2, (YUV_BUFFER_CONFIG *)(FrameBuffer));
+
+ }
+ else
+ {
+ ((vp5BLIT_FUNC)vScreen->blitter)(ptrScrn, thisPitch, (YUV_BUFFER_CONFIG *)(FrameBuffer));
+ }
+ VP5_readTSC(&endtsc);
+
+ duration = ( endtsc - starttsc ) / (pbi->ProcessorFrequency) ;
+ if( pbi->avgBlitTime == 0)
+ {
+ pbi->avgBlitTime = duration;
+ }
+ else
+ {
+
+ pbi->avgBlitTime = (7*pbi->avgBlitTime + duration)>>3;
+ }
+
+ /* blitter cleanup ?*/
+ if ((vp5BLIT_FUNC)vScreen->blitExit != (vp5BLIT_FUNC)-1)
+ ((vp5_VSCREEN_FUNC)vScreen->blitExit)();
+
+ }
+ }
+ return DXL_OK;
+}
+
+
+static int vp50_decompress(vp50_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE vScreen)
+{
+
+ // if we have a compressed frame decompress it ( otherwise we'll just redo
+ // the scaling and postprocessing from the last frame )
+ if (src->addr)
+ {
+
+ if( src->fSize != 0 && (src->addr[0]>=1 || src->addr[1]>=1 || src->addr[2] >=1))
+ {
+ // decode the frame
+ int retVal= VP5_DecodeFrameToYUV(
+ src->myPBI,
+ (char *)src->addr,
+ src->fSize,
+ src->imWidth,
+ src->imHeight);
+
+ if(retVal != 0 )
+ {
+ if(retVal == -1)
+ return DXL_VERSION_CONFLICT;
+ else
+ return DXL_BAD_DATA;
+ }
+ }
+ }
+ VP5_GetYUVConfig(src->myPBI, (YUV_BUFFER_CONFIG *) &src->FrameBuffer);
+ return DXL_OK;
+}
+
+/*
+ close down a decompressor, releasing the wilk decompressor,
+ the xImage (decompressor), and the intermediate vScreen (surface)
+*/
+
+static int vp50_xImageDestroy(vp50_XIMAGE_HANDLE xThis)
+{
+ if (xThis)
+ {
+ if(xThis->owned)
+ VP5_StopDecoder(&(xThis->myPBI));
+ duck_free(xThis);
+ }
+
+ return DXL_OK;
+}
+
+/*
+ called during initialization and/or when xImage (decompressor)
+ attributes change, note that nImage and src are actually
+ synonymous and should be cleared out a bit (to say the least!)
+
+
+ !!!!!!
+ This function should be prepared to get data that is NOT of the
+ type native to the decoder, It should do it's best to verify it
+ as valid data and should clean up after itself and return NULL
+ if it doesn't recognize the format of the data
+*/
+static DXL_XIMAGE_HANDLE vp50_xImageCreate(unsigned char *data);
+static DXL_XIMAGE_HANDLE vp50_xImageReCreate(vp50_XIMAGE_HANDLE src,unsigned char *data,
+ int type,enum BITDEPTH bitDepth,int w,int h)
+{
+ (void) bitDepth;
+ if (type != VP50_FOURCC)
+ return NULL;
+
+ if (src != NULL) /* if an xImage/decompressor already exists, destroy it */
+ vp50_xImageDestroy(src);
+
+ /* create a new xImage, specific to this type of decoder,
+ (see "vp50_XIMAGE" struct above and dxl_main.h) */
+
+ src = (vp50_XIMAGE_HANDLE)duck_calloc(1,sizeof(vp50_XIMAGE),DMEM_GENERAL);
+
+ if (!src)
+ return NULL;
+
+// duck_memset(nImage,0,sizeof(vp50_XIMAGE));
+
+ /* set up the "vtable" of interface calls */
+ src->create = (DXL_XIMAGE_HANDLE (*)(void *)) vp50_xImageCreate;
+ src->recreate = (DXL_XIMAGE_HANDLE (*)(DXL_XIMAGE_HANDLE,void *,int,int,int,int)) vp50_xImageReCreate;
+
+ src->destroy = (int (*)(DXL_XIMAGE_HANDLE))vp50_xImageDestroy;
+ src->dx = (int (*)(DXL_XIMAGE_HANDLE, DXL_VSCREEN_HANDLE)) vp50_decompress;
+ src->blit = NULL; /* there is no interleaved blitter for vp5x files */
+ src->setParameter = vp50_SetParameter;
+
+#if !KLUDGE_FOR_NEIL
+ src->internalFormat = (int (*)(DXL_XIMAGE_HANDLE, DXL_VSCREEN_HANDLE)) vp50_GetXImageInternalFormat;
+#endif
+ src->bdPrefs = bitDepths; /* plug in the list of prefered bit depths */
+
+ src->addr = data;
+ src->dkFlags.inUse = 1;
+
+ src->imWidth = src->w = (short) (w ? w : 320);
+ src->imHeight = src->h = (short) (h ? h : 240);
+
+ src->myFourCC = VP50_FOURCC;
+
+ /* create new PBI */
+ if(!VP5_StartDecoder( &(src->myPBI), src->imWidth, src->imHeight ))
+ {
+ vp50_xImageDestroy(src);
+ src = NULL;
+ }
+ src->owned = 1;
+
+ return (DXL_XIMAGE_HANDLE ) src;
+}
+
+/* in this "glue" case, just calls through to the create function */
+
+static DXL_XIMAGE_HANDLE vp50_xImageCreate(unsigned char *data)
+{
+ return vp50_xImageReCreate(NULL, data, VP50_FOURCC, (enum BITDEPTH ) 0,0,0);
+}
+
+int vp50_Init(void)
+{
+
+ DXL_RegisterXImage(
+ (DXL_XIMAGE_HANDLE (*)(unsigned char *)) vp50_xImageCreate,
+ VP50_FOURCC,
+ YV12
+ );
+
+
+ /* initialize all the global variables */
+ VP5_VPInitLibrary();
+
+ return DXL_OK;
+}
+
+/*
+ main exit routine, called during DXL_ExitVideo()
+ clean up any global information if necessary
+*/
+
+int vp50_Exit(void)
+{
+ VP5_VPDeInitLibrary();
+
+ return DXL_OK;
+}
+
+void vp50_SetParameter(DXL_XIMAGE_HANDLE src,int Command, unsigned long Parameter )
+{
+ if(Command == PBC_SET_PBSTRUCT)
+ {
+
+ if(((vp50_XIMAGE_HANDLE) src)->owned)
+ VP5_StopDecoder(&(((vp50_XIMAGE_HANDLE) src)->myPBI));
+
+ ((vp50_XIMAGE_HANDLE) src)->owned = 0;
+ ((vp50_XIMAGE_HANDLE) src)->myPBI= (PB_INSTANCE *) Parameter;
+
+ }
+ else
+ VP5_SetPbParam( ((vp50_XIMAGE_HANDLE) src)->myPBI, (PB_COMMAND_TYPE) Command, (UINT32) Parameter );
+}
+
+typedef struct {
+ unsigned char* baseAddr;
+ long rowBytes;
+} YV12_PLANE;
+
+typedef struct {
+ YV12_PLANE y;
+ YV12_PLANE u;
+ YV12_PLANE v;
+} YV12_PLANES;
+
+void GetImageBufs(DXL_XIMAGE_HANDLE x, YV12_PLANES *p)
+{
+ vp50_XIMAGE_HANDLE xim=(vp50_XIMAGE_HANDLE)x;
+ p->y.baseAddr=(unsigned char *)xim->FrameBuffer.YBuffer;
+ p->u.baseAddr=(unsigned char *)xim->FrameBuffer.UBuffer;
+ p->v.baseAddr=(unsigned char *)xim->FrameBuffer.VBuffer;
+ p->y.rowBytes=xim->FrameBuffer.YStride;
+ p->u.rowBytes=xim->FrameBuffer.UVStride;
+ p->v.rowBytes=xim->FrameBuffer.UVStride;
+} \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/HuffTables.h b/Src/libvpShared/corelibs/cdxv/vp50/include/HuffTables.h
new file mode 100644
index 00000000..eccbbc67
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/HuffTables.h
@@ -0,0 +1,33 @@
+/****************************************************************************
+*
+* Module Title : HuffTables.h
+*
+* Description : Video CODEC
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.01 JBB 26 Jan 01 New Huffman Code
+* 1.00 PGW 12/10/00 Configuration baseline
+*
+*****************************************************************************
+*/
+
+#ifndef HUFFTAB_H
+#define HUFFTAB_H
+
+#include "type_aliases.h"
+#include "huffman.h"
+
+/****************************************************************************
+* Hufman tables
+*****************************************************************************
+*/
+
+// For details of tokens and extra bit breakdown see token definitions in huffman.h
+UINT8 ExtraBitLengths_VP5[MAX_ENTROPY_TOKENS] = { 0, 1, 1, 1, 1, 2, 3, 5, 6, 12, 0 };
+UINT32 DctRangeMinVals[MAX_ENTROPY_TOKENS] = { 0, 1, 2, 3, 4, 5, 7, 11, 27, 59, 0 };
+
+#endif \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/boolhuff.h b/Src/libvpShared/corelibs/cdxv/vp50/include/boolhuff.h
new file mode 100644
index 00000000..4c1a53b1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/boolhuff.h
@@ -0,0 +1,78 @@
+/****************************************************************************
+*
+* Module Title : boolhuff.H
+*
+* Description : Video CODEC
+*
+* AUTHOR : James Bankoski
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 JBB 01JUN01 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+#ifndef boolhuff_h
+
+#define boolhuff_h
+
+#ifdef NOTNORMALIZED
+typedef struct _boolcoder
+{
+ unsigned char *buffer;
+ unsigned int pos;
+ union
+ {
+ unsigned int value;
+ unsigned char v[4];
+ };
+ unsigned int range;
+} BOOL_CODER;
+#else
+typedef struct
+{
+ unsigned int bits;
+ unsigned int bitpos;
+ unsigned int *source;
+ unsigned int pos;
+} bitpump;
+typedef struct
+{
+ unsigned int lowvalue;
+ unsigned int range;
+ unsigned int value;
+ int count;
+ unsigned int pos;
+ unsigned char *buffer;
+
+ // Variables used to track bit costs without outputing to the bitstream
+ unsigned int MeasureCost;
+ unsigned long BitCounter;
+} BOOL_CODER;
+#endif
+
+extern void StartDecode(BOOL_CODER *bc, unsigned char *buffer);
+
+extern int DecodeBool(BOOL_CODER *bc, int context);
+extern int DecodeBool128(BOOL_CODER *bc);
+
+extern void StopDecode(BOOL_CODER *bc);
+
+extern void StartEncode(BOOL_CODER *bc, unsigned char *buffer);
+
+extern void EncodeBool(BOOL_CODER *bc, int x, int context);
+extern void EncodeBool2(BOOL_CODER *bc, int x, int context);
+extern void StopEncode(BOOL_CODER *bc);
+
+extern double shannonCost0[256];
+extern double shannonCost1[256];
+extern unsigned int shannon64Cost0[256];
+extern unsigned int shannon64Cost1[256];
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/compdll.h b/Src/libvpShared/corelibs/cdxv/vp50/include/compdll.h
new file mode 100644
index 00000000..b4b8d78b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/compdll.h
@@ -0,0 +1,562 @@
+/****************************************************************************
+*
+* Module Title : COMPDLL.H
+*
+* Description : Video CODEC demo compression DLL main header
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.34 YWX 09-Dec-02 Added Function pointers for frame/field varainces calculation
+* 1.33 YWX 30-Oct-02 Added EncoderLoopFilterOff flag
+* 1.32 YWX 28-Oct-02 Added function pointer for 5 region diamond search
+* 1.31 YWX 28-Oct-02 Added above and left token context and 5 region
+* diamond motion search sites
+* 1.30 YWX 02-Jul-02 Added new funcion pointers for motion search
+* 1.31 JBB 04 JUL-02 Added preprocessor code
+* 1.29 AWG 20-Jun-01 Removed QuadCodeComponent function prototype & HExtra/VExtra
+* 1.29 AWG 22-May-01 Added support for DCT16
+* 1.28 JBB 05-May-01 Changes for VP5 (new entropytablebits and tokenextra chgs
+* 1.27 JBB 23-Mar-01 Changed QuickCompress datatype from BOOL to INT32
+* 1.26 JBB 11 Feb 01 Merged in: added vars for map ca move ac choice to right after dc
+* 1.25 PGW 31 Jan 01 Added some stats variables and VP5 Mv entropy tables.
+* 1.24 JBB 30 Nov 00 Version number changes
+* 1.23 JBB 15 Nov 00 Cleaned out ifdefs
+* 1.22 JBB 15 Oct 00 Added First Pass Function
+* 1.21 JBB 11 Sep 00 new function pointers for subtract removed transxquant
+* 1.20 JBB 07 Sep 00 Changed error metrics to Unsigned int
+* 1.19 JBB 24 Aug 00 Ansi C compatible
+* 1.18 JBB 27Jul00 added checks on Mallocs
+* 1.17 JBB 24Jul00 Changed error functions to return INT32 instead of double
+* 1.16 PGW 12 Jul 00 Removed CompAutoKeyFrameThreshold.
+* 1.15 PGW 29 Jun 00 Removed instnace varibale CarryOverAdaptionEnabled.
+* 1.14 PGW 27 Jun 00 Added QTargetModifier[]. Changes to CONFIG_TYPE2.
+* 1.13 JBB 30/05/00 Removed hard coded size limits
+* 1.12 JBB 22/05/00 Added OriginalDC support to remove max_fragments depends
+* 1.11 YX 13/04/00 Add function pointers for new optimizations
+* 1.10 YX 06/04/00 More buffers alligned MMX Fdct
+* 1.09 YX 20/03/00 32 Byte alligned buffers, Back to Integer Forward DCT
+* Additional Function pointers for optimized code
+* 1.08 PGW 17/03/00 Changes to support seperate Y and UV entropy tables.
+* Added PreProcFilterLevel to allow control of preprecessor
+* filter level.
+* 1.07 YX 09/03/00 Change to use floating point forward DCT
+* 1.06 PGW 17/12/99 Draw dib functionality removed.
+* 1.05 PGW 05/10/99 Remove some Windows dependancies for VFW compressor.
+* 1.04 PGW 20/07/99 Rate targeting corrections for VFW version of codec
+* 1.03 PGW 15/07/99 Added QuickCompress flag.
+* 1.02 PGW 05/07/99 Added GetFOURMVExhaustiveSearch() function
+* 1.01 PGW 29/06/99 Added GetMBMVExhaustiveSearch() function.
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+#ifndef __INC_COMPDLL_H
+#define __INC_COMPDLL_H
+
+#define MIN_BPB_FACTOR 0.1
+#define MAX_BPB_FACTOR 10.0
+
+#define KEY_FRAME_CONTEXT 5
+
+#include "codec_common.h"
+#include "preprocif.h"
+#include "preproc.h"
+#include "pbdll.h"
+#include "vp50_comp_interface.h"
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+// Debug/stats code
+//#define PSNR_ON
+
+
+/****************************************************************************
+* Types
+*****************************************************************************
+*/
+
+typedef struct CONFIG_TYPE2
+{
+ UINT32 TargetBandwidth;
+ UINT32 OutputFrameRate;
+
+ UINT32 FirstFrameQ;
+ UINT32 BaseQ;
+ UINT32 MaxQ; // Absolute Max Q allowed.
+ UINT32 ActiveWorstQuality; // Reflects worst quality Currently allowed (specified as an index where 0 is worst quality)
+ UINT32 ActiveBestQuality; // Reflects best quality currently allowed (specified as an index where 0 is worst quality)
+
+} CONFIG_TYPE2;
+
+
+/* Defines the largest positive integer expressable with a standard int type */
+/****************************************************************************
+* * Type declarations
+****************************************************************************
+*/
+
+typedef enum
+{
+ DCT_COEF_TOKEN,
+ MODE_TOKEN,
+ BLOCKMAP_TOKEN,
+ MV_TOKEN
+} TOKENTYPE;
+
+typedef struct _TOKENEXTRA
+{
+ INT32 Token;
+ UINT32 Extra;
+} TOKENEXTRA;
+
+
+typedef struct LineEq2
+{
+ double M;
+ double C;
+
+} LINE_EQ2;
+
+typedef struct
+{
+ BLOCK_CONTEXTA * AbovePtr;
+ BLOCK_CONTEXTA Above;
+ BLOCK_CONTEXT * LeftPtr;
+ BLOCK_CONTEXT Left;
+ Q_LIST_ENTRY * LastDcPtr;
+ Q_LIST_ENTRY LastDc;
+
+} MB_DC_CONTEXT;
+
+/****************************************************************************
+* MACROS
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Global Variables
+*****************************************************************************
+*/
+
+//****************************************************************
+// Function Pointers now library globals!
+extern UINT32 (*GetSAD16)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32);
+extern UINT32 (*GetSadHalfPixel16)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32);
+extern void (*fdct_short) ( INT16 * InputData, INT16 * OutputData );
+extern void (*idctc[65])( INT16 *InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern UINT32 (*GetSAD)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32) ;
+//extern UINT32 (*GetNextSAD)(UINT8 *, INT32, UINT8 *, UINT32, UINT32 );
+extern UINT32 (*GetSadHalfPixel)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32 );
+extern UINT32 (*GetInterError)( UINT8 *, INT32, UINT8 *, UINT8 *, INT32 );
+extern UINT32 (*GetIntraError)( UINT8 *, INT32);
+extern void (*Sub8)( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride );
+extern void (*Sub8_128)( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride );
+extern void (*Sub8Av2)( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride );
+
+//****************************************************************
+
+
+
+
+#define HUGE_ERROR (1<<28) // Out of range test value
+
+#define MAX_SEARCH_SITES 33 // Number of search sites for a 4-step search (at pixel accuracy)
+
+typedef struct CP_INSTANCE * xCP_INST;
+typedef struct CP_INSTANCE
+{
+ PB_INSTANCE pb; // playback
+ INT32 DropCounter;
+
+ //****************************************************************************************************
+ // Compressor Configuration
+ CONFIG_TYPE2 Configuration;
+
+ YUV_BUFFER_CONFIG InputConfig;
+ YUV_BUFFER_CONFIG YuvInputData;
+ INT32 SizeStep;
+
+ INT32 QuickCompress;
+ BOOL GoldenFrameEnabled;
+ BOOL InterPrediction;
+ BOOL MotionCompensation;
+ BOOL AutoKeyFrameEnabled ;
+ INT32 ForceKeyFrameEvery ;
+ INT32 AutoKeyFrameThreshold ;
+ UINT32 LastKeyFrame ;
+ UINT32 MinimumDistanceToKeyFrame ;
+ INT32 KeyFrameDataTargetOrig ; // Data rate target for key frames
+ INT32 KeyFrameDataTarget ; // Data rate target for key frames
+ UINT32 KeyFrameFrequency ;
+ BOOL DropFramesAllowed ;
+ BOOL DropFrame;
+ INT32 DropCount ;
+ UINT32 QualitySetting;
+ UINT32 PreProcFilterLevel;
+ BOOL AllowSpatialResampling;
+ UINT8 RdOpt; // 0 - off, 1 - basic rd on, 2 - all rd options on
+
+
+ // Compressor Statistics
+ double TotErrScore;
+ UINT32 InterError;
+ UINT32 MVErrorPerBit;
+ UINT32 ErrorPerBit;
+ UINT32 IntraError;
+ INT64 KeyFrameCount ; // Count of key frames.
+ INT64 TotKeyFrameBytes ;
+ UINT32 LastKeyFrameSize ;
+ UINT32 PriorKeyFrameSize[KEY_FRAME_CONTEXT];
+ UINT32 PriorKeyFrameDistance[KEY_FRAME_CONTEXT];
+ INT32 FrameQuality[6];
+ int DecoderErrorCode; // Decoder error flag.
+ INT32 ThreshMapThreshold;
+ INT32 TotalMotionScore;
+ INT64 TotalByteCount;
+ INT32 FixedQ;
+
+ // Frame Statistics
+ INT64 CurrentFrame;
+ UINT32 LastFrameSize;
+ UINT32 ThisFrameSize;
+ BOOL ThisIsFirstFrame;
+ BOOL ThisIsKeyFrame;
+ BOOL GfRecoveryFrame;
+
+ INT32 MotionScore;
+ UINT32 FirstSixthBoundary; // Macro block index marking the first sixth of the image
+ UINT32 LastSixthBoundary; // Macro block index marking the last sixth of the image
+
+ /* Rate Targeting variables PGW 08/05/96). */
+ double BpbCorrectionFactor;
+ double KeyFrameBpbCorrectionFactor;
+
+ // Controlling Block Selection
+ UINT32 MVChangeFactor;
+ UINT32 FourMvChangeFactor;
+ UINT32 ExhaustiveSearchThresh;
+ UINT32 MinImprovementForFourMV;
+ UINT32 FourMVThreshold;
+ UINT32 IntraThresh;
+
+ UINT32 MinErrorForMacroBlockMVSearch;
+ UINT32 MinErrorForBlockMVSearch;
+ UINT32 MinErrorForGoldenMVSearch;
+
+
+ //****************************************************************************************************
+
+
+ //****************************************************************************************************
+ // Frames
+ // Used in the selecetive convolution filtering of the Y plane. */
+ YUV_BUFFER_ENTRY *yuv1ptr;
+ YUV_BUFFER_ENTRY *yuv1ptrAlloc;
+ //****************************************************************************************************
+
+ //****************************************************************************************************
+ // Token Buffers
+ TOKENEXTRA *CoeffTokens;
+ TOKENEXTRA *CoeffTokensAlloc;
+ TOKENEXTRA *CoeffTokenPtr;
+
+ INT16 LastDC[3];
+
+ BOOL_CODER bc;
+
+ //****************************************************************************************************
+
+ //****************************************************************************************************
+ // SuperBlock, MacroBLock and Fragment Information
+ // Coded flag arrays and counters for them
+
+ //****************************************************************************************************
+ // Live Codec Variables
+
+ UINT8 *DataOutputBuffer;
+ //****************************************************************************************************
+
+ //****************************************************************************************
+ // STATICS COPIED FROM C FILES (USED IN MULTIPLE FUNCTIONS BUT ARE NOT REALLY INSTANCE GLOBALS )
+ // copied from cencode.c
+ UINT8 MBCodingMode; // Coding mode flags
+
+ // copied from mcomp.c
+ INT32 MVPixelOffsetY[MAX_SEARCH_SITES];
+ UINT32 InterTripOutThresh;
+ INT32 MVSearchSteps;
+ INT32 MVOffsetX[MAX_SEARCH_SITES];
+ INT32 MVOffsetY[MAX_SEARCH_SITES];
+ INT32 HalfPixelRef2Offset[9]; // Offsets for half pixel compensation
+ INT8 HalfPixelXOffset[9]; // Half pixel MV offsets for X
+ INT8 HalfPixelYOffset[9]; // Half pixel MV offsets for Y
+
+
+ Q_LIST_ENTRY *quantized_list;
+ Q_LIST_ENTRY *quantized_listAlloc;
+
+ MOTION_VECTOR MVector;
+ INT16 *DCT_codes; //Buffer that stores the result of Forward DCT
+ INT16 *DCTDataBuffer; //Input data buffer for Forward DCT
+ INT16 *DCT_codesAlloc;
+ INT16 *DCTDataBufferAlloc;
+
+
+ // Motion compensation related variables
+ UINT32 MvMaxExtent;
+
+ INT32 byte_bit_offset;
+
+ // copied from cbitman.c
+ UINT32 NearestError[4];
+ UINT32 NearError[4];
+ UINT32 ZeroError[4];
+ UINT32 BestError[4];
+
+ UINT32 ErrorBins[128];
+
+ //****************************************************************
+ // instances (used for reconstructing buffers and to hold tokens etc.)
+ xPP_INST pp; // preprocessor
+
+#if defined PSNR_ON
+ double TotPsnr;
+ double MinPsnr;
+ double MaxPsnr;
+ double TotYPsnr;
+ double MinYPsnr;
+ double MaxYPsnr;
+ double TotUPsnr;
+ double MinUPsnr;
+ double MaxUPsnr;
+ double TotVPsnr;
+ double MinVPsnr;
+ double MaxVPsnr;
+#endif
+
+ // Structures for entropy contexts
+ UINT32 FrameDcTokenDist[2][MAX_ENTROPY_TOKENS];
+ UINT32 FrameAcTokenDist[PREC_CASES][2][VP5_AC_BANDS][MAX_ENTROPY_TOKENS];
+
+ // Storage for the first frame entropy probabilities.
+ // These are re-used for all subsequent key frames when we are operating in
+ // error (drop frame) ressiliant mode.
+ UINT8 FirstFrameDcProbs[2*(MAX_ENTROPY_TOKENS-1)];
+ UINT8 FirstFrameAcProbs[2*PREC_CASES*VP5_AC_BANDS*(MAX_ENTROPY_TOKENS-1)];
+
+ // The Plane Y or UV to which the current block belongs (0 = Y 1 = UV)
+ UINT8 EncoderPlane;
+
+ // Last token coded this block.
+ UINT8 ThisBlockLastToken;
+ UINT8 ZeroCount;
+ //UINT32 MBModeCount[MAX_MODES+1];
+ UINT32 MBModeCount[4][MAX_MODES+1];
+ UINT32 BModeCount[MAX_MODES+1];
+ UINT32 CountModeSameAsLast[4][MAX_MODES+1];
+ UINT32 CountModeDiffFrLast[4][MAX_MODES+1];
+
+ UINT32 ModeCodeArray[4][MAX_MODES+1][MAX_MODES+1];
+ UINT8 ModeLengthArray[4][MAX_MODES+1][MAX_MODES+1];
+
+ // TEMP
+ UINT32 ModeBitCount[2];
+ INT64 ModeComplexity[2];
+ UINT32 ModeBlocks[2];
+
+ UINT32 MBModeCostBoth[11];
+ UINT32 MBModeCostNoNear[11];
+ UINT32 MBModeCostNoNearest[11];
+ UINT32 BModeCost[11];
+ UINT32 MvBaselineDist[2][MV_ENTROPY_TOKENS];
+ UINT32 FrameMvCount;
+ UINT32 EstMVCost[2][MV_ENTROPY_TOKENS];
+ UINT32 EstModeCost[2][MAX_MODES];
+
+ UINT32 nExperimentals;
+ INT32 Experimental[C_SET_EXPERIMENTAL_MAX - C_SET_EXPERIMENTAL_MIN + 1];
+
+ // Bandwidth and buffer control variables
+ INT32 PerFrameBandwidth; // Target for average bandwidth per frame.
+ INT32 InterFrameTarget; // Average "inter" frame bit target corrected for key frame costs
+ INT32 ThisFrameTarget; // Modified rate target for this frame
+
+ BOOL BufferedMode; // FALSE = Tight buffering (Video Conferencing mode); TRUE = normal buffered/streaming mode.
+ BOOL ErrorResilliantMode; // A mode used for VC etc. to make the codec more resilliant to dropped frames.
+ INT32 StartingBufferLevel; // The initial encoder buffer level
+ INT32 CurrentBufferLevel; // Current decoder buffer fullness state
+ INT32 OptimalBufferLevel; // The buffer level target we strive to reach / maintain.
+ INT32 DropFramesWaterMark; // Buffer fullness watermark for forced drop frames.
+ INT32 ResampleDownWaterMark; // Buffer fullness watermark for downwards spacial re-sampling
+ INT32 ResampleUpWaterMark; // Buffer fullness watermark where returning to larger image size is consdered
+ INT32 LastKeyFrameBufferLevel; // Used to monitor changes in buffer level when considering re-sampling.
+
+ INT32 Speed;
+ INT32 CPUUsed;
+
+ UINT32 ModeMvCostEstimate; // Running total of cost estimates for modes and MVs in this frame.
+
+ // Variables used in regulating cost of new motion vectors based upon an estimate of new MV frequency.
+ UINT32 FrameNewMvCounter;
+ UINT32 FrameModeCounter;
+ UINT32 MvEpbCorrection;
+ UINT32 LastFrameNewMvUsage; // 0 = Low 9 = High
+
+ UINT32 * MbBestErr;
+ UINT32 * MbBestErrAlloc;
+
+ UINT32 EstDcTokenCosts[2][MAX_ENTROPY_TOKENS];
+ UINT32 EstAcTokenCosts[PREC_CASES][2][VP5_AC_BANDS][MAX_ENTROPY_TOKENS];
+
+ // Data structures used to save and restor MB and DC contexts during rate distortion
+ MACROBLOCK_INFO CopyMbi;
+ BLOCK_CONTEXTA AboveCopyY[2];
+ BLOCK_CONTEXTA AboveCopyU;
+ BLOCK_CONTEXTA AboveCopyV;
+ BLOCK_CONTEXT LeftYCopy[2];
+ BLOCK_CONTEXT LeftUCopy;
+ BLOCK_CONTEXT LeftVCopy;
+ Q_LIST_ENTRY LastDcYCopy[3];
+ Q_LIST_ENTRY LastDcUCopy[3];
+ Q_LIST_ENTRY LastDcVCopy[3];
+
+ // Above and left context for encoding
+ UINT8 *aboveDcTokensAlloc[3]; // 0 for y, 1 for u and 2 for v
+ UINT8 *aboveDcTokens[3]; // 0 for y, 1 for u and 2 for v
+ UINT8 leftTokens[4][64]; // 0 1 for y 2 for u and 3 for v
+
+
+ MB_DC_CONTEXT MbDcContexts[MAX_MODES][6]; // Per mode, per block position data structure for and MB
+
+ UINT32 avgPickModeTime;
+ UINT32 avgEncodeTime;
+ UINT32 avgPackVideoTime;
+
+ UINT32 ForceHScale;
+ UINT32 ForceHRatio;
+ UINT32 ForceVScale;
+ UINT32 ForceVRatio;
+ BOOL ForceInternalSize;
+
+ PreProcInstance preproc;
+ INT32 FrameRateInput;
+ INT32 FrameRateDropFrames;
+ INT32 FrameRateDropCount;
+
+
+ //
+ UINT32 EncoderLoopFilterOff;
+ // variables for 5 region diamond MV search
+ INT32 DSMVSearchSteps;
+ INT32 DSMVPixelOffsetY[MAX_SEARCH_SITES];
+ INT32 DSMVOffsetX[MAX_SEARCH_SITES];
+ INT32 DSMVOffsetY[MAX_SEARCH_SITES];
+
+
+ UINT32 (*FindMvViaSearch)( xCP_INST cpi,
+ UINT8 *SrcPtr,
+ INT32 SourceStride,
+ UINT8 *RefPtr,
+ INT32 ReconStride,
+ MOTION_VECTOR *MV,
+ UINT8 **BestBlockPtr,
+ UINT32 BlockSize);
+
+
+ void (*FindBestHalfPixelMv)(xCP_INST cpi,
+ UINT8 *SrcPtr,
+ INT32 SourceStride,
+ UINT8 *RefPtr,
+ INT32 ReconStride,
+ MOTION_VECTOR *MV,
+ UINT8 **BestBlockPtr,
+ UINT32 BlockSize,
+ UINT32 MinError);
+
+
+
+} CP_INSTANCE;
+
+
+UINT32 (*GetMBFrameVertVar)(CP_INSTANCE *cpi);
+UINT32 (*GetMBFieldVertVar)(CP_INSTANCE *cpi);
+
+/****************************************************************************
+* Functions.
+*****************************************************************************
+*/
+
+
+extern void UpdateFrame(CP_INSTANCE *cpi);
+
+extern UINT32 QuadCodeDisplayFragments (CP_INSTANCE *cpi);
+
+extern UINT32 QuadCodeComponent ( CP_INSTANCE *cpi, UINT32 FirstSB, UINT32 SBRows, UINT32 SBCols, UINT32 HExtra, UINT32 VExtra, INT32 SourceStride );
+
+extern void AcquireSingleFrame( CP_INSTANCE *cpi, UINT32 CurrFrame );
+extern void AcquireFirstFrame(CP_INSTANCE *cpi);
+extern void AcquireNextFrame( CP_INSTANCE *cpi, UINT32 CurrFrame );
+
+extern void InitFrameTimer( CP_INSTANCE *cpi);
+
+extern UINT32 EncodeData(CP_INSTANCE *cpi);
+
+// Loop optimizations
+extern void InitMapArrays();
+
+// Codec
+extern UINT32 DPCMTokenizeBlock ( CP_INSTANCE *cpi, INT32 FragIndex, INT32 SourceStride );
+extern void SUB8( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+ INT32 SourceStride, INT32 ReconStride );
+extern void SUB8_128( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+ INT32 SourceStride );
+extern void SUB8AV2( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+ INT32 SourceStride, INT32 ReconStride );
+
+
+
+extern void PackEOBRun(CP_INSTANCE *cpi);
+extern void ConvertBmpToYUV( PB_INSTANCE *pbi, UINT8 * BmpDataPtr, UINT8 * YuvBufferPtr );
+extern CP_INSTANCE * CreateCPInstance(void);
+extern void DeleteCPInstance(CP_INSTANCE **cpi);
+extern void CMachineSpecificConfig(void);
+// extern void fdct_slow16 ( INT16 * InputData, INT16 * OutputData );
+extern void fdct_slowf ( INT16 * InputData, INT16 * OutputData );
+extern void fdct_short_C ( INT16 * InputData, INT16 * OutputData );
+extern void fdct_short_C ( INT16 * InputData, INT16 * OutputData );
+
+extern BOOL EAllocateFragmentInfo(CP_INSTANCE *cpi);
+extern BOOL EAllocateFrameInfo(CP_INSTANCE *cpi);
+extern void EDeleteFragmentInfo(CP_INSTANCE *cpi);
+extern void EDeleteFrameInfo(CP_INSTANCE *cpi);
+extern UINT32 PickIntra( CP_INSTANCE *cpi );
+extern UINT32 PickModes( CP_INSTANCE *cpi, UINT32 *InterError, UINT32 *IntraError);
+
+extern INT32 GetSpeckSumAbsDiffs( UINT8 * NewDataPtr, UINT8 * RefDataPtr,
+ INT32 SourceStride, INT32 ErrorSoFar, INT32 BestSoFar );
+extern INT32 GetNextSpeckSumAbsDiffs( UINT8 * NewDataPtr, UINT8 * RefDataPtr,
+ INT32 SourceStride, INT32 ErrorSoFar, INT32 BestSoFar );
+
+extern INT32 GetHalfPixelSpeckSumAbsDiffs( UINT8 * SrcData, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2,
+ INT32 SourceStride, INT32 ErrorSoFar, INT32 BestSoFar );
+extern void ClampAndUpdateQ ( CP_INSTANCE *cpi, UINT32 QIndex) ;
+
+// cx\generic\encodembs.c
+extern void EncodeFrameMbs(CP_INSTANCE *cpi);
+
+
+// cx\generic\vfw_comp_if.c
+extern void CCONV ChangeEncoderSize(CP_INSTANCE* cpi, UINT32 Width, UINT32 Height);
+extern void CopyOrResize(CP_INSTANCE* cpi);
+
+// cx\generic\tokenize.c
+extern UINT16 TokenizeFrag(CP_INSTANCE* cpi, INT16* RawData, UINT16 BlockSize, UINT32 Plane, BLOCK_CONTEXTA* Above, BLOCK_CONTEXT* Left);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/decodemode.h b/Src/libvpShared/corelibs/cdxv/vp50/include/decodemode.h
new file mode 100644
index 00000000..c2dfea9a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/decodemode.h
@@ -0,0 +1,100 @@
+/****************************************************************************
+*
+* Module Title : decodemode.h
+*
+* Description : functions for decoding modes and motionvectors
+*
+* AUTHOR : James Bankoski
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 JBB 30OCT01 New Configuration baseline.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#ifndef STRICT
+#define STRICT /* Strict type checking. */
+#endif
+
+/****************************************************************************
+* Implicit Imports
+*****************************************************************************
+*/
+extern UINT8 Stats[9][4][4][4];
+extern UINT8 NNStats[7][4][4][4];
+extern UINT8 NN2Stats[7][4][4][4];
+extern UINT8 blockStats[3][4][4][4];
+
+#define MODETYPES 3
+#define MODEVECTORS 16
+#define PROBVECTORXMIT 174
+#define PROBIDEALXMIT 254
+
+
+/****************************************************************************
+* Exported data structures.
+*****************************************************************************
+*/
+
+
+typedef struct _modeContext
+{
+ UINT8 left;
+ UINT8 above;
+ UINT8 last;
+} MODE_CONTEXT;
+
+typedef struct _htorp
+{
+ unsigned char selector : 1; // 1 bit selector 0->ptr, 1->token
+ unsigned char value : 7;
+} torp;
+
+typedef struct _hnode
+{
+ torp left;
+ torp right;
+} HNODE;
+
+typedef enum _MODETYPE
+{
+ MACROBLOCK,
+ NONEAREST_MACROBLOCK,
+ NONEAR_MACROBLOCK,
+ BLOCK
+} MODETYPE;
+
+
+#ifndef MAPCA
+__inline
+#endif
+ int mbClass(int i);
+
+
+/****************************************************************************
+* Imports
+*****************************************************************************
+*/
+extern HNODE MBCodingMode[9];
+extern HNODE NN2MBCodingMode[8];
+extern HNODE NNMBCodingMode[7];
+extern HNODE BlockCodingMode[3];
+extern UINT8 BaselineXmittedProbs[4][2][MAX_MODES];
+
+/****************************************************************************
+* Function Prototypes
+*****************************************************************************
+*/
+void DecodeModeProbs(PB_INSTANCE *pbi);
+
+extern void FindNearestandNextNearest(PB_INSTANCE* pbi, UINT32 MBrow, UINT32 MBcol,
+ MOTION_VECTORA* nearest, MOTION_VECTORA* nextnearest, UINT8 Frame,int *type);
+
+extern void BuildModeTree(PB_INSTANCE *pbi);
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/decodemv.h b/Src/libvpShared/corelibs/cdxv/vp50/include/decodemv.h
new file mode 100644
index 00000000..2dcab158
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/decodemv.h
@@ -0,0 +1,45 @@
+/****************************************************************************
+*
+* Module Title : decodemode.h
+*
+* Description : functions for decoding modes and motionvectors
+*
+* AUTHOR : James Bankoski
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 JBB 30OCT01 New Configuration baseline.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#ifndef STRICT
+#define STRICT /* Strict type checking. */
+#endif
+
+
+#define MV_NODES 11
+
+/****************************************************************************
+* Implicit Imports
+*****************************************************************************
+*/
+/****************************************************************************
+* Exported data structures and functions
+*****************************************************************************
+*/
+
+extern void FindNearestandNextNearest(PB_INSTANCE* pbi, UINT32 MBrow, UINT32 MBcol,
+ MOTION_VECTORA* nearest, MOTION_VECTORA* nextnearest, UINT8 Frame, int *type);
+
+extern void ConfigureMvEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType );
+
+extern void decodeMotionVector( PB_INSTANCE *pbi, MOTION_VECTOR *mv, MOTION_VECTOR *nearestMv);
+
+extern UINT8 MvUpdateProbs[2][MV_NODES];
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/huffman.h b/Src/libvpShared/corelibs/cdxv/vp50/include/huffman.h
new file mode 100644
index 00000000..7c4995a6
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/huffman.h
@@ -0,0 +1,93 @@
+/****************************************************************************
+*
+* Module Title : Huffman.h
+*
+* Description : Video CODEC
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.04 YWX 06-Nov-01 Changed for compatibility with Equator C compiler
+* 1.03 JBB 26 Jan 01 New Huffman Code
+* 1.02 PGW 11 Oct 00 Deleted reference to FrequencyCounts[].
+* 1.01 PGW 15/03/00 Changes re. updated entropy tables.
+* 1.00 PGW 12/10/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+#ifndef HUFFMAN_H
+#define HUFFMAN_H
+
+#include "type_aliases.h"
+#include "boolhuff.h"
+
+/****************************************************************************
+* Constants
+*****************************************************************************
+*/
+
+/****************************************************************************/
+
+/****************************************************************************
+* Types
+*****************************************************************************
+*/
+
+typedef struct _tokenorptr
+{
+ unsigned int selector : 1; // 1 bit selector 0->ptr, 1->token
+ unsigned int value : 7;
+} tokenorptr;
+
+
+typedef struct _huffnode
+{
+ union
+ {
+ char l;
+ tokenorptr left;
+ } leftunion;
+ union
+ {
+ char r;
+ tokenorptr right;
+ } rightunion;
+ unsigned char freq;
+
+} HUFF_NODE;
+
+/****************************************************************************
+* Data structures
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Functions
+*****************************************************************************
+*/
+extern void VP5_BuildHuffTree(
+ HUFF_NODE *hn,
+ unsigned int *counts,
+ int values );
+
+extern void VP5_CreateCodeArray( HUFF_NODE *hn,
+ int node,
+ unsigned int *codearray,
+ unsigned char *lengtharray,
+ int codevalue,
+ int codelength );
+
+extern void VP5_EncodeValue(
+ BOOL_CODER *bc,
+ HUFF_NODE *hn,
+ int value,
+ int length);
+
+
+
+#endif
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/misc_common.h b/Src/libvpShared/corelibs/cdxv/vp50/include/misc_common.h
new file mode 100644
index 00000000..f72453d3
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/misc_common.h
@@ -0,0 +1,53 @@
+/****************************************************************************
+*
+* Module Title : MiscCommon.h
+*
+* Description : Miscellaneous common routines header file
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 PGW 15/10/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+
+#ifndef MISCCOMP_H
+#define MISCCOMP_H
+
+#include "type_aliases.h"
+#include "compdll.h"
+
+/****************************************************************************
+* Constants
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Types
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Data structures
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Function Prototypes
+*****************************************************************************
+*/
+extern double GetEstimatedBpb( CP_INSTANCE *cpi, UINT32 TargetQIndex );
+extern void UpdateBpbCorrectionFactor( CP_INSTANCE *cpi, UINT32 FrameSize );
+extern void UpRegulateMB( CP_INSTANCE *cpi, UINT32 RegulationQ, UINT32 SB, UINT32 MB, BOOL NoCheck );
+extern void ClampAndUpdateQ ( CP_INSTANCE *cpi, UINT32 QIndex );
+extern void RegulateQ( CP_INSTANCE *cpi, INT32 TargetBits );
+extern void ConfigureQuality( CP_INSTANCE *cpi, UINT32 QualityValue );
+extern void CopyBackExtraFrags(CP_INSTANCE *cpi);
+
+extern void PredictFilteredBlock(PB_INSTANCE* pbi, INT16* OutputPtr, BLOCK_POSITION bp);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/pbdll.h b/Src/libvpShared/corelibs/cdxv/vp50/include/pbdll.h
new file mode 100644
index 00000000..bea27745
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/pbdll.h
@@ -0,0 +1,535 @@
+/****************************************************************************
+*
+* Module Title : PBDLL
+*
+* Description : Video CODEC DEMO playback dll header
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.40 YWX 17-Dec-02 Added DeInteralceMode
+* 1.39 YWX 06-Nov-01 Changed to align the MB coeffs buffer memory
+* 1.38 AWG 22-MAY-01 Added support for DCT16
+* 1.37 JBB 01-MAY-01 Added features to support vp5
+* 1.36 JBB 06-Apr-01 Added cpufree variable
+* 1.35 JBB 23-Mar-01 New data structure defined for DC prediction
+* 1.34 JBX 22-Mar-01 Merged with vp4-mapca bitstream
+* 1.33.PGW 08 Feb 01 Added LastFrameQIndex.
+* 1.32 PGW 25 Jan 01 Changes to support new motion vector entropy coding in VP5.
+* 1.31 JBB 26-JAN-01 Fixes for New Huffman Strategy
+* 1.30 YWX 27-Nov-00 Added function Pointers for simple deblocker, i.e.
+* Deblocking filter for low end machines
+* 1.29 YWX 02-Nov-00 Added function pointers for new loopfilter
+* 1.28 PGW 16 Nov 00 Deleted redundant data structures.
+* Added BlockPatternPredictor.
+* 1.27 YWX 02-Nov-00 Added function pointers for new loopfilter
+* 1.26 YWX 19_Oct-00 Added function pointers for 1-2 scaling
+* 1.25 JBB 17-oct-00 Ifdefs around version information
+* 1.24 YWX 17-Oct-00 Added *FragCoordinates for new loop filter strategy
+* 1.23 PGW 15 Oct 00 Added select_InterUV_quantiser() and related data structures.
+* 1.22 PGW 11 Oct 00 Added CreateHuffmanTrees() and DestroyHuffmanTrees()
+* Added void SelectHuffmanSet() and Huffman selector variables.
+* 1.23 YWX 11-Oct-00 Added LastFrameNoMvRecon and LastFrameNoMvReconAlloc
+* 1.22 YWX 04 Oct 00 Merged scaling and new loop filtering code
+* 1.21 YWX 06 Sep 00 Added new deringing functions pointers
+* 1.21 PGW 18 Sep 00 QThreshTable[] made instance specific.
+* Added InitQTables().
+* 1.20 JBB 25 Aug 00 Versioning differences
+* 1.19 JBB 21 Aug 00 New More Blurry in high variance area deringer
+* 1.18 YWX 2 Aug 00 Added function pointers for Postproc
+* 1.17 JBB 28 Jul 00 Added Fragment Variance Value for eliminating deringer
+* in some cases...
+* 1.16 JBB 27 Jul 00 Moved kernel modifiers to pbi, malloc checks
+* 1.15 SJL 24Jul00 Changes for Mac
+* 1.14 YWX 15/05/00 More variable and function pointersf for postprocessor
+* 1.13 YWX 08/05/00 Added #ifdef s and function pointers for postprocessor
+* 1.12 JYX 05/05/00 Added PostProcessing (PostProcessBuffer + PostProcessLevel)
+* 1.11 SJL 20/04/00 Added ability to enable new dequant code for the dxer.
+* 1.10 JYX 06/04/00 Alligned Small Buffers & Live Codec Reordering
+* 1.09 SJL 22/03/00 Added func ptr for the loop filter.
+* 1.08 JBB 20/03/00 32 Byte alligned buffers, Back to Integer Forward DCT
+* Additional function pointers for optimized code
+* 1.07 PGW 20/03/00 Removed InterIntra.
+* 1.06 PGW 17/03/00 Changes to support seperate Y and UV entropy tables.
+* 1.05 JBB 29/01/00 Removed Globals added Playback only function externs !
+* 1.04 PGW 17/12/99 Draw dib functionality removed.
+* 1.03 PGW 22/11/99 Changes relating to restructuring of block map stuff.
+* 1.02 PGW 15/07/99 Added bit extraction variables.
+* 1.01 PGW 09/07/99 Added code to support profile timing
+* 1.00 PGW 28/06/99 New Configuration baseline.
+*
+*****************************************************************************
+*/
+
+#ifndef __INC_PBDLL_H
+#define __INC_PBDLL_H
+
+
+#define VAL_RANGE 256
+
+
+#include "codec_common.h"
+#include "huffman.h"
+#include "tokenentropy.h"
+#include "vfw_pb_interface.h"
+#include "postproc_if.h"
+#include "vputil_if.h"
+#include "quantize.h"
+#include "boolhuff.h"
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+#ifdef MAPCA
+// switch to turn on the Data streamer
+#define DMAREADREFERENCE
+#define DMAWRITERECON
+#define RECONSTRUCTMBATONCE
+
+#define __inline
+#endif
+/****************************************************************************
+* Types
+*****************************************************************************
+*/
+
+typedef enum
+{
+ CODE_INTER_NO_MV = 0x0, // INTER prediction, (0,0) motion vector implied.
+ CODE_INTRA = 0x1, // INTRA i.e. no prediction.
+ CODE_INTER_PLUS_MV = 0x2, // INTER prediction, non zero motion vector.
+ CODE_INTER_NEAREST_MV = 0x3, // Use Last Motion vector
+ CODE_INTER_NEAR_MV = 0x4, // Prior last motion vector
+ CODE_USING_GOLDEN = 0x5, // 'Golden frame' prediction (no MV).
+ CODE_GOLDEN_MV = 0x6, // 'Golden frame' prediction plus MV.
+ CODE_INTER_FOURMV = 0x7, // Inter prediction 4MV per macro block.
+ CODE_GOLD_NEAREST_MV = 0x8, // Use Last Motion vector
+ CODE_GOLD_NEAR_MV = 0x9, // Prior last motion vector
+ DO_NOT_CODE = 0x10 // Fake Mode
+} CODING_MODE;
+
+typedef struct
+{
+ unsigned int DisplayFragment : 1;
+ unsigned int FragCodingMode : 4;
+ int MVectorX : 6;
+ int MVectorY : 6;
+} FRAG_INFO;
+
+typedef struct _DCINFO
+{
+ Q_LIST_ENTRY dc;
+ short frame;
+} DCINFO;
+
+// defined so i don't have to remember which block goes where
+typedef enum
+{
+ TOP_LEFT_Y_BLOCK = 0,
+ TOP_RIGHT_Y_BLOCK = 1,
+ BOTTOM_LEFT_Y_BLOCK = 2,
+ BOTTOM_RIGHT_Y_BLOCK = 3,
+ U_BLOCK = 4,
+ V_BLOCK = 5
+} BLOCK_POSITION;
+
+
+// all the information gathered from a block to be used as context in the next block
+typedef struct
+{
+ UINT8 Tokens[64];
+ CODING_MODE Mode;
+ UINT16 Frame;
+ Q_LIST_ENTRY Dc;
+ UINT32 EOBPos;
+ UINT32 unused;
+} BLOCK_CONTEXT;
+
+typedef struct
+{
+ UINT32 EOBPos;
+ CODING_MODE Mode;
+ UINT16 Frame;
+ Q_LIST_ENTRY Dc;
+ UINT8 Tokens[1];
+ UINT8 unused[3];
+} BLOCK_CONTEXTA;
+
+typedef struct
+{
+ INT16 x;
+ INT16 y;
+
+} MOTION_VECTORA;
+
+// all the contexts maintained for a frame
+typedef struct
+{
+ BLOCK_CONTEXT LeftY[2]; // 1 for each block row in a macroblock
+ BLOCK_CONTEXT LeftU;
+ BLOCK_CONTEXT LeftV;
+
+ BLOCK_CONTEXTA *AboveY;
+ BLOCK_CONTEXTA *AboveU;
+ BLOCK_CONTEXTA *AboveV;
+
+ BLOCK_CONTEXTA *AboveYAlloc;
+ BLOCK_CONTEXTA *AboveUAlloc;
+ BLOCK_CONTEXTA *AboveVAlloc;
+
+ Q_LIST_ENTRY LastDcY[3]; // 1 for each frame
+ Q_LIST_ENTRY LastDcU[3];
+ Q_LIST_ENTRY LastDcV[3];
+
+} FRAME_CONTEXT;
+
+// Structure to hold last token values at each position in block
+typedef UINT8 TOKENBUFFER[256];
+
+//#define BIT_STATS 1
+#ifdef BIT_STATS
+#define BIT_STAT_CATEGORIES 8
+
+extern UINT32 BitStats[BIT_STAT_CATEGORIES];
+extern UINT8 BitStatCategory;
+#endif
+
+typedef struct
+{
+ Q_LIST_ENTRY (*CoeffsAlloc)[72]; // coefficients 64 per frag 4 y in raster order, u then v
+ Q_LIST_ENTRY (*Coeffs)[72]; // coefficients 64 per frag 4 y in raster order, u then v
+ CODING_MODE Mode; // mode macroblock coded as
+ CODING_MODE BlockMode[6]; // mode macroblock coded as
+ MOTION_VECTOR Mv[6]; // one motion vector per block u and v calculated from rest
+
+ MOTION_VECTOR NearestInterMVect;// nearest mv in last frame
+ MOTION_VECTOR NearInterMVect; // near mv in last frame
+ MOTION_VECTOR NearestGoldMVect; // nearest mv in gold frame
+ MOTION_VECTOR NearGoldMVect; // near mv in gold frame
+ UINT32 MBrow; // mb row
+ UINT32 MBcol; // mb col
+
+ BLOCK_POSITION bp; // block number 0 - 5
+ UINT32 Source; // address for source (compressor only)
+ UINT32 SourceY; // starting row
+ UINT32 SourceX; // starting column
+ INT32 CurrentSourceStride; // pitch of source (compressor only)
+ UINT32 Recon; // address in reconstruction buffer of block
+ INT32 CurrentReconStride; // pitch of reconstruction
+ UINT32 Plane; // plane block is from
+ INT32 MvShift; // motion vector shift value
+ INT32 MvModMask; // motion vector mod mask
+ INT32 FrameSourceStride; // Stride of the frame
+ INT32 FrameReconStride; // Stride of the frame
+
+#ifdef RECONSTRUCTMBATONCE
+ UINT32 ReconIndex[6]; // ReconIndex for each block
+#endif
+
+ UINT32 SourcePtr[6]; // address for source (compressor only)
+ UINT32 ReconPtr[6]; // address for source (compressor only)
+ UINT32 StripPtr[6];
+#ifdef DMAREADREFERENCE
+ INT32 Offset[6];
+ UINT32 BoundaryX[6];
+ UINT32 BoundaryY[6];
+#endif
+ BLOCK_CONTEXTA *Above; // above block context
+ BLOCK_CONTEXT *Left; // left block context
+ Q_LIST_ENTRY *LastDc; // last dc value seen
+
+ INT32 Interlaced; // is the macroblock interlaced?
+
+} MACROBLOCK_INFO;
+
+/****************************************************************************
+* MACROS
+*****************************************************************************
+*/
+
+// Enumeration of how block is coded
+#define CURRENT_ENCODE_VERSION 5
+#define CURRENT_DECODE_VERSION 5
+
+#define UMV_BORDER 32
+#define STRIDE_EXTRA (UMV_BORDER * 2)
+
+
+#define MAX_MV_EXTENT 31 // Max search distance in half pixel increments
+#define MV_ENTROPY_TABLES 16
+#define MV_ENTROPY_TOKENS ((MAX_MV_EXTENT * 2) + 1)
+
+#define PPROC_QTHRESH 64
+
+#define MAX_MODES 10
+
+#define DCT_KEY_FRAME 0
+
+#define DEFAULT_HALF_PIXEL_PROB 85
+
+
+#define DCProbOffset(A,B) \
+ ( (A) * (MAX_ENTROPY_TOKENS-1) \
+ + (B) )
+
+#define DCContextOffset(A,B,C,D) \
+ ( (A) * TOKEN_CONTEXTS * TOKEN_CONTEXTS * CONTEXT_NODES \
+ + (B) * TOKEN_CONTEXTS * CONTEXT_NODES \
+ + (C) * CONTEXT_NODES \
+ + (D) )
+
+#define ACProbOffset(A,B,C,D) \
+ ( (A) * PREC_CASES * VP5_AC_BANDS * (MAX_ENTROPY_TOKENS-1) \
+ + (B) * VP5_AC_BANDS * (MAX_ENTROPY_TOKENS-1) \
+ + (C) * (MAX_ENTROPY_TOKENS-1) \
+ + (D) )
+
+
+#define ACContextOffset(A,B,C,D,E) \
+ ( (A) * PREC_CASES * (VP5_AC_BANDS-3) * TOKEN_CONTEXTS * CONTEXT_NODES \
+ + (B) * (VP5_AC_BANDS-3) * TOKEN_CONTEXTS * CONTEXT_NODES \
+ + (C) * TOKEN_CONTEXTS * CONTEXT_NODES \
+ + (D) * CONTEXT_NODES \
+ + (E) )
+
+#define MBOffset(row,col) ( (row) * pbi->MBCols + (col) )
+
+/****************************************************************************
+* Global Variables
+*****************************************************************************
+*/
+extern UINT8 LimitVal_VP31[VAL_RANGE * 3];
+
+extern BOOL VP5_ModeUsesMC[MAX_MODES]; // table to indicate if the given mode uses motion estimation
+
+extern const int VP5_Mode2Frame[DO_NOT_CODE];
+
+extern const INT32 CoeffToBand[65];
+
+//****************************************************************
+// Function Pointers some probably could be library globals!
+// all the information we ever need about a macroblock
+
+typedef struct PB_INSTANCE
+{
+ // Should be able to delete these entries when VP5 complete
+ INT32 CodedBlockIndex;
+ UINT8 *DataOutputInPtr;
+ FRAG_INFO *FragInfo;
+ FRAG_INFO *FragInfoAlloc;
+
+
+ /* Current access points fopr input and output buffers */
+ BOOL_CODER br;
+
+ //****************************************************************************************
+ // Decoder and Frame Type Information
+ UINT8 Vp3VersionNo;
+ UINT32 DeInterlaceMode;
+ UINT32 PostProcessingLevel; /* Perform post processing */
+ UINT32 ProcessorFrequency; /* CPU frequency */
+ UINT32 CPUFree;
+ UINT8 FrameType;
+ UINT8 KeyFrameType;
+ //****************************************************************************************
+
+ //****************************************************************************************
+ // Frame Size & Index Information
+
+ CONFIG_TYPE Configuration; // frame configuration
+
+ UINT32 CurrentFrameSize;
+
+ UINT32 YPlaneSize;
+ UINT32 UVPlaneSize;
+ UINT32 VFragments;
+ UINT32 HFragments;
+ UINT32 UnitFragments;
+ UINT32 YPlaneFragments;
+ UINT32 UVPlaneFragments;
+
+ UINT32 ReconYPlaneSize;
+ UINT32 ReconUVPlaneSize;
+
+ UINT32 YDataOffset;
+ UINT32 UDataOffset;
+ UINT32 VDataOffset;
+ UINT32 ReconYDataOffset;
+ UINT32 ReconUDataOffset;
+ UINT32 ReconVDataOffset;
+
+ UINT32 MacroBlocks; // Number of Macro-Blocks in Y component
+ UINT32 MBRows; // Number of rows of MacroBlocks in a Y frame
+ UINT32 MBCols; // Number of cols of MacroBlocks in a Y frame
+ UINT32 ScaleWidth;
+ UINT32 ScaleHeight;
+ UINT32 OutputWidth;
+ UINT32 OutputHeight;
+ UINT32 OutputStride;
+
+ //****************************************************************************************
+
+ //****************************************************************************************
+ // Frames
+ YUV_BUFFER_ENTRY *ThisFrameRecon;
+ YUV_BUFFER_ENTRY *ThisFrameReconAlloc;
+ YUV_BUFFER_ENTRY *GoldenFrame;
+ YUV_BUFFER_ENTRY *GoldenFrameAlloc;
+ YUV_BUFFER_ENTRY *LastFrameRecon;
+ YUV_BUFFER_ENTRY *LastFrameReconAlloc;
+ YUV_BUFFER_ENTRY *PostProcessBuffer;
+ YUV_BUFFER_ENTRY *PostProcessBufferAlloc;
+ YUV_BUFFER_ENTRY *ScaleBuffer; /* new buffer for testing new loop filtering scheme */
+ YUV_BUFFER_ENTRY *ScaleBufferAlloc;
+ //****************************************************************************************
+
+ //****************************************************************************************
+ Q_LIST_ENTRY *quantized_list;
+#ifdef RECONSTRUCTMBATONCE
+ INT16 (*ReconDataBuffer)[64];
+ INT16 (*ReconDataBufferAlloc)[64];
+#else
+ INT16 *ReconDataBuffer;
+ INT16 *ReconDataBufferAlloc;
+#endif
+ UINT8 FragCoefEOB; // Position of last non 0 coef within QFragData
+ INT16 *TmpReconBuffer;
+ INT16 *TmpReconBufferAlloc;
+ INT16 *TmpDataBuffer;
+ INT16 *TmpDataBufferAlloc;
+
+ UINT8 *LoopFilteredBlockAlloc;
+ UINT8 *LoopFilteredBlock;
+
+#ifdef DMAREADREFERENCE
+ UINT8 (*ReferenceBlocksAlloc)[192];
+ UINT8 (*ReferenceBlocks)[192]; // Six Reference Blocks
+ UINT32 mvX[6], mvY[6];
+#endif
+
+#ifdef DMAWRITERECON
+ #ifdef RECONSTRUCTMBATONCE
+ UINT8 *ThisBandReconPtr[6]; //Current Band to write to
+ #else
+ UINT8 *ThisBandReconPtr; //Current Band to write to
+ #endif
+ UINT8 *ReconstructedMBs; //bandbuffer for DMA reconstructed MB row.
+ UINT8 *ReconstructedMBsAlloc;
+ UINT8 *FillMem;
+#endif
+ //****************************************************************
+
+ void (**idct)(INT16 *InputData, INT16 *QuantMatrix, INT16 * OutputData );
+
+ POSTPROC_INST postproc;
+ QUANTIZER *quantizer;
+ MACROBLOCK_INFO mbi; // all the information needed for one macroblock
+ FRAME_CONTEXT fc; // all of the context information needed for a frame
+
+ TOKENBUFFER LastToken; // LTIndex of tokens at each position in block
+
+ CODING_MODE LastMode; // Last Mode decoded;
+
+ UINT8 DcProbs[2*(MAX_ENTROPY_TOKENS-1)];
+ UINT8 AcProbs[2*PREC_CASES*VP5_AC_BANDS*(MAX_ENTROPY_TOKENS-1)];
+ UINT8 DcNodeContexts[2*TOKEN_CONTEXTS*TOKEN_CONTEXTS*CONTEXT_NODES]; // Plane, Node, Contexts, Contexts
+ UINT8 AcNodeContexts[2*PREC_CASES*(VP5_AC_BANDS-3)*TOKEN_CONTEXTS*CONTEXT_NODES]; // Prec, Plane, AcBand, Context, Node
+
+ UINT8 ZeroCount;
+ UINT8 MBModeProb[11];
+ UINT8 BModeProb[11];
+
+ UINT8 Inter00Prob;
+ UINT32 AvgFrameQIndex;
+
+ BOOL testMode;
+
+ UINT32 mvNearOffset[16];
+
+ int probInterlaced;
+ char *MBInterlaced;
+ char *predictionMode;
+ MOTION_VECTORA *MBMotionVector;
+ char *MBInterlacedAlloc;
+ char *predictionModeAlloc;
+ MOTION_VECTORA *MBMotionVectorAlloc;
+
+ UINT8 MvSignProbs[2];
+ UINT8 MvZeroProbs[2];
+ UINT8 MvHalfPixelProbs[2];
+ UINT8 MvLowBitProbs[2];
+ UINT8 MvSizeProbs[2][((MAX_MV_EXTENT+1) >> 2) - 1];
+
+ UINT8 probXmitted[4][2][MAX_MODES];
+ UINT8 probModeSame[4][MAX_MODES];
+ UINT8 probMode[4][MAX_MODES][MAX_MODES-1]; // nearest+near,nearest only, nonearest+nonear, 10 preceding modes, 9 nodes
+
+ UINT32 maxTimePerFrame;
+ UINT32 thisDecodeTime;
+ UINT32 avgDecodeTime;
+ UINT32 avgPPTime[10];
+ UINT32 avgBlitTime;
+ UINT32 BlackClamp;
+ UINT32 WhiteClamp;
+
+} PB_INSTANCE;
+
+/****************************************************************************
+* Functions.
+*****************************************************************************
+*/
+//****************************************************************
+// Function Pointers now library globals!
+//extern void (*ReadTokens)( xPB_INST pbi, UINT32 BlockSize, UINT32 Hpos );
+
+//****************************************************************
+extern PB_INSTANCE * VP5_CreatePBInstance(void);
+extern void VP5_DeletePBInstance(PB_INSTANCE **);
+extern BOOL VP5_LoadFrame(PB_INSTANCE *pbi);
+extern void VP5_SetFrameType(PB_INSTANCE *pbi, UINT8 FrType );
+extern UINT8 VP5_GetFrameType(PB_INSTANCE *pbi);
+extern BOOL VP5_InitFrameDetails(PB_INSTANCE *pbi);
+extern void VP5_ErrorTrap( PB_INSTANCE *pbi, int ErrorCode );
+extern BOOL VP5_AllocateFragmentInfo(PB_INSTANCE * pbi);
+extern BOOL VP5_AllocateFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize);
+extern void VP5_DeleteFragmentInfo(PB_INSTANCE * pbi);
+extern void VP5_DeleteFrameInfo(PB_INSTANCE * pbi);
+extern void VP5_DMachineSpecificConfig(void);
+
+INLINE UINT32 VP5_bitread1(BOOL_CODER *br)
+{
+ return (DecodeBool128(br));
+}
+INLINE UINT32 VP5_bitread(BOOL_CODER *br, int bits)
+{
+ UINT32 z = 0;
+ int bit;
+ for(bit=bits-1;bit>=0;bit--)
+ {
+ z|=(DecodeBool128(br)<<bit);
+ }
+ return z;
+}
+extern void vp5_appendframe(PB_INSTANCE *pbi);
+extern void VP5_readTSC(unsigned long *tsc);
+extern void ConfigureContexts(PB_INSTANCE *pbi);
+
+// dx\generic\decodembs.c
+extern void ResetAboveContext(PB_INSTANCE* pbi);
+extern void ResetLeftContext(PB_INSTANCE* pbi);
+extern void UpdateContext(PB_INSTANCE* pbi, BLOCK_CONTEXT* c, BLOCK_POSITION bp);
+extern void UpdateContextA(PB_INSTANCE* pbi, BLOCK_CONTEXTA* c, BLOCK_POSITION bp);
+extern void PredictDC(PB_INSTANCE* pbi, BLOCK_POSITION bp, Q_LIST_ENTRY* LastDC, BLOCK_CONTEXTA* Above, BLOCK_CONTEXT* Left);
+
+// dx\generic\recon.c
+extern void ReconstructBlock(PB_INSTANCE* pbi, BLOCK_POSITION bp);
+
+// dx\generic\decodemode.c
+extern CODING_MODE DecodeBlockMode(PB_INSTANCE *pbi);
+extern CODING_MODE DecodeMode(PB_INSTANCE *pbi, CODING_MODE lastmode, UINT32 type);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/quantize.h b/Src/libvpShared/corelibs/cdxv/vp50/include/quantize.h
new file mode 100644
index 00000000..d2e1a314
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/quantize.h
@@ -0,0 +1,89 @@
+#ifndef QUANTIZE_H
+#define QUANTIZE_H
+#include "codec_common.h"
+#include "codec_common_interface.h"
+
+/****************************************************************************
+* Structures
+*****************************************************************************
+*/
+typedef struct
+{
+ UINT32 FrameQIndex; // Quality specified as a table index
+ UINT32 ThisFrameQuantizerValue; // Quality value for this frame
+ short round[8];
+ short mult[8];
+ short zbin[8];
+ UINT32 LastQuantizerValue; // Quality value for this frame
+ UINT32 QThreshTable[Q_TABLE_SIZE]; // ac quantizer scale values
+
+ UINT32 *transIndex; // array to reorder zig zag to idct's ordering
+ UINT8 quant_index[64]; // array to reorder from raster to zig zag
+
+ // used by the dequantizer
+ Q_LIST_ENTRY * dequant_coeffs[2]; // pointer to current dequantization tables
+ Q_LIST_ENTRY * dequant_coeffsAlloc[2]; // alloc so we can keep alligned
+
+ INT32 QuantCoeffs[2][64]; // Quantizer values table
+ INT32 QuantRound[2][64]; // Quantizer rounding table
+ INT32 ZeroBinSize[2][64]; // Quantizer zero bin table
+
+
+} QUANTIZER;
+
+/****************************************************************************
+* Functions
+*****************************************************************************
+*/
+
+extern void VP5_InitQTables
+(
+ QUANTIZER *pbi,
+ UINT8 Vp3VersionNo
+);
+
+extern void VP5_UpdateQ
+(
+ QUANTIZER *pbi,
+ UINT8 Vp3VersionNo
+);
+
+extern void VP5_UpdateQC
+(
+ QUANTIZER *pbi,
+ UINT8 Vp3VersionNo
+);
+
+extern void VP5_init_quantizer
+(
+ QUANTIZER *pbi,
+ UINT8 Vp3VersionNo
+);
+
+extern void (*VP5_quantize)
+(
+ QUANTIZER *pbi,
+ INT16 * DCT_block,
+ Q_LIST_ENTRY * quantized_list,
+ UINT8 bp
+);
+
+extern void VP5_init_dequantizer
+(
+ QUANTIZER *pbi,
+ UINT8 Vp3VersionNo
+);
+
+extern QUANTIZER * VP5_CreateQuantizer
+(
+ void
+);
+
+extern void VP5_DeleteQuantizer
+(
+ QUANTIZER **pbi
+);
+
+extern UINT8 QTableSelect[6];
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/systemdependant.h b/Src/libvpShared/corelibs/cdxv/vp50/include/systemdependant.h
new file mode 100644
index 00000000..9408a09e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/systemdependant.h
@@ -0,0 +1,52 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.h
+*
+* Description : Miscellaneous system dependant functions header
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 PGW 12/10/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/*******************************************3*********************************
+* Header Files
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Constants
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Types
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Data structures
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Functions
+*****************************************************************************
+*/
+
+// Test machine config
+
+// Misc.
+extern void VP5_IssueWarning( char * WarningMessage );
+extern void PauseProcess( unsigned int SleepMs );
+
+// System dynamic memory allocation
+char * SytemGlobalAlloc( unsigned int Size );
+void SystemGlobalFree( char * MemPtr );
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/tokenentropy.h b/Src/libvpShared/corelibs/cdxv/vp50/include/tokenentropy.h
new file mode 100644
index 00000000..8052f942
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/tokenentropy.h
@@ -0,0 +1,129 @@
+/****************************************************************************
+*
+* Module Title : TokenEntropy.h
+*
+* Description : Video CODEC: Coefficient toke entropy header.
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.01 PGW 27 Jun 01 Module created.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Header Frames
+*****************************************************************************
+*/
+
+
+#ifndef TOKEN_ENTROPY_H
+#define TOKEN_ENTROPY_H
+
+#include "type_aliases.h"
+#include "boolhuff.h"
+#include "codec_common.h"
+#include "huffman.h"
+
+
+/****************************************************************************
+* Constants
+*****************************************************************************
+*/
+
+// VP5 hufman table AC bands
+#define VP5_AC_BANDS 6
+
+// Tokens Value Extra Bits (range + sign)
+#define ZERO_TOKEN 0 //0 Extra Bits 0+0
+#define ONE_TOKEN 1 //1 Extra Bits 0+1
+#define TWO_TOKEN 2 //2 Extra Bits 0+1
+#define THREE_TOKEN 3 //3 Extra Bits 0+1
+#define FOUR_TOKEN 4 //4 Extra Bits 0+1
+#define DCT_VAL_CATEGORY1 5 //5-6 Extra Bits 1+1
+#define DCT_VAL_CATEGORY2 6 //7-10 Extra Bits 2+1
+#define DCT_VAL_CATEGORY3 7 //11-26 Extra Bits 4+1
+#define DCT_VAL_CATEGORY4 8 //11-26 Extra Bits 5+1
+#define DCT_VAL_CATEGORY5 9 //27-58 Extra Bits 5+1
+#define DCT_VAL_CATEGORY6 10 //59+ Extra Bits 11+1
+#define DCT_EOB_TOKEN 11 //EOB Extra Bits 0+0
+#define MAX_ENTROPY_TOKENS (DCT_EOB_TOKEN + 1)
+#define ILLEGAL_TOKEN 255
+
+
+#define TOKEN_CONTEXTS 6 // EOB, 0, 1, 2, 3-4, x
+#define CONTEXT_NODES (MAX_ENTROPY_TOKENS-7)
+
+#define PREC_CASES 3
+
+#define DC_PROBABILITY_UPDATE_THRESH 100
+
+#define ZERO_CONTEXT_NODE 0
+#define EOB_CONTEXT_NODE 1
+#define ONE_CONTEXT_NODE 2
+#define LOW_VAL_CONTEXT_NODE 3
+#define TWO_CONTEXT_NODE 4
+#define THREE_CONTEXT_NODE 5
+#define HIGH_LOW_CONTEXT_NODE 6
+#define CAT_ONE_CONTEXT_NODE 7
+#define CAT_THREEFOUR_CONTEXT_NODE 8
+#define CAT_THREE_CONTEXT_NODE 9
+#define CAT_FIVE_CONTEXT_NODE 10
+
+#define PROB_UPDATE_BASELINE_COST 7
+
+#define MAX_PROB 254
+#define DCT_MAX_VALUE 2048
+
+
+/****************************************************************************
+* Types
+*****************************************************************************
+*/
+
+extern const UINT32 ProbCost[256];
+
+extern const UINT8 ExtraBitLengths_VP5[MAX_ENTROPY_TOKENS];
+extern const UINT32 DctRangeMinVals[MAX_ENTROPY_TOKENS];
+
+typedef struct LineEq
+{
+ INT32 M;
+ INT32 C;
+} LINE_EQ;
+
+
+extern const UINT8 DcUpdateProbs[2][MAX_ENTROPY_TOKENS-1];
+extern const UINT8 AcUpdateProbs[PREC_CASES][2][VP5_AC_BANDS][MAX_ENTROPY_TOKENS-1];
+extern const UINT8 PrevTokenIndex[MAX_ENTROPY_TOKENS];
+
+extern UINT8 PrecZeroRunLength[BLOCK_SIZE];
+
+/****************************************************************************
+* Data structures
+*****************************************************************************
+*/
+
+
+// These table contains the normailized probabilities required to traverse the
+// entropy tree for DC and AC value tokens representing values >= 2
+// Probabilities are normalized to 8 bits and represent the likelyhood of a zero branch.
+
+
+/****************************************************************************
+* Functions
+*****************************************************************************
+*/
+
+
+#endif
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/xprintf.h b/Src/libvpShared/corelibs/cdxv/vp50/include/xprintf.h
new file mode 100644
index 00000000..3d746395
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/xprintf.h
@@ -0,0 +1,37 @@
+#ifndef XPRINTF_H
+#define XPRINTF_H
+//____________________________________________________________________________
+//
+// File: xprintf.h
+//
+// Description: Display a printf style message on the current video frame
+//
+// Author: Keith Looney
+//
+//____________________________________________________________________________
+// Revision History
+//
+
+//____________________________________________________________________________
+// Includes
+
+#include "pbdll.h"
+
+//____________________________________________________________________________
+// Defines
+
+//____________________________________________________________________________
+// Declarations
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+extern int vp5_xprintf(const PB_INSTANCE* ppbi, long pixel, const char* format, ...);
+
+#if __cplusplus
+}
+#endif
+
+#endif // XPRINTF_H
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj b/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj
new file mode 100644
index 00000000..c9f9a51e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj
@@ -0,0 +1,326 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{FC9F7C6D-C0BF-4265-B7BD-C184573C3C8A}</ProjectGuid>
+ <RootNamespace>vp5d</RootNamespace>
+ <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>16.0.32002.118</_ProjectFileVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\vp5d\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+ <CodeAnalysisRules />
+ <CodeAnalysisRuleAssemblies />
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+ <CodeAnalysisRules />
+ <CodeAnalysisRuleAssemblies />
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\vp5d\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+ <CodeAnalysisRules />
+ <CodeAnalysisRuleAssemblies />
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+ <CodeAnalysisRules />
+ <CodeAnalysisRuleAssemblies />
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg">
+ <VcpkgEnableManifest>false</VcpkgEnableManifest>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Vcpkg">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Vcpkg">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <AdditionalIncludeDirectories>Include;..\include;..\..\Include;..\..\..\Include;..\..\..\Include\VP50;.\include;..\..\include;..\..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;..\..\..\..\libvp6\include;..\..\..\..\libvp6\include\vp50;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>_WINDOWS;_USRDLL;vp5D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;WIN32;NDEBUG;INLINE=__inline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ <Bscmake>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <OutputFile>.\..\..\Lib\Win32\Release/vp5d.bsc</OutputFile>
+ </Bscmake>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <AdditionalIncludeDirectories>Include;..\include;..\..\Include;..\..\..\Include;..\..\..\Include\VP50;.\include;..\..\include;..\..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;..\..\..\..\libvp6\include;..\..\..\..\libvp6\include\vp50;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>_WINDOWS;_USRDLL;vp5D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;WIN32;NDEBUG;INLINE=__inline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ <Bscmake>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <OutputFile>.\..\..\Lib\Win32\Release/vp5d.bsc</OutputFile>
+ </Bscmake>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>Include;..\include;..\..\Include;..\..\..\Include;..\..\..\Include\VP50;.\include;..\..\include;..\..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;..\..\..\..\libvp6\include;..\..\..\..\libvp6\include\vp50;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>_WINDOWS;_USRDLL;vp5D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;WIN32;_DEBUG;INLINE=__inline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <MinimalRebuild>false</MinimalRebuild>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ <Bscmake>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <OutputFile>.\..\..\Lib\Win32\Debug/vp5d.bsc</OutputFile>
+ </Bscmake>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>Include;..\include;..\..\Include;..\..\..\Include;..\..\..\Include\VP50;.\include;..\..\include;..\..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;..\..\..\..\libvp6\include;..\..\..\..\libvp6\include\vp50;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>_WINDOWS;_USRDLL;vp5D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;WIN32;_DEBUG;INLINE=__inline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <MinimalRebuild>false</MinimalRebuild>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ <Bscmake>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <OutputFile>.\..\..\Lib\Win32\Debug/vp5d.bsc</OutputFile>
+ </Bscmake>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="dx\Generic\boolhuff.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\debug.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodembs.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodemode.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodemv.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\DFrameR.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\DSystemDependant.c">
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\FrameIni.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\Huffman.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\pb_globals.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\quantize.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\recon.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\TokenEntropy.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\vfwpbdll_if.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\vp50dxv.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Win32\dsystemdependant.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="dx\Win32\quantindexmmx.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="xprintf\xprintf.cpp">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj.filters
new file mode 100644
index 00000000..cfe106b5
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj.filters
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Decompress">
+ <UniqueIdentifier>{2c04083d-6bcf-4b0c-94ce-55f89142c8dc}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="Win32">
+ <UniqueIdentifier>{aad98e01-f672-4f5a-8d90-0d8c9eeab331}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="dx\Generic\boolhuff.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\debug.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodembs.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodemode.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\decodemv.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\DFrameR.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\DSystemDependant.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\FrameIni.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\Huffman.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\pb_globals.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\quantize.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\recon.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\TokenEntropy.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="DX\Generic\vfwpbdll_if.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Generic\vp50dxv.c">
+ <Filter>Decompress</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Win32\dsystemdependant.c">
+ <Filter>Win32</Filter>
+ </ClCompile>
+ <ClCompile Include="dx\Win32\quantindexmmx.c">
+ <Filter>Win32</Filter>
+ </ClCompile>
+ <ClCompile Include="xprintf\xprintf.cpp">
+ <Filter>Win32</Filter>
+ </ClCompile>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/xprintf/xprintf.cpp b/Src/libvpShared/corelibs/cdxv/vp50/xprintf/xprintf.cpp
new file mode 100644
index 00000000..29826461
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/xprintf/xprintf.cpp
@@ -0,0 +1,169 @@
+//____________________________________________________________________________
+//
+// File: xprintf.cpp
+//
+// Description: Display a printf style message on the current video frame
+//
+// Author: Keith Looney
+//
+//____________________________________________________________________________
+// Revision History
+//
+
+//____________________________________________________________________________
+// Includes
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <windows.h>
+
+#include "xprintf.h"
+
+//________ ____________________________________________________________________
+// Defines
+
+//____________________________________________________________________________
+// Declarations
+
+//____________________________________________________________________________
+// Definitions
+
+/****************************************************************************
+ *
+ * Function : xprintf
+ *
+ * Description : Display a printf style message on the current video frame
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS : void
+ *
+ * Notes :
+ *
+ * ERRORS :
+ *
+ ****************************************************************************/
+
+int vp5_xprintf(const PB_INSTANCE* ppbi, long nPixel, const char* format, ...)
+{
+ HFONT hfont,hfonto;
+ va_list arglist;
+ char szFormatted[256] = "";
+ UINT8* pDest = &ppbi->PostProcessBuffer[nPixel];
+ long nSizeY = ppbi->HFragments * 8;
+ long nStride = ppbi->Configuration.YStride;
+ BOOL bRC;
+ int rc = 0;
+
+ // Format text
+
+ va_start(arglist, format);
+ _vsnprintf(szFormatted, sizeof(szFormatted), format, arglist);
+ va_end(arglist);
+
+ // Set up temporary bitmap
+
+ HDC hdcMemory = NULL;
+ HBITMAP hbmTemp = NULL;
+ HBITMAP hbmOrig = NULL;
+
+ RECT rect;
+ rect.left = 0;
+ rect.top = 0;
+ rect.right = 8 * strlen(szFormatted);
+ rect.bottom = 8;
+
+ hdcMemory = CreateCompatibleDC(NULL);
+ if (hdcMemory == NULL)
+ {
+ goto Exit;
+ }
+
+ hbmTemp = CreateBitmap(rect.right, rect.bottom, 1, 1, NULL);
+ if (hbmTemp == NULL)
+ {
+ goto Exit;
+ }
+ hbmOrig = static_cast<HBITMAP>(SelectObject(hdcMemory, hbmTemp));
+ if(!hbmOrig)
+ {
+ goto Exit;
+ }
+
+ // Write text into bitmap
+ // font?
+ hfont = CreateFont(8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,VARIABLE_PITCH | FF_SWISS, "");
+ if(hfont == NULL)
+ {
+ goto Exit;
+ }
+ hfonto = static_cast<HFONT>(SelectObject(hdcMemory, hbmTemp));
+ if(!hfonto)
+ {
+ goto Exit;
+ }
+
+ SelectObject (hdcMemory, hfont);
+ SetTextColor(hdcMemory, 1);
+ SetBkColor(hdcMemory, 0);
+ SetBkMode(hdcMemory, TRANSPARENT);
+
+ bRC = BitBlt(hdcMemory, rect.left, rect.top, rect.right, rect.bottom, hdcMemory, rect.left, rect.top, BLACKNESS);
+ if (!bRC)
+ {
+ goto Exit;
+ }
+
+ bRC = ExtTextOut(hdcMemory, 0, 0, ETO_CLIPPED, &rect, szFormatted, strlen(szFormatted), NULL);
+ if (!bRC)
+ {
+ goto Exit;
+ }
+
+ // Copy bitmap to video frame
+
+ long x;
+ long y;
+
+ for (y = rect.top; y < rect.bottom; ++y)
+ {
+ for (x = rect.left; x < rect.right; ++x)
+ {
+ if (GetPixel(hdcMemory, x, rect.bottom - 1 - y))
+ {
+ pDest[x] = 255;
+ }
+ }
+ pDest += nStride;
+ }
+
+ rc = strlen(szFormatted);
+
+Exit:
+ if (hbmTemp != NULL)
+ {
+ if (hbmOrig != NULL)
+ {
+ SelectObject(hdcMemory, hbmOrig);
+ }
+ DeleteObject(hbmTemp);
+ }
+ if (hfont != NULL)
+ {
+ if (hfonto!= NULL)
+ {
+ SelectObject(hdcMemory, hfonto);
+ }
+ DeleteObject(hfont);
+ }
+
+ if (hdcMemory != NULL)
+ {
+ DeleteDC(hdcMemory);
+ }
+ hdcMemory = 0;
+
+ return rc;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/Makefile b/Src/libvpShared/corelibs/cdxv/vppp/Makefile
new file mode 100644
index 00000000..f8a75930
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/Makefile
@@ -0,0 +1,64 @@
+## Target to built
+
+TARGET =libvppp
+
+## TOOLS
+CC = ecc
+LD = ecc
+AR = ar
+OBJDUMP = objdump
+RM = rm -f
+
+## Directories
+TOPDIR =C:\DuckSoft
+PRIVATEINCLUDE =${TOPDIR}\private\include
+CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
+CDXVINCLUDE =${TOPDIR}\private\corelibs\cdxv\include
+VPPPINCLUDE =${TOPDIR}\private\corelibs\cdxv\vppp\include
+
+CURRENTDIR =${TOPDIR}\private\corelibs\cdxv\vppp
+LIBDIR =${TOPDIR}\private\corelibs\lib\mapca
+
+## Compile Flags
+ALLINCLUDES =-I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${VPPPINCLUDE}
+VP6DEFINES =-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
+ETIDEFINES =-DMAPCA
+ALLDEFINES =${VP6DEFINES} ${ETIDEFINES}
+
+DEBUG =-O2
+CFLAGS =-msvc -align 8 -etswp -mP3OPT_nonlocal_calls_through_register=true \
+ -mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
+ -magen_interroutine_padding
+ALLFLAGS =$(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
+
+
+## Files
+OBJS = bsp\borders.o \
+ generic\clamp.o \
+ generic\deblock.o \
+ generic\DeInterlace.o \
+ generic\Dering.o \
+ generic\loopfilter.o \
+ generic\postproc.o \
+ generic\scale.o \
+ generic\simpledeblocker.o \
+ generic\doptsystemdependant.o
+
+
+SRCS = $(OBJS:.o=.c)
+
+ARTARGET = ${TARGET}.a
+
+# archive
+
+ARTARGET:${OBJS}
+ ${AR} -cr ${ARTARGET} ${OBJS}
+ mv ${ARTARGET} ${LIBDIR}
+
+${OBJS} : ${SRCS}
+ $(CC) $(ALLFLAGS) -c $*.c -o $*.o
+
+clean:
+ ${RM} ${OBJS} ${ARTARGET}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/DeInterlace.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/DeInterlace.c
new file mode 100644
index 00000000..8609121d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/DeInterlace.c
@@ -0,0 +1,76 @@
+/****************************************************************************
+ *
+ * Module Title : DeInterlace.c
+ *
+ * Description : De-Interlace routines.
+ *
+ ***************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <memory.h>
+#include "type_aliases.h"
+
+/****************************************************************************
+ *
+ * ROUTINE : CFastDeInterlace
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DstPtr : Pointer to output image.
+ * INT32 Width : Image width.
+ * INT32 Height : Image height.
+ * INT32 Stride : Image stride.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 3-tap filter vertically to remove interlacing
+ * artifacts.
+ *
+ * SPECIAL NOTES : This function use a three tap filter [1, 2, 1] to blur
+ * veritically in an interlaced frame. This function assumes:
+ * 1) SrcPtr & DstPtr buffers have the same geometry.
+ * 2) SrcPtr != DstPtr.
+ *
+ ****************************************************************************/
+void CFastDeInterlace
+(
+ UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ INT32 Width,
+ INT32 Height,
+ INT32 Stride
+)
+{
+ INT32 i, j;
+ UINT32 x0, x1, x2;
+ UINT8 *PrevSrcPtr, *NextSrcPtr;
+ UINT8 *CurrentSrcPtr = SrcPtr;
+ UINT8 *CurrentDstPtr = DstPtr;
+
+ // Always copy the first line
+ memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+
+ for ( i=1; i<Height-1; i++ )
+ {
+ PrevSrcPtr = CurrentSrcPtr;
+ CurrentSrcPtr += Stride;
+ NextSrcPtr = CurrentSrcPtr + Stride;
+ CurrentDstPtr += Stride;
+
+ for ( j=0; j<Width; j++ )
+ {
+ x0 = PrevSrcPtr[j];
+ x1 = (CurrentSrcPtr[j]<<1);
+ x2 = NextSrcPtr[j];
+ CurrentDstPtr[j] = (UINT8)( (x0 + x1 + x2 + 2)>>2 );
+ }
+ }
+
+ // Copy the last line
+ CurrentSrcPtr += Stride;
+ CurrentDstPtr += Stride;
+ memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/borders.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/borders.c
new file mode 100644
index 00000000..2cf04ad0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/borders.c
@@ -0,0 +1,303 @@
+/****************************************************************************
+*
+* Module Title : borders.c
+*
+* Description :
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "postp.h"
+
+#ifdef MAPCA
+#include "eti/mm.h"
+#include "eti_loopdir.h"
+#endif
+
+#ifdef MAPCA
+void CopyYLeftRightBorder
+(
+ UINT8 *restrict SrcPtr1,
+ UINT8 *restrict SrcPtr2,
+ UINT8 *restrict DestPtr1,
+ UINT8 *restrict DestPtr2,
+ UINT32 PlaneHeight,
+ UINT32 PlaneStride
+)
+{
+ n64 *restrict DstPtr64_1 = (n64* restrict)DestPtr1;
+ n64 *restrict DstPtr64_2 = (n64* restrict)DestPtr2;
+ n32 PlaneStride64 = (PlaneStride>>3);
+ n32 Left, Right;
+ n64 Left64, Right64;
+ int i;
+
+ loop_directives ( ELD_SWP_IVDEP );
+ for ( i=0; i<PlaneHeight; i++ )
+ {
+ Left = SrcPtr1[0];
+ Right = SrcPtr2[0];
+
+ Left64 = hmpv_bcopyrev_64_32 ( Left, 0, 0 );
+ Right64 = hmpv_bcopyrev_64_32 ( Right, 0, 0 );
+
+ DstPtr64_1[0] = Left64;
+ DstPtr64_2[0] = Right64;
+
+ DstPtr64_1[1] = Left64;
+ DstPtr64_2[1] = Right64;
+
+ DstPtr64_1[2] = Left64;
+ DstPtr64_2[2] = Right64;
+
+ DstPtr64_1[3] = Left64;
+ DstPtr64_2[3] = Right64;
+
+ SrcPtr1 += PlaneStride;
+ SrcPtr2 += PlaneStride;
+ DstPtr64_1 += PlaneStride64;
+ DstPtr64_2 += PlaneStride64;
+ }
+}
+
+void CopyUVLeftRightBorder
+(
+ UINT8 *restrict SrcPtr1,
+ UINT8 *restrict SrcPtr2,
+ UINT8 *restrict DestPtr1,
+ UINT8 *restrict DestPtr2,
+ UINT32 PlaneHeight,
+ UINT32 PlaneStride
+)
+{
+ n64 *restrict DstPtr64_1 = (n64* restrict)DestPtr1;
+ n64 *restrict DstPtr64_2 = (n64* restrict)DestPtr2;
+ n32 PlaneStride64 = (PlaneStride>>3);
+ n32 Left, Right;
+ n64 Left64, Right64;
+ int i;
+
+ loop_directives ( ELD_SWP_IVDEP );
+ for ( i=0; i<PlaneHeight; i++ )
+ {
+ Left = SrcPtr1[0];
+ Right = SrcPtr2[0];
+
+ Left64 = hmpv_bcopyrev_64_32 ( Left, 0, 0 );
+ Right64 = hmpv_bcopyrev_64_32 ( Right, 0, 0 );
+
+ DstPtr64_1[0] = Left64;
+ DstPtr64_2[0] = Right64;
+
+ DstPtr64_1[1] = Left64;
+ DstPtr64_2[1] = Right64;
+
+ SrcPtr1 += PlaneStride;
+ SrcPtr2 += PlaneStride;
+ DstPtr64_1 += PlaneStride64;
+ DstPtr64_2 += PlaneStride64;
+ }
+}
+#endif
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateUMVBorder
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *DestReconPtr : Pointer to reconstructed image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Copies pixel values in first/last rows/columns of the
+ * image into the UMV border in the specified reconstructed
+ * image.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UpdateUMVBorder ( POSTPROC_INSTANCE *pbi, UINT8 *DestReconPtr )
+{
+
+ INT32 i;
+ INT32 PlaneHeight;
+ UINT8 *SrcPtr1, *SrcPtr2;
+ UINT8 *DestPtr1, *DestPtr2;
+
+ UINT32 Border = pbi->MVBorder;
+ INT32 PlaneStride = pbi->YStride;
+
+ /***********/
+ /* Y Plane */
+ /***********/
+ PlaneStride = pbi->YStride;
+ PlaneHeight = pbi->VFragments * 8;
+
+ // copy the left and right most columns out
+ SrcPtr1 = DestReconPtr + pbi->ReconYDataOffset;
+ SrcPtr2 = SrcPtr1 + 8 * pbi->HFragments - 1;
+ DestPtr1= SrcPtr1 - Border;
+ DestPtr2= SrcPtr2 + 1;
+
+#ifdef MAPCA
+ CopyYLeftRightBorder ( SrcPtr1, SrcPtr2, DestPtr1,DestPtr2, PlaneHeight, PlaneStride );
+#else
+ for ( i=0; i<PlaneHeight; i++ )
+ {
+ memset ( DestPtr1, SrcPtr1[0], Border );
+ memset ( DestPtr2, SrcPtr2[0], Border );
+ SrcPtr1 += PlaneStride;
+ SrcPtr2 += PlaneStride;
+ DestPtr1 += PlaneStride;
+ DestPtr2 += PlaneStride;
+ }
+#endif
+
+ // Now copy the top and bottom source lines into each line of the respective borders
+ SrcPtr1 = DestReconPtr + Border * PlaneStride;
+ SrcPtr2 = SrcPtr1 + (pbi->VFragments * 8 * PlaneStride)- PlaneStride;
+ DestPtr1= DestReconPtr;
+ DestPtr2= SrcPtr2 + PlaneStride;
+ for ( i=0; i<(INT32)Border; i++ )
+ {
+ memcpy ( DestPtr1, SrcPtr1, PlaneStride );
+ memcpy ( DestPtr2, SrcPtr2, PlaneStride );
+ DestPtr1 += PlaneStride;
+ DestPtr2 += PlaneStride;
+ }
+
+ PlaneStride = pbi->UVStride;
+ PlaneHeight = pbi->VFragments * 4;
+
+ /***********/
+ /* U Plane */
+ /***********/
+
+ // copy the left and right most columns out
+ SrcPtr1 = DestReconPtr + pbi->ReconUDataOffset;
+ SrcPtr2 = SrcPtr1 + 4 * pbi->HFragments - 1;
+ DestPtr1= SrcPtr1 - Border/2;
+ DestPtr2= SrcPtr2 + 1;
+
+#ifdef MAPCA
+ CopyUVLeftRightBorder ( SrcPtr1, SrcPtr2, DestPtr1,DestPtr2, PlaneHeight, PlaneStride );
+#else
+ for ( i=0; i<PlaneHeight; i++ )
+ {
+ memset ( DestPtr1, SrcPtr1[0], Border/2 );
+ memset ( DestPtr2, SrcPtr2[0], Border/2 );
+ SrcPtr1 += PlaneStride;
+ SrcPtr2 += PlaneStride;
+ DestPtr1 += PlaneStride;
+ DestPtr2 += PlaneStride;
+ }
+#endif
+
+ // Now copy the top and bottom source lines into each line of the respective borders
+ SrcPtr1 = DestReconPtr + pbi->ReconUDataOffset - Border/2;
+ SrcPtr2 = SrcPtr1 + (pbi->VFragments * 4 * PlaneStride)- PlaneStride;
+ DestPtr1= SrcPtr1 - Border/2*PlaneStride;
+ DestPtr2= SrcPtr2 + PlaneStride;
+ for ( i=0; i<(INT32)(Border/2); i++ )
+ {
+ memcpy ( DestPtr1, SrcPtr1, PlaneStride );
+ memcpy ( DestPtr2, SrcPtr2, PlaneStride );
+ DestPtr1 += PlaneStride;
+ DestPtr2 += PlaneStride;
+ }
+
+ /***********/
+ /* V Plane */
+ /***********/
+
+ // copy the left and right most columns out
+ SrcPtr1 = DestReconPtr + pbi->ReconVDataOffset;
+ SrcPtr2 = SrcPtr1 + 4 * pbi->HFragments - 1;
+ DestPtr1= SrcPtr1 - Border/2;
+ DestPtr2= SrcPtr2 + 1;
+
+#ifdef MAPCA
+ CopyUVLeftRightBorder ( SrcPtr1, SrcPtr2, DestPtr1,DestPtr2, PlaneHeight, PlaneStride );
+#else
+ for ( i=0; i<PlaneHeight; i++ )
+ {
+ memset ( DestPtr1, SrcPtr1[0], Border/2 );
+ memset ( DestPtr2, SrcPtr2[0], Border/2 );
+ SrcPtr1 += PlaneStride;
+ SrcPtr2 += PlaneStride;
+ DestPtr1 += PlaneStride;
+ DestPtr2 += PlaneStride;
+ }
+#endif
+
+ // Now copy the top and bottom source lines into each line of the respective borders
+ SrcPtr1 = DestReconPtr + pbi->ReconVDataOffset - Border/2;
+ SrcPtr2 = SrcPtr1 + (pbi->VFragments * 4 * PlaneStride)- PlaneStride;
+ DestPtr1= SrcPtr1 - Border/2*PlaneStride;
+ DestPtr2= SrcPtr2 + PlaneStride;
+ for ( i=0; i<(INT32)(Border/2); i++ )
+ {
+ memcpy ( DestPtr1, SrcPtr1, PlaneStride );
+ memcpy ( DestPtr2, SrcPtr2, PlaneStride );
+ DestPtr1 += PlaneStride;
+ DestPtr2 += PlaneStride;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CopyFrame
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * YUV_BUFFER_CONFIG *b : Pointer to source image.
+ * UINT8 *DestReconPtr : Pointer to destination image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Copies the source image into the destination image and
+ * updates the destination's UMV borders.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CopyFrame ( POSTPROC_INSTANCE *pbi, YUV_BUFFER_CONFIG *b, UINT8 *DestReconPtr )
+{
+ int row;
+ unsigned char *source, *dest;
+
+ source = (unsigned char *) b->YBuffer;
+ dest = DestReconPtr + pbi->ReconYDataOffset;
+ for ( row=0; row<b->YHeight; row++ )
+ {
+ memcpy ( dest, source, b->YWidth );
+ source += b->YStride;
+ dest += pbi->YStride;
+ }
+
+ source = (unsigned char *) b->UBuffer;
+ dest = DestReconPtr + pbi->ReconUDataOffset;
+ for ( row=0; row<b->UVHeight; row++ )
+ {
+ memcpy ( dest, source, b->UVWidth );
+ source += b->UVStride;
+ dest += pbi->UVStride;
+ }
+
+ source = (unsigned char *) b->VBuffer;
+ dest = DestReconPtr + pbi->ReconVDataOffset;
+ for ( row=0; row<b->UVHeight; row++ )
+ {
+ memcpy ( dest, source, b->UVWidth );
+ source += b->UVStride;
+ dest += pbi->UVStride;
+ }
+
+ UpdateUMVBorder ( pbi, DestReconPtr );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/clamp.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/clamp.c
new file mode 100644
index 00000000..8f863382
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/clamp.c
@@ -0,0 +1,75 @@
+/****************************************************************************
+ *
+ * Module Title : clamp.c
+ *
+ * Description : Image pixel value clamping routines.
+ *
+ ***************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+ *
+ * ROUTINE : ClampLevels_C
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * INT32 BlackClamp, : Number of levels to clamp up from 0.
+ * INT32 WhiteClamp, : Number of levels to clamp down from 255.
+ * UINT8 *Src, : Pointer to input image to be clamped.
+ * UINT8 *Dst : Pointer to clamped image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Clamps the pixel values in the input image at each
+ * end of the 8-bit range.
+ *
+ * SPECIAL NOTES : BlackClamp/WhiteClamp are the number.of levels to
+ * clamp at either end of the range. In particular, it
+ * should be noted that WhiteClamp is _not_ the level
+ * to clamp to at the high end of the range.
+ *
+ ****************************************************************************/
+void ClampLevels_C
+(
+ POSTPROC_INSTANCE *pbi,
+ INT32 BlackClamp,
+ INT32 WhiteClamp,
+ UINT8 *Src,
+ UINT8 *Dst
+)
+{
+ int i;
+ int row,col;
+ unsigned char clamped[256];
+
+ int width = pbi->HFragments*8;
+ int height = pbi->VFragments*8;
+ UINT8 *SrcPtr = Src + pbi->ReconYDataOffset;
+ UINT8 *DestPtr = Dst + pbi->ReconYDataOffset;
+ UINT32 LineLength = pbi->YStride;
+
+ // set up clamping table so we can avoid ifs while clamping
+ for ( i=0; i<256; i++ )
+ {
+ clamped[i] = i;
+ if ( i<BlackClamp )
+ clamped[i] = BlackClamp;
+
+ if ( i>(255-WhiteClamp) )
+ clamped[i] = 255-WhiteClamp;
+ }
+
+ // clamping is for Y only!
+ for ( row=0 ; row<height; row++ )
+ {
+ for ( col=0; col<width; col++ )
+ SrcPtr[col] = clamped[DestPtr[col]];
+ SrcPtr += LineLength;
+ DestPtr += LineLength;
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/deblock.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/deblock.c
new file mode 100644
index 00000000..e9604510
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/deblock.c
@@ -0,0 +1,1491 @@
+/****************************************************************************
+ *
+ * Module Title : deblock.c
+ *
+ * Description : Post-processing deblocker functions.
+ *
+ ***************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+ * Header Files
+ ***************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#if ( defined(_MSC_VER) || defined(MAPCA) )
+#define abs(x) ( (x>0) ? (x) : (-(x)) )
+#endif
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+UINT32 DeblockLimitValuesVp4[Q_TABLE_SIZE] =
+{
+ 30, 25, 20, 20, 15, 15, 14, 14,
+ 13, 13, 12, 12, 11, 11, 10, 10,
+ 9, 9, 8, 8, 7, 7, 7, 7,
+ 6, 6, 6, 6, 5, 5, 5, 5,
+ 4, 4, 4, 4, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+UINT32 DeblockLimitValuesVp5[Q_TABLE_SIZE] =
+{
+ 15, 15, 15, 15, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 9, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 7, 7, 7, 7, 7, 7, 7,
+ 6, 6, 6, 6, 5, 5, 5, 5,
+ 5, 4, 4, 4, 4, 4, 4, 3,
+ 3, 3, 3, 3, 3, 2, 2, 2,
+ 2, 2, 1, 1, 1, 0, 0, 0
+};
+
+UINT32 DeblockLimitValuesVp6[Q_TABLE_SIZE] =
+{
+ 15, 15, 15, 15, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 9, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 7, 7, 7, 7, 7, 7, 7,
+ 6, 6, 6, 6, 5, 5, 5, 5,
+ 5, 4, 4, 4, 4, 4, 4, 3,
+ 3, 3, 3, 3, 3, 2, 2, 2,
+ 2, 2, 1, 1, 1, 0, 0, 0
+};
+
+UINT32 *DCQuantScaleV2;
+UINT32 *DCQuantScaleUV;
+UINT32 *DCQuantScaleV1;
+UINT32 *DeblockLimitValuesV2;
+
+/****************************************************************************
+ *
+ * ROUTINE : SetupDeblockValueArray_Generic
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * INT32 FLimit : Deblocking limit value.
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : UINT32 *: Pointer to deblocker LUT.
+ *
+ * FUNCTION : Sets up the bounding value array.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INT32 *SetupDeblockValueArray_Generic ( POSTPROC_INSTANCE *pbi, INT32 FLimit )
+{
+ INT32 i;
+ INT32 *DeblockValuePtr;
+
+ DeblockValuePtr = &pbi->DeblockBoundingValue[256];
+
+ // Set up the bounding value array.
+ memset ( pbi->DeblockBoundingValue, 0, (512*sizeof(*pbi->DeblockBoundingValue)) );
+
+ for ( i=0; i<FLimit; i++ )
+ {
+ DeblockValuePtr[-i-FLimit] = (-FLimit+i);
+ DeblockValuePtr[-i] = -i;
+ DeblockValuePtr[i] = i;
+ DeblockValuePtr[i+FLimit] = FLimit-i;
+ }
+ return DeblockValuePtr;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SetupDeblocker
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Prepares LUT ready to apply a loop filter.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetupDeblocker ( POSTPROC_INSTANCE *pbi )
+{
+ INT32 FLimit;
+
+ if ( pbi->Vp3VersionNo >= 2 )
+ {
+ FLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+ pbi->DeblockValuePtr = SetupDeblockValueArray_Generic ( pbi, FLimit );
+ }
+ else
+ {
+ FLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+ pbi->DeblockValuePtr = SetupDeblockValueArray ( pbi, FLimit );
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockVerticalEdgesInLoopFilteredBand
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DesPtr : Pointer to output image.
+ * UINT32 PlaneLineStep : Stride of SrcPtr & DesPtr.
+ * UINT32 FragsAcross : Number of blocks across.
+ * UINT32 StartFrag : Number of first block.
+ * UINT32 *QuantScale :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filters the vertical edges in a band.
+ *
+ * SPECIAL NOTES : Variance values for each block are stored in
+ * pbi->FragmentVariances for later use.
+ *
+ ****************************************************************************/
+void DeblockVerticalEdgesInLoopFilteredBand
+(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragsAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+)
+{
+ UINT32 j, k;
+ INT32 QStep;
+ INT32 FLimit;
+ INT32 p1,p2;
+ INT32 psum;
+ INT32 v[10];
+ INT32 Sum1, Sum2;
+ INT32 Variance1, Variance2;
+ UINT8 *Src, *Des;
+ UINT32 CurrentFrag = StartFrag;
+
+ while ( CurrentFrag < (StartFrag+FragsAcross-1) )
+ {
+ Src = SrcPtr + 8*(CurrentFrag-StartFrag+1);
+ Des = DesPtr + 8*(CurrentFrag-StartFrag+1);
+
+ QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
+ FLimit = (QStep * QStep * 3)>>5 ;
+
+ for( j=0; j<8 ; j++)
+ {
+ v[1] = Src[-4];
+ v[2] = Src[-3];
+ v[3] = Src[-2];
+ v[4] = Src[-1];
+ v[5] = Src[0];
+ v[6] = Src[+1];
+ v[7] = Src[+2];
+ v[8] = Src[+3];
+
+ Variance1 = Variance2 = 0;
+ Sum1 = Sum2 = 0;
+
+ for ( k=1; k<=4; k++ )
+ {
+ Sum1 += v[k];
+ Variance1 += v[k]*v[k];
+ }
+
+ for ( k=5; k<=8; k++ )
+ {
+ Sum2 += v[k];
+ Variance2 += v[k]*v[k];
+ }
+ Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+ Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+ pbi->FragmentVariances[CurrentFrag] += Variance1;
+ pbi->FragmentVariances[CurrentFrag + 1] += Variance2;
+
+ if( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+ ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+ {
+ p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4];
+ p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3];
+
+ /* low pass filtering (LPF9: 1 1 2 2 4 2 2 1 1) */
+ psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+ Des[-4] = (INT8) ((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+ psum += v[5] - p1;
+ Des[-3] = (INT8) ((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+ psum += v[6] - p1;
+ Des[-2] = (INT8) ((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+ psum += v[7] - p1;
+ Des[-1] = (INT8) ((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+
+ psum += v[8] - v[1];
+ Des[0] = (INT8) ((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+ psum += p2 - v[2];
+ Des[+1] =(INT8) ((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+ psum += p2 - v[3];
+ Des[+2] = (INT8) ((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+ psum += p2 - v[4];
+ Des[+3] = (INT8) ((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+ }
+
+ Src += PlaneLineStep;
+ Des += PlaneLineStep;
+ }
+
+ CurrentFrag++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockLoopFilteredBand_C
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DesPtr : Pointer to output image.
+ * UINT32 PlaneLineStep : Stride of SrcPtr & DesPtr.
+ * UINT32 FragsAcross : Number of blocks across.
+ * UINT32 StartFrag : Number of first block.
+ * UINT32 *QuantScale :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filters both horizontal and vertical edge in a band.
+ *
+ * SPECIAL NOTES : Variance values for each block are stored in
+ * pbi->FragmentVariances for later use.
+ *
+ ****************************************************************************/
+void DeblockLoopFilteredBand_C
+(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragsAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+)
+{
+ UINT32 j,k;
+ UINT32 CurrentFrag=StartFrag;
+ INT32 QStep;
+ INT32 FLimit;
+ UINT8 *Src, *Des;
+ INT32 psum;
+ INT32 v[10];
+ INT32 p1,p2;
+ INT32 w1, w2, w3, w4, w5;
+ INT32 Variance1, Variance2;
+ INT32 Sum1, Sum2;
+
+ w1 = PlaneLineStep;
+ w2 = PlaneLineStep * 2;
+ w3 = PlaneLineStep * 3;
+ w4 = PlaneLineStep * 4;
+ w5 = PlaneLineStep * 5;
+
+ while ( CurrentFrag < StartFrag+FragsAcross )
+ {
+ Src = SrcPtr + 8*(CurrentFrag-StartFrag);
+ Des = DesPtr + 8*(CurrentFrag-StartFrag);
+
+ QStep = QuantScale[pbi->FragQIndex[CurrentFrag+FragsAcross]];
+ FLimit = (QStep * QStep * 3)>>5 ;
+
+ for ( j=0; j<8; j++ )
+ {
+ v[1] = Src[-w4];
+ v[2] = Src[-w3];
+ v[3] = Src[-w2];
+ v[4] = Src[-w1];
+ v[5] = Src[0];
+ v[6] = Src[+w1];
+ v[7] = Src[+w2];
+ v[8] = Src[+w3];
+
+ Variance1 = Variance2 = 0;
+ Sum1 = Sum2 = 0;
+
+ for ( k=1; k<=4; k++ )
+ {
+ Sum1 += v[k];
+ Variance1 += v[k]*v[k];
+ }
+ for ( k=5; k<=8; k++ )
+ {
+ Sum2 += v[k];
+ Variance2 += v[k]*v[k];
+ }
+ Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+ Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+ pbi->FragmentVariances[CurrentFrag] += Variance1;
+ pbi->FragmentVariances[CurrentFrag + FragsAcross] += Variance2;
+
+ if( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+ ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+ {
+ p1 = (abs(Src[-w4] - Src[-w5]) < QStep ) ? Src[-w5] : Src[-w4];
+ p2 = (abs(Src[+w3] - Src[+w4]) < QStep ) ? Src[+w4] : Src[+w3];
+
+ /* low pass filtering (LPF9: 1 1 2 2 4 2 2 1 1) */
+ psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+ Des[-w4] = (INT8)((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+ psum += v[5] - p1;
+ Des[-w3] = (INT8)((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+ psum += v[6] - p1;
+ Des[-w2] = (INT8)((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+ psum += v[7] - p1;
+ Des[-w1] = (INT8)((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+
+ psum += v[8] - v[1];
+ Des[0] = (INT8)((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+ psum += p2 - v[2];
+ Des[+w1] = (INT8)((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+ psum += p2 - v[3];
+ Des[+w2] = (INT8)((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+ psum += p2 - v[4];
+ Des[+w3] = (INT8)((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+ }
+ else
+ {
+ Des[-w4] = Src[-w4];
+ Des[-w3] = Src[-w3];
+ Des[-w2] = Src[-w2];
+ Des[-w1] = Src[-w1];
+ Des[0] = Src[0];
+ Des[+w1] = Src[+w1];
+ Des[+w2] = Src[+w2];
+ Des[+w3] = Src[+w3];
+ }
+ Src++;
+ Des++;
+ }
+ CurrentFrag++;
+ }
+
+ CurrentFrag = StartFrag;
+
+ while ( CurrentFrag < (StartFrag+FragsAcross-1) )
+ {
+ Des = DesPtr - 8*PlaneLineStep + 8*(CurrentFrag-StartFrag+1);
+ Src = Des;
+
+ QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
+ FLimit = (QStep * QStep * 3)>>5 ;
+
+ for ( j=0; j<8 ; j++ )
+ {
+ v[1] = Src[-4];
+ v[2] = Src[-3];
+ v[3] = Src[-2];
+ v[4] = Src[-1];
+ v[5] = Src[0];
+ v[6] = Src[+1];
+ v[7] = Src[+2];
+ v[8] = Src[+3];
+
+ Variance1 = Variance2 = 0;
+ Sum1 = Sum2 = 0;
+
+ for ( k=1; k<=4; k++ )
+ {
+ Sum1 += v[k];
+ Variance1 += v[k]*v[k];
+ }
+ for ( k=5; k<=8; k++ )
+ {
+ Sum2 += v[k];
+ Variance2 += v[k]*v[k];
+ }
+ Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+ Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+ pbi->FragmentVariances[CurrentFrag] += Variance1;
+ pbi->FragmentVariances[CurrentFrag + 1] += Variance2;
+
+ if ( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+ ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+ {
+ p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4];
+ p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3];
+
+ /* lo pass filtering (LPF9: 1 1 2 2 4 2 2 1 1) */
+ psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+ Des[-4] = (INT8)((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+ psum += v[5] - p1;
+ Des[-3] = (INT8)((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+ psum += v[6] - p1;
+ Des[-2] = (INT8)((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+ psum += v[7] - p1;
+ Des[-1] = (INT8)((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+
+ psum += v[8] - v[1];
+ Des[0] = (INT8)((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+ psum += p2 - v[2];
+ Des[+1] = (INT8)((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+ psum += p2 - v[3];
+ Des[+2] =(INT8)((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+ psum += p2 - v[4];
+ Des[+3] = (INT8)((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+ }
+
+ Src += PlaneLineStep;
+ Des += PlaneLineStep;
+ }
+
+ CurrentFrag++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockVerticalEdgesInNonFilteredBand
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DesPtr : Pointer to output image.
+ * UINT32 PlaneLineStep : Stride of SrcPtr & DesPtr.
+ * UINT32 FragsAcross : Number of blocks across.
+ * UINT32 StartFrag : Number of first block.
+ * UINT32 *QuantScale :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filter the vertical edges in a band.
+ *
+ * SPECIAL NOTES : Variance values for each block are stored in
+ * pbi->FragmentVariances for later use.
+ *
+ ****************************************************************************/
+void DeblockVerticalEdgesInNonFilteredBand
+(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragsAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+)
+{
+ UINT32 j,k;
+ INT32 QStep;
+ INT32 FLimit;
+ INT32 psum;
+ INT32 v[10];
+ INT32 p1,p2;
+ INT32 Sum1, Sum2;
+ INT32 Variance1, Variance2;
+ UINT8 *Src, *Des;
+ UINT32 CurrentFrag = StartFrag;
+
+ while ( CurrentFrag < (StartFrag + FragsAcross-1) )
+ {
+ Src = SrcPtr + 8*(CurrentFrag-StartFrag+1);
+ Des = DesPtr + 8*(CurrentFrag-StartFrag+1);
+
+ QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
+ FLimit = (QStep * QStep * 3)>>5 ;
+
+ for ( j=0; j<8 ; j++ )
+ {
+ v[1] = Src[-4];
+ v[2] = Src[-3];
+ v[3] = Src[-2];
+ v[4] = Src[-1];
+ v[5] = Src[0];
+ v[6] = Src[+1];
+ v[7] = Src[+2];
+ v[8] = Src[+3];
+
+ Variance1 = Variance2 = 0;
+ Sum1 = Sum2 = 0;
+
+ for ( k=1; k<=4; k++ )
+ {
+ Sum1 += v[k];
+ Variance1 += v[k]*v[k];
+ }
+ for ( k=5; k<=8; k++ )
+ {
+ Sum2 += v[k];
+ Variance2 += v[k]*v[k];
+ }
+ Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+ Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+ pbi->FragmentVariances[CurrentFrag] += Variance1;
+ pbi->FragmentVariances[CurrentFrag + 1] += Variance2;
+
+ if ( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+ ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+ {
+ p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4];
+ p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3];
+
+ // low pass filtering (LPF9: 1 1 2 2 4 2 2 1 1)
+ psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+ Des[-4] = (INT8)((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+ psum += v[5] - p1;
+ Des[-3] = (INT8)((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+ psum += v[6] - p1;
+ Des[-2] = (INT8)((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+ psum += v[7] - p1;
+ Des[-1] = (INT8)((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+
+ psum += v[8] - v[1];
+ Des[0] = (INT8)((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+ psum += p2 - v[2];
+ Des[+1] =(INT8)((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+ psum += p2 - v[3];
+ Des[+2] = (INT8)((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+ psum += p2 - v[4];
+ Des[+3] = (INT8)((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+ }
+ else
+ {
+ // Old loop filter
+ INT32 FiltVal;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ FiltVal = v[3] - v[4] * 3 + v[5] * 3 - v[6] ;
+ FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];
+ Des[-1] = LimitTable[(INT32)v[4] + FiltVal];
+ Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+ }
+
+ Src += PlaneLineStep;
+ Des += PlaneLineStep;
+ }
+
+ CurrentFrag++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockVerticalEdgesInNonFilteredBandNewFilter
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DesPtr : Pointer to output image.
+ * UINT32 PlaneLineStep : Stride of SrcPtr & DesPtr.
+ * UINT32 FragsAcross : Number of blocks across.
+ * UINT32 StartFrag : Number of first block.
+ * UINT32 *QuantScale :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filter the vertical edges in a band.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeblockVerticalEdgesInNonFilteredBandNewFilter
+(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragsAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+)
+{
+ UINT32 j,k;
+ INT32 QStep;
+ INT32 FLimit;
+ INT32 psum;
+ INT32 v[10];
+ INT32 p1,p2;
+ INT32 Sum1, Sum2;
+ UINT8 *Src, *Des;
+ UINT32 CurrentFrag = StartFrag;
+
+ QStep = QuantScale[pbi->FrameQIndex];
+
+ for (CurrentFrag = StartFrag; CurrentFrag < (StartFrag + FragsAcross); CurrentFrag++)
+ {
+ Src = SrcPtr + 8*(CurrentFrag-StartFrag+1);
+ Des = DesPtr + 8*(CurrentFrag-StartFrag+1);
+
+ FLimit = (QStep * QStep * 3)>>5;
+
+ for ( j=0; j<8; j++ )
+ {
+ v[0] = Src[-5];
+ v[1] = Src[-4];
+ v[2] = Src[-3];
+ v[3] = Src[-2];
+ v[4] = Src[-1];
+ v[5] = Src[0];
+ v[6] = Src[+1];
+ v[7] = Src[+2];
+ v[8] = Src[+3];
+ v[9] = Src[+4];
+
+ Sum1 = Sum2 = 0;
+
+ for ( k=1; k<=4; k++ )
+ Sum1 += abs ( v[k]-v[k-1] );
+
+ for ( k=5; k<=8; k++ )
+ Sum2 += abs ( v[k]-v[k+1] );
+
+ if ( (Sum1 < FLimit) && (Sum2 < FLimit) &&
+ ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+ {
+ p1 = v[0];
+ p2 = v[9];
+
+ // low pass filtering (LPF7: 1 1 1 2 1 1 1)
+ psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+ Des[-4] = (INT8)((psum + v[1]) >> 3);
+ psum += v[5] - p1;
+ Des[-3] = (INT8)((psum + v[2]) >> 3);
+ psum += v[6] - p1;
+ Des[-2] = (INT8)((psum + v[3]) >> 3);
+ psum += v[7] - p1;
+ Des[-1] = (INT8)((psum + v[4]) >> 3);
+
+ psum += v[8] - v[1];
+ Des[0] = (INT8)((psum + v[5]) >> 3);
+ psum += p2 - v[2];
+ Des[+1] = (INT8)((psum + v[6]) >> 3);
+ psum += p2 - v[3];
+ Des[+2] = (INT8)((psum + v[7]) >> 3);
+ psum += p2 - v[4];
+ Des[+3] = (INT8)((psum + v[8]) >> 3);
+ }
+ else
+ {
+ // Old loopfilter
+ INT32 FiltVal;
+ UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ FiltVal = v[3] - v[4] * 3 + v[5] * 3 - v[6] ;
+ FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];
+ Des[-1] = LimitTable[(INT32)v[4] + FiltVal];
+ Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+ }
+
+ Src += PlaneLineStep;
+ Des += PlaneLineStep;
+ }
+
+
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockNonFilteredBand_C
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DesPtr : Pointer to output image.
+ * UINT32 PlaneLineStep : Stride of SrcPtr & DesPtr.
+ * UINT32 FragsAcross : Number of blocks across.
+ * UINT32 StartFrag : Number of first block.
+ * UINT32 *QuantScale :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filter both horizontal and vertical edge in a band.
+ *
+ * SPECIAL NOTES : Variance values for each block are stored in
+ * pbi->FragmentVariances for later use.
+ *
+ ****************************************************************************/
+void DeblockNonFilteredBand_C
+(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragsAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+)
+{
+ UINT32 j,k;
+ INT32 QStep;
+ INT32 FLimit;
+ INT32 psum;
+ INT32 v[10];
+ INT32 p1,p2;
+ INT32 w1, w2, w3, w4, w5;
+ INT32 Variance1, Variance2;
+ INT32 Sum1, Sum2;
+ UINT8 *Src, *Des;
+ UINT32 CurrentFrag = StartFrag;
+
+ w1 = PlaneLineStep;
+ w2 = PlaneLineStep * 2;
+ w3 = PlaneLineStep * 3;
+ w4 = PlaneLineStep * 4;
+ w5 = PlaneLineStep * 5;
+
+ while ( CurrentFrag < StartFrag+FragsAcross )
+ {
+ Src = SrcPtr + 8*(CurrentFrag-StartFrag);
+ Des = DesPtr + 8*(CurrentFrag-StartFrag);
+ QStep = QuantScale[pbi->FragQIndex[CurrentFrag+FragsAcross]];
+ FLimit = (QStep * QStep * 3)>>5;
+
+ for ( j=0; j<8; j++ )
+ {
+ v[1] = Src[-w4];
+ v[2] = Src[-w3];
+ v[3] = Src[-w2];
+ v[4] = Src[-w1];
+ v[5] = Src[ 0];
+ v[6] = Src[+w1];
+ v[7] = Src[+w2];
+ v[8] = Src[+w3];
+
+ Variance1 = Variance2 = 0;
+ Sum1 = Sum2 = 0;
+
+ for ( k=1; k<=4; k++ )
+ {
+ Sum1 += v[k];
+ Variance1 += v[k]*v[k];
+ }
+ for ( k=5; k<=8; k++ )
+ {
+ Sum2 += v[k];
+ Variance2 += v[k]*v[k];
+ }
+ Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+ Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+ pbi->FragmentVariances[CurrentFrag] += Variance1;
+ pbi->FragmentVariances[CurrentFrag + FragsAcross] += Variance2;
+
+ if ( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+ ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+ {
+ p1 = (abs(Src[-w4] - Src[-w5]) < QStep ) ? Src[-w5] : Src[-w4];
+ p2 = (abs(Src[+w3] - Src[+w4]) < QStep ) ? Src[+w4] : Src[+w3];
+
+ // low pass filtering (LPF9: 1 1 2 2 4 2 2 1 1)
+ psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+ Des[-w4] = (INT8)((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+ psum += v[5] - p1;
+ Des[-w3] = (INT8)((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+ psum += v[6] - p1;
+ Des[-w2] = (INT8)((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+ psum += v[7] - p1;
+ Des[-w1] = (INT8)((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+
+ psum += v[8] - v[1];
+ Des[0] = (INT8)((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+ psum += p2 - v[2];
+ Des[+w1] = (INT8)((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+ psum += p2 - v[3];
+ Des[+w2] = (INT8)((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+ psum += p2 - v[4];
+ Des[+w3] = (INT8)((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+ }
+ else
+ {
+ // Old loopfilter
+ INT32 FiltVal;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ FiltVal = v[3] - v[4] * 3 + v[5] * 3 - v[6] ;
+ FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];
+ Des[-w1] = LimitTable[(INT32)v[4] + FiltVal];
+ Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+ Des[-w4]=Src[-w4];
+ Des[-w3]=Src[-w3];
+ Des[-w2]=Src[-w2];
+ Des[+w1]=Src[+w1];
+ Des[+w2]=Src[+w2];
+ Des[+w3]=Src[+w3];
+ }
+
+ Src++;
+ Des++;
+ }
+
+ // Finished filtering horizontal edge, vertical edge next...
+
+ // skip the first one
+ if ( CurrentFrag==StartFrag )
+ CurrentFrag++;
+ else
+ {
+ Des = DesPtr - 8*PlaneLineStep + 8*(CurrentFrag-StartFrag);
+ Src = Des;
+
+ QStep = QuantScale[pbi->FragQIndex[CurrentFrag]];
+ FLimit = (QStep * QStep * 3)>>5 ;
+
+ for ( j=0; j<8; j++ )
+ {
+ v[1] = Src[-4];
+ v[2] = Src[-3];
+ v[3] = Src[-2];
+ v[4] = Src[-1];
+ v[5] = Src[0];
+ v[6] = Src[+1];
+ v[7] = Src[+2];
+ v[8] = Src[+3];
+
+ Variance1 = Variance2 = 0;
+ Sum1 = Sum2 = 0;
+
+ for ( k=1; k<=4; k++ )
+ {
+ Sum1 += v[k];
+ Variance1 += v[k]*v[k];
+ }
+ for ( k=5; k<=8; k++ )
+ {
+ Sum2 += v[k];
+ Variance2 += v[k]*v[k];
+ }
+ Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+ Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+ pbi->FragmentVariances[CurrentFrag-1] += Variance1;
+ pbi->FragmentVariances[CurrentFrag] += Variance2;
+
+ if ( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+ ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+ {
+ p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4];
+ p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3];
+
+ // lo pass filtering (LPF9: 1 1 2 2 4 2 2 1 1)
+ psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+ Des[-4] = (INT8)((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+ psum += v[5] - p1;
+ Des[-3] = (INT8)((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+ psum += v[6] - p1;
+ Des[-2] = (INT8)((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+ psum += v[7] - p1;
+ Des[-1] = (INT8)((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+
+ psum += v[8] - v[1];
+ Des[0] = (INT8)((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+ psum += p2 - v[2];
+ Des[+1] = (INT8)((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+ psum += p2 - v[3];
+ Des[+2] =(INT8)((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+ psum += p2 - v[4];
+ Des[+3] = (INT8)((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+ }
+ else
+ {
+ // Old loop-filter
+ INT32 FiltVal;
+ UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ FiltVal = v[3] - v[4] * 3 + v[5] * 3 - v[6] ;
+ FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];
+ Des[-1] = LimitTable[(INT32)v[4] + FiltVal];
+ Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+ }
+
+ Src += PlaneLineStep;
+ Des += PlaneLineStep;
+ }
+ }
+
+ CurrentFrag++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockNonFilteredBandNewFilter_C
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DesPtr : Pointer to output image.
+ * UINT32 PlaneLineStep : Stride of SrcPtr & DesPtr.
+ * UINT32 FragsAcross : Number of blocks across.
+ * UINT32 StartFrag : Number of first block.
+ * UINT32 *QuantScale :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filter both horizontal and vertical edge in a band.
+ *
+ * SPECIAL NOTES : Variance values for each block are stored in
+ * pbi->FragmentVariances for later use.
+ * Uses SAD to determine where to apply the new
+ * 7 tap fiter.
+ *
+ ****************************************************************************/
+void DeblockNonFilteredBandNewFilter_C
+(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragsAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+)
+{
+ UINT32 j,k;
+ INT32 QStep;
+ INT32 FLimit;
+ INT32 psum;
+ INT32 v[10];
+ INT32 p1,p2;
+ INT32 w1, w2, w3, w4, w5;
+ INT32 Sum1, Sum2;
+ UINT8 *Src, *Des;
+ UINT32 CurrentFrag = StartFrag;
+
+ w1 = PlaneLineStep;
+ w2 = PlaneLineStep * 2;
+ w3 = PlaneLineStep * 3;
+ w4 = PlaneLineStep * 4;
+ w5 = PlaneLineStep * 5;
+
+ QStep = QuantScale[pbi->FrameQIndex];
+
+ while ( CurrentFrag < (StartFrag + FragsAcross) )
+ {
+ Src = SrcPtr + 8*(CurrentFrag-StartFrag);
+ Des = DesPtr + 8*(CurrentFrag-StartFrag);
+
+ FLimit = ( QStep * 3 ) >> 2;
+
+ for ( j=0; j<8; j++ )
+ {
+ v[0] = Src[-w5];
+ v[1] = Src[-w4];
+ v[2] = Src[-w3];
+ v[3] = Src[-w2];
+ v[4] = Src[-w1];
+ v[5] = Src[ 0];
+ v[6] = Src[+w1];
+ v[7] = Src[+w2];
+ v[8] = Src[+w3];
+ v[9] = Src[+w4];
+
+ Sum1 = Sum2 = 0;
+
+ for ( k=1; k<=4; k++ )
+ Sum1 += abs ( v[k]-v[k-1] );
+
+ for ( k=5; k<=8; k++ )
+ Sum2 += abs ( v[k]-v[k+1] );
+
+ pbi->FragmentVariances[CurrentFrag] +=((Sum1>255)?255:Sum1);
+ pbi->FragmentVariances[CurrentFrag + FragsAcross] += ((Sum2>255)?255:Sum2);
+
+ if ( (Sum1 < FLimit) && (Sum2 < FLimit) &&
+ ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+ {
+ p1 = v[0];
+ p2 = v[9];
+
+ // low pass filtering (LPF7: 1 1 1 2 1 1 1)
+ psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+ Des[-w4] = (INT8)((psum + v[1]) >> 3);
+ psum += v[5] - p1;
+ Des[-w3] = (INT8)((psum + v[2]) >> 3);
+ psum += v[6] - p1;
+ Des[-w2] = (INT8)((psum + v[3]) >> 3);
+ psum += v[7] - p1;
+ Des[-w1] = (INT8)((psum + v[4]) >> 3);
+
+ psum += v[8] - v[1];
+ Des[0] = (INT8)((psum + v[5]) >> 3);
+ psum += p2 - v[2];
+ Des[+w1] = (INT8)((psum + v[6]) >> 3);
+ psum += p2 - v[3];
+ Des[+w2] = (INT8)((psum + v[7]) >> 3);
+ psum += p2 - v[4];
+ Des[+w3] = (INT8)((psum + v[8]) >> 3);
+ }
+ else
+ {
+ //old loopfilter
+ INT32 FiltVal;
+ UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ FiltVal = v[3] - v[4] * 3 + v[5] * 3 - v[6] ;
+ FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];
+ Des[-w1] = LimitTable[(INT32)v[4] + FiltVal];
+ Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+ Des[-w4]=Src[-w4];
+ Des[-w3]=Src[-w3];
+ Des[-w2]=Src[-w2];
+ Des[+w1]=Src[+w1];
+ Des[+w2]=Src[+w2];
+ Des[+w3]=Src[+w3];
+ }
+
+ Src++;
+ Des++;
+ }
+
+ // Finished filtering horizontal edge, vertical edge next...
+
+ // skip the first one
+ if ( CurrentFrag==StartFrag )
+ CurrentFrag++;
+ else
+ {
+ Des = DesPtr - 8*PlaneLineStep + 8*(CurrentFrag-StartFrag);
+ Src = Des;
+
+ FLimit = (QStep * 3) >> 2;
+
+ for ( j=0; j<8; j++ )
+ {
+ v[0] = Src[-5];
+ v[1] = Src[-4];
+ v[2] = Src[-3];
+ v[3] = Src[-2];
+ v[4] = Src[-1];
+ v[5] = Src[0];
+ v[6] = Src[+1];
+ v[7] = Src[+2];
+ v[8] = Src[+3];
+ v[9] = Src[+4];
+
+ Sum1 = Sum2 = 0;
+
+ for ( k=1; k<=4; k++ )
+ Sum1 += abs ( v[k]-v[k-1] );
+
+ for ( k=5; k<=8; k++ )
+ Sum2 += abs ( v[k]-v[k+1] );
+
+ pbi->FragmentVariances[CurrentFrag-1] += ((Sum1>255)?255:Sum1);
+ pbi->FragmentVariances[CurrentFrag] += ((Sum2>255)?255:Sum2);
+
+ if ( (Sum1 < FLimit) && (Sum2 < FLimit) &&
+ ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+ {
+ p1 = v[0];
+ p2 = v[9];
+
+ // low pass filtering (LPF7: 1 1 1 2 1 1 1)
+ psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+ Des[-4] = (INT8)((psum + v[1]) >> 3);
+ psum += v[5] - p1;
+ Des[-3] = (INT8)((psum + v[2]) >> 3);
+ psum += v[6] - p1;
+ Des[-2] = (INT8)((psum + v[3]) >> 3);
+ psum += v[7] - p1;
+ Des[-1] = (INT8)((psum + v[4]) >> 3);
+
+ psum += v[8] - v[1];
+ Des[0] = (INT8)((psum + v[5]) >> 3);
+ psum += p2 - v[2];
+ Des[+1] = (INT8)((psum + v[6]) >> 3);
+ psum += p2 - v[3];
+ Des[+2] = (INT8)((psum + v[7]) >> 3);
+ psum += p2 - v[4];
+ Des[+3] = (INT8)((psum + v[8]) >> 3);
+ }
+ else
+ {
+ // Old loopfilter
+ INT32 FiltVal;
+ UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ FiltVal = v[3] - v[4] * 3 + v[5] * 3 - v[6] ;
+ FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];
+ Des[-1] = LimitTable[(INT32)v[4] + FiltVal];
+ Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+ }
+
+ Src += PlaneLineStep;
+ Des += PlaneLineStep;
+ }
+ CurrentFrag++;
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockPlane
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *SourceBuffer : Pointer to input image.
+ * UINT8 *DestinationBuffer : Pointer to output image.
+ * UINT32 Channel : Whether the Y, U or V plane.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies de-blocking filters to an image plane Y, U or V.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeblockPlane
+(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SourceBuffer,
+ UINT8 *DestinationBuffer,
+ UINT32 Channel
+)
+{
+
+ UINT32 i, j, k;
+ UINT32 PixelIndex;
+
+ UINT32 FragsDown = 0;
+ UINT32 FragsAcross = 0;
+ UINT32 StartFrag = 0;
+ UINT32 PlaneLineStep = 0;
+ UINT8 *SrcPtr = 0, *DesPtr = 0;
+ UINT32 *QuantScale = 0;
+
+ typedef void (*ApplyFilterToBand) (xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+
+ ApplyFilterToBand DeblockBand;
+ ApplyFilterToBand DeblockVerticalEdgesInBand;
+
+ if ( pbi->Vp3VersionNo >= 2 )
+ {
+ DeblockBand = DeblockNonFilteredBand;
+ DeblockVerticalEdgesInBand = DeblockVerticalEdgesInNonFilteredBand;
+ }
+ else
+ {
+ DeblockBand = DeblockLoopFilteredBand;
+ DeblockVerticalEdgesInBand = DeblockVerticalEdgesInLoopFilteredBand;
+ }
+
+ switch( Channel )
+ {
+ case 0:
+ // Get the parameters
+ PlaneLineStep = pbi->YStride;
+ FragsAcross = pbi->HFragments;
+ FragsDown = pbi->VFragments;
+ StartFrag = 0;
+ PixelIndex = pbi->ReconYDataOffset;
+ SrcPtr = &SourceBuffer[PixelIndex];
+ DesPtr = &DestinationBuffer[PixelIndex];
+ break;
+
+ case 1:
+ // Get the parameters
+ PlaneLineStep = pbi->UVStride;
+ FragsAcross = pbi->HFragments / 2;
+ FragsDown = pbi->VFragments / 2;
+ StartFrag = pbi->YPlaneFragments;
+ PixelIndex = pbi->ReconUDataOffset;
+ SrcPtr = &SourceBuffer[PixelIndex];
+ DesPtr = &DestinationBuffer[PixelIndex];
+ break;
+
+ default:
+ // Get the parameters
+ PlaneLineStep = pbi->UVStride;
+ FragsAcross = pbi->HFragments / 2;
+ FragsDown = pbi->VFragments / 2;
+ StartFrag = pbi->YPlaneFragments + pbi->UVPlaneFragments;
+ PixelIndex = pbi->ReconVDataOffset;
+ SrcPtr = &SourceBuffer[PixelIndex];
+ DesPtr = &DestinationBuffer[PixelIndex];
+ break;
+ }
+
+ if ( pbi->Vp3VersionNo >= 2 )
+ {
+ switch ( Channel )
+ {
+ case 0:
+ QuantScale = DCQuantScaleV2;
+ break;
+ case 1:
+ case 2:
+ QuantScale = DCQuantScaleUV;
+ break;
+ }
+ }
+ else
+ {
+ QuantScale = DCQuantScaleV1;
+ }
+
+ for ( i=0; i<4; i++ )
+ for ( j=0; j<PlaneLineStep; j++ )
+ DesPtr[i*PlaneLineStep + j] = SrcPtr[i*PlaneLineStep + j];
+
+ // loop to last band
+ k = 1;
+ while ( k < FragsDown )
+ {
+ SrcPtr += 8*PlaneLineStep;
+ DesPtr += 8*PlaneLineStep;
+
+ // Filter both the horizontal and vertical block edges inside the band
+ DeblockBand ( pbi,
+ SrcPtr,
+ DesPtr,
+ PlaneLineStep,
+ FragsAcross,
+ StartFrag,
+ QuantScale );
+
+ // Move on...
+ StartFrag += FragsAcross;
+ k++;
+ }
+
+ // The Last band
+ for ( i=0; i<4; i++ )
+ for ( j=0; j<PlaneLineStep; j++ )
+ DesPtr[(i+4)*PlaneLineStep + j] = SrcPtr[(i+4)*PlaneLineStep + j];
+
+ DeblockVerticalEdgesInBand ( pbi,
+ SrcPtr,
+ DesPtr,
+ PlaneLineStep,
+ FragsAcross,
+ StartFrag,
+ QuantScale );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockPlaneNew
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT32 PlaneLineStep : Stride for the plane.
+ * UINT32 StartFrag : Number of first block.
+ * UINT32 FragsAcross : Number of blocks horizontally.
+ * UINT32 FragsDown : Number of blocks vertically.
+ * UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DesPtr : Pointer to output image.
+ * UINT32 *QuantScale :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies new de-blocking filters to an image plane Y, U or V.
+ *
+ * SPECIAL NOTES : Uses the new de-blocking filter.
+ *
+ ****************************************************************************/
+void DeblockPlaneNew
+(
+ POSTPROC_INSTANCE *pbi,
+ UINT32 PlaneLineStep,
+ UINT32 StartFrag,
+ UINT32 FragsAcross,
+ UINT32 FragsDown,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 *QuantScale
+)
+{
+ UINT32 i, k;
+
+ typedef void (*ApplyFilterToBand) (xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+
+ ApplyFilterToBand DeblockBand;
+ ApplyFilterToBand DeblockVerticalEdgesInBand;
+
+ DeblockBand = DeblockNonFilteredBandNewFilter;
+ DeblockVerticalEdgesInBand = DeblockVerticalEdgesInNonFilteredBandNewFilter;
+
+ for ( i=0; i<4; i++ )
+ memcpy ( DesPtr+i*PlaneLineStep, SrcPtr+i*PlaneLineStep, PlaneLineStep );
+
+ // loop to last band
+ k = 1;
+
+ while ( k < FragsDown )
+ {
+ SrcPtr += 8*PlaneLineStep;
+ DesPtr += 8*PlaneLineStep;
+
+ // Filter both the horizontal and vertical block edges inside the band
+ DeblockBand ( pbi,
+ SrcPtr,
+ DesPtr,
+ PlaneLineStep,
+ FragsAcross,
+ StartFrag,
+ QuantScale );
+
+ // Move-on...
+ StartFrag += FragsAcross;
+ k++;
+ }
+
+ // The Last band
+ for ( i=0; i<4; i++ )
+ memcpy ( DesPtr+(i+4)*PlaneLineStep, SrcPtr+(i+4)*PlaneLineStep, PlaneLineStep );
+
+ DeblockVerticalEdgesInBand ( pbi,
+ SrcPtr,
+ DesPtr,
+ PlaneLineStep,
+ FragsAcross,
+ StartFrag,
+ QuantScale );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockFrame
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *SourceBuffer : Pointer to input frame.
+ * UINT8 *DestinationBuffer : Pointer to output deblocked frame.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies loop filter to the edge pixels of coded blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeblockFrame ( POSTPROC_INSTANCE *pbi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer )
+{
+ // Initialize the fragment variance accumulators
+ memset ( pbi->FragmentVariances, 0 , pbi->UnitFragments*sizeof(INT32) );
+
+ SetupDeblocker(pbi);
+
+#if defined(_WIN32)
+ if ( pbi->Vp3VersionNo >= 5 )
+ {
+ // Y
+ DeblockPlaneNew ( pbi,
+ pbi->YStride,
+ 0,
+ pbi->HFragments,
+ pbi->VFragments,
+ &SourceBuffer[pbi->ReconYDataOffset],
+ &DestinationBuffer[pbi->ReconYDataOffset],
+ DCQuantScaleV2 );
+ // U
+ DeblockPlaneNew ( pbi,
+ pbi->UVStride,
+ 0,
+ pbi->HFragments / 2,
+ pbi->VFragments / 2,
+ &SourceBuffer[pbi->ReconUDataOffset],
+ &DestinationBuffer[pbi->ReconUDataOffset],
+ DCQuantScaleUV );
+ // V
+ DeblockPlaneNew ( pbi,
+ pbi->UVStride,
+ 0,
+ pbi->HFragments / 2,
+ pbi->VFragments / 2,
+ &SourceBuffer[pbi->ReconVDataOffset],
+ &DestinationBuffer[pbi->ReconVDataOffset],
+ DCQuantScaleUV );
+ }
+ else
+#endif
+ {
+ DeblockPlane ( pbi, SourceBuffer, DestinationBuffer, 0 ); // Y
+ DeblockPlane ( pbi, SourceBuffer, DestinationBuffer, 1 ); // U
+ DeblockPlane ( pbi, SourceBuffer, DestinationBuffer, 2 ); // V
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockFrameInterlaced
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *SourceBuffer : Pointer to input frame.
+ * UINT8 *DestinationBuffer : Pointer to output deblocked frame.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a loop filter to the edge pixels of coded blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeblockFrameInterlaced ( POSTPROC_INSTANCE *pbi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer )
+{
+ INT32 *FragVarPtr;
+
+ SetupDeblocker ( pbi );
+
+ // Y Plane
+ FragVarPtr = pbi->FragmentVariances;
+ memset ( FragVarPtr, 0, pbi->UnitFragments*sizeof(INT32) );
+
+ DeblockPlaneNew ( pbi,
+ pbi->YStride*2,
+ 0,
+ pbi->HFragments,
+ pbi->VFragments/2,
+ &SourceBuffer[pbi->ReconYDataOffset],
+ &DestinationBuffer[pbi->ReconYDataOffset],
+ DCQuantScaleV2 );
+
+ pbi->FragmentVariances = pbi->FragmentVariances + pbi->HFragments*pbi->VFragments/2;
+
+ DeblockPlaneNew ( pbi,
+ pbi->YStride*2,
+ 0,
+ pbi->HFragments,
+ pbi->VFragments/2,
+ &SourceBuffer[pbi->ReconYDataOffset+pbi->YStride],
+ &DestinationBuffer[pbi->ReconYDataOffset+pbi->YStride],
+ DCQuantScaleV2 );
+
+ // Restore the FragmentVariances point in PBI
+ pbi->FragmentVariances = FragVarPtr;
+
+ // UV Plane
+ DeblockPlaneNew ( pbi,
+ pbi->UVStride,
+ pbi->YPlaneFragments,
+ pbi->HFragments / 2,
+ pbi->VFragments / 2,
+ &SourceBuffer[pbi->ReconUDataOffset],
+ &DestinationBuffer[pbi->ReconUDataOffset],
+ DCQuantScaleUV );
+
+ DeblockPlaneNew ( pbi,
+ pbi->UVStride,
+ pbi->YPlaneFragments + pbi->UVPlaneFragments,
+ pbi->HFragments / 2,
+ pbi->VFragments / 2,
+ &SourceBuffer[pbi->ReconVDataOffset],
+ &DestinationBuffer[pbi->ReconVDataOffset],
+ DCQuantScaleUV );
+ return;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/dering.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/dering.c
new file mode 100644
index 00000000..7c970d1a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/dering.c
@@ -0,0 +1,1166 @@
+/****************************************************************************
+ *
+ * Module Title : Dering.c
+ *
+ * Description : Post-processing de-rining filter routines.
+ *
+ ***************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Frames
+****************************************************************************/
+#include "postp.h"
+#include "stdlib.h" /* to get abs() */
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#if ( defined(_MSC_VER) || defined(MAPCA) )
+#define abs(x) ( (x>0) ? (x) : (-(x)) )
+#endif
+
+#define Clamp(val) ( (val)<0 ? 0 : ((val)>255 ? 255 : (val)) )
+
+/****************************************************************************
+* Exported Global Variables
+****************************************************************************/
+UINT32 DeringModifierV1[Q_TABLE_SIZE];
+
+/*const*/ UINT32 DeringModifierV2[Q_TABLE_SIZE] =
+{
+ 9, 9, 8, 8, 7, 7, 7, 7,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 3, 3, 3, 3, 2, 2, 2, 2
+};
+
+/*const*/ UINT32 DeringModifierV3[Q_TABLE_SIZE] =
+{
+ 9, 9, 9, 9, 8, 8, 8, 8,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 5, 5, 5, 5, 5, 5,
+ 4, 4, 4, 4, 3, 3, 3, 3,
+ 2, 2, 2, 0, 0, 0, 0, 0
+};
+
+/*const*/ INT32 SharpenModifier[Q_TABLE_SIZE] =
+{
+ -12, -11, -10, -10, -9, -9, -9, -9,
+ -6, -6, -6, -6, -6, -6, -6, -6,
+ -4, -4, -4, -4, -4, -4, -4, -4,
+ -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/****************************************************************************
+ *
+ * ROUTINE : DeringBlockStrong_C
+ *
+ * INPUTS : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * const UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DstPtr : Pointer to output image.
+ * const INT32 Pitch : Stride of SrcPtr & DstPtr.
+ * UINT32 FragQIndex : Quantizer index to use.
+ * UINT32 *QuantScale :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a strong de-ringing filter to a block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockStrong_C
+(
+ const POSTPROC_INSTANCE *pbi,
+ const UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ const INT32 Pitch,
+ UINT32 FragQIndex,
+ UINT32 *QuantScale
+)
+{
+ int B;
+ int al;
+ int ar;
+ int au;
+ int ad;
+ int atot;
+ int High;
+ int Low;
+ int TmpMod;
+ int newVal;
+ short UDMod[72];
+ short LRMod[72];
+ unsigned int j,k;
+
+ unsigned char p;
+ unsigned char pl;
+ unsigned char pr;
+ unsigned char pu;
+ unsigned char pd;
+
+ unsigned int rowOffset = 0;
+ unsigned int round = (1<<6);
+ unsigned int QValue = QuantScale[FragQIndex];
+ int Sharpen = SharpenModifier[FragQIndex];
+
+ const unsigned char *Src = SrcPtr;
+ const unsigned char *curRow = SrcPtr;
+ const unsigned char *lastRow = SrcPtr-Pitch;
+ const unsigned char *nextRow = SrcPtr+Pitch;
+ unsigned char *dstRow = DstPtr;
+
+ (void) pbi;
+ Low = 0;
+ High = 3 * QValue;
+
+ if ( High>32 )
+ High = 32;
+
+ /* Initialize the Mod Data */
+ for ( k=0; k<9; k++ )
+ {
+ for ( j=0; j<8; j++ )
+ {
+ TmpMod = 32 + QValue - (abs(Src[j]-Src[j-Pitch]));
+
+ if ( TmpMod < -64 )
+ TmpMod = Sharpen;
+ else if ( TmpMod < Low )
+ TmpMod = Low;
+ else if ( TmpMod > High )
+ TmpMod = High;
+
+ UDMod[k*8+j] = (INT16)TmpMod;
+ }
+ Src += Pitch;
+ }
+
+ Src = SrcPtr;
+
+ for ( k=0; k<8; k++ )
+ {
+ for ( j=0; j<9; j++ )
+ {
+ TmpMod = 32 + QValue - (abs(Src[j]-Src[j-1]));
+
+ if ( TmpMod < -64 )
+ TmpMod = Sharpen;
+ else if ( TmpMod < 0 )
+ TmpMod = Low;
+ else if ( TmpMod > High )
+ TmpMod = High;
+
+ LRMod[k*9+j] = (INT16)TmpMod;
+ }
+ Src += Pitch;
+ }
+
+ for ( k=0; k<8; k++ )
+ {
+ // In the case that this function called with
+ // same buffer for source and destination, To
+ // keep the c and the mmx version to have
+ // consistant results, intermediate buffer is
+ // used to store the eight pixel value before
+ // writing them to destination(i.e. Overwriting
+ // souce for the speical case)
+
+ // column 0
+ int newPixel[8];
+
+ atot = 128;
+ B = round;
+ p = curRow[rowOffset+0];
+
+ pl = curRow[rowOffset+0-1];
+ al = LRMod[k*9+0];
+ atot -= al;
+ B += al * pl;
+
+ pu = lastRow[rowOffset+0];
+ au = UDMod[k*8+0];
+ atot -= au;
+ B += au * pu;
+
+ pd = nextRow[rowOffset+0];
+ ad = UDMod[(k+1)*8+0];
+ atot -= ad;
+ B += ad * pd;
+
+ pr = curRow[rowOffset+0+1];
+ ar = LRMod[k*9+0+1];
+ atot -= ar;
+ B += ar * pr;
+
+ newVal = ( atot * p + B) >> 7;
+
+ newPixel[0] = Clamp( newVal );
+
+ // column 1
+ atot = 128;
+ B = round;
+ p = curRow[rowOffset+1];
+
+ pl = curRow[rowOffset+1-1];
+ al = LRMod[k*9+1];
+ atot -= al;
+ B += al * pl;
+
+ pu = lastRow[rowOffset+1];
+ au = UDMod[k*8+1];
+ atot -= au;
+ B += au * pu;
+
+ pd = nextRow[rowOffset+1];
+ ad = UDMod[(k+1)*8+1];
+ atot -= ad;
+ B += ad * pd;
+
+ pr = curRow[rowOffset+1+1];
+ ar = LRMod[k*9+1+1];
+ atot -= ar;
+ B += ar * pr;
+
+ newVal = ( atot * p + B) >> 7;
+
+ newPixel[1] = Clamp( newVal );
+
+ // column 2
+ atot = 128;
+ B = round;
+ p = curRow[rowOffset+2];
+
+ pl = curRow[rowOffset+2-1];
+ al = LRMod[k*9+2];
+ atot -= al;
+ B += al * pl;
+
+ pu = lastRow[rowOffset+2];
+ au = UDMod[k*8+2];
+ atot -= au;
+ B += au * pu;
+
+ pd = nextRow[rowOffset+2];
+ ad = UDMod[(k+1)*8+2];
+ atot -= ad;
+ B += ad * pd;
+
+ pr = curRow[rowOffset+2+1];
+ ar = LRMod[k*9+2+1];
+ atot -= ar;
+ B += ar * pr;
+
+ newVal = ( atot * p + B) >> 7;
+
+ newPixel[2] = Clamp( newVal );
+
+ // column 3
+ atot = 128;
+ B = round;
+ p = curRow[rowOffset+3];
+
+ pl = curRow[rowOffset+3-1];
+ al = LRMod[k*9+3];
+ atot -= al;
+ B += al * pl;
+
+ pu = lastRow[rowOffset+3];
+ au = UDMod[k*8+3];
+ atot -= au;
+ B += au * pu;
+
+ pd = nextRow[rowOffset+3];
+ ad = UDMod[(k+1)*8+3];
+ atot -= ad;
+ B += ad * pd;
+
+ pr = curRow[rowOffset+3+1];
+ ar = LRMod[k*9+3+1];
+ atot -= ar;
+ B += ar * pr;
+
+ newVal = ( atot * p + B) >> 7;
+
+ newPixel[3] = Clamp( newVal );
+
+ // column 4
+ atot = 128;
+ B = round;
+ p = curRow[rowOffset+4];
+
+ pl = curRow[rowOffset+4-1];
+ al = LRMod[k*9+4];
+ atot -= al;
+ B += al * pl;
+
+ pu = lastRow[rowOffset+4];
+ au = UDMod[k*8+4];
+ atot -= au;
+ B += au * pu;
+
+ pd = nextRow[rowOffset+4];
+ ad = UDMod[(k+1)*8+4];
+ atot -= ad;
+ B += ad * pd;
+
+ pr = curRow[rowOffset+4+1];
+ ar = LRMod[k*9+4+1];
+ atot -= ar;
+ B += ar * pr;
+
+ newVal = ( atot * p + B) >> 7;
+
+ newPixel[4] = Clamp( newVal );
+
+ // column 5
+ atot = 128;
+ B = round;
+ p = curRow[rowOffset+5];
+
+ pl = curRow[rowOffset+5-1];
+ al = LRMod[k*9+5];
+ atot -= al;
+ B += al * pl;
+
+ pu = lastRow[rowOffset+5];
+ au = UDMod[k*8+5];
+ atot -= au;
+ B += au * pu;
+
+ pd = nextRow[rowOffset+5];
+ ad = UDMod[(k+1)*8+5];
+ atot -= ad;
+ B += ad * pd;
+
+ pr = curRow[rowOffset+5+1];
+ ar = LRMod[k*9+5+1];
+ atot -= ar;
+ B += ar * pr;
+
+ newVal = ( atot * p + B) >> 7;
+
+ newPixel[5] = Clamp( newVal );
+
+ // column 6
+ atot = 128;
+ B = round;
+ p = curRow[rowOffset+6];
+
+ pl = curRow[rowOffset+6-1];
+ al = LRMod[k*9+6];
+ atot -= al;
+ B += al * pl;
+
+ pu = lastRow[rowOffset+6];
+ au = UDMod[k*8+6];
+ atot -= au;
+ B += au * pu;
+
+ pd = nextRow[rowOffset+6];
+ ad = UDMod[(k+1)*8+6];
+ atot -= ad;
+ B += ad * pd;
+
+ pr = curRow[rowOffset+6+1];
+ ar = LRMod[k*9+6+1];
+ atot -= ar;
+ B += ar * pr;
+
+ newVal = ( atot * p + B) >> 7;
+
+ newPixel[6] = Clamp( newVal );
+
+ // column 7
+ atot = 128;
+ B = round;
+ p = curRow[rowOffset+7];
+
+ pl = curRow[rowOffset+7-1];
+ al = LRMod[k*9+7];
+ atot -= al;
+ B += al * pl;
+
+ pu = lastRow[rowOffset+7];
+ au = UDMod[k*8+7];
+ atot -= au;
+ B += au * pu;
+
+ pd = nextRow[rowOffset+7];
+ ad = UDMod[(k+1)*8+7];
+ atot -= ad;
+ B += ad * pd;
+
+ pr = curRow[rowOffset+7+1];
+ ar = LRMod[k*9+7+1];
+ atot -= ar;
+ B += ar * pr;
+
+ newVal = ( atot * p + B) >> 7;
+
+ newPixel[7] = Clamp( newVal );
+
+ dstRow[rowOffset+0] = (INT8)newPixel[0];
+ dstRow[rowOffset+1] = (INT8)newPixel[1];
+ dstRow[rowOffset+2] = (INT8)newPixel[2];
+ dstRow[rowOffset+3] = (INT8)newPixel[3];
+ dstRow[rowOffset+4] = (INT8)newPixel[4];
+ dstRow[rowOffset+5] = (INT8)newPixel[5];
+ dstRow[rowOffset+6] = (INT8)newPixel[6];
+ dstRow[rowOffset+7] = (INT8)newPixel[7];
+
+ rowOffset += Pitch;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeringBlockWeak_C
+ *
+ * INPUTS : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * const UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DstPtr : Pointer to output image.
+ * const INT32 Pitch : Stride of SrcPtr & DstPtr.
+ * UINT32 FragQIndex : Quantizer index to use.
+ * UINT32 *QuantScale :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a weak de-ringing filter to a block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockWeak_C
+(
+ const POSTPROC_INSTANCE *pbi,
+ const UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ const INT32 Pitch,
+ UINT32 FragQIndex,
+ UINT32 *QuantScale
+)
+{
+ int B;
+ int al;
+ int ar;
+ int au;
+ int ad;
+ int atot;
+ int High;
+ int Low;
+ int newVal;
+ int TmpMod;
+ short UDMod[72];
+ short LRMod[72];
+ unsigned int j, k;
+ unsigned char p;
+ unsigned char pl;
+ unsigned char pr;
+ unsigned char pu;
+ unsigned char pd;
+
+ unsigned int rowOffset = 0;
+ unsigned int round = (1<<6);
+ unsigned int QValue = QuantScale[FragQIndex];
+ int Sharpen = SharpenModifier[FragQIndex];
+ const unsigned char *Src = SrcPtr;
+ const unsigned char *curRow = SrcPtr;
+ const unsigned char *lastRow = SrcPtr-Pitch;
+ const unsigned char *nextRow = SrcPtr+Pitch;
+ unsigned char *dstRow = DstPtr;
+
+ (void) pbi;
+
+ Low = 0;
+ High = 3 * QValue;
+
+ if ( High>24 )
+ High = 24;
+
+ /* Initialize the Mod Data */
+ for ( k=0; k<9; k++ )
+ {
+ for ( j=0; j<8; j++ )
+ {
+ TmpMod = 32 + QValue - 2*(abs(Src[j]-Src[j-Pitch]));
+
+ if ( TmpMod < -64 )
+ TmpMod = Sharpen;
+ else if ( TmpMod < Low )
+ TmpMod = Low;
+ else if ( TmpMod > High )
+ TmpMod = High;
+
+ UDMod[k*8+j] = (INT16)TmpMod;
+ }
+ Src += Pitch;
+ }
+
+ Src = SrcPtr;
+
+ for ( k=0; k<8; k++ )
+ {
+ for ( j=0; j<9; j++ )
+ {
+ TmpMod = 32 + QValue - 2*(abs(Src[j]-Src[j-1]));
+
+ if ( TmpMod < -64 )
+ TmpMod = Sharpen;
+ else if ( TmpMod < Low )
+ TmpMod = Low;
+ else if ( TmpMod > High )
+ TmpMod = High;
+
+ LRMod[k*9+j] = (INT16)TmpMod;
+ }
+ Src += Pitch;
+ }
+
+ for ( k=0; k<8; k++ )
+ {
+ // loop expanded for speed
+ for ( j=0; j<8; j++ )
+ {
+ // column 0
+ atot = 128;
+ B = round;
+ p = curRow[rowOffset+j];
+
+ pl = curRow[rowOffset+j-1];
+ al = LRMod[k*9+j];
+ atot -= al;
+ B += al * pl;
+
+ pu = lastRow[rowOffset+j];
+ au = UDMod[k*8+j];
+ atot -= au;
+ B += au * pu;
+
+ pd = nextRow[rowOffset+j];
+ ad = UDMod[(k+1)*8+j];
+ atot -= ad;
+ B += ad * pd;
+
+ pr = curRow[rowOffset+j+1];
+ ar = LRMod[k*9+j+1];
+ atot -= ar;
+ B += ar * pr;
+
+ newVal = ( atot * p + B) >> 7;
+
+ dstRow[ rowOffset+j] = (INT8) Clamp( newVal );
+ }
+
+ rowOffset += Pitch;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeringBlock
+ *
+ * INPUTS : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * const UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DstPtr : Pointer to output image.
+ * const INT32 Pitch : Stride of SrcPtr & DstPtr.
+ * UINT32 FragQIndex : Quantizer index to use.
+ * UINT32 *QuantScale :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a de-ringing filter to a block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlock
+(
+ const POSTPROC_INSTANCE *pbi,
+ const UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ const INT32 Pitch,
+ UINT32 FragQIndex,
+ const UINT32 *QuantScale,
+ UINT32 Variance
+)
+{
+ int B;
+ int atot;
+ int newVal;
+ int High;
+ int Low;
+ int TmpMod;
+ int N[8]; // neighbors
+ unsigned int j, k, l;
+ unsigned int QValue = QuantScale[FragQIndex];
+
+ int Slope = 4;
+ unsigned int round = (1<<7);
+ const unsigned char *srcRow = SrcPtr;
+ unsigned char *dstRow = DstPtr;
+ int Sharpen = SharpenModifier[FragQIndex];
+
+ if ( pbi->PostProcessingLevel > 100 )
+ QValue = pbi->PostProcessingLevel - 100;
+
+ if ( Variance > 32768)
+ Slope = 4;
+ else if (Variance > 2048)
+ Slope = 8;
+
+ Low = 0;
+ High = 3 * QValue;
+
+ if ( High > 32 )
+ High = 32;
+
+ for ( k=0; k<8; k++ )
+ {
+ // loop expanded for speed
+ for ( j=0; j<8; j++ )
+ {
+ // set up 8 neighbors of pixel srcRow[j]
+ N[0] = srcRow[j-Pitch-1];
+ N[1] = srcRow[j-Pitch ];
+ N[2] = srcRow[j-Pitch+1];
+ N[3] = srcRow[j -1];
+ N[4] = srcRow[j +1];
+ N[5] = srcRow[j+Pitch-1];
+ N[6] = srcRow[j+Pitch ];
+ N[7] = srcRow[j+Pitch+1];
+
+ // column 0
+ atot = 256;
+ B = round;
+
+ for ( l=0; l<8; l++ )
+ {
+ TmpMod = 32 + QValue - (Slope *(abs(srcRow[j]-N[l])) >> 2);
+
+ if ( TmpMod < -64 )
+ TmpMod = Sharpen;
+ else if ( TmpMod < Low )
+ TmpMod = Low;
+ else if ( TmpMod > High )
+ TmpMod = High;
+
+ atot -= TmpMod;
+ B += TmpMod * N[l];
+ }
+
+ newVal = ( atot * srcRow[j] + B) >> 8;
+
+ dstRow[j] = (INT8) Clamp( newVal );
+ }
+
+ dstRow += Pitch;
+ srcRow += Pitch;
+ }
+}
+
+/***************************************************************************
+ *
+ * ROUTINE : DiagonalBlur
+ *
+ * INPUTS : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance (NOT USED).
+ * const UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DstPtr : Pointer to output image.
+ * const INT32 Pitch : Stride of SrcPtr & DstPtr.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a de-ringing filter to a block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ***************************************************************************/
+void DiagonalBlur
+(
+ const POSTPROC_INSTANCE *pbi,
+ const UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ const INT32 Pitch
+)
+{
+ unsigned int j, k;
+ unsigned char *dstRow = DstPtr;
+ const unsigned char *srcRow = SrcPtr;
+
+ for ( k=0; k<8; k++ )
+ {
+ // loop expanded for speed
+ for ( j=0; j<8; j++ )
+ {
+ int sum;
+
+ sum = 16;
+ sum += 8*srcRow[j];
+ sum += 2*srcRow[j-2*Pitch-2];
+ sum += 2*srcRow[j-2*Pitch+2];
+ sum += 4*srcRow[j-Pitch -1];
+ sum += 4*srcRow[j-Pitch +1];
+ sum += 4*srcRow[j+Pitch -1];
+ sum += 4*srcRow[j+Pitch +1];
+ sum += 2*srcRow[j+2*Pitch-2];
+ sum += 2*srcRow[j+2*Pitch+2];
+
+ sum >>= 5;
+
+ dstRow[j] = sum;
+ }
+
+ dstRow += Pitch;
+ srcRow += Pitch;
+ }
+ for ( k=0; k<8; k++ )
+ {
+ // loop expanded for speed
+ for ( j=0; j<8; j++ )
+ {
+ int sum;
+
+ sum = 1;
+ sum += 6*srcRow[j];
+ sum += -1 * srcRow[j-Pitch];
+ sum += -1 * srcRow[j+Pitch];
+ sum += -1 * srcRow[j-1];
+ sum += -1 * srcRow[j+1];
+
+ sum >>= 1;
+
+ if ( sum<0 )
+ sum = 0;
+
+ if ( sum>255 )
+ sum = 255;
+
+ dstRow[j] = sum;
+ }
+
+ dstRow += Pitch;
+ srcRow += Pitch;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeringFrame
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *Src : Pointer to input image.
+ * UINT8 *Dst : Pointer to output image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a de-ringing filter to a frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringFrame ( POSTPROC_INSTANCE *pbi, UINT8 *Src, UINT8 *Dst )
+{
+ UINT32 Block;
+ UINT32 col, row;
+ UINT32 BlocksAcross, BlocksDown;
+ UINT32 *QuantScale;
+ UINT32 LineLength;
+ INT32 Thresh1,Thresh2,Thresh3,Thresh4;
+ UINT8 *SrcPtr; // Pointer to line of source image data
+ UINT8 *DestPtr; // Pointer to line of destination image data
+ INT32 Quality = pbi->FrameQIndex;
+
+ if ( pbi->Vp3VersionNo >= 5 )
+ {
+ Thresh1 = 384;
+ Thresh2 = 6 * Thresh1;
+ Thresh3 = 5 * Thresh2/4;
+ Thresh4 = 5 * Thresh2/2;
+ }
+ else
+ {
+ Thresh1 = 2048;
+ Thresh2 = 15 * Thresh1;
+ Thresh3 = 3 * Thresh2;
+ Thresh4 = 4 * Thresh2;
+ }
+
+ if ( pbi->Vp3VersionNo >= 5 )
+ QuantScale = DeringModifierV3;
+ else if ( pbi->Vp3VersionNo >= 2 )
+ QuantScale = DeringModifierV2;
+ else
+ QuantScale = DeringModifierV1;
+
+ BlocksAcross = pbi->HFragments;
+ BlocksDown = pbi->VFragments;
+
+ SrcPtr = Src + pbi->ReconYDataOffset;
+ DestPtr = Dst + pbi->ReconYDataOffset;
+ LineLength = pbi->YStride;
+
+ Block = 0;
+
+ // De-ring Y plane
+ for ( row=0 ; row<BlocksDown; row++ )
+ {
+ for ( col=0; col<BlocksAcross; col++ )
+ {
+ INT32 Variance = pbi->FragmentVariances[Block];
+
+ if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh3) )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+
+ if( (col > 0 && pbi->FragmentVariances[Block-1] > Thresh4 ) ||
+ (col + 1 < BlocksAcross && pbi->FragmentVariances[Block+1] > Thresh4 ) ||
+ (row + 1 < BlocksDown && pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) ||
+ (row > 0 && pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ }
+ else if ( Variance > Thresh2 )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else if(Variance > Thresh1 )
+ {
+ DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else
+ {
+ CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+ }
+
+ ++Block;
+ }
+ SrcPtr += 8 * LineLength;
+ DestPtr += 8 * LineLength;
+ }
+
+ // De-ring U plane
+ BlocksAcross /= 2;
+ BlocksDown /= 2;
+ LineLength /= 2;
+
+ SrcPtr = Src + pbi->ReconUDataOffset;
+ DestPtr = Dst + pbi->ReconUDataOffset;
+ for ( row=0; row<BlocksDown; row++ )
+ {
+ for ( col=0; col<BlocksAcross; col++ )
+ {
+ INT32 Variance = pbi->FragmentVariances[Block];
+ if ( pbi->Vp3VersionNo < 5)
+ Quality = pbi->FragQIndex[Block];
+
+ if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh4) )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else if ( Variance > Thresh2 )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else if ( Variance > Thresh1 )
+ {
+ DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else
+ {
+ CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+ }
+
+ ++Block;
+ }
+ SrcPtr += 8 * LineLength;
+ DestPtr += 8 * LineLength;
+ }
+
+ // De-ring U plane
+ SrcPtr = Src + pbi->ReconVDataOffset;
+ DestPtr = Dst + pbi->ReconVDataOffset;
+
+ for ( row=0; row<BlocksDown; row++ )
+ {
+ for ( col=0; col<BlocksAcross; col++ )
+ {
+ INT32 Variance = pbi->FragmentVariances[Block];
+
+ if ( pbi->Vp3VersionNo < 5 )
+ Quality = pbi->FragQIndex[Block];
+
+ if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh4) )
+ {
+ DeringBlockStrong (pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong (pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong (pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+
+ }
+ else if ( Variance > Thresh2 )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else if ( Variance > Thresh1 )
+ {
+ DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else
+ {
+ CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+ }
+
+ ++Block;
+ }
+
+ SrcPtr += 8 * LineLength;
+ DestPtr += 8 * LineLength;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeringFrameInterlaced
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * UINT8 *Src : Pointer to input image.
+ * UINT8 *Dst : Pointer to output image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a de-ringing filter to an INTERLACED frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringFrameInterlaced ( POSTPROC_INSTANCE *pbi, UINT8 *Src, UINT8 *Dst )
+{
+ UINT32 Block;
+ UINT32 col, row;
+ UINT32 BlocksAcross,BlocksDown;
+ UINT32 LineLength;
+ UINT32 *QuantScale;
+ INT32 Thresh1,Thresh2,Thresh3,Thresh4;
+ UINT8 *SrcPtr; // Pointer to line of source image data
+ UINT8 *DestPtr; // Pointer to line of destination image data
+ INT32 Quality = pbi->FrameQIndex;
+
+ if ( pbi->Vp3VersionNo >= 5 )
+ {
+ Thresh1 = 384;
+ Thresh2 = 6 * Thresh1;
+ Thresh3 = 5 * Thresh2/4;
+ Thresh4 = 5 * Thresh2/2;
+ }
+ else
+ {
+ Thresh1 = 2048;
+ Thresh2 = 15 * Thresh1;
+ Thresh3 = 3 * Thresh2;
+ Thresh4 = 4 * Thresh2;
+ }
+
+ if ( pbi->Vp3VersionNo >= 5 )
+ QuantScale = DeringModifierV3;
+ else if ( pbi->Vp3VersionNo >= 2 )
+ QuantScale = DeringModifierV2;
+ else
+ QuantScale = DeringModifierV1;
+
+ BlocksAcross = pbi->HFragments;
+ BlocksDown = pbi->VFragments/2; // Y plane will be done in two passes
+
+ SrcPtr = Src + pbi->ReconYDataOffset;
+ DestPtr = Dst + pbi->ReconYDataOffset;
+ LineLength = pbi->YStride * 2; // pitch is doubled for interlacing
+
+ Block = 0;
+
+ // De-ring Y Plane: Top Field
+ for ( row=0; row<BlocksDown; row++ )
+ {
+ for ( col=0; col<BlocksAcross; col++ )
+ {
+ INT32 Variance = pbi->FragmentVariances[Block];
+
+ if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh3) )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+
+ if( (col > 0 && pbi->FragmentVariances[Block-1] > Thresh4 ) ||
+ (col + 1 < BlocksAcross && pbi->FragmentVariances[Block+1] > Thresh4 ) ||
+ (row + 1 < BlocksDown && pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) ||
+ (row > 0 && pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+
+ }
+ else if ( Variance > Thresh2 )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else if ( Variance > Thresh1 )
+ {
+ DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else
+ {
+ CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+ }
+
+ ++Block;
+ }
+
+ SrcPtr += 8 * LineLength;
+ DestPtr += 8 * LineLength;
+ }
+
+ // De-ring Y Plane: Bottom Field
+ SrcPtr = Src + pbi->ReconYDataOffset + pbi->YStride;
+ DestPtr = Dst + pbi->ReconYDataOffset + pbi->YStride;
+
+ for ( row=0; row<BlocksDown; row++ )
+ {
+ for ( col=0; col<BlocksAcross; col++ )
+ {
+ INT32 Variance = pbi->FragmentVariances[Block];
+
+ if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh3) )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+
+ if( (col > 0 && pbi->FragmentVariances[Block-1] > Thresh4 ) ||
+ (col + 1 < BlocksAcross && pbi->FragmentVariances[Block+1] > Thresh4 ) ||
+ (row + 1 < BlocksDown && pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) ||
+ (row > 0 && pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ }
+ else if ( Variance > Thresh2 )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else if(Variance > Thresh1 )
+ {
+ DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else
+ {
+ CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+ }
+
+ ++Block;
+ }
+
+ SrcPtr += 8 * LineLength;
+ DestPtr += 8 * LineLength;
+ }
+
+ // NOTE: BlocksDown for UV Planes is same as in Y for interlaced frame.
+
+ // De-ring U Plane
+ BlocksAcross /= 2;
+ LineLength /= 4;
+
+ SrcPtr = Src + pbi->ReconUDataOffset;
+ DestPtr = Dst + pbi->ReconUDataOffset;
+
+ for ( row=0; row<BlocksDown; row++ )
+ {
+ for ( col=0; col<BlocksAcross; col++ )
+ {
+ INT32 Variance = pbi->FragmentVariances[Block];
+
+ if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh4) )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else if ( Variance > Thresh2 )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else if ( Variance > Thresh1 )
+ {
+ DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else
+ {
+ CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+ }
+
+ ++Block;
+ }
+
+ SrcPtr += 8 * LineLength;
+ DestPtr += 8 * LineLength;
+ }
+
+ // De-ring V Plane
+ SrcPtr = Src + pbi->ReconVDataOffset;
+ DestPtr = Dst + pbi->ReconVDataOffset;
+
+ for ( row=0; row<BlocksDown; row++ )
+ {
+ for ( col=0; col<BlocksAcross; col++ )
+ {
+ INT32 Variance = pbi->FragmentVariances[Block];
+
+ if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh4) )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else if ( Variance > Thresh2 )
+ {
+ DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else if ( Variance > Thresh1 )
+ {
+ DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+ }
+ else
+ {
+ CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+ }
+
+ ++Block;
+ }
+
+ SrcPtr += 8 * LineLength;
+ DestPtr += 8 * LineLength;
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/doptsystemdependant.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/doptsystemdependant.c
new file mode 100644
index 00000000..260354fb
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/doptsystemdependant.c
@@ -0,0 +1,92 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions
+*
+****************************************************************************/
+
+/*******************************************3********************************
+* Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
+extern void FilteringVert_12_C ( UINT32 QValue,UINT8 *Src, INT32 Pitch);
+extern void FilteringHoriz_12_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch );
+extern void FilteringVert_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch );
+extern void FilteringHoriz_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch );
+extern void HorizontalLine_1_2_Scale_C ( const unsigned char *source, unsigned int sourceWidth, unsigned char *dest, unsigned int destWidth );
+extern void HorizontalLine_3_5_Scale_C ( const unsigned char *source, unsigned int sourceWidth, unsigned char *dest, unsigned int destWidth );
+extern void HorizontalLine_4_5_Scale_C ( const unsigned char *source, unsigned int sourceWidth, unsigned char *dest, unsigned int destWidth );
+extern void VerticalBand_4_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_4_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_3_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_3_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_1_2_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_1_2_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void FilterHoriz_Simple_C ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_Simple_C ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterHoriz_Generic ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_Generic ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern INT32 *SetupBoundingValueArray_Generic ( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern INT32 *SetupDeblockValueArray_Generic ( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern void DeringBlockWeak_C ( POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void DeringBlockStrong_C ( POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void DeblockLoopFilteredBand_C ( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBand_C ( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBandNewFilter_C ( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void ClampLevels_C( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
+extern void CFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
+extern void PlaneAddNoise_C( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+/****************************************************************************
+ *
+ * ROUTINE : PostProcMachineSpecificConfig
+ *
+ * INPUTS : UINT32 version : Codec version number (UNUSED)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets post-processing function pointers to vanilla
+ * C implementations.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PostProcMachineSpecificConfig ( UINT32 Version )
+{
+ FilterHoriz = FilterHoriz_Generic;
+ FilterVert = FilterVert_Generic;
+ SetupBoundingValueArray = SetupBoundingValueArray_Generic;
+ SetupDeblockValueArray = SetupDeblockValueArray_Generic;
+ DeringBlockWeak = DeringBlockWeak_C;
+ DeringBlockStrong = DeringBlockStrong_C;
+ DeblockLoopFilteredBand = DeblockLoopFilteredBand_C;
+ DeblockNonFilteredBand = DeblockNonFilteredBand_C;
+ DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_C;
+ FilterHoriz_Simple = FilterHoriz_Simple_C;
+ FilterVert_Simple = FilterVert_Simple_C;
+ HorizontalLine_1_2_Scale = HorizontalLine_1_2_Scale_C;
+ VerticalBand_1_2_Scale = VerticalBand_1_2_Scale_C;
+ LastVerticalBand_1_2_Scale = LastVerticalBand_1_2_Scale_C;
+ HorizontalLine_3_5_Scale = HorizontalLine_3_5_Scale_C;
+ VerticalBand_3_5_Scale = VerticalBand_3_5_Scale_C;
+ LastVerticalBand_3_5_Scale = LastVerticalBand_3_5_Scale_C;
+ HorizontalLine_4_5_Scale = HorizontalLine_4_5_Scale_C;
+ VerticalBand_4_5_Scale = VerticalBand_4_5_Scale_C;
+ LastVerticalBand_4_5_Scale = LastVerticalBand_4_5_Scale_C;
+ FilteringHoriz_8 = FilteringHoriz_8_C;
+ FilteringVert_8 = FilteringVert_8_C;
+ FilteringHoriz_12 = FilteringHoriz_12_C;
+ FilteringVert_12 = FilteringVert_12_C;
+ FastDeInterlace = CFastDeInterlace;
+ ClampLevels = ClampLevels_C;
+ PlaneAddNoise = PlaneAddNoise_C;
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/loopfilter.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/loopfilter.c
new file mode 100644
index 00000000..9980533d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/loopfilter.c
@@ -0,0 +1,976 @@
+/****************************************************************************
+*
+* Module Title : loopfilter.c
+*
+* Description : Loop filter functions.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define Mod8(x) ( (x) & 7 )
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+UINT32 LoopFilterLimitValuesV1[Q_TABLE_SIZE] =
+{
+ 30, 25, 20, 20, 15, 15, 14, 14,
+ 13, 13, 12, 12, 11, 11, 10, 10,
+ 9, 9, 8, 8, 7, 7, 7, 7,
+ 6, 6, 6, 6, 5, 5, 5, 5,
+ 4, 4, 4, 4, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+UINT32 *LoopFilterLimitValuesV2;
+
+UINT32 LoopFilterLimitValuesVp4[Q_TABLE_SIZE] =
+{
+ 30, 25, 20, 20, 15, 15, 14, 14,
+ 13, 13, 12, 12, 11, 11, 10, 10,
+ 9, 9, 8, 8, 7, 7, 7, 7,
+ 6, 6, 6, 6, 5, 5, 5, 5,
+ 4, 4, 4, 4, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+UINT32 LoopFilterLimitValuesVp5[Q_TABLE_SIZE] =
+{
+ 14, 14, 13, 13, 12, 12, 10, 10,
+ 10, 10, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 7, 7, 7, 7,
+ 7, 7, 6, 6, 6, 6, 6, 6,
+ 5, 5, 5, 5, 4, 4, 4, 4,
+ 4, 4, 4, 3, 3, 3, 3, 2
+};
+
+UINT32 LoopFilterLimitValuesVp6[Q_TABLE_SIZE] =
+{
+ 14, 14, 13, 13, 12, 12, 10, 10,
+ 10, 10, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 7, 7, 7, 7,
+ 7, 7, 6, 6, 6, 6, 6, 6,
+ 5, 5, 5, 5, 4, 4, 4, 4,
+ 4, 4, 4, 3, 3, 3, 3, 2
+};
+
+/****************************************************************************
+ *
+ * ROUTINE : SetupBoundingValueArray_Generic
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ * INT32 FLimit : Value to use as limit.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : INT32: Pointer to LUT position 0 (cast to UINT32)
+ *
+ * FUNCTION : Set up the bounding value array.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INT32 *SetupBoundingValueArray_Generic ( POSTPROC_INSTANCE *ppi, INT32 FLimit )
+{
+ INT32 i;
+ INT32 *BoundingValuePtr;
+
+ BoundingValuePtr = &ppi->FiltBoundingValue[256];
+
+ // Set up the bounding value array
+ memset ( ppi->FiltBoundingValue, 0, (512*sizeof(*ppi->FiltBoundingValue)) );
+ for ( i=0; i<FLimit; i++ )
+ {
+ BoundingValuePtr[-i-FLimit] = (-FLimit+i);
+ BoundingValuePtr[-i] = -i;
+ BoundingValuePtr[i] = i;
+ BoundingValuePtr[i+FLimit] = FLimit-i;
+ }
+
+ return BoundingValuePtr;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SetupLoopFilter
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initializes LUTs and function pointer for loop filter.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetupLoopFilter ( POSTPROC_INSTANCE *ppi )
+{
+ INT32 FLimit;
+
+ FLimit = LoopFilterLimitValuesV2[ppi->FrameQIndex];
+
+ if ( ppi->Vp3VersionNo >= 2 )
+ ppi->BoundingValuePtr = SetupBoundingValueArray_Generic(ppi, FLimit);
+ else
+ ppi->BoundingValuePtr = SetupBoundingValueArray ( ppi, FLimit );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterHoriz_Generic
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ * UINT8 *PixelPtr : Pointer to Pointer to input data.
+ * INT32 LineLength : Stride of input data.
+ * INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies the 4-tap loop-filter across vertical edge,
+ * i.e. filter is applied horizontally.
+ *
+ * SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/
+void FilterHoriz_Generic
+(
+ POSTPROC_INSTANCE *ppi,
+ UINT8 *PixelPtr,
+ INT32 LineLength,
+ INT32 *BoundingValuePtr
+)
+{
+ INT32 j;
+ INT32 FiltVal;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+ (void)ppi;
+
+ for ( j=0; j<8; j++ )
+ {
+ FiltVal = PixelPtr[0] - (PixelPtr[1]*3) + (PixelPtr[2]*3) - PixelPtr[3];
+ FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+
+ PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
+ PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
+
+ PixelPtr += LineLength;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterVert_Generic
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ * UINT8 *PixelPtr : Pointer to Pointer to input data.
+ * INT32 LineLength : Stride of input data.
+ * INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies the 4-tap loop-filter across horizontal edge,
+ * i.e. filter is applied vertically.
+ *
+ * SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/
+void FilterVert_Generic
+(
+ POSTPROC_INSTANCE *ppi,
+ UINT8 *PixelPtr,
+ INT32 LineLength,
+ INT32 *BoundingValuePtr
+)
+{
+ INT32 j;
+ INT32 FiltVal;
+ UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+ (void)ppi;
+
+ for ( j=0; j<8; j++ )
+ {
+ FiltVal = (INT32)PixelPtr[-(2 * LineLength)]
+ - ((INT32)PixelPtr[- LineLength] * 3)
+ + ((INT32)PixelPtr[0] * 3)
+ - (INT32)PixelPtr[LineLength];
+
+ FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+
+ PixelPtr[-LineLength] = LimitTable[(INT32)PixelPtr[-LineLength] + FiltVal];
+ PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] - FiltVal];
+
+ PixelPtr++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : Bound
+ *
+ * INPUTS : UINT32 FLimit : Limit to use in computing bounding value.
+ * INT32 FiltVal : Value to have bounds applied to.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : INT32:
+ *
+ * FUNCTION : Computes a bounded Filtval based on specified Flimit.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+
+#if defined (_WIN32_WCE)
+INT32 Bound ( UINT32 FLimit, INT32 FiltVal )
+#else
+INLINE INT32 Bound ( UINT32 FLimit, INT32 FiltVal )
+#endif
+{
+ INT32 Clamp;
+ INT32 FiltSign;
+ INT32 NewSign;
+
+ Clamp = 2 * FLimit;
+
+ // Next 3 lines are fast way to find abs...
+ FiltSign = (FiltVal >> 31); // Sign extension makes FiltSign all 0's or all 1's
+ FiltVal ^= FiltSign; // FiltVal is then 1's complement of value if -ve
+ FiltVal -= FiltSign; // Filtval = abs Filtval
+
+ FiltVal *= (FiltVal < Clamp); // clamp filter value to 2 times limit
+
+ FiltVal -= FLimit; // subtract limit value
+
+ // Next 3 lines are fast way to find abs...
+ NewSign = (FiltVal >> 31); // Sign extension makes NewSign all 0's or all 1's
+ FiltVal ^= NewSign; // FiltVal is then 1's complement of value if -ve
+ FiltVal -= NewSign; // FiltVal = abs FiltVal
+
+ FiltVal = FLimit - FiltVal; // flimit - abs (filtVal - flimit)
+
+ FiltVal += FiltSign; // convert back to signed value
+ FiltVal ^= FiltSign;
+
+ return FiltVal;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilteringHoriz_8_C
+ *
+ * INPUTS : UINT32 QValue : Current quatizer level.
+ * UINT8 *Src : Pointer to data to be filtered.
+ * INT32 Pitch : Pitch of input data.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies horizontal filter across vertical edge inside
+ * block with Q-dependent limits.
+ *
+ * SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/
+void FilteringHoriz_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
+{
+ INT32 j;
+ INT32 FiltVal;
+ UINT32 FLimit;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ FLimit = LoopFilterLimitValuesV2[QValue];
+
+ for ( j=0; j<8; j++ )
+ {
+ // Apply 4-tap filter with rounding...
+ FiltVal = ( Src[-2] -
+ (Src[-1] * 3) +
+ (Src[ 0] * 3) -
+ Src[ 1] + 4 ) >> 3;
+
+ FiltVal = Bound ( FLimit, FiltVal );
+
+ Src[-1] = LimitTable[(INT32)Src[-1] + FiltVal];
+ Src[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
+
+ Src += Pitch;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilteringVert_8_C
+ *
+ * INPUTS : UINT32 QValue : Current quatizer level.
+ * UINT8 *Src : Pointer to data to be filtered.
+ * INT32 Pitch : Pitch of input data.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies vertical filter across horizontal edge inside
+ * block with Q-dependent limits.
+ *
+ * SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/
+void FilteringVert_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
+{
+ INT32 j;
+ INT32 FiltVal;
+ UINT32 FLimit;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ FLimit = LoopFilterLimitValuesV2[QValue];
+
+ for ( j=0; j<8; j++ )
+ {
+ // Apply 4-tap filter with rounding...
+ FiltVal = ( (INT32)Src[-(2 * Pitch)] -
+ ((INT32)Src[-Pitch] * 3) +
+ ((INT32)Src[0] * 3 ) -
+ (INT32)Src[Pitch] + 4 ) >> 3;
+
+ FiltVal = Bound( FLimit, FiltVal);
+
+ Src[-Pitch] = LimitTable[(INT32)Src[-Pitch] + FiltVal];
+ Src[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
+
+ Src++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilteringHoriz_12_C
+ *
+ * INPUTS : UINT32 QValue : Current quatizer level.
+ * UINT8 *Src : Pointer to data to be filtered.
+ * INT32 Pitch : Pitch of input data.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies vertical filter across horizontal edge inside
+ * block with Q-dependent limits.
+ *
+ * SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/
+void FilteringHoriz_12_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
+{
+ INT32 j;
+ INT32 FiltVal;
+ UINT32 FLimit;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ FLimit = LoopFilterLimitValuesV2[QValue];
+
+ for ( j=0; j<12; j++ )
+ {
+ // Apply 4-tap filter with rounding...
+ FiltVal = ( Src[-2] -
+ (Src[-1] * 3) +
+ (Src[ 0] * 3) -
+ Src[1] + 4) >> 3;
+
+ FiltVal = Bound ( FLimit, FiltVal );
+
+ Src[-1] = LimitTable[(INT32)Src[-1] + FiltVal];
+ Src[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
+
+ Src += Pitch;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilteringVert_12_C
+ *
+ * INPUTS : UINT32 QValue : Current quatizer level.
+ * UINT8 *Src : Pointer to data to be filtered.
+ * INT32 Pitch : Pitch of input data.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies vertical filter across horizontal edge inside
+ * block with Q-dependent limits.
+ *
+ * SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/
+void FilteringVert_12_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
+{
+ INT32 j;
+ INT32 FiltVal;
+ UINT32 FLimit;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ FLimit = LoopFilterLimitValuesV2[QValue];
+
+ for ( j=0; j<12; j++ )
+ {
+ FiltVal = ( (INT32)Src[- (2 * Pitch)] -
+ ((INT32)Src[- Pitch] * 3) +
+ ((INT32)Src[0] * 3) -
+ (INT32)Src[Pitch] + 4 ) >> 3;
+
+ FiltVal = Bound ( FLimit, FiltVal );
+
+ Src[-Pitch] = LimitTable[(INT32)Src[-Pitch] + FiltVal];
+ Src[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
+
+ Src++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ApplyReconLoopFilter
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ * INT32 FrameQIndex : Q index for the frame.
+ * UINT8 *LastFrameRecon : Pointer to last frame reconstruction buffer.
+ * UINT8 *PostProcessBuffer : Pointer to last post-processing buffer.
+ * UINT8 *FragInfo : Pointer to list of coded blocks.
+ * UINT32 FragInfoElementSize : Size of each element.
+ * UINT32 FragInfoCodedMask : Mask to get at whether fragment is coded.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a loop filter to the edge pixels of coded blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ApplyReconLoopFilter
+(
+ POSTPROC_INSTANCE *ppi,
+ INT32 FrameQIndex,
+ UINT8 *LastFrameRecon,
+ UINT8 *PostProcessBuffer,
+ UINT8 *FragInfo,
+ UINT32 FragInfoElementSize,
+ UINT32 FragInfoCodedMask
+)
+{
+ int j, m, n;
+ UINT32 nextRow;
+ UINT8 *rowStart;
+ INT32 *BoundingValuePtr;
+
+ INT32 i = 0;
+ INT32 FLimit = 0;
+ int FromFragment = 0;
+ INT32 LineLength = 0;
+ INT32 LineFragments = 0;
+ int FragsAcross = ppi->HFragments;
+ int FragsDown = ppi->VFragments;
+
+ // variables passed in per frame
+ ppi->FrameQIndex = FrameQIndex;
+ ppi->LastFrameRecon = LastFrameRecon;
+ ppi->PostProcessBuffer = PostProcessBuffer;
+ ppi->FragInfo = FragInfo;
+ ppi->FragInfoElementSize = FragInfoElementSize;
+ ppi->FragInfoCodedMask = FragInfoCodedMask;
+
+ FLimit = LoopFilterLimitValuesV1[ppi->FrameQIndex];
+ if ( FLimit == 0 )
+ return;
+
+ BoundingValuePtr = SetupBoundingValueArray ( ppi, FLimit );
+
+ for ( j=0; j<3; j++ )
+ {
+ switch ( j )
+ {
+ case 0: // Y
+ FromFragment = 0;
+ FragsAcross = ppi->HFragments;
+ FragsDown = ppi->VFragments;
+ LineLength = ppi->YStride;
+ LineFragments = ppi->HFragments;
+ rowStart = ppi->LastFrameRecon + ppi->ReconYDataOffset;
+ break;
+ case 1: // U
+ FromFragment = ppi->YPlaneFragments;
+ FragsAcross = ppi->HFragments >> 1;
+ FragsDown = ppi->VFragments >> 1;
+ LineLength = ppi->UVStride;
+ LineFragments = ppi->HFragments / 2;
+ rowStart = ppi->LastFrameRecon + ppi->ReconUDataOffset;
+ break;
+ case 2: // V
+ FromFragment = ppi->YPlaneFragments + ppi->UVPlaneFragments;
+ FragsAcross = ppi->HFragments >> 1;
+ FragsDown = ppi->VFragments >> 1;
+ LineLength = ppi->UVStride;
+ LineFragments = ppi->HFragments / 2;
+ rowStart = ppi->LastFrameRecon + ppi->ReconVDataOffset;
+ break;
+ }
+
+ nextRow = 8*LineLength;
+ i = FromFragment;
+ n = 0;
+
+ /*************/
+ /* First Row */
+ /*************/
+
+ /* First column */
+
+ // only do 2 prediction if fragment coded and on non intra or if all fragments are intra
+ if ( blockCoded ( i ) )
+ {
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded ( i + 1 ) )
+ FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if ( !blockCoded (i + LineFragments) )
+ FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+ }
+
+ i++;
+
+ /* Middle columns */
+ for ( n=1; n<FragsAcross-1; n++, i++ )
+ {
+ if ( blockCoded( i ))
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded( i + 1 ) )
+ FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if( !blockCoded( i + LineFragments) )
+ FilterVert(ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr);
+ }
+ }
+
+ // Last Column
+ if ( blockCoded( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if( !blockCoded (i + LineFragments) )
+ FilterVert(ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr);
+ }
+
+ i++;
+ rowStart += nextRow;
+ n = 0;
+
+ /***************/
+ /* Middle Rows */
+ /***************/
+ for ( m=1; m<FragsDown-1; m++ )
+ {
+ /* First column */
+ n=0;
+
+ // only do 2 prediction if fragment coded and on non intra or if all fragments are intra
+ if( blockCoded( i ) )
+ {
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded ( i + 1 ) )
+ FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if ( !blockCoded (i + LineFragments) )
+ FilterVert ( ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr );
+ }
+
+ i++;
+
+ /* Middle columns */
+ for ( n=1; n<FragsAcross-1; n++, i++ )
+ {
+ if ( blockCoded ( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded ( i + 1 ) )
+ FilterHoriz ( ppi, rowStart + 8*n + 6 , LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if ( !blockCoded (i + LineFragments) )
+ FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+ }
+ }
+
+ /* Last Column */
+ if ( blockCoded ( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if ( !blockCoded (i + LineFragments) )
+ FilterVert ( ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr );
+ }
+
+ i++;
+ rowStart += nextRow;
+ }
+ }
+
+ //***********/
+ // Last Row */
+ //***********/
+
+ /* First Column */
+ n = 0;
+
+ // only do 2 prediction if fragment coded and on non intra or if all fragments are intra
+ if ( blockCoded ( i ) )
+ {
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded ( i + 1 ) )
+ FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+ }
+
+ i++;
+
+ /* middle columns */
+ for ( n=1; n<FragsAcross-1; n++, i++ )
+ {
+ if ( blockCoded ( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded( i + 1 ) )
+ FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+ }
+ }
+
+ /* Last Column */
+ if ( blockCoded ( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : LoopFilter
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ * INT32 FrameQIndex : Q index for the frame.
+ * UINT8 *LastFrameRecon : Pointer to last frame reconstruction buffer.
+ * UINT8 *PostProcessBuffer : Pointer to last post-processing buffer.
+ * UINT8 *FragInfo : Pointer to list of coded blocks.
+ * UINT32 FragInfoElementSize : Size of each element.
+ * UINT32 FragInfoCodedMask : Mask to get at whether fragment is coded.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a loop filter to the edge pixels of coded blocks.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void LoopFilter
+(
+ POSTPROC_INSTANCE *ppi,
+ INT32 FrameQIndex,
+ UINT8 *LastFrameRecon,
+ UINT8 *PostProcessBuffer,
+ UINT8 *FragInfo,
+ UINT32 FragInfoElementSize,
+ UINT32 FragInfoCodedMask
+)
+{
+ int j, m, n;
+ UINT32 nextRow;
+ UINT8 *rowStart;
+ INT32 *BoundingValuePtr;
+
+ INT32 i = 0;
+ INT32 FLimit = 0;
+ int FromFragment = 0;
+ INT32 LineLength = 0;
+ INT32 LineFragments = 0;
+ int FragsDown = ppi->VFragments;
+ int FragsAcross = ppi->HFragments;
+
+ // variables passed in per frame
+ ppi->FrameQIndex = FrameQIndex;
+ ppi->LastFrameRecon = LastFrameRecon;
+ ppi->PostProcessBuffer = PostProcessBuffer;
+ ppi->FragInfo = FragInfo;
+ ppi->FragInfoElementSize = FragInfoElementSize;
+ ppi->FragInfoCodedMask = FragInfoCodedMask;
+
+ FLimit = LoopFilterLimitValuesV1[ppi->FrameQIndex];
+ if ( FLimit == 0 )
+ return;
+
+ BoundingValuePtr = SetupBoundingValueArray ( ppi, FLimit );
+
+ for ( j=0; j<3; j++ )
+ {
+ switch ( j )
+ {
+ case 0: // Y
+ FromFragment = 0;
+ FragsAcross = ppi->HFragments;
+ FragsDown = ppi->VFragments;
+ LineLength = ppi->YStride;
+ LineFragments = ppi->HFragments;
+ rowStart = ppi->LastFrameRecon + ppi->ReconYDataOffset;
+ break;
+ case 1: // U
+ FromFragment = ppi->YPlaneFragments;
+ FragsAcross = ppi->HFragments >> 1;
+ FragsDown = ppi->VFragments >> 1;
+ LineLength = ppi->UVStride;
+ LineFragments = ppi->HFragments / 2;
+ rowStart = ppi->LastFrameRecon + ppi->ReconUDataOffset;
+ break;
+ case 2: // V
+ FromFragment = ppi->YPlaneFragments + ppi->UVPlaneFragments;
+ FragsAcross = ppi->HFragments >> 1;
+ FragsDown = ppi->VFragments >> 1;
+ LineLength = ppi->UVStride;
+ LineFragments = ppi->HFragments / 2;
+ rowStart = ppi->LastFrameRecon + ppi->ReconVDataOffset;
+ break;
+ }
+
+ nextRow = 8*LineLength;
+ i = FromFragment;
+ n = 0;
+
+ //************/
+ // First Row */
+ //************/
+
+ /* First Column */
+
+ // only do 2 prediction if fragment coded and on non intra or if all fragments are intra
+ if ( blockCoded ( i ) )
+ {
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded ( i + 1 ) )
+ FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if( !blockCoded (i + LineFragments) )
+ FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+ }
+
+ i++;
+
+ /* Middle columns */
+ for ( n=1; n<FragsAcross-1; n++, i++ )
+ {
+ if ( blockCoded ( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded ( i + 1 ) )
+ FilterHoriz(ppi, rowStart + 8*n +6 , LineLength, BoundingValuePtr);
+
+ // Bottom done if next row set
+ if( !blockCoded (i + LineFragments) )
+ FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+ }
+
+ }
+
+ /* Last Column */
+ if ( blockCoded ( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if( !blockCoded (i + LineFragments) )
+ FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+ }
+
+ i++;
+ rowStart += nextRow;
+ n = 0;
+
+ //**************/
+ // Middle Rows */
+ //**************/
+ for ( m=1; m<FragsDown-1; m++ )
+ {
+ /* First column */
+ n = 0;
+
+ // only do 2 prediction if fragment coded and on non intra or if all fragments are intra
+ if ( blockCoded ( i ) )
+ {
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded ( i + 1 ) )
+ FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if( !blockCoded (i + LineFragments) )
+ FilterVert(ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr);
+ }
+
+ i++;
+
+ /* Middle columns */
+ for ( n=1; n<FragsAcross-1; n++, i++ )
+ {
+ if ( blockCoded ( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded ( i + 1 ) )
+ FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if( !blockCoded (i + LineFragments) )
+ FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+ }
+ }
+
+ /* Last Column */
+ if ( blockCoded ( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+ // Bottom done if next row set
+ if( !blockCoded (i + LineFragments) )
+ FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+ }
+
+ i++;
+ rowStart += nextRow;
+ }
+
+ //***********/
+ // Last Row */
+ //***********/
+
+ /* First column */
+ n = 0;
+
+ // only do 2 prediction if fragment coded and on non intra or if all fragments are intra
+ if ( blockCoded ( i ) )
+ {
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded ( i + 1 ) )
+ FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+ }
+
+ i++;
+
+ /* Middle columns */
+ for ( n=1; n<FragsAcross-1; n++, i++ )
+ {
+ if ( blockCoded ( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+ // Filter right hand border only if the block to the right is not coded
+ if ( !blockCoded ( i + 1 ) )
+ FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+ }
+
+ }
+
+ /* Last Column */
+ if ( blockCoded ( i ) )
+ {
+ // Filter Left edge always
+ FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+
+ // TopRow is always done
+ FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+ }
+
+ i++;
+
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/postproc.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/postproc.c
new file mode 100644
index 00000000..d39af5ef
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/postproc.c
@@ -0,0 +1,796 @@
+/***************************************************************************
+ *
+ * Module Title : PostProc.c
+ *
+ * Description : Post Processing
+ *
+ ***************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "postp.h"
+#include "duck_mem.h"
+#include "stdlib.h"
+#include <math.h>
+#include <stddef.h>
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define Clamp255(x) (unsigned char) ( (x) < 0 ? 0 : ( (x) <= 255 ? (x) : 255 ) )
+// TODO: benski> need better checks for other compilers
+
+#if defined(_M_AMD64) || defined(__LP64__)
+#define ROUNDUP32(X) ( ( ( (uintptr_t) X ) + 31 )&( 0xFFFFFFFFFFFFFFE0 ) )
+#else //#elif //defined(_M_IX86)
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+#endif
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern void SimpleDeblockFrame(POSTPROC_INSTANCE *ppi, UINT8* SrcBuffer, UINT8* DestBuffer);
+extern void UpdateUMVBorder( POSTPROC_INSTANCE *ppi, UINT8 * DestReconPtr);
+extern void PostProcMachineSpecificConfig(UINT32 );
+
+extern void DeringFrame(POSTPROC_INSTANCE *ppi, UINT8 *Src, UINT8 *Dst);
+extern void DeringFrameInterlaced(POSTPROC_INSTANCE *ppi, UINT8 *Src, UINT8 *Dst);
+extern void DeblockFrame(POSTPROC_INSTANCE *ppi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer);
+extern void DeblockFrameUsing7TapFilter(POSTPROC_INSTANCE *ppi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer);
+extern void DeblockFrameInterlaced(POSTPROC_INSTANCE *ppi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer);
+
+extern UINT32 DeringModifierV1[ Q_TABLE_SIZE ];
+extern UINT32 DeringModifierV2[ Q_TABLE_SIZE ];
+
+extern UINT32 *DCQuantScaleV2;
+extern UINT32 *DCQuantScaleUV;
+extern UINT32 *DCQuantScaleV1;
+
+extern UINT32 LoopFilterLimitValuesVp4[Q_TABLE_SIZE];
+extern UINT32 LoopFilterLimitValuesVp5[Q_TABLE_SIZE];
+extern UINT32 LoopFilterLimitValuesVp6[Q_TABLE_SIZE];
+
+extern UINT32 DeblockLimitValuesVp4[Q_TABLE_SIZE];
+extern UINT32 DeblockLimitValuesVp5[Q_TABLE_SIZE];
+extern UINT32 DeblockLimitValuesVp6[Q_TABLE_SIZE];
+
+extern UINT32 *LoopFilterLimitValuesV2;
+
+extern UINT32 *DeblockLimitValuesV2;
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+UINT8 LimitVal_VP31[VAL_RANGE * 3];
+void (*FilteringVert_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+void (*FilteringHoriz_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+void (*FilteringVert_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+void (*FilteringHoriz_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+void (*VerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void (*LastVerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void (*VerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void (*LastVerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void (*HorizontalLine_1_2_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+void (*HorizontalLine_3_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+void (*HorizontalLine_4_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+void (*VerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void (*LastVerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void (*FilterHoriz_Simple)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+void (*FilterVert_Simple)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+void (*DeringBlockWeak)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
+void (*DeringBlockStrong)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
+void (*DeblockLoopFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+void (*DeblockNonFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+void (*DeblockNonFilteredBandNewFilter)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+INT32*(*SetupBoundingValueArray)(xPB_INST ppi, INT32 FLimit);
+INT32*(*SetupDeblockValueArray)(xPB_INST ppi, INT32 FLimit);
+void (*FilterHoriz)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+void (*FilterVert)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+void (*ClampLevels)( POSTPROC_INSTANCE *ppi,INT32 BlackClamp, INT32 WhiteClamp, UINT8 *Src, UINT8 *Dst);
+void (*FastDeInterlace)(UINT8 *SrcPtr, UINT8 *DstPtr, INT32 Width, INT32 Height, INT32 Stride);
+void (*PlaneAddNoise)( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+/****************************************************************************
+ *
+ * ROUTINE : InitPostProcessing
+ *
+ * INPUTS : UINT32 *DCQuantScaleV2p :
+ * UINT32 *DCQuantScaleUVp :
+ * UINT32 *DCQuantScaleV1p :
+ * UINT32 Version : Codec version number.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initialise pointers to version specific data tables &
+ * set-up LUTs.
+ *
+ * SPECIAL NOTES : None
+ *
+ ****************************************************************************/
+void InitPostProcessing
+(
+ UINT32 *DCQuantScaleV2p,
+ UINT32 *DCQuantScaleUVp,
+ UINT32 *DCQuantScaleV1p,
+ UINT32 Version
+)
+{
+ int i;
+
+ for ( i=0; i<VAL_RANGE*3; i++ )
+ {
+ int x = i - VAL_RANGE;
+ LimitVal_VP31[i] = Clamp255 ( x );
+ }
+
+ DCQuantScaleV2 = DCQuantScaleV2p;
+ DCQuantScaleUV = DCQuantScaleUVp;
+ DCQuantScaleV1 = DCQuantScaleV1p;
+
+ for ( i=0 ; i<Q_TABLE_SIZE; i++ )
+ DeringModifierV1[i] = DCQuantScaleV1[i];
+
+ if ( Version >= 6 )
+ {
+ LoopFilterLimitValuesV2 = LoopFilterLimitValuesVp6;
+ DeblockLimitValuesV2 = DeblockLimitValuesVp6;
+ }
+ else if ( Version >= 5 )
+ {
+ LoopFilterLimitValuesV2 = LoopFilterLimitValuesVp5;
+ DeblockLimitValuesV2 = DeblockLimitValuesVp5;
+ }
+ else
+ {
+ LoopFilterLimitValuesV2 = LoopFilterLimitValuesVp4;
+ DeblockLimitValuesV2 = DeblockLimitValuesVp4;
+ }
+ PostProcMachineSpecificConfig ( Version );
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : DeInitPostProcessing
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-initializes post-processing module.
+ *
+ * SPECIAL NOTES : Currently this function does nothing.
+ *
+ ****************************************************************************/
+void DeInitPostProcessing ( void )
+{
+ return;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeletePostProcBuffers
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-allocates buffers used by the post-processing module.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeletePostProcBuffers ( POSTPROC_INSTANCE *ppi )
+{
+ if ( ppi->IntermediateBufferAlloc )
+ duck_free ( ppi->IntermediateBufferAlloc );
+ ppi->IntermediateBufferAlloc = 0;
+ ppi->IntermediateBuffer = 0;
+
+ if ( ppi->IntermediateBufferAlloc )
+ duck_free ( ppi->IntermediateBufferAlloc );
+ ppi->IntermediateBufferAlloc = 0;
+ ppi->IntermediateBuffer = 0;
+
+ if ( ppi->FiltBoundingValueAlloc )
+ duck_free ( ppi->FiltBoundingValueAlloc );
+ ppi->FiltBoundingValueAlloc = 0;
+ ppi->FiltBoundingValue = 0;
+
+ if ( ppi->DeblockBoundingValueAlloc )
+ duck_free ( ppi->DeblockBoundingValueAlloc );
+ ppi->DeblockBoundingValueAlloc = 0;
+ ppi->DeblockBoundingValue = 0;
+
+ if ( ppi->FragQIndexAlloc )
+ duck_free ( ppi->FragQIndexAlloc );
+ ppi->FragQIndexAlloc = 0;
+ ppi->FragQIndex = 0;
+
+ if ( ppi->FragmentVariancesAlloc )
+ duck_free ( ppi->FragmentVariancesAlloc );
+ ppi->FragmentVariancesAlloc = 0;
+ ppi->FragmentVariances = 0;
+
+ if ( ppi->FragDeblockingFlagAlloc )
+ duck_free ( ppi->FragDeblockingFlagAlloc );
+ ppi->FragDeblockingFlagAlloc = 0;
+ ppi->FragDeblockingFlag = 0;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : AllocatePostProcBuffers
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : INT32: TRUE: Success, FALSE Failure (Chenge to BOOL!!)
+ *
+ * FUNCTION : Allocates buffers used by the post-processing module.
+ *
+ * SPECIAL NOTES : Uses ROUNDUP32 to align allocated buffers to improve
+ * cache performance.
+ *
+ ****************************************************************************/
+INT32 AllocatePostProcBuffers ( POSTPROC_INSTANCE *ppi )
+{
+ DeletePostProcBuffers ( ppi );
+
+ ppi->IntermediateBufferAlloc = (UINT8*)duck_malloc ( 32 + ppi->YStride *
+ (ppi->Configuration.VideoFrameHeight + ppi->MVBorder*2) * 3/2 * sizeof(UINT8), DMEM_GENERAL);
+ if ( !ppi->IntermediateBufferAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
+ ppi->IntermediateBuffer = (UINT8 *)ROUNDUP32 ( ppi->IntermediateBufferAlloc );
+
+ ppi->FiltBoundingValueAlloc = (INT32 *)duck_malloc(32+512*sizeof(INT32), DMEM_GENERAL);
+ if ( !ppi->FiltBoundingValueAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
+ ppi->FiltBoundingValue = (INT32 *)ROUNDUP32 ( ppi->FiltBoundingValueAlloc );
+
+ ppi->DeblockBoundingValueAlloc = (INT32 *)duck_malloc(32+512*sizeof(INT32), DMEM_GENERAL);
+ if ( !ppi->DeblockBoundingValueAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
+ ppi->DeblockBoundingValue = (INT32 *)ROUNDUP32 ( ppi->DeblockBoundingValueAlloc );
+
+ ppi->FragQIndexAlloc = (INT32 *)duck_malloc(32+ppi->UnitFragments*sizeof(INT32), DMEM_GENERAL);
+ if ( !ppi->FragQIndexAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
+ ppi->FragQIndex = (INT32 *)ROUNDUP32 ( ppi->FragQIndexAlloc );
+
+ ppi->FragmentVariancesAlloc = (INT32 *)duck_malloc(32+ppi->UnitFragments*sizeof(INT32), DMEM_GENERAL);
+ if ( !ppi->FragmentVariancesAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
+ ppi->FragmentVariances = (INT32 *)ROUNDUP32 ( ppi->FragmentVariancesAlloc );
+
+ ppi->FragDeblockingFlagAlloc = (UINT8 *)duck_malloc(32+ppi->UnitFragments*sizeof(UINT8), DMEM_GENERAL);
+ if ( !ppi->FragDeblockingFlagAlloc ){ DeletePostProcBuffers ( ppi ); return FALSE; };
+ ppi->FragDeblockingFlag = (UINT8 *)ROUNDUP32 ( ppi->FragDeblockingFlagAlloc );
+
+ return TRUE;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ChangePostProcConfiguration
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ * CONFIG_TYPE *ConfigurationInit : Pointer to
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initialize post-processor to with the setting passed in.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ChangePostProcConfiguration ( POSTPROC_INSTANCE *ppi, CONFIG_TYPE *ConfigurationInit )
+{
+ memcpy ((void *)&ppi->Configuration, (void *)ConfigurationInit, sizeof(CONFIG_TYPE) );
+
+ ppi->HFragments = (ppi->Configuration.VideoFrameWidth >> 3);
+ ppi->VFragments = (ppi->Configuration.VideoFrameHeight>> 3);
+ ppi->YStride = ppi->Configuration.YStride;
+ ppi->UVStride = ppi->Configuration.UVStride;
+ ppi->YPlaneFragments = ppi->HFragments * ppi->VFragments;
+ ppi->UVPlaneFragments = ppi->YPlaneFragments / 4;
+ ppi->UnitFragments = ppi->YPlaneFragments + 2 * ppi->UVPlaneFragments;
+ ppi->MVBorder = (ppi->YStride - 8*ppi->HFragments)/2;
+ ppi->ReconYDataOffset = ppi->MVBorder * ppi->YStride + ppi->MVBorder;
+ ppi->ReconYDataOffset = ppi->MVBorder * ppi->YStride + ppi->MVBorder;
+
+ ppi->ReconUDataOffset =
+ (ppi->YStride * (ppi->Configuration.VideoFrameHeight + ppi->MVBorder*2))
+ + ppi->MVBorder / 2 * ppi->UVStride + ppi->MVBorder/2;
+
+ ppi->ReconVDataOffset =
+ (ppi->YStride * (ppi->Configuration.VideoFrameHeight + ppi->MVBorder*2))
+ + (ppi->UVStride * (ppi->Configuration.VideoFrameHeight/2 + ppi->MVBorder))
+ + ppi->MVBorder/2 * ppi->UVStride +ppi->MVBorder/2;
+
+ AllocatePostProcBuffers ( ppi );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CreatePostProcInstance
+ *
+ * INPUTS : CONFIG_TYPE *ConfigurationInit : Pointer to configuration.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : POSTPROC_INSTANCE *: Pointer to allocated & configured
+ * post-processor instance.
+ *
+ * FUNCTION : Allocates space for and initializes a post-processor
+ * instance.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+POSTPROC_INSTANCE *CreatePostProcInstance ( CONFIG_TYPE *ConfigurationInit )
+{
+ POSTPROC_INSTANCE *ppi;
+ int postproc_size = sizeof ( POSTPROC_INSTANCE );
+
+ ppi = (POSTPROC_INSTANCE *) duck_malloc ( postproc_size, DMEM_GENERAL );
+ if ( !ppi )
+ return 0;
+
+ // initialize whole structure to 0
+ memset ( (unsigned char *)ppi, 0, postproc_size );
+
+ ChangePostProcConfiguration ( ppi, ConfigurationInit );
+
+ ppi->AddNoiseMode = 1;
+
+ return ppi;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeletePostProcInstance
+ *
+ * INPUTS : POSTPROC_INSTANCE **ppi : Pointer-to-pointer to post-processor instance.
+ *
+ * OUTPUTS : POSTPROC_INSTANCE **ppi : Pointer-to-pointer to post-processor instance.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Deletes post-processor instance & de-allocates memory.
+ *
+ * SPECIAL NOTES : Pointer to post-processor instance is set to NULL
+ * on exit.
+ *
+ ****************************************************************************/
+void DeletePostProcInstance ( POSTPROC_INSTANCE **ppi )
+{
+ if ( *ppi )
+ {
+ // Delete any other dynamically allocaed temporary buffers
+ DeletePostProcBuffers ( *ppi );
+ duck_free ( *ppi );
+ *ppi = 0;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SetPPInterlacedMode
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ * int Interlaced : 0=Non-interlaced, 1=Interlaced.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Set post-processor's Interlaced Mode to specified value.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetPPInterlacedMode ( POSTPROC_INSTANCE *ppi, int Interlaced )
+{
+ ppi->Configuration.Interlaced = Interlaced;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SetDeInterlaceMode
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ * int DeInterlaceMode : Mode to use for de-interlacing.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Set post-processor's De-Interlace Mode to specified value.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetDeInterlaceMode ( POSTPROC_INSTANCE *ppi, int DeInterlaceMode )
+{
+ ppi->DeInterlaceMode = DeInterlaceMode;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SetDeInterlaceMode
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ * int DeInterlaceMode : Mode to use for de-interlacing.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Set post-processor's De-Interlace Mode to specified value.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetAddNoiseMode(POSTPROC_INSTANCE *ppi, int AddNoiseMode)
+{
+ ppi->AddNoiseMode = AddNoiseMode;
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : UpdateFragQIndex
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Update the QIndex for each updated block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UpdateFragQIndex ( POSTPROC_INSTANCE *ppi )
+{
+ UINT32 i;
+ UINT32 ThisFrameQIndex;
+
+ // Mark coded blocks with Q-index
+ ThisFrameQIndex = ppi->FrameQIndex;
+
+ for ( i=0; i<ppi->UnitFragments; i++ )
+ if ( blockCoded ( i ) )
+ ppi->FragQIndex[i] = ThisFrameQIndex;
+}
+
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : Gaussian
+ *
+ * INPUTS : sigma ( standard deviation), mu ( mean) and x (value)
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : generate height of gaussian distribution curve with
+ * deviation sigma and mean mu at position x
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+double gaussian(double sigma, double mu, double x)
+{
+ return 1 / ( sigma * sqrt(2.0*3.14159265)) *
+ (exp(-(x-mu)*(x-mu)/(2*sigma*sigma)));
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PlaneAddNoise_C
+ *
+ * INPUTS : UINT8 *Start starting address of buffer to add gaussian
+ * noise to
+ * UINT32 Width width of plane
+ * UINT32 Height height of plane
+ * INT32 Pitch distance between subsequent lines of frame
+ * INT32 q quantizer used to determine amount of noise
+ * to add
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : adds gaussian noise to a plane of pixels
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PlaneAddNoise_C( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q)
+{
+ unsigned int i,j;
+ INT32 Pitch4 = Pitch * 4;
+ const int noiseAmount = 2;
+ const int noiseAdder = 2 * noiseAmount + 1;
+
+ unsigned char blackclamp[16];
+ unsigned char whiteclamp[16];
+ unsigned char bothclamp[16];
+ char CharDist[300];
+ char Rand[2048];
+
+ double sigma;
+ sigma = 1 + .8*(63-q) / 63.0;
+
+ // set up a lookup table of 256 entries that matches
+ // a gaussian distribution with sigma determined by q.
+ //
+ {
+ double i,sum=0;
+ int next,j;
+
+ next=0;
+ for(i=-32;i<32;i++)
+ {
+ int a = (int) (.5+256*gaussian(sigma,0,i));
+
+ if(a)
+ {
+ for(j=0;j<a;j++)
+ {
+ CharDist[next+j]=(char) i;
+ }
+ next = next+j;
+ }
+
+ }
+ for(next=next;next<256;next++)
+ CharDist[next] = 0;
+
+ }
+
+ // generate a line of 2048 characters following our gaussian distribution
+ for(i=0;i<2048;i++)
+ {
+ Rand[i]=CharDist[rand() & 0xff];
+ }
+
+ for(i=0;i<16;i++)
+ {
+ blackclamp[i]=-CharDist[0];
+ whiteclamp[i]=-CharDist[0];
+ bothclamp[i]=-2*CharDist[0];
+ }
+
+ for(i=0;i<Height;i++)
+ {
+ UINT8* Pos = Start + i *Pitch;
+ INT8* Ref = (INT8 *) (Rand + (rand() & 0xff)); /* cast required on strict OSX-CW8 */
+
+ for(j=0;j<Width;j++)
+ {
+ if(Pos[j] < -CharDist[0])
+ Pos[j] = -CharDist[0];
+
+ if(Pos[j] > 255-CharDist[0])
+ Pos[j] = 255-CharDist[0];
+
+ Pos[j]+=Ref[j];
+ }
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : PostProcess
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ * INT32 Vp3VersionNo : Encoder version used to code frame.
+ * INT32 FrameType : Encoding method: Keyframe or non-Keyframe.
+ * INT32 PostProcessingLevel : Level of post-processing to perform.
+ * INT32 FrameQIndex : Q-index used to code frame.
+ * UINT8 *LastFrameRecon : Pointer to last frame reconstruction buffer.
+ * UINT8 *PostProcessBuffer : Pointer to last post-processing buffer.
+ * UINT8 *FragInfo : Pointer to list of coded blocks.
+ * UINT32 FragInfoElementSize : Size of each element.
+ * UINT32 FragInfoCodedMask : Mask to get at whether fragment is coded.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Applies de-blocking and de-ringing filters to the frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PostProcess
+(
+ POSTPROC_INSTANCE *ppi,
+ INT32 Vp3VersionNo,
+ INT32 FrameType,
+ INT32 PostProcessingLevel,
+ INT32 FrameQIndex,
+ UINT8 *LastFrameRecon,
+ UINT8 *PostProcessBuffer,
+ UINT8 *FragInfo,
+ UINT32 FragInfoElementSize,
+ UINT32 FragInfoCodedMask
+)
+{
+ int ReconUVPlaneSize;
+
+ // variables passed in per frame
+ ppi->Vp3VersionNo = Vp3VersionNo;
+ ppi->FrameType = FrameType;
+ ppi->PostProcessingLevel = PostProcessingLevel;
+ ppi->FrameQIndex = FrameQIndex;
+ ppi->LastFrameRecon = LastFrameRecon;
+ ppi->PostProcessBuffer = PostProcessBuffer;
+ ppi->FragInfo = FragInfo;
+ ppi->FragInfoElementSize = FragInfoElementSize;
+ ppi->FragInfoCodedMask = FragInfoCodedMask;
+
+ switch ( ppi->PostProcessingLevel )
+ {
+ case 8:
+ // On a slow machine, use a simpler and faster deblocking filter
+ UpdateFragQIndex ( ppi );
+ if(ppi->Vp3VersionNo < 2)
+ {
+ DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+ }
+ else
+ {
+ if ( ppi->Configuration.Interlaced && ppi->DeInterlaceMode )
+ {
+ SimpleDeblockFrame ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
+ ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
+ memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
+ memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
+ FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset,
+ ppi->PostProcessBuffer+ppi->ReconYDataOffset,
+ ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
+ }
+ else
+ SimpleDeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+ }
+ break;
+
+
+ case 6:
+ case 5:
+ if ( ppi->Vp3VersionNo < 5 )
+ {
+ UpdateFragQIndex ( ppi );
+ }
+ else
+ {
+ if ( ppi->Configuration.Interlaced )
+ {
+ if ( !ppi->DeInterlaceMode )
+ {
+ DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+ UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
+ DeringFrameInterlaced ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
+ }
+ else
+ {
+ DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
+ UpdateUMVBorder ( ppi, ppi->IntermediateBuffer );
+ DeringFrameInterlaced ( ppi, ppi->IntermediateBuffer, ppi->IntermediateBuffer );
+
+ ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
+ memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
+ memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
+ FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset,
+ ppi->PostProcessBuffer+ppi->ReconYDataOffset,
+ ppi->HFragments*8, ppi->VFragments*8, ppi->YStride);
+ }
+ break;
+ }
+ }
+ DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+ UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
+ DeringFrame ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
+
+ if(ppi->AddNoiseMode&&PlaneAddNoise!=0)
+ PlaneAddNoise(ppi->PostProcessBuffer + ppi->ReconYDataOffset,ppi->HFragments*8, ppi->VFragments*8,ppi->YStride,FrameQIndex);
+
+ break;
+ case 7:
+ if ( ppi->Vp3VersionNo >= 5 )
+ {
+ if ( ppi->Configuration.Interlaced )
+ {
+ if ( !ppi->DeInterlaceMode )
+ DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+ else
+ {
+ DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
+ ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
+ memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
+ memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
+ FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset,
+ ppi->PostProcessBuffer+ppi->ReconYDataOffset,
+ ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
+ }
+ break;
+ }
+ }
+ else
+ {
+ UpdateFragQIndex ( ppi );
+ }
+ DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+ UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
+ DeringFrame ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
+
+
+ break;
+
+
+ case 4:
+ if ( ppi->Vp3VersionNo >= 5 )
+ {
+ if ( ppi->Configuration.Interlaced )
+ {
+ if ( !ppi->DeInterlaceMode )
+ DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+ else
+ {
+ DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
+ ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
+ memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
+ memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
+ FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset,
+ ppi->PostProcessBuffer+ppi->ReconYDataOffset,
+ ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
+ }
+ break;
+ }
+ }
+ else
+ {
+ UpdateFragQIndex ( ppi );
+ }
+ DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+ //PlaneAddNoise(ppi->PostProcessBuffer + ppi->ReconYDataOffset,ppi->HFragments*8, ppi->VFragments*8,ppi->YStride,FrameQIndex);
+ break;
+
+ case 1:
+ UpdateFragQIndex ( ppi );
+ break;
+
+ case 0:
+ if ( ppi->Configuration.Interlaced && ppi->DeInterlaceMode )
+ {
+ ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
+ memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->LastFrameRecon+ppi->ReconUDataOffset, ReconUVPlaneSize );
+ memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->LastFrameRecon+ppi->ReconVDataOffset, ReconUVPlaneSize );
+ FastDeInterlace ( ppi->LastFrameRecon+ppi->ReconYDataOffset,
+ ppi->PostProcessBuffer+ppi->ReconYDataOffset,
+ ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
+ }
+ break;
+
+ default:
+ DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+ UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
+ DeringFrame ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
+ break;
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/scale.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/scale.c
new file mode 100644
index 00000000..ee7a26fb
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/scale.c
@@ -0,0 +1,1496 @@
+/****************************************************************************
+*
+* Module Title : scale.c
+*
+* Description : Image scaling functions.
+*
+***************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern void UpdateUMVBorder ( POSTPROC_INSTANCE *ppi, UINT8 * DestReconPtr );
+
+/****************************************************************************
+*
+* ROUTINE : HorizontalLine_Copy
+*
+* INPUTS : const unsigned char *source : Pointer to source data.
+* unsigned int sourceWidth : Stride of source.
+* unsigned char *dest : Pointer to destination data.
+* unsigned int destWidth : Stride of destination (NOT USED).
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Copies horizontal line of pixels from source to
+* destination unscaled.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void HorizontalLine_Copy
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+ )
+{
+ (void) destWidth;
+ memcpy ( dest, source, sourceWidth );
+}
+
+/****************************************************************************
+*
+* ROUTINE : NullScale
+*
+* INPUTS : unsigned char *dest : Pointer to destination data (NOT USED).
+* unsigned int destPitch : Stride of destination data (NOT USED).
+* unsigned int destWidth : Width of destination data (NOT USED).
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Null scaling function -- does nothing.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void NullScale ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+ (void) destWidth;
+ (void) destPitch;
+ (void) dest;
+ return;
+}
+
+/****************************************************************************
+*
+* ROUTINE : HorizontalLine_4_5_Scale_C
+*
+* INPUTS : const unsigned char *source : Pointer to source data.
+* unsigned int sourceWidth : Stride of source.
+* unsigned char *dest : Pointer to destination data.
+* unsigned int destWidth : Stride of destination (NOT USED).
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Copies horizontal line of pixels from source to
+* destination scaling up by 4 to 5.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void HorizontalLine_4_5_Scale_C
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+ )
+{
+ unsigned i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) destWidth;
+
+ for ( i=0; i<sourceWidth-4; i+=4 )
+ {
+ a = src[0];
+ b = src[1];
+ des [0] = (UINT8) a;
+ des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8);
+ c = src[2] * 154;
+ a = src[3];
+ des [2] = (UINT8) (( b * 102 + c + 128) >> 8);
+ des [3] = (UINT8) (( c + 102 * a + 128) >> 8);
+ b = src[4];
+ des [4] = (UINT8) (( a * 205 + 51 * b + 128) >> 8);
+
+ src += 4;
+ des += 5;
+ }
+
+ a = src[0];
+ b = src[1];
+ des [0] = (UINT8) (a);
+ des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8);
+ c = src[2] * 154;
+ a = src[3];
+ des [2] = (UINT8) (( b * 102 + c + 128) >> 8);
+ des [3] = (UINT8) (( c + 102 * a + 128) >> 8);
+ des [4] = (UINT8) (a);
+
+}
+
+/****************************************************************************
+*
+* ROUTINE : VerticalBand_4_5_Scale_C
+*
+* INPUTS : unsigned char *dest : Pointer to destination data.
+* unsigned int destPitch : Stride of destination data.
+* unsigned int destWidth : Width of destination data.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Scales vertical band of pixels by scale 4 to 5. The
+* height of the band scaled is 4-pixels.
+*
+* SPECIAL NOTES : The routine uses the first line of the band below
+* the current band.
+*
+****************************************************************************/
+void VerticalBand_4_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+ unsigned int i;
+ unsigned int a, b, c, d;
+ unsigned char *des = dest;
+
+ for ( i=0; i<destWidth; i++ )
+ {
+ a = des [0];
+ b = des [destPitch];
+
+ des[destPitch] = (UINT8) (( a * 51 + 205 * b + 128)>>8);
+
+ c = des[destPitch*2]*154;
+ d = des[destPitch*3];
+
+ des [destPitch*2] = (UINT8) (( b * 102 + c + 128) >> 8);
+ des [destPitch*3] = (UINT8) (( c + 102 * d + 128) >> 8);
+
+ // First line in next band
+ a = des [destPitch * 5];
+ des [destPitch * 4] = (UINT8) (( d * 205 + 51 * a +128)>>8);
+
+ des ++;
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : LastVerticalBand_4_5_Scale_C
+*
+* INPUTS : unsigned char *dest : Pointer to destination data.
+* unsigned int destPitch : Stride of destination data.
+* unsigned int destWidth : Width of destination data.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Scales last vertical band of pixels by scale 4 to 5. The
+* height of the band scaled is 4-pixels.
+*
+* SPECIAL NOTES : The routine does not have available the first line of
+* the band below the current band, since this is the
+* last band.
+*
+****************************************************************************/
+void LastVerticalBand_4_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+ unsigned int i;
+ unsigned int a, b, c, d;
+ unsigned char *des = dest;
+
+ for ( i=0; i<destWidth; ++i )
+ {
+ a = des[0];
+ b = des[destPitch];
+
+ des[destPitch] = (UINT8) ((a * 51 + 205 * b + 128)>>8);
+
+ c = des[destPitch*2]*154;
+ d = des[destPitch*3];
+
+ des [destPitch*2] = (UINT8) (( b * 102 + c + 128) >> 8);
+ des [destPitch*3] = (UINT8) (( c + 102 * d + 128) >> 8);
+
+ // No other line for interplation of this line, so ..
+ des[destPitch*4] = (UINT8) d;
+
+ des++;
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : Scale _4_5_2D
+*
+* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance (NOT USED).
+* const unsigned char *source : Pointer to source image.
+* unsigned int sourcePitch : Stride of source image.
+* unsigned int sourceWidth : Width of source image.
+* unsigned int sourceHeight : Height of source image (NOT USED).
+* unsigned char *dest : Pointer to destination image.
+* unsigned int destPitch : Stride of destination image.
+* unsigned int destWidth : Width of destination image.
+* unsigned int destHeight : Height of destination image.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Two-dimensional 4 to 5 scaling up of an image.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void Scale_4_5_2D
+(
+ POSTPROC_INSTANCE *ppi,
+ const unsigned char *source,
+ unsigned int sourcePitch,
+ unsigned int sourceWidth,
+ unsigned int sourceHeight,
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth,
+ unsigned int destHeight
+ )
+{
+ unsigned i, k;
+ const unsigned int srcBandHeight = 4;
+ const unsigned int destBandHeight = 5;
+
+ (void) sourceHeight;
+ (void) ppi;
+
+ HorizontalLine_4_5_Scale ( source, sourceWidth, dest, destWidth );
+
+ // Except last band
+ for ( k=0; k<destHeight/destBandHeight-1; k++ )
+ {
+ // scale one band horizontally
+ for ( i=1; i<srcBandHeight; i++ )
+ {
+ HorizontalLine_4_5_Scale ( source+i*sourcePitch,
+ sourceWidth,
+ dest+i*destPitch,
+ destWidth);
+ }
+
+ // first line of next band
+ HorizontalLine_4_5_Scale ( source+srcBandHeight*sourcePitch,
+ sourceWidth,
+ dest+destBandHeight*destPitch,
+ destWidth );
+
+ // Vertical scaling is in place
+ VerticalBand_4_5_Scale ( dest, destPitch, destWidth );
+
+ // move to the next band
+ source += srcBandHeight * sourcePitch;
+ dest += destBandHeight * destPitch;
+ }
+
+ // scale one band horizontally
+ for ( i=1; i<srcBandHeight; i++ )
+ {
+ HorizontalLine_4_5_Scale ( source+i*sourcePitch,
+ sourceWidth,
+ dest+i*destPitch,
+ destWidth );
+ }
+
+ // Vertical scaling is in place
+ LastVerticalBand_4_5_Scale ( dest, destPitch, destWidth );
+}
+
+
+/****************************************************************************
+*
+* ROUTINE : HorizontalLine_3_5_Scale_C
+*
+* INPUTS : const unsigned char *source : Pointer to source data.
+* unsigned int sourceWidth : Stride of source.
+* unsigned char *dest : Pointer to destination data.
+* unsigned int destWidth : Stride of destination (NOT USED).
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Copies horizontal line of pixels from source to
+* destination scaling up by 3 to 5.
+*
+* SPECIAL NOTES : None.
+*
+*
+****************************************************************************/
+void HorizontalLine_3_5_Scale_C
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+ )
+{
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) destWidth;
+
+ for ( i=0; i<sourceWidth-3; i+=3 )
+ {
+ a = src[0];
+ b = src[1];
+ des [0] = (UINT8) (a);
+ des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+
+ c = src[2] ;
+ des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+ des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+ a = src[3];
+ des [4] = (UINT8) (( c * 154 + a * 102 + 128 ) >> 8);
+
+ src += 3;
+ des += 5;
+ }
+
+ a = src[0];
+ b = src[1];
+ des [0] = (UINT8) (a);
+
+ des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+ c = src[2] ;
+ des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+ des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+ des [4] = (UINT8) (c);
+}
+
+/****************************************************************************
+*
+* ROUTINE : VerticalBand_3_5_Scale_C
+*
+* INPUTS : unsigned char *dest : Pointer to destination data.
+* unsigned int destPitch : Stride of destination data.
+* unsigned int destWidth : Width of destination data.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Scales vertical band of pixels by scale 3 to 5. The
+* height of the band scaled is 3-pixels.
+*
+* SPECIAL NOTES : The routine uses the first line of the band below
+* the current band.
+*
+****************************************************************************/
+void VerticalBand_3_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+
+ for ( i=0; i<destWidth; i++ )
+ {
+ a = des [0];
+ b = des [destPitch];
+ des [destPitch] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+
+ c = des[destPitch*2];
+ des [destPitch*2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+ des [destPitch*3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+ // First line in next band...
+ a = des [destPitch * 5];
+ des [destPitch * 4] = (UINT8) (( c * 154 + a * 102 + 128 ) >> 8);
+
+ des++;
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : LastVerticalBand_3_5_Scale_C
+*
+* INPUTS : unsigned char *dest : Pointer to destination data.
+* unsigned int destPitch : Stride of destination data.
+* unsigned int destWidth : Width of destination data.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Scales last vertical band of pixels by scale 3 to 5. The
+* height of the band scaled is 3-pixels.
+*
+* SPECIAL NOTES : The routine does not have available the first line of
+* the band below the current band, since this is the
+* last band.
+*
+****************************************************************************/
+void LastVerticalBand_3_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+ unsigned int i;
+ unsigned int a, b, c;
+ unsigned char *des = dest;
+
+ for ( i=0; i<destWidth; ++i )
+ {
+ a = des [0];
+ b = des [destPitch];
+
+ des [ destPitch ] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+
+ c = des[destPitch*2];
+ des [destPitch*2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+ des [destPitch*3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+ // No other line for interplation of this line, so ..
+ des [ destPitch * 4 ] = (UINT8) (c) ;
+
+ des++;
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : Scale _3_5_2D
+*
+* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance (NOT USED).
+* const unsigned char *source : Pointer to source image.
+* unsigned int sourcePitch : Stride of source image.
+* unsigned int sourceWidth : Width of source image.
+* unsigned int sourceHeight : Height of source image (NOT USED).
+* unsigned char *dest : Pointer to destination image.
+* unsigned int destPitch : Stride of destination image.
+* unsigned int destWidth : Width of destination image.
+* unsigned int destHeight : Height of destination image.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Two-dimensional 3 to 5 scaling up of an image.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void Scale_3_5_2D
+(
+ POSTPROC_INSTANCE *ppi,
+ const unsigned char *source,
+ unsigned int sourcePitch,
+ unsigned int sourceWidth,
+ unsigned int sourceHeight,
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth,
+ unsigned int destHeight
+ )
+{
+ // define the constants for a 3->5 scale up
+ const unsigned int srcBandHeight = 3;
+ const unsigned int destBandHeight = 5;
+ unsigned int i, k;
+
+ (void) ppi;
+ (void) sourceHeight;
+
+ HorizontalLine_3_5_Scale ( source, sourceWidth, dest, destWidth );
+
+ // Except last band
+ for ( k=0; k<destHeight/destBandHeight-1; k++ )
+ {
+ // scale one band horizontally
+ for ( i=1; i<srcBandHeight; i++ )
+ {
+ HorizontalLine_3_5_Scale ( source+i*sourcePitch,
+ sourceWidth,
+ dest+i*destPitch,
+ destWidth );
+ }
+
+ // First line of next band
+ HorizontalLine_3_5_Scale ( source+srcBandHeight*sourcePitch,
+ sourceWidth,
+ dest+destBandHeight*destPitch,
+ destWidth );
+
+ // Vertical scaling is in place
+ VerticalBand_3_5_Scale ( dest, destPitch, destWidth );
+
+ // move to the next band
+ source += srcBandHeight * sourcePitch;
+ dest += destBandHeight * destPitch;
+ }
+
+ // scale one band horizontally
+ for ( i=1; i<srcBandHeight; i++ )
+ {
+ HorizontalLine_3_5_Scale ( source+i*sourcePitch,
+ sourceWidth,
+ dest+i*destPitch,
+ destWidth );
+ }
+
+ // Vertical scaling is in place
+ LastVerticalBand_3_5_Scale ( dest, destPitch, destWidth );
+}
+
+/****************************************************************************
+*
+* ROUTINE : HorizontalLine_1_2_Scale_C
+*
+* INPUTS : const unsigned char *source : Pointer to source data.
+* unsigned int sourceWidth : Stride of source.
+* unsigned char *dest : Pointer to destination data.
+* unsigned int destWidth : Stride of destination (NOT USED).
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Copies horizontal line of pixels from source to
+* destination scaling up by 1 to 2.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void HorizontalLine_1_2_Scale_C
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+ )
+{
+ unsigned int i;
+ unsigned int a, b;
+ unsigned char *des = dest;
+ const unsigned char *src = source;
+
+ (void) destWidth;
+
+ for ( i=0; i<sourceWidth-1; i+=1 )
+ {
+ a = src[0];
+ b = src[1];
+ des [0] = (UINT8) (a);
+ des [1] = (UINT8) (( a + b + 1 ) >> 1);
+ src += 1;
+ des += 2;
+ }
+
+ a = src[0];
+ des [0] = (UINT8) (a);
+ des [1] = (UINT8) (a);
+}
+
+/****************************************************************************
+*
+* ROUTINE : VerticalBand_1_2_Scale_C
+*
+* INPUTS : unsigned char *dest : Pointer to destination data.
+* unsigned int destPitch : Stride of destination data.
+* unsigned int destWidth : Width of destination data.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Scales vertical band of pixels by scale 1 to 2. The
+* height of the band scaled is 1-pixel.
+*
+* SPECIAL NOTES : The routine uses the first line of the band below
+* the current band.
+*
+****************************************************************************/
+void VerticalBand_1_2_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+ unsigned int i;
+ unsigned int a, b;
+ unsigned char *des = dest;
+
+ for ( i=0; i<destWidth; i++ )
+ {
+ a = des [0];
+ b = des [destPitch * 2];
+
+ des[destPitch] = (UINT8) ((a + b + 1 )>>1);
+
+ des++;
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : LastVerticalBand_1_2_Scale_C
+*
+* INPUTS : unsigned char *dest : Pointer to destination data.
+* unsigned int destPitch : Stride of destination data.
+* unsigned int destWidth : Width of destination data.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Scales last vertical band of pixels by scale 1 to 2. The
+* height of the band scaled is 1-pixel.
+*
+* SPECIAL NOTES : The routine does not have available the first line of
+* the band below the current band, since this is the
+* last band.
+*
+****************************************************************************/
+void LastVerticalBand_1_2_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+ unsigned int i;
+ unsigned char *des = dest;
+
+ for ( i=0; i<destWidth; ++i )
+ {
+ des[destPitch] = des[0];
+ des++;
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : Scale1D_c
+*
+* INPUTS : const unsigned char *source : Pointer to data to be scaled.
+* int sourceStep : Number of pixels to step on in source.
+* unsigned int sourceScale : Scale for source.
+* unsigned int sourceLength : Length of source (UNUSED).
+* unsigned char *dest : Pointer to output data array.
+* int destStep : Number of pixels to step on in destination.
+* unsigned int destScale : Scale for destination.
+* unsigned int destLength : Length of destination.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Performs linear interpolation in one dimension.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void Scale1D_c
+(
+ const unsigned char *source,
+ int sourceStep,
+ unsigned int sourceScale,
+ unsigned int sourceLength,
+ unsigned char *dest,
+ int destStep,
+ unsigned int destScale,
+ unsigned int destLength
+ )
+{
+ unsigned int i;
+ unsigned int roundValue = destScale / 2;
+ unsigned int leftModifier = destScale;
+ unsigned int rightModifier = 0;
+ unsigned char leftPixel = *source;
+ unsigned char rightPixel = *( source + sourceStep );
+
+ (void) sourceLength;
+
+ // These asserts are needed if there are boundary issues...
+ //assert ( destScale > sourceScale );
+ //assert ( (sourceLength-1) * destScale >= (destLength-1) * sourceScale );
+
+ for ( i=0; i<destLength*destStep; i+=destStep )
+ {
+ dest[i] = (INT8)((leftModifier*leftPixel + rightModifier*rightPixel + roundValue) / destScale);
+
+ rightModifier += sourceScale;
+
+ while ( rightModifier > destScale )
+ {
+ rightModifier -= destScale;
+ source += sourceStep;
+ leftPixel = *source;
+ rightPixel = *( source + sourceStep );
+ }
+
+ leftModifier = destScale - rightModifier;
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : Scale1D_2t1_i
+*
+* INPUTS : const unsigned char *source : Pointer to data to be scaled.
+* int sourceStep : Number of pixels to step on in source.
+* unsigned int sourceScale : Scale for source (UNUSED).
+* unsigned int sourceLength : Length of source (UNUSED).
+* unsigned char *dest : Pointer to output data array.
+* int destStep : Number of pixels to step on in destination.
+* unsigned int destScale : Scale for destination (UNUSED).
+* unsigned int destLength : Length of destination.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Performs 2-to-1 interpolated scaling.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void Scale1D_2t1_i
+(
+ const unsigned char *source,
+ int sourceStep,
+ unsigned int sourceScale,
+ unsigned int sourceLength,
+ unsigned char *dest,
+ int destStep,
+ unsigned int destScale,
+ unsigned int destLength
+ )
+{
+ unsigned int i, j;
+ unsigned int temp;
+
+ (void) sourceLength;
+ (void) sourceScale;
+ (void) destScale;
+
+ sourceStep *= 2;
+ dest[0] = source[0];
+ for ( i=destStep, j=sourceStep; i<destLength*destStep; i+=destStep, j+=sourceStep )
+ {
+ temp = 8;
+ temp += 3 * source[j-sourceStep];
+ temp += 10 * source[j];
+ temp += 3 * source[j+sourceStep];
+ temp >>= 4;
+ dest[i] = (INT8) (temp);
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : Scale1D_2t1_ps
+*
+* INPUTS : const unsigned char *source : Pointer to data to be scaled.
+* int sourceStep : Number of pixels to step on in source.
+* unsigned int sourceScale : Scale for source (UNUSED).
+* unsigned int sourceLength : Length of source (UNUSED).
+* unsigned char *dest : Pointer to output data array.
+* int destStep : Number of pixels to step on in destination.
+* unsigned int destScale : Scale for destination (UNUSED).
+* unsigned int destLength : Length of destination.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Performs 2-to-1 point subsampled scaling.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void Scale1D_2t1_ps
+(
+ const unsigned char *source,
+ int sourceStep,
+ unsigned int sourceScale,
+ unsigned int sourceLength,
+ unsigned char *dest,
+ int destStep,
+ unsigned int destScale,
+ unsigned int destLength
+ )
+{
+ unsigned int i, j;
+
+ (void) sourceLength;
+ (void) sourceScale;
+ (void) destScale;
+
+ sourceStep *= 2;
+ j = 0;
+ for ( i=0; i<destLength*destStep; i+=destStep, j+=sourceStep )
+ dest[i] = source[j];
+}
+
+/****************************************************************************
+*
+* ROUTINE : Scale2D
+*
+* INPUTS : const unsigned char *source : Pointer to data to be scaled.
+* int sourcePitch : Stride of source image.
+* unsigned int sourceWidth : Width of input image.
+* unsigned int sourceHeight : Height of input image.
+* unsigned char *dest : Pointer to output data array.
+* int destPitch : Stride of destination image.
+* unsigned int destWidth : Width of destination image.
+* unsigned int destHeight : Height of destination image.
+* unsigned char *tempArea : Pointer to temp work area.
+* unsigned char tempAreaHeight : Height of temp work area.
+* unsigned int hscale : Horizontal scale factor numerator.
+* unsigned int hratio : Horizontal scale factor denominator.
+* unsigned int vscale : Vertical scale factor numerator.
+* unsigned int vratio : Vertical scale factor denominator.
+* unsigned int interlaced : Interlace flag.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Performs 2-tap linear interpolation in two dimensions.
+*
+* SPECIAL NOTES : Expansion is performed one band at a time to help with
+* caching.
+*
+****************************************************************************/
+void Scale2D
+(
+ const unsigned char *source,
+ int sourcePitch,
+ unsigned int sourceWidth,
+ unsigned int sourceHeight,
+ unsigned char *dest,
+ int destPitch,
+ unsigned int destWidth,
+ unsigned int destHeight,
+ unsigned char *tempArea,
+ unsigned char tempAreaHeight,
+ unsigned int hscale,
+ unsigned int hratio,
+ unsigned int vscale,
+ unsigned int vratio,
+ unsigned int interlaced
+ )
+{
+ unsigned int i, j, k;
+ unsigned int bands;
+ unsigned int destBandHeight;
+ unsigned int sourceBandHeight;
+
+ typedef void (*Scale1D)( const unsigned char *source,int sourceStep,unsigned int sourceScale,unsigned int sourceLength,
+ unsigned char *dest,int destStep,unsigned int destScale,unsigned int destLength);
+
+ Scale1D Scale1Dv = Scale1D_c;
+ Scale1D Scale1Dh = Scale1D_c;
+
+ if ( hscale==2 && hratio==1 )
+ Scale1Dh = Scale1D_2t1_ps;
+
+ if ( vscale==2 && vratio==1 )
+ {
+ if ( interlaced )
+ Scale1Dv = Scale1D_2t1_ps;
+ else
+ Scale1Dv = Scale1D_2t1_i;
+ }
+
+ if ( sourceHeight == destHeight )
+ {
+ // for each band of the image
+ for ( k=0; k<destHeight; k++ )
+ {
+ Scale1Dh ( source, 1, hscale, sourceWidth+1, dest, 1, hratio, destWidth );
+ source += sourcePitch;
+ dest += destPitch;
+ }
+ return;
+ }
+
+ if ( destHeight > sourceHeight )
+ {
+ destBandHeight = tempAreaHeight - 1;
+ sourceBandHeight = destBandHeight * sourceHeight / destHeight;
+ }
+ else
+ {
+ sourceBandHeight = tempAreaHeight - 1;
+ destBandHeight = sourceBandHeight * vratio / vscale;
+ }
+
+ // first row needs to be done so that we can stay one row ahead for vertical zoom
+ Scale1Dh ( source, 1, hscale, sourceWidth+1, tempArea, 1, hratio, destWidth );
+
+ // for each band of the image
+ bands = (destHeight + destBandHeight - 1)/ destBandHeight;
+ for ( k=0; k<bands; k++ )
+ {
+ // scale one band horizontally
+ for ( i=1; i<sourceBandHeight+1; i++ )
+ {
+ if ( k*sourceBandHeight+i < sourceHeight )
+ {
+ Scale1Dh ( source+i*sourcePitch, 1, hscale, sourceWidth+1,
+ tempArea+i*destPitch, 1, hratio, destWidth );
+ }
+ else // Duplicate the last row
+ {
+ // copy tempArea row 0 over from last row in the past
+ memcpy ( tempArea+i*destPitch, tempArea+(i-1)*destPitch, destPitch );
+ }
+ }
+
+ // scale one band vertically
+ for ( j=0; j<destWidth; j++ )
+ {
+ Scale1Dv ( &tempArea[j], destPitch, vscale, sourceBandHeight+1,
+ &dest[j], destPitch, vratio, destBandHeight );
+ }
+
+ // copy tempArea row 0 over from last row in the past
+ memcpy ( tempArea, tempArea+sourceBandHeight*destPitch, destPitch );
+
+ // move to the next band
+ source += sourceBandHeight * sourcePitch;
+ dest += destBandHeight * destPitch;
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : ScaleFrame
+*
+* INPUTS : YUV_BUFFER_CONFIG *src : Pointer to frame to be scaled.
+* YUV_BUFFER_CONFIG *dst : Pointer to buffer to hold scaled frame.
+* unsigned char *tempArea : Pointer to temp work area.
+* unsigned char tempAreaHeight : Height of temp work area.
+* unsigned int hscale : Horizontal scale factor numerator.
+* unsigned int hratio : Horizontal scale factor denominator.
+* unsigned int vscale : Vertical scale factor numerator.
+* unsigned int vratio : Vertical scale factor denominator.
+* unsigned int interlaced : Interlace flag.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Performs 2-tap linear interpolation in two dimensions.
+*
+* SPECIAL NOTES : Expansion is performed one band at a time to help with
+* caching.
+*
+****************************************************************************/
+void ScaleFrame
+(
+ YUV_BUFFER_CONFIG *src,
+ YUV_BUFFER_CONFIG *dst,
+ unsigned char *tempArea,
+ unsigned char tempHeight,
+ unsigned int hscale,
+ unsigned int hratio,
+ unsigned int vscale,
+ unsigned int vratio,
+ unsigned int interlaced
+ )
+{
+ int i;
+ int dw = (hscale - 1 + src->YWidth * hratio) / hscale;
+ int dh = (vscale - 1 + src->YHeight * vratio) / vscale;
+
+ // call our internal scaling routines!!
+ Scale2D ( (unsigned char *) src->YBuffer, src->YStride, src->YWidth, src->YHeight,
+ (unsigned char *) dst->YBuffer, dst->YStride, dw, dh,
+ tempArea, tempHeight, hscale, hratio, vscale, vratio, interlaced );
+
+ if ( dw < (int)dst->YWidth )
+ for ( i=0; i<dh; i++ )
+ memset ( dst->YBuffer+i*dst->YStride+dw-1, dst->YBuffer[i*dst->YStride+dw-2], dst->YWidth-dw+1 );
+
+ if ( dh < (int)dst->YHeight )
+ for ( i=dh-1; i<(int)dst->YHeight; i++ )
+ memcpy(dst->YBuffer + i*dst->YStride, dst->YBuffer + (dh-2) * dst->YStride, dst->YWidth+1);
+
+ Scale2D ( (unsigned char *) src->UBuffer,src->UVStride, src->UVWidth, src->UVHeight,
+ (unsigned char *) dst->UBuffer,dst->UVStride, dw/2, dh/2,
+ tempArea, tempHeight, hscale, hratio, vscale, vratio, interlaced );
+
+ if ( dw/2 < (int)dst->UVWidth )
+ for(i=0;i<dst->UVHeight;i++)
+ memset(dst->UBuffer + i * dst->UVStride + dw/2 - 1, dst->UBuffer[i*dst->UVStride+dw/2-2],dst->UVWidth-dw/2 + 1);
+
+ if ( dh/2 < (int)dst->UVHeight )
+ for ( i=dh/2-1; i<(int)dst->YHeight/2; i++ )
+ memcpy ( dst->UBuffer+i*dst->UVStride, dst->UBuffer+(dh/2-2)*dst->UVStride, dst->UVWidth );
+
+ Scale2D ( (unsigned char *) src->VBuffer,src->UVStride, src->UVWidth, src->UVHeight,
+ (unsigned char *) dst->VBuffer,dst->UVStride, dw/2, dh/2,
+ tempArea, tempHeight, hscale, hratio, vscale, vratio, interlaced );
+
+ if ( dw/2 < (int)dst->UVWidth )
+ for ( i=0; i<dst->UVHeight; i++ )
+ memset ( dst->VBuffer+i*dst->UVStride+dw/2-1, dst->VBuffer[i*dst->UVStride+dw/2-2], dst->UVWidth-dw/2+1 );
+
+ if ( dh/2 < (int) dst->UVHeight )
+ for ( i=dh/2-1; i<(int)dst->YHeight/2; i++ )
+ memcpy ( dst->VBuffer+i*dst->UVStride, dst->VBuffer+(dh/2-2)*dst->UVStride, dst->UVWidth );
+}
+
+/****************************************************************************
+*
+* ROUTINE : Fast_4_5_Scale
+*
+* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance (NOT USED).
+* UINT8 *FrameBuffer : Pointer to source image.
+* YUV_BUFFER_CONFIG *YuvConfig : Pointer to destination image.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Scales up image by factor of 5/4, creating 5 output
+* samples for every 4 input samples horizontally &
+* vertically.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void Fast_4_5_Scale ( POSTPROC_INSTANCE *ppi, UINT8 *FrameBuffer, YUV_BUFFER_CONFIG *YuvConfig )
+{
+ // check that width and height are valid please..!
+ int h = ppi->Configuration.VideoFrameHeight;
+ int w = ppi->Configuration.VideoFrameWidth;
+ int nh = YuvConfig->YHeight;
+ int nw = YuvConfig->YWidth;
+
+ Scale_4_5_2D ( ppi, &FrameBuffer[ppi->ReconYDataOffset], w+32, w, h,
+ (UINT8 *)YuvConfig->YBuffer, nw, nw, nh );
+ w >>= 1;
+ h >>= 1;
+ nw >>= 1;
+ nh >>= 1;
+
+ Scale_4_5_2D ( ppi, &FrameBuffer[ppi->ReconUDataOffset], w+16, w, h,
+ (UINT8 *)YuvConfig->UBuffer, nw, nw, nh );
+
+ Scale_4_5_2D ( ppi, &FrameBuffer[ppi->ReconVDataOffset], w+16, w, h,
+ (UINT8 *)YuvConfig->VBuffer, nw, nw, nh );
+}
+
+/****************************************************************************
+*
+* ROUTINE : Fast_3_5_Scale
+*
+* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance (NOT USED).
+* UINT8 *FrameBuffer : Pointer to source image.
+* YUV_BUFFER_CONFIG *YuvConfig : Pointer to destination image.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Scales up image by factor of 5/3, creating 5 output
+* samples for every 3 input samples horizontally &
+* vertically.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void Fast_3_5_Scale ( POSTPROC_INSTANCE *ppi, UINT8 *FrameBuffer, YUV_BUFFER_CONFIG *YuvConfig )
+{
+ // check that width and height are valid please..!
+ int h = ppi->Configuration.VideoFrameHeight;
+ int w = ppi->Configuration.VideoFrameWidth;
+ int nh = YuvConfig->YHeight;
+ int nw = YuvConfig->YWidth;
+
+ Scale_3_5_2D ( ppi, &FrameBuffer[ppi->ReconYDataOffset], w+32, w, h,
+ (UINT8 *)YuvConfig->YBuffer, nw, nw, nh );
+ w >>= 1;
+ h >>= 1;
+ nw >>= 1;
+ nh >>= 1;
+
+ Scale_3_5_2D ( ppi, &FrameBuffer[ppi->ReconUDataOffset], w+16, w, h,
+ (UINT8 *)YuvConfig->UBuffer, nw, nw, nh );
+
+ Scale_3_5_2D ( ppi, &FrameBuffer[ppi->ReconVDataOffset], w+16, w, h,
+ (UINT8 *)YuvConfig->VBuffer, nw, nw, nh );
+}
+
+/****************************************************************************
+*
+* ROUTINE : AnyRatio_2D_Scale
+*
+* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance (NOT USED).
+* const unsigned char *source : Pointer to source image.
+* unsigned int sourcePitch : Stride of source image.
+* unsigned int sourceWidth : Width of source image.
+* unsigned int sourceHeight : Height of source image (NOT USED).
+* unsigned char *dest : Pointer to destination image.
+* unsigned int destPitch : Stride of destination image.
+* unsigned int destWidth : Width of destination image.
+* unsigned int destHeight : Height of destination image.
+*
+* OUTPUTS : None.
+*
+* RETURNS : int: 1 if image scaled, 0 if image could not be scaled.
+*
+* FUNCTION : Scale the image with changing apect ratio.
+*
+* SPECIAL NOTES : This scaling is a bi-linear scaling. Need to re-work the
+* whole function for new scaling algorithm.
+*
+****************************************************************************/
+int AnyRatio_2D_Scale
+(
+ POSTPROC_INSTANCE *ppi,
+ const unsigned char *source,
+ unsigned int sourcePitch,
+ unsigned int sourceWidth,
+ unsigned int sourceHeight,
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth,
+ unsigned int destHeight
+ )
+{
+ unsigned int i, k, max_k;
+ unsigned int srcBandHeight = 0;
+ unsigned int destBandHeight = 0;
+
+ // suggested scale factors
+ int hs = ppi->Configuration.HScale;
+ int hr = ppi->Configuration.HRatio;
+ int vs = ppi->Configuration.VScale;
+ int vr = ppi->Configuration.VRatio;
+
+ // assume the ratios are scalable instead of should be centered
+ int RatioScalable = 1;
+
+ void (*HorizLineScale) ( const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
+ void (*VertBandScale) ( unsigned char *, unsigned int, unsigned int) = NULL;
+ void (*LastVertBandScale) ( unsigned char *, unsigned int, unsigned int) = NULL;
+
+ (void) ppi;
+
+ // find out the ratio for each direction
+ switch ( hr*10/hs )
+ {
+ case 8:
+ // 4-5 Scale in Width direction
+ HorizLineScale = HorizontalLine_4_5_Scale;
+ break;
+ case 6:
+ // 3-5 Scale in Width direction
+ HorizLineScale = HorizontalLine_3_5_Scale;
+ break;
+ case 5:
+ // 1-2 Scale in Width direction
+ HorizLineScale = HorizontalLine_1_2_Scale;
+ break;
+ case 10:
+ // no scale in Width direction
+ HorizLineScale = HorizontalLine_Copy;
+ break;
+ default:
+ // The ratio is not acceptable now
+ // throw("The ratio is not acceptable for now!");
+ RatioScalable = 0;
+ break;
+ }
+
+ switch ( vr*10/vs )
+ {
+ case 8:
+ // 4-5 Scale in vertical direction
+ VertBandScale = VerticalBand_4_5_Scale;
+ LastVertBandScale = LastVerticalBand_4_5_Scale;
+ srcBandHeight = 4;
+ destBandHeight = 5;
+ break;
+ case 6:
+ // 3-5 Scale in vertical direction
+ VertBandScale = VerticalBand_3_5_Scale;
+ LastVertBandScale = LastVerticalBand_3_5_Scale;
+ srcBandHeight = 3;
+ destBandHeight = 5;
+ break;
+ case 5:
+ // 1-2 Scale in vertical direction
+ VertBandScale = VerticalBand_1_2_Scale;
+ LastVertBandScale = LastVerticalBand_1_2_Scale;
+ srcBandHeight = 1;
+ destBandHeight = 2;
+ break;
+ case 10:
+ // no scale in Width direction
+ VertBandScale = NullScale;
+ LastVertBandScale = NullScale;
+ srcBandHeight = 4;
+ destBandHeight = 4;
+ break;
+ default:
+ // The ratio is not acceptable now
+ // throw("The ratio is not acceptable for now!");
+ RatioScalable = 0;
+ break;
+ }
+
+ if ( RatioScalable == 0 )
+ return RatioScalable;
+
+ HorizLineScale ( source, sourceWidth, dest, destWidth );
+
+ // except last band
+ max_k = (destHeight+destBandHeight-1)/destBandHeight;
+ if (max_k)
+ {
+ for ( k=0; k<max_k-1; k++ )
+ {
+ // scale one band horizontally
+ for ( i=1; i<srcBandHeight; i++ )
+ {
+ HorizLineScale ( source+i*sourcePitch,
+ sourceWidth,
+ dest+i*destPitch,
+ destWidth );
+ }
+
+ // first line of next band
+ HorizLineScale ( source+srcBandHeight*sourcePitch,
+ sourceWidth,
+ dest+destBandHeight*destPitch,
+ destWidth );
+
+ // Vertical scaling is in place
+ VertBandScale ( dest, destPitch, destWidth );
+
+ // Next band...
+ source += srcBandHeight * sourcePitch;
+ dest += destBandHeight * destPitch;
+ }
+
+ // scale one band horizontally
+ for ( i=1; i<srcBandHeight; i++ )
+ {
+ HorizLineScale ( source+i*sourcePitch,
+ sourceWidth,
+ dest+i*destPitch,
+ destWidth );
+ }
+
+ // Vertical scaling is in place
+ LastVertBandScale ( dest, destPitch, destWidth );
+ }
+ return RatioScalable;
+}
+
+/****************************************************************************
+*
+* ROUTINE : AnyRatioFrameScale
+*
+* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance (NOT USED).
+* UINT8 *FrameBuffer : Pointer to source image.
+* YUV_BUFFER_CONFIG *YuvConfig : Pointer to destination image.
+* INT32 YOffset : Offset from start of buffer to Y samples.
+* INT32 UVOffset : Offset from start of buffer to UV samples.
+*
+* OUTPUTS : None.
+*
+* RETURNS : int: 1 if image scaled, 0 if image could not be scaled.
+*
+* FUNCTION : Scale the image with changing apect ratio.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+int AnyRatioFrameScale
+(
+ POSTPROC_INSTANCE *ppi,
+ UINT8 *FrameBuffer,
+ YUV_BUFFER_CONFIG *YuvConfig,
+ INT32 YOffset,
+ INT32 UVOffset
+ )
+{
+ int i;
+ int ew;
+ int eh;
+
+ // suggested scale factors
+ int hs = ppi->Configuration.HScale;
+ int hr = ppi->Configuration.HRatio;
+ int vs = ppi->Configuration.VScale;
+ int vr = ppi->Configuration.VRatio;
+
+ int RatioScalable = 1;
+
+ int sw = (ppi->Configuration.ExpandedFrameWidth * hr + hs - 1)/hs;
+ int sh = (ppi->Configuration.ExpandedFrameHeight * vr + vs - 1)/vs;
+ int dw = ppi->Configuration.ExpandedFrameWidth;
+ int dh = ppi->Configuration.ExpandedFrameHeight;
+
+ if ( hr == 3 )
+ ew = (sw+2)/3 * 3 * hs / hr;
+ else
+ ew = (sw+7)/8 * 8 * hs / hr;
+
+ if ( vr == 3 )
+ eh = (sh+2)/3 * 3 * vs / vr;
+ else
+ eh = (sh+7)/8 * 8 * vs / vr;
+
+ RatioScalable = AnyRatio_2D_Scale ( ppi, &FrameBuffer[ppi->ReconYDataOffset],
+ ppi->Configuration.VideoFrameWidth +ppi->MVBorder*2, sw, sh,
+ (UINT8 *) YuvConfig->YBuffer + YOffset, YuvConfig->YStride, dw, dh);
+
+ for ( i=0; i<eh; i++ )
+ memset ( YuvConfig->YBuffer+YOffset+i*YuvConfig->YStride+dw, 0, ew-dw );
+
+ for ( i=dh; i<eh; i++ )
+ memset ( YuvConfig->YBuffer+YOffset+i*YuvConfig->YStride, 0, ew );
+
+ if ( RatioScalable==0 )
+ return RatioScalable;
+
+ sw = (sw+1)>>1;
+ sh = (sh+1)>>1;
+ dw = (dw+1)>>1;
+ dh = (dh+1)>>1;
+
+ AnyRatio_2D_Scale ( ppi, &FrameBuffer[ppi->ReconUDataOffset], ppi->Configuration.VideoFrameWidth/2+ppi->MVBorder, sw,sh,
+ (UINT8 *)YuvConfig->UBuffer+UVOffset, YuvConfig->UVStride, dw, dh );
+
+ AnyRatio_2D_Scale ( ppi, &FrameBuffer[ppi->ReconVDataOffset], ppi->Configuration.VideoFrameWidth/2+ppi->MVBorder, sw, sh,
+ (UINT8 *)YuvConfig->VBuffer+UVOffset, YuvConfig->UVStride, dw, dh );
+
+ return RatioScalable;
+}
+
+/****************************************************************************
+*
+* ROUTINE : CenterImage
+*
+* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+* UINT8 *FrameBuffer : Pointer to source image.
+* YUV_BUFFER_CONFIG *YuvConfig : Pointer to destination image.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Centers the image without scaling in the output buffer.
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void CCONV CenterImage ( POSTPROC_INSTANCE *ppi, UINT8 *FrameBuffer, YUV_BUFFER_CONFIG *YuvConfig )
+{
+ UINT32 i;
+ INT32 RowOffset, ColOffset;
+ UINT8 *SrcDataPointer;
+ UINT8 *DstDataPointer;
+
+ // center values
+ RowOffset = (YuvConfig->YHeight - ppi->Configuration.VideoFrameHeight)/2;
+ ColOffset = (YuvConfig->YWidth - ppi->Configuration.VideoFrameWidth)/2;
+
+ // Y's
+ SrcDataPointer = &FrameBuffer[ppi->ReconYDataOffset];
+ DstDataPointer = (UINT8 *)YuvConfig->YBuffer+RowOffset*YuvConfig->YWidth+ColOffset;
+ for ( i=0; i<ppi->Configuration.VideoFrameHeight; i++ )
+ {
+ memcpy ( DstDataPointer, SrcDataPointer, ppi->Configuration.VideoFrameWidth );
+ DstDataPointer += YuvConfig->YWidth;
+ SrcDataPointer += ppi->YStride;
+ }
+
+ // U's
+ SrcDataPointer = &FrameBuffer[ppi->ReconUDataOffset];
+ DstDataPointer = (UINT8 *)YuvConfig->UBuffer+RowOffset/2*YuvConfig->UVWidth+ColOffset/2;
+ for ( i=0; i<ppi->Configuration.VideoFrameHeight/2; i++ )
+ {
+ memcpy ( DstDataPointer, SrcDataPointer, ppi->Configuration.VideoFrameWidth/2 );
+ DstDataPointer += YuvConfig->UVWidth;
+ SrcDataPointer += ppi->UVStride;
+ }
+
+ // V's
+ SrcDataPointer = &FrameBuffer[ppi->ReconVDataOffset];
+ DstDataPointer = (UINT8 *)YuvConfig->VBuffer+RowOffset/2*YuvConfig->UVWidth+ColOffset/2;
+ for ( i=0; i<ppi->Configuration.VideoFrameHeight/2; i++ )
+ {
+ memcpy ( DstDataPointer, SrcDataPointer, ppi->Configuration.VideoFrameWidth/2 );
+ DstDataPointer += YuvConfig->UVWidth;
+ SrcDataPointer += ppi->UVStride;
+ }
+}
+
+/****************************************************************************
+*
+* ROUTINE : ScaleOrCenter
+*
+* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+* UINT8 *FrameBuffer : Pointer to source image.
+* YUV_BUFFER_CONFIG *YuvConfig : Pointer to destination image.
+*
+* OUTPUTS : None.
+*
+* RETURNS : void
+*
+* FUNCTION : Centers the image without scaling in the output buffer.
+*
+* FUNCTION : Decides to scale or center image in scale buffer for blit
+*
+* SPECIAL NOTES : None.
+*
+****************************************************************************/
+void CCONV ScaleOrCenter
+(
+ POSTPROC_INSTANCE *ppi,
+ UINT8 *FrameBuffer,
+ YUV_BUFFER_CONFIG *YuvConfig
+ )
+{
+ if ( ppi->PostProcessingLevel )
+ UpdateUMVBorder ( ppi, FrameBuffer );
+
+ switch ( ppi->Configuration.ScalingMode )
+ {
+ case SCALE_TO_FIT:
+ case MAINTAIN_ASPECT_RATIO:
+ {
+ // center values
+ int row = (YuvConfig->YHeight - (int)ppi->Configuration.ExpandedFrameHeight ) / 2;
+ int col = (YuvConfig->YWidth - (int)ppi->Configuration.ExpandedFrameWidth ) / 2;
+
+ int YOffset = row * YuvConfig->YWidth + col;
+ int UVOffset = (row>>1) * YuvConfig->UVWidth + (col>>1);
+
+ // perform center and scale
+ AnyRatioFrameScale ( ppi, FrameBuffer, YuvConfig, YOffset, UVOffset );
+
+ break;
+ }
+ /*
+ case SCALE_TO_FIT:
+ // Scale the image if the aspect ratio is scalable
+ if ( AnyRatioFrameScale( ppi, FrameBuffer, YuvConfig, 0, 0 ) != 1 )
+ CenterImage ( ppi, FrameBuffer, YuvConfig );
+ break;
+ */
+ case CENTER:
+ CenterImage ( ppi, FrameBuffer, YuvConfig );
+ break;
+
+ default:
+ break;
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/simpledeblocker.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/simpledeblocker.c
new file mode 100644
index 00000000..66153da2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/simpledeblocker.c
@@ -0,0 +1,392 @@
+/****************************************************************************
+ *
+ * Module Title : simpledeblock.c
+ *
+ * Description : Simple deblocking filter.
+ *
+ ***************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#if ( defined(_MSC_VER) || defined(MAPCA) )
+#define abs(x) ( (x>0) ? (x) : (-(x)) )
+#endif
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern UINT32 *DeblockLimitValuesV2;
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+static const UINT32 DeblockLimitValuesV1[Q_TABLE_SIZE] =
+{
+ 30, 25, 20, 20, 15, 15, 14, 14,
+ 13, 13, 12, 12, 11, 11, 10, 10,
+ 9, 9, 8, 8, 7, 7, 7, 7,
+ 6, 6, 6, 6, 5, 5, 5, 5,
+ 4, 4, 4, 4, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterHoriz_Simple2_C
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processing instance (NOT USED).
+ * UINT8 *PixelPtr : Pointer to four pixels that straddle the edge.
+ * INT32 LineLength : Stride of the image being filtered.
+ * INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a loop filter to the vertical edge by applying
+ * the filter horizontally to each of the 8-rows of the
+ * block edge.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FilterHoriz_Simple2_C
+(
+ POSTPROC_INSTANCE *ppi,
+ UINT8 *PixelPtr,
+ INT32 LineLength,
+ INT32 *BoundingValuePtr
+)
+{
+ INT32 j;
+ INT32 x,y,z;
+ INT32 FiltVal;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ (void) ppi;
+
+ for ( j=0; j<8; j++ )
+ {
+ y = PixelPtr[2]-PixelPtr[1];
+
+ if ( !y ) continue;
+
+ x = PixelPtr[1]-PixelPtr[0];
+ z = PixelPtr[3]-PixelPtr[2];
+
+ FiltVal = 2 * y + z - x;
+ FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+
+ PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
+ PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
+
+ FiltVal >>= 1;
+ FiltVal *= ((x|z)==0);
+
+ PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] + FiltVal];
+ PixelPtr[3] = LimitTable[(INT32)PixelPtr[3] - FiltVal];
+
+ PixelPtr += LineLength;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterVert_Simple2_C
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processing instance (NOT USED).
+ * UINT8 *PixelPtr : Pointer to four pixels that straddle the edge.
+ * INT32 LineLength : Stride of the image being filtered.
+ * INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a loop filter to the horizontal edge by applying
+ * the filter vertically to each of the 8-columns of the
+ * block edge.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FilterVert_Simple2_C
+(
+ POSTPROC_INSTANCE *ppi,
+ UINT8 *PixelPtr,
+ INT32 LineLength,
+ INT32 *BoundingValuePtr
+)
+{
+ INT32 j;
+ INT32 FiltVal;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ (void) ppi;
+
+ for ( j=0; j<8; j++ )
+ {
+ INT32 UseHighVariance;
+
+ FiltVal = ( ((INT32)PixelPtr[0]*3) - ((INT32)PixelPtr[-LineLength]*3) );
+
+ UseHighVariance = abs ( PixelPtr[-(2*LineLength)] - PixelPtr[-LineLength] ) > 1 ||
+ abs ( PixelPtr[0] - PixelPtr[LineLength]) > 1;
+
+ if ( UseHighVariance )
+ FiltVal += ((INT32)PixelPtr[-(2*LineLength)]) - ((INT32)PixelPtr[LineLength]);
+
+ FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+
+ PixelPtr[-LineLength] = LimitTable[(INT32)PixelPtr[-LineLength] + FiltVal];
+ PixelPtr[ 0] = LimitTable[(INT32)PixelPtr[ 0] - FiltVal];
+
+ if ( !UseHighVariance )
+ {
+ FiltVal >>=1;
+ PixelPtr[-2*LineLength] = LimitTable[(INT32)PixelPtr[-2*LineLength] + FiltVal];
+ PixelPtr[ LineLength] = LimitTable[(INT32)PixelPtr[ LineLength] - FiltVal];
+ }
+
+ PixelPtr++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterHoriz_Simple_C
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processing instance (NOT USED).
+ * UINT8 *PixelPtr : Pointer to four pixels that straddle the edge.
+ * INT32 LineLength : Stride of the image being filtered.
+ * INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a loop filter to the vertical edge by applying
+ * the filter horizontally to each of the 8-rows of the
+ * block edge.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FilterHoriz_Simple_C
+(
+ POSTPROC_INSTANCE *ppi,
+ UINT8 *PixelPtr,
+ INT32 LineLength,
+ INT32 *BoundingValuePtr
+)
+{
+ INT32 j;
+ INT32 FiltVal;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ (void) ppi;
+
+ for ( j=0; j<8; j++ )
+ {
+ INT32 UseHighVariance;
+
+ FiltVal = (PixelPtr[2]*3) - (PixelPtr[1]*3);
+
+ UseHighVariance = abs(PixelPtr[0] - PixelPtr[1]) > 1 ||
+ abs(PixelPtr[2] - PixelPtr[3]) > 1;
+
+ if ( UseHighVariance )
+ FiltVal += PixelPtr[0] - PixelPtr[3];
+
+ FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+
+ PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
+ PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
+
+ if ( !UseHighVariance )
+ {
+ FiltVal >>= 1;
+ PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] + FiltVal];
+ PixelPtr[3] = LimitTable[(INT32)PixelPtr[3] - FiltVal];
+ }
+
+ PixelPtr += LineLength;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterVert_Simple_C
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processing instance (NOT USED).
+ * UINT8 *PixelPtr : Pointer to four pixels that straddle the edge.
+ * INT32 LineLength : Stride of the image being filtered.
+ * INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a loop filter to the horizontal edge by applying
+ * the filter vertically to each of the 8-columns of the
+ * block edge.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FilterVert_Simple_C
+(
+ POSTPROC_INSTANCE *ppi,
+ UINT8 *PixelPtr,
+ INT32 LineLength,
+ INT32 *BoundingValuePtr
+)
+{
+ INT32 j;
+ INT32 FiltVal;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+ (void) ppi;
+
+ for ( j=0; j<8; j++ )
+ {
+ INT32 UseHighVariance;
+
+ FiltVal = ( ((INT32)PixelPtr[0]*3) - ((INT32)PixelPtr[-LineLength]*3) );
+
+ UseHighVariance = abs(PixelPtr[-(2*LineLength)] - PixelPtr[-LineLength]) > 1 ||
+ abs(PixelPtr[0] - PixelPtr[LineLength]) > 1;
+
+ if ( UseHighVariance )
+ FiltVal += ((INT32)PixelPtr[-(2*LineLength)]) - ((INT32)PixelPtr[LineLength]);
+
+ FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+
+ PixelPtr[-LineLength] = LimitTable[(INT32)PixelPtr[-LineLength] + FiltVal];
+ PixelPtr[ 0] = LimitTable[(INT32)PixelPtr[ 0] - FiltVal];
+
+ if ( !UseHighVariance )
+ {
+ FiltVal >>=1;
+ PixelPtr[-2*LineLength] = LimitTable[(INT32)PixelPtr[-2*LineLength] + FiltVal];
+ PixelPtr[ LineLength] = LimitTable[(INT32)PixelPtr[ LineLength] - FiltVal];
+ }
+
+ PixelPtr++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SimpleDeblockFrame
+ *
+ * INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processing instance.
+ * UINT8 *SrcBuffer : Pointer to image to be deblocked.
+ * UINT8 *DestBuffer : Pointer to image to hold deblocked image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Simple deblocker.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SimpleDeblockFrame ( POSTPROC_INSTANCE *ppi, UINT8 *SrcBuffer, UINT8 *DestBuffer )
+{
+ INT32 j, m, n;
+ INT32 RowStart;
+ INT32 NextRow;
+ INT32 FLimit;
+ INT32 QIndex;
+ INT32 *BoundingValuePtr;
+ INT32 LineLength = 0;
+ INT32 FragsAcross = ppi->HFragments;
+ INT32 FragsDown = ppi->VFragments;
+
+ QIndex = ppi->FrameQIndex;
+
+ // Encoder version specific clause
+ if ( ppi->Vp3VersionNo >= 2 )
+ FLimit = DeblockLimitValuesV2[QIndex];
+ else
+ FLimit = DeblockLimitValuesV1[QIndex];
+
+ BoundingValuePtr = SetupDeblockValueArray ( ppi, FLimit );
+
+ for ( j=0; j<3; j++ )
+ {
+ switch ( j )
+ {
+ case 0: // Y
+ FragsAcross = ppi->HFragments;
+ FragsDown = ppi->VFragments;
+ LineLength = ppi->YStride;
+ RowStart = ppi->ReconYDataOffset;
+ break;
+ case 1: // U
+ FragsAcross = ppi->HFragments >> 1;
+ FragsDown = ppi->VFragments >> 1;
+ LineLength = ppi->UVStride;
+ RowStart = ppi->ReconUDataOffset;
+ break;
+ case 2: // V
+ FragsAcross = ppi->HFragments >> 1;
+ FragsDown = ppi->VFragments >> 1;
+ LineLength = ppi->UVStride;
+ RowStart = ppi->ReconVDataOffset;
+ break;
+ }
+
+ NextRow = LineLength * 8;
+
+ /*************/
+ /* First Row */
+ /*************/
+
+ memcpy ( &DestBuffer[RowStart], &SrcBuffer[RowStart], 8*LineLength );
+
+ /* First Column -- Skip */
+
+ /* Remaining Columns */
+ for ( n=1; n<FragsAcross; n++ ) // Filter Left edge always
+ FilterHoriz_Simple ( ppi, &DestBuffer[RowStart+n*8-2], LineLength, BoundingValuePtr );
+
+ RowStart += NextRow;
+
+ //**************/
+ // Middle Rows */
+ //**************/
+ for ( m=1; m<FragsDown; m++ )
+ {
+ n = 0;
+
+ memcpy ( &DestBuffer[RowStart], &SrcBuffer[RowStart], 8*LineLength );
+
+ /* First column */
+ FilterVert_Simple ( ppi, &DestBuffer[RowStart+n*8], LineLength, BoundingValuePtr );
+
+ /* Middle columns */
+ for ( n=1; n<FragsAcross; n++ )
+ {
+ // Filter Left edge always
+ FilterHoriz_Simple ( ppi, &DestBuffer[RowStart+n*8-2], LineLength, BoundingValuePtr );
+
+ // TopRow is always done
+ FilterVert_Simple ( ppi, &DestBuffer[RowStart+n*8], LineLength, BoundingValuePtr );
+ }
+
+ RowStart += NextRow;
+ }
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/include/postp.h b/Src/libvpShared/corelibs/cdxv/vppp/include/postp.h
new file mode 100644
index 00000000..08f0ef7e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/include/postp.h
@@ -0,0 +1,136 @@
+/****************************************************************************
+*
+* Module Title : postp.h
+*
+* Description : Post processor interface
+*
+****************************************************************************/
+#ifndef POSTP_H
+#define POSTP_H
+
+#include "codec_common.h"
+
+// YUV buffer configuration structure
+typedef struct
+{
+ int YWidth;
+ int YHeight;
+ int YStride;
+
+ int UVWidth;
+ int UVHeight;
+ int UVStride;
+
+ char * YBuffer;
+ char * UBuffer;
+ char * VBuffer;
+
+} YUV_BUFFER_CONFIG;
+
+typedef enum
+{
+ MAINTAIN_ASPECT_RATIO = 0x0,
+ SCALE_TO_FIT = 0x1,
+ CENTER = 0x2,
+ OTHER = 0x3
+} SCALE_MODE;
+
+// macro defined so that I can get the information from fraginfo ( I suspect this is going to change !)
+// and I wanted to be ready for the change!
+#define blockCoded(i) (ppi->FragInfo[(i)*ppi->FragInfoElementSize]&ppi->FragInfoCodedMask)
+
+
+typedef struct
+{
+
+ // per frame information passed in
+ INT32 Vp3VersionNo; // version of frame
+ INT32 FrameType; // key or non key
+ INT32 PostProcessingLevel; // level of post processing to perform
+ INT32 FrameQIndex; // q index value used on passed in frame
+ UINT8 *LastFrameRecon; // reconstruction buffer : passed in
+ UINT8 *PostProcessBuffer; // postprocessing buffer : passed in
+
+ // per block information passed in
+ UINT8 *FragInfo; // blocks coded : passed in
+ UINT32 FragInfoElementSize; // size of each element
+ UINT32 FragInfoCodedMask; // mask to get at whether fragment is coded
+
+ // per block info maintained by postprocessor
+ INT32 *FragQIndex; // block's q index : allocated and filled
+ INT32 *FragmentVariances; // block's pseudo variance : allocated and filled
+ UINT8 *FragDeblockingFlag; // whether to deblock block : allocated and filled
+
+ // filter specific vars
+ INT32 *BoundingValuePtr; // pointer to a filter
+ INT32 *FiltBoundingValue; // allocated (512 big)
+
+ // deblocker specific vars
+ INT32 *DeblockValuePtr; // pointer to a filter
+ INT32 *DeblockBoundingValue; // allocated (512 big)
+
+
+ // frame configuration
+ CONFIG_TYPE Configuration;
+ UINT32 ReconYDataOffset; // position within buffer of first y fragment
+ UINT32 ReconUDataOffset; // position within buffer of first u fragment
+ UINT32 ReconVDataOffset; // position within buffer of first v fragment
+ UINT32 YPlaneFragments; // number of y fragments
+ UINT32 UVPlaneFragments; // number of u and v fragments
+ UINT32 UnitFragments; // number of total fragments y+u+v
+ UINT32 HFragments; // number of horizontal fragments in y
+ UINT32 VFragments; // number of vertical fragments in y
+ INT32 YStride; // pitch of y in bytes
+ INT32 UVStride; // pitch of uv in bytes
+
+ // allocs so we can align our ptrs
+ INT32 *FiltBoundingValueAlloc;
+ INT32 *DeblockBoundingValueAlloc;
+ INT32 *FragQIndexAlloc;
+ INT32 *FragmentVariancesAlloc;
+ UINT8 *FragDeblockingFlagAlloc;
+ UINT32 MVBorder;
+ UINT8 *IntermediateBufferAlloc;
+ UINT8 *IntermediateBuffer;
+ UINT32 DeInterlaceMode;
+ UINT32 AddNoiseMode;
+
+} POSTPROC_INSTANCE;
+
+#define VAL_RANGE 256
+extern UINT8 LimitVal_VP31[VAL_RANGE * 3];
+typedef POSTPROC_INSTANCE * xPB_INST ;
+
+extern void (*FilteringVert_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+extern void (*FilteringHoriz_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+extern void (*FilteringVert_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+extern void (*FilteringHoriz_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+
+extern void (*CopyBlock) (unsigned char *src, unsigned char *dest, unsigned int srcstride);
+extern void (*VerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void (*LastVerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void (*VerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void (*LastVerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void (*HorizontalLine_1_2_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+extern void (*HorizontalLine_3_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+extern void (*HorizontalLine_4_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+extern void (*VerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void (*LastVerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void (*FilterHoriz_Simple)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+extern void (*FilterVert_Simple)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+extern void (*DeringBlockWeak)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
+extern void (*DeringBlockStrong)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
+extern void (*DeblockLoopFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+extern void (*DeblockNonFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+extern void (*DeblockNonFilteredBandNewFilter)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+extern INT32*(*SetupBoundingValueArray)(xPB_INST pbi, INT32 FLimit);
+extern INT32*(*SetupDeblockValueArray)(xPB_INST pbi, INT32 FLimit);
+extern void (*FilterHoriz)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+extern void (*FilterVert)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+extern void (*ClampLevels)( POSTPROC_INSTANCE *pbi,INT32 BlackClamp, INT32 WhiteClamp, UINT8 *Src, UINT8 *Dst);
+extern void (*FastDeInterlace)(UINT8 *SrcPtr, UINT8 *DstPtr, INT32 Width, INT32 Height, INT32 Stride);
+extern void (*PlaneAddNoise)( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+extern void DMachineSpecificConfig(INT32 MmxEnabled, INT32 XmmEnabled, INT32 WmtEnabled);
+
+#endif \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj b/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj
new file mode 100644
index 00000000..b78564bc
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj
@@ -0,0 +1,441 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <VCProjectVersion>17.0</VCProjectVersion>
+ <ProjectGuid>{8F2BF92C-C4E1-45AE-BA45-2617B03B32AC}</ProjectGuid>
+ <RootNamespace>vppp</RootNamespace>
+ <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg">
+ <VcpkgEnableManifest>false</VcpkgEnableManifest>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>_LIB;_DEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>_LIB;_DEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>_LIB;NDEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>_LIB;NDEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeaderOutputFile>
+ </PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="generic\borders.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\clamp.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\deblock.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\DeInterlace.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\dering.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\doptsystemdependant.c">
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\loopfilter.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\postproc.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\scale.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\simpledeblocker.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\clamp_asm.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\deblockopt.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\deblockwmtopt.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\DeInterlaceMmx.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\DeInterlaceWmt.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\deringopt.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\deringwmtopt.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\doptsystemdependant.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\loopf_asm.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\newlooptest_asm.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\scaleopt.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\simpledeblock_asm.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj.filters
new file mode 100644
index 00000000..d5946f3d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj.filters
@@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="generic">
+ <UniqueIdentifier>{adcd4975-46d4-4f20-8422-a898d3456999}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="win32">
+ <UniqueIdentifier>{4fbef4da-8fe3-440e-858e-2fbabea42066}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="generic\borders.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\clamp.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\deblock.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\DeInterlace.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\dering.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\doptsystemdependant.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\loopfilter.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\postproc.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\scale.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\simpledeblocker.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\clamp_asm.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\deblockopt.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\deblockwmtopt.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\DeInterlaceMmx.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\DeInterlaceWmt.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\deringopt.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\deringwmtopt.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\doptsystemdependant.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\loopf_asm.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\newlooptest_asm.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\scaleopt.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\simpledeblock_asm.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/vppp.xcodeproj/project.pbxproj b/Src/libvpShared/corelibs/cdxv/vppp/vppp.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..c7b0b41c
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/vppp.xcodeproj/project.pbxproj
@@ -0,0 +1,233 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 42;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 0C14238C0BB8155500FDDAB7 /* postproc.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C14238B0BB8155500FDDAB7 /* postproc.c */; };
+ 0C14238E0BB8155F00FDDAB7 /* loopfilter.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C14238D0BB8155F00FDDAB7 /* loopfilter.c */; };
+ 0CF73A2D0BB78F6700DD0AFD /* scale.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A2C0BB78F6700DD0AFD /* scale.c */; };
+ 0CF73A3A0BB78F8600DD0AFD /* simpledeblocker.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A310BB78F8600DD0AFD /* simpledeblocker.c */; };
+ 0CF73A3D0BB78F8600DD0AFD /* doptsystemdependant.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A340BB78F8600DD0AFD /* doptsystemdependant.c */; };
+ 0CF73A3E0BB78F8600DD0AFD /* deblock.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A350BB78F8600DD0AFD /* deblock.c */; };
+ 0CF73A3F0BB78F8600DD0AFD /* DeInterlace.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A360BB78F8600DD0AFD /* DeInterlace.c */; };
+ 0CF73A400BB78F8600DD0AFD /* dering.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A370BB78F8600DD0AFD /* dering.c */; };
+ 0CF73A410BB78F8600DD0AFD /* borders.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A380BB78F8600DD0AFD /* borders.c */; };
+ 0CF73A420BB78F8600DD0AFD /* clamp.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A390BB78F8600DD0AFD /* clamp.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+ 0C14238B0BB8155500FDDAB7 /* postproc.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = postproc.c; path = generic/postproc.c; sourceTree = "<group>"; };
+ 0C14238D0BB8155F00FDDAB7 /* loopfilter.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = loopfilter.c; path = generic/loopfilter.c; sourceTree = "<group>"; };
+ 0CF73A2C0BB78F6700DD0AFD /* scale.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = scale.c; path = generic/scale.c; sourceTree = "<group>"; };
+ 0CF73A310BB78F8600DD0AFD /* simpledeblocker.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = simpledeblocker.c; path = generic/simpledeblocker.c; sourceTree = "<group>"; };
+ 0CF73A340BB78F8600DD0AFD /* doptsystemdependant.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = doptsystemdependant.c; path = generic/doptsystemdependant.c; sourceTree = "<group>"; };
+ 0CF73A350BB78F8600DD0AFD /* deblock.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = deblock.c; path = generic/deblock.c; sourceTree = "<group>"; };
+ 0CF73A360BB78F8600DD0AFD /* DeInterlace.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = DeInterlace.c; path = generic/DeInterlace.c; sourceTree = "<group>"; };
+ 0CF73A370BB78F8600DD0AFD /* dering.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = dering.c; path = generic/dering.c; sourceTree = "<group>"; };
+ 0CF73A380BB78F8600DD0AFD /* borders.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = borders.c; path = generic/borders.c; sourceTree = "<group>"; };
+ 0CF73A390BB78F8600DD0AFD /* clamp.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = clamp.c; path = generic/clamp.c; sourceTree = "<group>"; };
+ D2AAC046055464E500DB518D /* libvppp.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvppp.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ D289987405E68DCB004EDB86 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 08FB7794FE84155DC02AAC07 /* vppp */ = {
+ isa = PBXGroup;
+ children = (
+ 08FB7795FE84155DC02AAC07 /* Source */,
+ C6A0FF2B0290797F04C91782 /* Documentation */,
+ 1AB674ADFE9D54B511CA2CBB /* Products */,
+ );
+ name = vppp;
+ sourceTree = "<group>";
+ };
+ 08FB7795FE84155DC02AAC07 /* Source */ = {
+ isa = PBXGroup;
+ children = (
+ 0C14238B0BB8155500FDDAB7 /* postproc.c */,
+ 0CF73A2C0BB78F6700DD0AFD /* scale.c */,
+ 0CF73A310BB78F8600DD0AFD /* simpledeblocker.c */,
+ 0CF73A340BB78F8600DD0AFD /* doptsystemdependant.c */,
+ 0CF73A350BB78F8600DD0AFD /* deblock.c */,
+ 0CF73A360BB78F8600DD0AFD /* DeInterlace.c */,
+ 0CF73A370BB78F8600DD0AFD /* dering.c */,
+ 0C14238D0BB8155F00FDDAB7 /* loopfilter.c */,
+ 0CF73A380BB78F8600DD0AFD /* borders.c */,
+ 0CF73A390BB78F8600DD0AFD /* clamp.c */,
+ );
+ name = Source;
+ sourceTree = "<group>";
+ };
+ 1AB674ADFE9D54B511CA2CBB /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ D2AAC046055464E500DB518D /* libvppp.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ C6A0FF2B0290797F04C91782 /* Documentation */ = {
+ isa = PBXGroup;
+ children = (
+ );
+ name = Documentation;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+ D2AAC043055464E500DB518D /* Headers */ = {
+ isa = PBXHeadersBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+ D2AAC045055464E500DB518D /* vppp */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vppp" */;
+ buildPhases = (
+ D2AAC043055464E500DB518D /* Headers */,
+ D2AAC044055464E500DB518D /* Sources */,
+ D289987405E68DCB004EDB86 /* Frameworks */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = vppp;
+ productName = vppp;
+ productReference = D2AAC046055464E500DB518D /* libvppp.a */;
+ productType = "com.apple.product-type.library.static";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 08FB7793FE84155DC02AAC07 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vppp" */;
+ hasScannedForEncodings = 1;
+ mainGroup = 08FB7794FE84155DC02AAC07 /* vppp */;
+ projectDirPath = "";
+ targets = (
+ D2AAC045055464E500DB518D /* vppp */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+ D2AAC044055464E500DB518D /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 0CF73A2D0BB78F6700DD0AFD /* scale.c in Sources */,
+ 0CF73A3A0BB78F8600DD0AFD /* simpledeblocker.c in Sources */,
+ 0CF73A3D0BB78F8600DD0AFD /* doptsystemdependant.c in Sources */,
+ 0CF73A3E0BB78F8600DD0AFD /* deblock.c in Sources */,
+ 0CF73A3F0BB78F8600DD0AFD /* DeInterlace.c in Sources */,
+ 0CF73A400BB78F8600DD0AFD /* dering.c in Sources */,
+ 0CF73A410BB78F8600DD0AFD /* borders.c in Sources */,
+ 0CF73A420BB78F8600DD0AFD /* clamp.c in Sources */,
+ 0C14238C0BB8155500FDDAB7 /* postproc.c in Sources */,
+ 0C14238E0BB8155F00FDDAB7 /* loopfilter.c in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 1DEB91EC08733DB70010E9CD /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_MODEL_TUNING = G5;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ INSTALL_PATH = /usr/local/lib;
+ PRODUCT_NAME = vppp;
+ ZERO_LINK = YES;
+ };
+ name = Debug;
+ };
+ 1DEB91ED08733DB70010E9CD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ARCHS = (
+ ppc,
+ i386,
+ );
+ GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+ GCC_MODEL_TUNING = G5;
+ INSTALL_PATH = /usr/local/lib;
+ PRODUCT_NAME = vppp;
+ };
+ name = Release;
+ };
+ 1DEB91F008733DB70010E9CD /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ OBJROOT = build;
+ PREBINDING = NO;
+ SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+ SYMROOT = ../../../lib/osx;
+ USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
+ };
+ name = Debug;
+ };
+ 1DEB91F108733DB70010E9CD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ OBJROOT = build;
+ PREBINDING = NO;
+ SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+ SYMROOT = ../../../lib/osx;
+ USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vppp" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1DEB91EC08733DB70010E9CD /* Debug */,
+ 1DEB91ED08733DB70010E9CD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vppp" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1DEB91F008733DB70010E9CD /* Debug */,
+ 1DEB91F108733DB70010E9CD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceMmx.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceMmx.c
new file mode 100644
index 00000000..cf62c887
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceMmx.c
@@ -0,0 +1,143 @@
+/****************************************************************************
+ *
+ * Module Title : DeInterlaceWmt.c
+ *
+ * Description : DeInterlace Routines
+ *
+ ***************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+* Module constants.
+****************************************************************************/
+
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+short four2s[] = { 2, 2, 2, 2 };
+#pragma pack()
+#else
+__declspec(align(16)) short four2s[] = { 2, 2, 2, 2 };
+#endif
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtFastDeInterlace
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to input frame.
+ * UINT8 *DstPtr : Pointer to output frame.
+ * INT32 Width : Width of frame in pixels.
+ * INT32 Height : Height of frame in pixels.
+ * INT32 Stride : Stride of images.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 3 tap filter vertically to remove interlacing
+ * artifacts.
+ *
+ * SPECIAL NOTES : This function use a three tap filter [1, 2, 1] to blur
+ * veritically in an interlaced frame. This function assumes:
+ * 1) Buffers SrcPtr and DstPtr point to have the same geometery,
+ * 2) SrcPtr and DstPtr can _not_ be same.
+ *
+ ****************************************************************************/
+void MmxFastDeInterlace
+(
+ UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ INT32 Width,
+ INT32 Height,
+ INT32 Stride
+)
+{
+ INT32 i;
+ UINT8 *CurrentSrcPtr = SrcPtr;
+ UINT8 *CurrentDstPtr = DstPtr;
+
+ // Always copy the first line
+ memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+
+ for ( i=1; i<Height-1; i++ )
+ {
+ CurrentDstPtr += Stride;
+
+ __asm
+ {
+ mov esi, [CurrentSrcPtr]
+ mov edi, [CurrentDstPtr]
+
+ xor ecx, ecx
+ mov edx, [Stride]
+
+ lea eax, [esi + edx]
+ lea edx, [eax + edx]
+
+ mov ebx, [Width]
+ pxor mm7, mm7
+
+MmxDeInterlaceLoop:
+ movq mm0, QWORD ptr [esi + ecx] // line -1
+ movq mm1, QWORD ptr [eax + ecx] // line 0
+
+ movq mm3, mm0 // line -1
+ punpcklbw mm0, mm7 // line -1 low
+
+ movq mm2, QWORD ptr [edx + ecx] // line 1
+ punpckhbw mm3, mm7 // line -1 high
+
+
+ movq mm4, mm1 // line 0
+ punpcklbw mm1, mm7 // line 0 low
+
+ paddw mm0, four2s // line -1 low + 2s
+ paddw mm3, four2s // line -1 high + 2s
+
+ punpckhbw mm4, mm7 // line 0 high
+ psllw mm1, 1 // line 0 * 2
+
+ psllw mm4, 1 // line 0 * 2
+ movq mm5, mm2 // line 1
+
+ punpcklbw mm2, mm7 // line 1 low
+ paddw mm0, mm1 // line -1 + line 0 * 2
+
+ paddw mm3, mm4 // line -1 + line 0 * 2
+ punpckhbw mm5, mm7 // line 1 high
+
+ paddw mm0, mm2 // -1 + 0 * 2 + 1
+ paddw mm3, mm5 // -1 + 0 * 2 + 1
+
+ psraw mm0, 2 // >> 2
+ psraw mm3, 2 // >> 2
+
+ packuswb mm0, mm3
+
+ movq QWORD ptr [edi+ecx], mm0
+ add ecx, 8
+
+ cmp ecx, ebx
+ jl MmxDeInterlaceLoop
+
+ }
+ CurrentSrcPtr += Stride;
+ /*
+ for(j=0;j<Width;j++)
+ {
+ x0 = PrevSrcPtr[j];
+ x1 = (CurrentSrcPtr[j]<<1);
+ x2 = NextSrcPtr[j];
+ CurrentDstPtr[j] = (UINT8)( (x0 + x1 + x2)>>2 );
+ }
+ */
+ }
+
+ // copy the last line
+ CurrentSrcPtr += Stride;
+ CurrentDstPtr += Stride;
+ memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceWmt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceWmt.c
new file mode 100644
index 00000000..d324d9e1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceWmt.c
@@ -0,0 +1,129 @@
+/****************************************************************************
+ *
+ * Module Title : DeInterlaceWmt.c
+ *
+ * Description : DeInterlace
+ *
+ ***************************************************************************/
+
+/****************************************************************************
+* Header Frames
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+* Module constants.
+****************************************************************************/
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+short Eight2s[] = { 2, 2, 2, 2, 2, 2, 2, 2 };
+#pragma pack()
+#else
+__declspec(align(16)) short Eight2s[] = { 2, 2, 2, 2, 2, 2, 2, 2 };
+#endif
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtFastDeInterlace
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to input frame.
+ * UINT8 *DstPtr : Pointer to output frame.
+ * INT32 Width : Width of frame in pixels.
+ * INT32 Height : Height of frame in pixels.
+ * INT32 Stride : Stride of images.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 3 tap filter vertically to remove interlacing
+ * artifacts.
+ *
+ * SPECIAL NOTES : This function use a three tap filter [1, 2, 1] to blur
+ * veritically in an interlaced frame. This function assumes:
+ * 1) Buffers SrcPtr and DstPtr point to have the same geometery,
+ * 2) SrcPtr and DstPtr can _not_ be same.
+ *
+ ****************************************************************************/
+void WmtFastDeInterlace
+(
+ UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ INT32 Width,
+ INT32 Height,
+ INT32 Stride
+)
+{
+ INT32 i;
+ UINT8 *CurrentSrcPtr = SrcPtr;
+ UINT8 *CurrentDstPtr = DstPtr;
+#if defined(_WIN32_WCE)
+ return;
+#else
+
+ // Always copy the first line
+ memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+
+ for ( i=1; i<Height-1; i++ )
+ {
+ CurrentDstPtr += Stride;
+
+ __asm
+ {
+ mov esi, [CurrentSrcPtr]
+ mov edi, [CurrentDstPtr]
+
+ xor ecx, ecx
+ mov edx, [Stride]
+
+ lea eax, [esi + edx]
+ lea edx, [eax + edx]
+
+ mov ebx, [Width]
+ pxor xmm7, xmm7
+
+WmtDeInterlaceLoop:
+ movq xmm0, QWORD ptr [esi + ecx]
+ movq xmm1, QWORD ptr [eax + ecx]
+
+ punpcklbw xmm0, xmm7
+ movq xmm2, QWORD ptr [edx + ecx]
+
+ punpcklbw xmm1, xmm7
+ paddw xmm0, Eight2s
+
+ psllw xmm1, 1
+ punpcklbw xmm2, xmm7
+
+ paddw xmm0, xmm1
+ paddw xmm0, xmm2
+
+ psraw xmm0, 2
+ packuswb xmm0, xmm7
+
+ movq QWORD ptr [edi+ecx], xmm0
+ add ecx, 8
+
+ cmp ecx, ebx
+ jl WmtDeInterlaceLoop
+
+ }
+ CurrentSrcPtr += Stride;
+ /*
+ for(j=0;j<Width;j++)
+ {
+ x0 = PrevSrcPtr[j];
+ x1 = (CurrentSrcPtr[j]<<1);
+ x2 = NextSrcPtr[j];
+ CurrentDstPtr[j] = (UINT8)( (x0 + x1 + x2)>>2 );
+ }
+ */
+ }
+
+ //copy the last line
+ CurrentSrcPtr += Stride;
+ CurrentDstPtr += Stride;
+ memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+#endif
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/clamp_asm.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/clamp_asm.c
new file mode 100644
index 00000000..27e43065
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/clamp_asm.c
@@ -0,0 +1,170 @@
+/****************************************************************************
+ *
+ * Module Title : clamp.c
+ *
+ * Description : c
+ *
+ * AUTHOR : Jim Bankoski
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ * 1.09 YWX 26-Sep-01 Changed the default bandHeight from 5 to 4
+ * 1.08 YWX 23-Jul-00 Changed horizontal scaling function names
+ * 1.07 JBB 04 Dec 00 Added new Center vs Scale Bits
+ * 1.06 YWX 01-Dec-00 Removed bi-cubic scale functions
+ * 1.05 YWX 18-Oct-00 Added 1-2 scale functions
+ * 1.04 YWX 11-Oct-00 Added ratio check to determine scaling or centering
+ * 1.03 YWX 09-Oct-00 Added functions that do differen scaling in horizontal
+ * and vertical directions
+ * 1.02 YWX 04-Oct-00 Added 3-5 scaling functions
+ * 1.01 YWX 03-Oct-00 Added a set of 4-5 scaling functions
+ * 1.00 JBB 15 Sep 00 New Configuration baseline.
+ *
+ *****************************************************************************
+ */
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+#include "postp.h"
+#include <stdio.h>
+
+/****************************************************************************
+ * Imported
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+* Module Static Variables
+*****************************************************************************
+*/
+void ClampLevels_wmt(
+ POSTPROC_INSTANCE *pbi,
+ INT32 BlackClamp, // number of values to clamp from 0
+ INT32 WhiteClamp, // number of values to clamp from 255
+ UINT8 *Src, // reconstruction buffer : passed in
+ UINT8 *Dst // postprocessing buffer : passed in
+ )
+{
+#if defined(_WIN32_WCE)
+ return;
+#else
+
+ __declspec(align(16)) unsigned char blackclamp[16];
+ __declspec(align(16)) unsigned char whiteclamp[16];
+ __declspec(align(16)) unsigned char bothclamp[16];
+
+ int i;
+ int width = pbi->HFragments *8;
+ int height = pbi->VFragments *8;
+ UINT8 *SrcPtr = Src + pbi->ReconYDataOffset;
+ UINT8 *DestPtr = Dst + pbi->ReconYDataOffset;
+ UINT32 LineLength = pbi->YStride ; // pitch is doubled for interlacing
+ int row;
+
+ for(i=0;i<16;i++)
+ {
+ blackclamp[i]=(unsigned char )BlackClamp;
+ whiteclamp[i]=(unsigned char )WhiteClamp;
+ bothclamp[i]=BlackClamp+WhiteClamp;
+ }
+
+ // clamping is for y only!
+ for ( row = 0 ; row < height ; row ++)
+ {
+ __asm
+ {
+ mov ecx, [width]
+ mov esi, SrcPtr
+ mov edi, DestPtr
+ xor eax,eax
+ nextset:
+ movdqa xmm1,[esi+eax]
+ psubusb xmm1,blackclamp
+ paddusb xmm1,bothclamp
+ psubusb xmm1,whiteclamp
+ movdqa [edi+eax],xmm1 ;write first 4 pixels
+ add eax,16
+ cmp eax, ecx
+ jl nextset
+ }
+ SrcPtr += LineLength;
+ DestPtr += LineLength;
+ }
+#endif
+}
+
+
+
+void ClampLevels_mmx(
+ POSTPROC_INSTANCE *pbi,
+ INT32 BlackClamp, // number of values to clamp from 0
+ INT32 WhiteClamp, // number of values to clamp from 255
+ UINT8 *Src, // reconstruction buffer : passed in
+ UINT8 *Dst // postprocessing buffer : passed in
+ )
+{
+
+#if defined(_WIN32_WCE)
+ #pragma pack(8)
+ unsigned char blackclamp[16];
+ unsigned char whiteclamp[16];
+ unsigned char bothclamp[16];
+ #pragma pack()
+#else
+ __declspec(align(8)) unsigned char blackclamp[16];
+ __declspec(align(8)) unsigned char whiteclamp[16];
+ __declspec(align(8)) unsigned char bothclamp[16];
+#endif
+ int i;
+ int width = pbi->HFragments *8;
+ int height = pbi->VFragments *8;
+ UINT8 *SrcPtr = Src + pbi->ReconYDataOffset;
+ UINT8 *DestPtr = Dst + pbi->ReconYDataOffset;
+ UINT32 LineLength = pbi->YStride ; // pitch is doubled for interlacing
+ int row;
+
+ for(i=0;i<8;i++)
+ {
+ blackclamp[i]=(unsigned char )BlackClamp;
+ whiteclamp[i]=(unsigned char )WhiteClamp;
+ bothclamp[i]=BlackClamp+WhiteClamp;
+ }
+
+ // clamping is for y only!
+ for ( row = 0 ; row < height ; row ++)
+ {
+ __asm
+ {
+ mov ecx, [width]
+ mov esi, SrcPtr
+ mov edi, DestPtr
+ xor eax,eax
+ nextset:
+ movq mm1,[esi+eax]
+ psubusb mm1,blackclamp
+ paddusb mm1,bothclamp
+ psubusb mm1,whiteclamp
+ movq [edi+eax],mm1 ;write first 4 pixels
+ add eax,8
+ cmp eax, ecx
+ jl nextset
+ }
+ SrcPtr += LineLength;
+ DestPtr += LineLength;
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockopt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockopt.c
new file mode 100644
index 00000000..67285fab
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockopt.c
@@ -0,0 +1,6692 @@
+/****************************************************************************
+ *
+ * Module Title : DeblockOpt.c
+ *
+ * Description : Optimized functions for deblocking
+ *
+ * AUTHOR : Yaowu Xu
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ * 1.04 YWX 21-Mar-02 bug fixed in functions using abs diff criteria
+ * 1.03 YWX 15-Jun-01 Added new 7 tap filter in deblocking
+ * 1.02 YWX 02-May-01 Changed to use sum of abs diff to replace variance
+ * 1.01 YWX 17-Nov-00 Re-arranged loop inside deblockNonFilteredBand()
+ * 1.00 YWX 02-Nov-00 Configuration baseline from old PPoptfunctions.c
+ *
+ *****************************************************************************
+ */
+
+
+/****************************************************************************
+ * Header Frames
+ *****************************************************************************
+ */
+
+#ifdef _MSC_VER
+#pragma warning(disable:4799)
+#pragma warning(disable:4731)
+#endif
+
+
+#define STRICT /* Strict type checking. */
+
+#include "postp.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+static short Eight128s[] = {128, 128, 128, 128,128, 128, 128, 128 };
+static short Eight64s[] = {64, 64, 64, 64, 64, 64, 64, 64 };
+static short EightThrees[]= {3, 3, 3, 3, 3, 3, 3, 3};
+static short EightFours[]= {4, 4, 4, 4, 4, 4, 4, 4};
+static short Four128s[] = {128, 128, 128, 128};
+static short Four64s[] = {64, 64, 64, 64 };
+static short FourThrees[]= {3, 3, 3, 3};
+static short FourFours[]= {4, 4, 4, 4};
+static short FourOnes[]= { 1, 1, 1, 1};
+static unsigned char Eight128c[] = {128, 128, 128, 128,128, 128, 128, 128 };
+#pragma pack()
+#else
+__declspec(align(16)) static short Eight128s[] = {128, 128, 128, 128,128, 128, 128, 128 };
+__declspec(align(16)) static short Eight64s[] = {64, 64, 64, 64, 64, 64, 64, 64 };
+__declspec(align(16)) static short EightThrees[]= {3, 3, 3, 3, 3, 3, 3, 3};
+__declspec(align(16)) static short EightFours[]= {4, 4, 4, 4, 4, 4, 4, 4};
+__declspec(align(16)) static short Four128s[] = {128, 128, 128, 128};
+__declspec(align(16)) static short Four64s[] = {64, 64, 64, 64 };
+__declspec(align(16)) static short FourThrees[]= {3, 3, 3, 3};
+__declspec(align(16)) static short FourFours[]= {4, 4, 4, 4};
+__declspec(align(16)) static short FourOnes[]= { 1, 1, 1, 1};
+__declspec(align(16)) static unsigned char Eight128c[] = {128, 128, 128, 128,128, 128, 128, 128 };
+#endif
+
+/****************************************************************************
+ * Explicit Imports
+ *****************************************************************************
+ */
+
+extern UINT32 *DeblockLimitValuesV2;
+/****************************************************************************
+ * Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Exported Functions
+ *****************************************************************************
+ */
+extern double gaussian(double sigma, double mu, double x);
+
+/****************************************************************************
+ * Module Statics
+ *****************************************************************************
+ */
+/****************************************************************************
+ *
+ * ROUTINE : SetupBoundingValueArray_ForMMX
+ *
+ * INPUTS :
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Applies a loop filter to the edge pixels of coded blocks.
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+INT32 *SetupDeblockValueArray_ForMMX(POSTPROC_INSTANCE *pbi, INT32 FLimit)
+{
+ INT32 * BoundingValuePtr;
+
+ /*
+ Since the FiltBoundingValue array is currently only used in the generic version, we are going
+ to reuse this memory for our own purposes.
+ 2 longs for limit, 2 longs for _4ONES, 2 longs for LFABS_MMX, and 8 longs for temp work storage
+ */
+ BoundingValuePtr = (INT32 *)((UINT32)(&pbi->DeblockBoundingValue[256]) & 0xffffffe0);
+
+ //expand for mmx code
+ BoundingValuePtr[0] = BoundingValuePtr[1] = FLimit * 0x00010001;
+ BoundingValuePtr[2] = BoundingValuePtr[3] = 0x00010001;
+ BoundingValuePtr[4] = BoundingValuePtr[5] = 0x00040004;
+
+ return BoundingValuePtr;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockLoopFilteredBand_MMX
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Filter both horizontal and vertical edge in a band
+ *
+ * SPECIAL NOTES :
+ *
+ * REFERENCE :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void DeblockLoopFilteredBand_MMX(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+ )
+{
+ UINT32 j;
+ UINT32 CurrentFrag=StartFrag;
+ UINT32 QStep;
+ UINT8 *Src, *Des;
+ UINT32 Var1, Var2;
+
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+short QStepMmx[4];
+short FLimitMmx[4];
+short Rows[80];
+short NewRows[64];
+
+unsigned short Variance11[4];
+unsigned short Variance12[4];
+unsigned short Variance21[4];
+unsigned short Variance22[4];
+#pragma pack()
+#else
+__declspec(align(16)) short QStepMmx[4];
+__declspec(align(16)) short FLimitMmx[4];
+__declspec(align(16)) short Rows[80];
+__declspec(align(16)) short NewRows[64];
+
+__declspec(align(16)) unsigned short Variance11[4];
+__declspec(align(16)) unsigned short Variance12[4];
+__declspec(align(16)) unsigned short Variance21[4];
+__declspec(align(16)) unsigned short Variance22[4];
+#endif
+
+ Src=SrcPtr;
+ Des=DesPtr;
+
+ while(CurrentFrag < StartFrag + FragAcross )
+ {
+
+ QStep = QuantScale[ pbi->FragQIndex[CurrentFrag+FragAcross]];
+ if( QStep > 3 )
+ {
+ QStepMmx[0] = (INT16)QStep;
+ QStepMmx[1] = (INT16)QStep;
+ QStepMmx[2] = (INT16)QStep;
+ QStepMmx[3] = (INT16)QStep;
+ __asm
+ {
+
+ /* Save the registers */
+ push eax
+ push ebp
+ push ecx
+ push edx
+ push esi
+ push edi
+
+
+ /* Calculate the FLimit and store FLimit and QStep */
+
+ movq mm0, QStepMmx /* mm0 = QStep */
+ movq mm1, FourThrees /* mm1 = 03030303 */
+
+ pmullw mm1, mm0 /* mm1 = QStep * 3 */
+ pmullw mm1, mm0 /* mm1 = QStep * QStep * 3 */
+
+ psrlw mm1, 5 /* mm1 = FLimit */
+ movq [FLimitMmx], mm1 /* Save FLimit */
+
+ /* Copy the data to the intermediate buffer */
+
+ mov eax, Src /* eax = Src */
+ xor edx, edx /* clear edx */
+
+ lea esi, NewRows /* esi = NewRows */
+
+ lea edi, Rows /* edi = Rows */
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+
+ pxor mm7, mm7 /* Clear mm7 */
+ sub edx, ecx /* edx = -Pitch */
+
+ lea eax, [eax + edx * 4 ] /* eax = Src - 4*Pitch */
+ movq mm0, [eax + edx] /* mm0 = Src[-5*Pitch] */
+
+ movq mm1, mm0 /* mm1 = mm0 */
+ punpcklbw mm0, mm7 /* Lower Four -5 */
+
+ movq mm2, [eax] /* mm2 = Src[-4*Pitch] */
+ movq mm3, mm2 /* mm3 = mm2 */
+
+ punpckhbw mm1, mm7 /* Higher Four -5 */
+ movq [edi], mm0 /* Write Lower Four of -5 */
+
+ punpcklbw mm2, mm7 /* Lower Four -4 */
+ punpckhbw mm3, mm7 /* higher Four -4 */
+
+ movq [edi+8], mm1 /* Write Higher Four of -5 */
+ movq mm4, [eax + ecx] /* mm4 = Src[-3*Pitch] */
+
+ movq [edi+16], mm2 /* Write Lower -4 */
+ movq [edi+24], mm3 /* write hight -4 */
+
+ movq mm5, mm4 /* mm5 = mm4 */
+ punpcklbw mm4, mm7 /* lower four -3 */
+
+ movq mm0, [eax + ecx *2] /* mm0 = Src[-2*Pitch] */
+ punpckhbw mm5, mm7 /* higher four -3 */
+
+ movq mm1, mm0 /* mm1 = mm0 */
+ movq [edi+32], mm4 /* write Lower -3 */
+
+ punpcklbw mm0, mm7 /* lower four -2 */
+ lea eax, [eax + ecx *4] /* eax = Src */
+
+ movq [edi+40], mm5 /* write Higher -3 */
+ punpckhbw mm1, mm7 /* higher four -2 */
+
+ movq mm2, [eax + edx] /* mm2 = Src[-Pitch] */
+ movq [edi+48], mm0 /* lower -2 */
+
+ movq mm3, mm2 /* mm3 = mm2 */
+ punpcklbw mm2, mm7 /* lower -1 */
+
+ movq [edi+56], mm1 /* higher -2 */
+ punpckhbw mm3, mm7 /* Higher -1 */
+
+ movq mm4, [eax] /* mm4 = Src[0] */
+ movq [edi+64], mm2 /* Lower -1 */
+
+ movq mm5, mm4 /* mm5 = mm4 */
+ movq [edi+72], mm3 /* Higher -1 */
+
+ punpcklbw mm4, mm7 /* lower 0 */
+ punpckhbw mm5, mm7 /* higher 0 */
+
+ movq mm0, [eax + ecx] /* mm0 = Src[Pitch] */
+ movq [edi+80], mm4 /* write lower 0 */
+
+ movq mm1, mm0 /* mm1 = mm0 */
+ movq [edi+88], mm5 /* write higher 0 */
+
+ punpcklbw mm0, mm7 /* lower 1 */
+ punpckhbw mm1, mm7 /* higher 1 */
+
+ movq mm2, [eax + ecx *2 ] /* mm2 = Src[2*Pitch] */
+ lea eax, [eax + ecx *4] /* eax = Src + 4 * Pitch */
+
+ movq mm3, mm2 /* mm3 = mm2 */
+ movq [edi+96], mm0 /* write lower 1 */
+
+ punpcklbw mm2, mm7 /* lower 2 */
+ punpckhbw mm3, mm7 /* higher 2 */
+
+ movq mm4, [eax + edx ] /* mm4 = Src[3*pitch] */
+ movq [edi+104], mm1 /* wirte higher 1 */
+
+ movq mm5, mm4 /* mm5 = mm4 */
+ punpcklbw mm4, mm7 /* Low 3 */
+
+ movq [edi+112], mm2 /* write lower 2 */
+ movq [edi+120], mm3 /* write higher 2 */
+
+ movq mm0, [eax] /* mm0 = Src[4*pitch] */
+ punpckhbw mm5, mm7 /* high 3 */
+
+ movq mm1, mm0 /* mm1=mm0 */
+ movq [edi+128], mm4 /* low 3 */
+
+ punpcklbw mm0, mm7 /* low 4 */
+ punpckhbw mm1, mm7 /* high 4 */
+
+ movq [edi+136], mm5 /* high 3 */
+ movq [edi+144], mm0 /* low 4 */
+
+ movq [edi+152], mm1 /* high 4 */
+
+ /* done with copying everything to intermediate buffer */
+ /* Now, compute the variances for Pixel 1-4 and 5-8 */
+
+ /* we use mm0,mm1,mm2 for 1234 and mm4, mm5, mm6 for 5-8 */
+ /* mm7 = 0, mm3 = {128, 128, 128, 128} */
+
+ pcmpeqw mm3, mm3 /* mm3 = FFFFFFFFFFFFFFFF */
+ psllw mm3, 15 /* mm3 = 8000800080008000 */
+ psrlw mm3, 8 /* mm3 = 0080008000800080 */
+
+ movq mm2, [edi+16] /* Pixel 1 */
+ movq mm6, [edi+80] /* Pixel 5 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ movq mm0, mm2 /* mm0 = pixel 1 */
+ movq mm4, mm6 /* mm4 = pixel 5 */
+
+ pmullw mm2, mm2 /* mm2 = pixel1 * pixel1 */
+ pmullw mm6, mm6 /* mm6 = pixel5 * pixel5 */
+
+ movq mm1, mm2 /* mm1 = pixel1^2 */
+ movq mm5, mm6 /* mm5 = pixel5^2 */
+
+ movq mm2, [edi+32] /* Pixel 2 */
+ movq mm6, [edi+96] /* Pixel 6 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 2 */
+ paddw mm4, mm6 /* mm4 += pixel 6 */
+
+ pmullw mm2, mm2 /* mm2 = pixel2^2 */
+ pmullw mm6, mm6 /* mm6 = pixel6^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel2^2 */
+ paddw mm5, mm6 /* mm5 += pixel6^2 */
+
+ movq mm2, [edi+48] /* Pixel 3 */
+ movq mm6, [edi+112] /* Pixel 7 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 3 */
+ paddw mm4, mm6 /* mm4 += pixel 7 */
+
+ pmullw mm2, mm2 /* mm2 = pixel3^2 */
+ pmullw mm6, mm6 /* mm6 = pixel7^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel3^2 */
+ paddw mm5, mm6 /* mm5 += pixel7^2 */
+
+ movq mm2, [edi+64] /* Pixel 4 */
+ movq mm6, [edi+128] /* Pixel 8 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 4 */
+ paddw mm4, mm6 /* mm4 += pixel 8 */
+
+ pmullw mm2, mm2 /* mm2 = pixel4^2 */
+ pmullw mm6, mm6 /* mm6 = pixel8^2 */
+
+ paddw mm1, mm2 /* mm1 = pixel4^2 */
+ paddw mm5, mm6 /* mm5 = pixel8^2 */
+
+ /* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* mm1 = x1 + x2 + x3 + x4 */
+ /* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* mm5 = x5 + x6 + x7 + x8 */
+
+ movq mm7, mm3 /* mm7 = mm3 */
+ psrlw mm7, 7 /* mm7 = 0001000100010001 */
+
+ movq mm2, mm0 /* make copy of sum1 */
+ movq mm6, mm4 /* make copy of sum2 */
+
+ paddw mm0, mm7 /* (sum1 + 1) */
+ paddw mm4, mm7 /* (sum2 + 1) */
+
+ psraw mm2, 1 /* sum1 /2 */
+ psraw mm6, 1 /* sum2 /2 */
+
+ psraw mm0, 1 /* (sum1 + 1)/2 */
+ psraw mm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw mm2, mm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw mm6, mm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw mm1, mm2 /* Variance 1 */
+ psubw mm5, mm6 /* Variance 2 */
+
+ movq mm7, FLimitMmx /* mm7 = FLimit */
+ movq mm2, mm1 /* copy of Varinace 1*/
+
+ movq mm6, mm5 /* Variance 2 */
+ movq [Variance11], mm1 /* Save Variance1 */
+
+ movq [Variance21], mm5 /* Save Variance2 */
+ psubw mm1, mm7 /* Variance 1 < Flimit? */
+
+ psubw mm5, mm7 /* Variance 2 < Flimit? */
+ psraw mm2, 15 /* Variance 1 > 32768? */
+
+ psraw mm6, 15 /* Vaiance 2 > 32768? */
+ psraw mm1, 15 /* FFFF/0000 for true/false */
+
+ psraw mm5, 15 /* FFFF/0000 for true/false */
+ movq mm7, [edi+64] /* mm0 = Pixel 4 */
+
+ pandn mm2, mm1 /* Variance1<32678 &&
+ Variance1<Limit */
+ pandn mm6, mm5 /* Variance2<32678 &&
+ Variance1<Limit */
+
+ movq mm4, [edi+80] /* mm4 = Pixel 5 */
+ pand mm6, mm2 /* mm6 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+
+ movq mm2, mm7 /* make copy of Pixel4 */
+
+ psubusw mm7, mm4 /* 4 - 5 */
+ psubusw mm4, mm2 /* 5 - 4 */
+
+ por mm7, mm4 /* abs(4 - 5) */
+ psubw mm7, QStepMmx /* abs(4-5)<QStepMmx ? */
+
+ psraw mm7, 15 /* FFFF/0000 for True/Flase */
+ pand mm7, mm6
+
+ /* mm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* now lets look at the right four colomn */
+
+ add edi, 8 /* offset 8 to right 4 cols */
+
+ movq mm2, [edi+16] /* Pixel 1 */
+ movq mm6, [edi+80] /* Pixel 5 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ movq mm0, mm2 /* mm0 = pixel 1 */
+ movq mm4, mm6 /* mm4 = pixel 5 */
+
+ pmullw mm2, mm2 /* mm2 = pixel1 * pixel1 */
+ pmullw mm6, mm6 /* mm6 = pixel5 * pixel5 */
+
+ movq mm1, mm2 /* mm1 = pixel1^2 */
+ movq mm5, mm6 /* mm5 = pixel5^2 */
+
+ movq mm2, [edi+32] /* Pixel 2 */
+ movq mm6, [edi+96] /* Pixel 6 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 2 */
+ paddw mm4, mm6 /* mm4 += pixel 6 */
+
+ pmullw mm2, mm2 /* mm2 = pixel2^2 */
+ pmullw mm6, mm6 /* mm6 = pixel6^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel2^2 */
+ paddw mm5, mm6 /* mm5 += pixel6^2 */
+
+ movq mm2, [edi+48] /* Pixel 3 */
+ movq mm6, [edi+112] /* Pixel 7 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 3 */
+ paddw mm4, mm6 /* mm4 += pixel 7 */
+
+ pmullw mm2, mm2 /* mm2 = pixel3^2 */
+ pmullw mm6, mm6 /* mm6 = pixel7^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel3^2 */
+ paddw mm5, mm6 /* mm5 += pixel7^2 */
+
+ movq mm2, [edi+64] /* Pixel 4 */
+ movq mm6, [edi+128] /* Pixel 8 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 4 */
+ paddw mm4, mm6 /* mm4 += pixel 8 */
+
+ pmullw mm2, mm2 /* mm2 = pixel4^2 */
+ pmullw mm6, mm6 /* mm6 = pixel8^2 */
+
+ paddw mm1, mm2 /* mm1 = pixel4^2 */
+ paddw mm5, mm6 /* mm5 = pixel8^2 */
+
+ /* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* mm1 = x1 + x2 + x3 + x4 */
+ /* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* mm5 = x5 + x6 + x7 + x8 */
+
+ psrlw mm3, 7 /* mm3 = 0001000100010001 */
+
+ movq mm2, mm0 /* make copy of sum1 */
+ movq mm6, mm4 /* make copy of sum2 */
+
+ paddw mm0, mm3 /* (sum1 + 1) */
+ paddw mm4, mm3 /* (sum2 + 1) */
+
+ psraw mm2, 1 /* sum1 /2 */
+ psraw mm6, 1 /* sum2 /2 */
+
+ psraw mm0, 1 /* (sum1 + 1)/2 */
+ psraw mm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw mm2, mm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw mm6, mm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw mm1, mm2 /* Variance 1 */
+ psubw mm5, mm6 /* Variance 2 */
+
+ movq [Variance12], mm1 /* Save Variance1 */
+ movq [Variance22], mm5 /* Save Variance2 */
+
+ movq mm3, FLimitMmx /* mm3 = FLimit */
+ movq mm2, mm1 /* copy of Varinace 1*/
+
+ movq mm6, mm5 /* Variance 2 */
+ psubw mm1, mm3 /* Variance 1 < Flimit? */
+
+ psubw mm5, mm3 /* Variance 2 < Flimit? */
+ psraw mm2, 15 /* Variance 1 > 32768? */
+
+ psraw mm6, 15 /* Vaiance 2 > 32768? */
+ psraw mm1, 15 /* FFFF/0000 for true/false */
+
+ psraw mm5, 15 /* FFFF/0000 for true/false */
+ movq mm0, [edi+64] /* mm0 = Pixel 4 */
+
+ pandn mm2, mm1 /* Variance1<32678 &&
+ Variance1<Limit */
+ pandn mm6, mm5 /* Variance2<32678 &&
+ Variance1<Limit */
+
+ movq mm4, [edi+80] /* mm4 = Pixel 5 */
+ pand mm6, mm2 /* mm6 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+ movq mm2, mm0 /* make copy of Pixel4 */
+
+ psubusw mm0, mm4 /* 4 - 5 */
+ psubusw mm4, mm2 /* 5 - 4 */
+
+ por mm0, mm4 /* abs(4 - 5) */
+ psubw mm0, QStepMmx /* abs(4-5)<QStepMmx ? */
+
+ psraw mm0, 15 /* FFFF/0000 for True/False */
+ pand mm0, mm6
+
+ sub edi, 8 /* offset edi back */
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 and mm7 now are in use */
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movq mm5, [edi] /* mm5 = -5 */
+ movq mm4, [edi + 16] /* mm4 = -4 */
+
+ movq mm3, mm4 /* copy of -4 */
+ movq mm6, mm5 /* copy of -5 */
+
+ psubusw mm4, mm6 /* mm4 = [-4] - [-5] */
+ psubusw mm5, mm3 /* mm5 = [-5] - [-4] */
+
+ por mm4, mm5 /* abs([-4]-[-5] ) */
+ psubw mm4, QStepMmx /* abs([-4]-[-5] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm1, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm1, mm3 /* */
+
+ por mm1, mm4 /* mm1 = p1 */
+
+ /* now find P2 */
+
+ movq mm4, [edi+128] /* mm4 = [3] */
+ movq mm5, [edi+144] /* mm5 = [4] */
+
+ movq mm3, mm4 /* copy of 3 */
+ movq mm6, mm5 /* copy of 4 */
+
+ psubusw mm4, mm6 /* mm4 = [3] - [4] */
+ psubusw mm5, mm3 /* mm5 = [4] - [3] */
+
+ por mm4, mm5 /* abs([3]-[4] ) */
+ psubw mm4, QStepMmx /* abs([3]-[4] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm2, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm2, mm3 /* */
+
+ por mm2, mm4 /* mm2 = p2 */
+
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+ /* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm3, mm1 /* mm3 = p1 */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+ movq mm4, [edi+16] /* mm4 = x1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+ movq mm4, mm3 /* mm4 = mm3 */
+
+ movq mm5, [edi+16] /* mm5 = x1 */
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+
+ psllw mm4, 1 /* mm4 = (sum+x1)<<1 */
+ psubw mm4, [edi+64] /* mm4 = (sum+x1)<<1-x4 */
+
+ paddw mm4, [edi+80] /* mm4 = (sum+x1)<<1-x4+x5 */
+ psraw mm4, 4 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm7 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+80] /* mm4 =(sum+x2)<<1-x5 */
+ paddw mm4, [edi+96] /* mm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw mm4, 4 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm7 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+96] /* mm4 =(sum+x3)<<1-x6 */
+ paddw mm4, [edi+112] /* mm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw mm4, 4 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm7 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ paddw mm4, mm4 /* mm4 *=2 */
+
+ paddw mm4, mm1 /* += p1 */
+ psubw mm4, [edi+16] /* -= x1 */
+
+ psubw mm4, [edi+112] /* -= x7 */
+ paddw mm4, [edi+128] /* += x8 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x4 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x4 */
+
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ paddw mm4, mm4 /* mm4 *= 2 */
+
+ paddw mm4, [edi+16] /* += x1 */
+ psubw mm4, [edi+32] /* -= x2 */
+
+ psubw mm4, [edi+128] /* -= x8 */
+ paddw mm4, mm2 /* += p2 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x5 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x5 */
+
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+32] /* +=x2 */
+ psubw mm4, [edi+48] /* -=x3 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x6 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x6 */
+
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+48] /* +=x3 */
+ psubw mm4, [edi+64] /* -=x4 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x7 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x7 */
+
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+64] /* +=x4 */
+ psubw mm4, [edi+80] /* -=x5 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x8 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x8 */
+
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with left four columns */
+ /* now do the righ four columns */
+
+ add edi, 8 /* shift to right four column */
+ add esi, 8 /* shift to right four column */
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 now are in use */
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movq mm5, [edi] /* mm5 = -5 */
+ movq mm4, [edi + 16] /* mm4 = -4 */
+
+ movq mm3, mm4 /* copy of -4 */
+ movq mm6, mm5 /* copy of -5 */
+
+ psubusw mm4, mm6 /* mm4 = [-4] - [-5] */
+ psubusw mm5, mm3 /* mm5 = [-5] - [-4] */
+
+ por mm4, mm5 /* abs([-4]-[-5] ) */
+ psubw mm4, QStepMmx /* abs([-4]-[-5] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm1, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm1, mm3 /* */
+
+ por mm1, mm4 /* mm1 = p1 */
+
+ /* now find P2 */
+
+ movq mm4, [edi+128] /* mm4 = [3] */
+ movq mm5, [edi+144] /* mm5 = [4] */
+
+ movq mm3, mm4 /* copy of 3 */
+ movq mm6, mm5 /* copy of 4 */
+
+ psubusw mm4, mm6 /* mm4 = [3] - [4] */
+ psubusw mm5, mm3 /* mm5 = [4] - [3] */
+
+ por mm4, mm5 /* abs([3]-[4] ) */
+ psubw mm4, QStepMmx /* abs([3]-[4] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm2, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm2, mm3 /* */
+
+ por mm2, mm4 /* mm2 = p2 */
+
+ /* psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4; */
+ /* Des[-w4] = (((psum + v[1]) << 1) - (v[4] - v[5])) >> 4; */
+ /* Des[-w4]=Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm3, mm1 /* mm3 = p1 */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+ movq mm4, [edi+16] /* mm4 = x1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+ movq mm4, mm3 /* mm4 = mm3 */
+
+ movq mm5, [edi+16] /* mm5 = x1 */
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+
+ psllw mm4, 1 /* mm4 = (sum+x1)<<1 */
+ psubw mm4, [edi+64] /* mm4 = (sum+x1)<<1-x4 */
+
+ paddw mm4, [edi+80] /* mm4 = (sum+x1)<<1-x4+x5 */
+ psraw mm4, 4 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm0 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+80] /* mm4 =(sum+x2)<<1-x5 */
+ paddw mm4, [edi+96] /* mm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw mm4, 4 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm0 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+96] /* mm4 =(sum+x3)<<1-x6 */
+ paddw mm4, [edi+112] /* mm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw mm4, 4 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm0 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ paddw mm4, mm4 /* mm4 *=2 */
+
+ paddw mm4, mm1 /* += p1 */
+ psubw mm4, [edi+16] /* -= x1 */
+
+ psubw mm4, [edi+112] /* -= x7 */
+ paddw mm4, [edi+128] /* += x8 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x4 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x4 */
+
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ paddw mm4, mm4 /* mm4 *= 2 */
+
+ paddw mm4, [edi+16] /* += x1 */
+ psubw mm4, [edi+32] /* -= x2 */
+
+ psubw mm4, [edi+128] /* -= x8 */
+ paddw mm4, mm2 /* += p2 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x5 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x5 */
+
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+32] /* +=x2 */
+ psubw mm4, [edi+48] /* -=x3 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x6 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x6 */
+
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+48] /* +=x3 */
+ psubw mm4, [edi+64] /* -=x4 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x7 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x7 */
+
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+64] /* +=x4 */
+ psubw mm4, [edi+80] /* -=x5 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x8 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x8 */
+
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with right four column */
+ add edi, 8 /* shift edi to point x1 */
+ sub esi, 8 /* shift esi back to x1 */
+
+ mov ebp, Des /* the destination */
+ lea ebp, [ebp + edx *4] /* point to des[-w4] */
+
+ movq mm0, [esi]
+ packuswb mm0, [esi + 8]
+
+ movq [ebp], mm0 /* write des[-w4] */
+
+ movq mm1, [esi + 16]
+ packuswb mm1, [esi + 24]
+
+ movq [ebp+ecx ], mm1 /* write des[-w3] */
+
+ movq mm2, [esi + 32]
+ packuswb mm2, [esi + 40]
+
+ movq [ebp+ecx*2 ], mm2 /* write des[-w2] */
+
+ movq mm3, [esi + 48]
+ packuswb mm3, [esi + 56]
+
+ lea ebp, [ebp+ecx*4] /* point to des[0] */
+ movq [ebp+edx], mm3 /* write des[-w1] */
+
+ movq mm0, [esi + 64]
+ packuswb mm0, [esi + 72]
+
+ movq [ebp ], mm0 /* write des[0] */
+
+ movq mm1, [esi + 80]
+ packuswb mm1, [esi + 88]
+
+ movq [ebp+ecx], mm1 /* write des[w1] */
+
+ movq mm2, [esi + 96]
+ packuswb mm2, [esi + 104]
+
+ movq [ebp+ecx*2], mm2 /* write des[w2] */
+
+ movq mm3, [esi + 112]
+ packuswb mm3, [esi + 120]
+
+ lea ebp, [ebp+ecx*2] /* point to des[w4] */
+ movq [ebp+ecx], mm3 /* write des[w3] */
+
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebp
+ pop eax
+
+
+ } /* end of the macro */
+
+ Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+ Var1 += Variance12[0]+ Variance12[1]+Variance12[2]+Variance12[3];
+ pbi->FragmentVariances[CurrentFrag] += Var1;
+
+ Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+ Var2 += Variance22[0]+ Variance22[1]+Variance22[2]+Variance22[3];
+ pbi->FragmentVariances[CurrentFrag + FragAcross] += Var2;
+ }
+ else
+ {
+
+ /* copy from src to des */
+ __asm
+ {
+ push esi
+ push edi
+ push ecx
+
+ mov esi, Src /* esi = Src */
+ mov edi, Des /* edi = Des */
+
+ push edx
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ xor edx, edx /* clear edx */
+
+ sub edx, ecx /* edx = -Pitch */
+ lea esi, [esi+edx*4] /* esi=Src-4*Pitch*/
+
+ movq mm0, [esi] /* first row */
+ movq [edi+edx*4], mm0 /* write first row */
+
+ lea edi, [edi+edx*4] /* edi=Des-4*Pitch*/
+ movq mm1, [esi+ecx] /* Src-3*Pitch */
+
+ movq [edi+ecx], mm1 /* write second row */
+ movq mm2, [esi+ecx*2] /* Src-2*Pitch */
+
+ lea esi, [esi+ecx*4] /* Src */
+ movq [edi+ecx*2], mm2 /* write third row */
+
+ lea edi, [edi+ecx*4] /* Des */
+ movq mm3, [esi+edx] /* Src-Pitch */
+
+ movq [edi+edx], mm3 /* write fourth row */
+ movq mm4, [esi] /* Src */
+
+ movq mm5, [esi+ecx] /* Src+Pitch */
+ movq [edi], mm4 /* write fifth rwo */
+
+ movq mm6, [esi+ecx*2]
+ lea esi, [esi+ecx*4] /* Src+pitch*4 */
+
+ movq [edi+ecx], mm5 /* write the sixth rwo */
+ movq [edi+ecx*2], mm6 /* write the seventh row */
+
+ movq mm7, [esi+edx]
+ lea edi, [edi+ecx*4] /* Des+Pitch*4 */
+
+ movq [edi+edx], mm7 /* write the last row */
+
+ pop edx
+ pop ecx
+ pop edi
+ pop esi
+ }
+
+ }
+
+ Src += 8;
+ Des += 8;
+ CurrentFrag ++;
+ }
+
+ Des -= ((PlaneLineStep + FragAcross)<<3);
+ Des += 8;
+ Src = Des;
+
+ CurrentFrag = StartFrag ;
+
+ while(CurrentFrag < StartFrag + FragAcross - 1)
+ {
+
+ QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
+
+ if( QStep > 3 )
+ {
+ QStepMmx[0] = (INT16)QStep;
+ QStepMmx[1] = (INT16)QStep;
+ QStepMmx[2] = (INT16)QStep;
+ QStepMmx[3] = (INT16)QStep;
+
+ for( j=0; j<8;j++)
+ {
+ Rows[j] = (short) (Src[-5 +j*PlaneLineStep]);
+ Rows[72+j] = (short)(Src[4+j*PlaneLineStep]);
+ }
+
+ __asm
+ {
+ /* Save the registers */
+ push eax
+ push ebp
+ push ecx
+ push edx
+ push esi
+ push edi
+
+ /* Calculate the FLimit and store FLimit and QStep */
+
+ movq mm0, QStepMmx /* mm0 = QStep */
+ movq mm1, FourThrees /* mm1 = 03030303 */
+
+ pmullw mm1, mm0 /* mm1 = QStep * 3 */
+ pmullw mm1, mm0 /* mm1 = QStep * QStep * 3 */
+
+ psrlw mm1, 5 /* mm1 = FLimit */
+ movq [FLimitMmx], mm1 /* Save FLimit */
+
+ /* setup the pointers to data */
+
+ mov eax, Src /* eax = Src */
+ xor edx, edx /* clear edx */
+
+ sub eax, 4 /* eax = Src-4 */
+ lea esi, NewRows /* esi = NewRows */
+ lea edi, Rows /* edi = Rows */
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ sub edx, ecx /* edx = -Pitch */
+
+ /* Get the data to the intermediate buffer */
+
+ movq mm0, [eax] /* mm0 = 07 06 05 04 03 02 01 00 */
+ movq mm1, [eax+ecx] /* mm1 = 17 16 15 14 13 12 11 10 */
+
+ movq mm2, [eax+ecx*2] /* mm2 = 27 26 25 24 23 22 21 20 */
+ lea eax, [eax+ecx*4] /* Go down four Rows */
+
+ movq mm3, [eax+edx] /* mm3 = 37 36 35 34 33 32 31 30 */
+ movq mm4, mm0 /* mm4 = 07 06 05 04 03 02 01 00 */
+
+ punpcklbw mm0, mm1 /* mm0 = 13 03 12 02 11 01 10 00 */
+ punpckhbw mm4, mm1 /* mm4 = 17 07 16 06 15 05 14 04 */
+
+ movq mm5, mm2 /* mm5 = 27 26 25 24 23 22 21 20 */
+ punpcklbw mm2, mm3 /* mm2 = 33 23 32 22 31 21 30 20 */
+
+ punpckhbw mm5, mm3 /* mm5 = 37 27 36 26 35 25 34 24 */
+ movq mm1, mm0 /* mm1 = 13 03 12 02 11 01 10 00 */
+
+ punpcklwd mm0, mm2 /* mm0 = 31 21 11 01 30 20 10 00 */
+ punpckhwd mm1, mm2 /* mm1 = 33 23 13 03 32 22 12 02 */
+
+ movq mm2, mm4 /* mm2 = 17 07 16 06 15 05 14 04 */
+ punpckhwd mm4, mm5 /* mm4 = 37 27 17 07 36 26 16 06 */
+
+ punpcklwd mm2, mm5 /* mm2 = 35 25 15 05 34 24 14 04 */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm5, mm0 /* make a copy */
+ punpcklbw mm0, mm7 /* mm0 = 30 20 10 00 */
+
+ movq [edi+16], mm0 /* write 00 10 20 30 */
+ punpckhbw mm5, mm7 /* mm5 = 31 21 11 01 */
+
+ movq mm0, mm1 /* mm0 =33 23 13 03 32 22 12 02 */
+ movq [edi+32], mm5 /* write 01 11 21 31 */
+
+ punpcklbw mm1, mm7 /* mm1 = 32 22 12 02 */
+ punpckhbw mm0, mm7 /* mm0 = 33 23 12 03 */
+
+ movq [edi+48], mm1 /* write 02 12 22 32 */
+ movq mm3, mm2 /* mm3 = 35 25 15 05 34 24 14 04 */
+
+ movq mm5, mm4 /* mm5 = 37 27 17 07 36 26 16 06 */
+ movq [edi+64], mm0 /* write 03 13 23 33 */
+
+ punpcklbw mm2, mm7 /* mm2 = 34 24 14 04 */
+ punpckhbw mm3, mm7 /* mm3 = 35 25 15 05 */
+
+ movq [edi+80], mm2 /* write 04 14 24 34 */
+ punpcklbw mm4, mm7 /* mm4 = 36 26 16 06 */
+
+ punpckhbw mm5, mm7 /* mm5 = 37 27 17 07 */
+ movq [edi+96], mm3 /* write 05 15 25 35 */
+
+ movq mm0, [eax] /* mm0 = 47 46 45 44 43 42 41 40 */
+ movq mm1, [eax + ecx ] /* mm1 = 57 56 55 54 53 52 51 50 */
+
+ movq [edi+112], mm4 /* write 06 16 26 37 */
+ movq mm2, [eax+ecx*2] /* mm2 = 67 66 65 64 63 62 61 60 */
+
+ lea eax, [eax+ ecx*4] /* Go down four rows */
+ movq [edi+128], mm5 /* write 07 17 27 37 */
+
+ movq mm4, mm0 /* mm4 = 47 46 45 44 43 42 41 40 */
+ movq mm3, [eax+edx] /* mm3 = 77 76 75 74 73 72 71 70 */
+
+ punpcklbw mm0, mm1 /* mm0 = 53 43 52 42 51 41 50 40 */
+ punpckhbw mm4, mm1 /* mm4 = 57 57 56 46 55 45 54 44 */
+
+ movq mm5, mm2 /* mm5 = 67 66 65 64 63 62 61 60 */
+ punpcklbw mm2, mm3 /* mm2 = 73 63 72 62 71 61 70 60 */
+
+ punpckhbw mm5, mm3 /* mm5 = 77 67 76 66 75 65 74 64 */
+ movq mm1, mm0 /* mm1 = 53 43 52 42 51 41 50 40 */
+
+ punpcklwd mm0, mm2 /* mm0 = 71 61 51 41 70 60 50 40 */
+ punpckhwd mm1, mm2 /* mm1 = 73 63 53 43 72 62 52 42 */
+
+ movq mm2, mm4 /* mm2 = 57 57 56 46 55 45 54 44 */
+ punpckhwd mm4, mm5 /* mm4 = 77 67 57 47 76 66 56 46 */
+
+ punpcklwd mm2, mm5 /* mm2 = 75 65 55 45 74 64 54 44 */
+
+ movq mm5, mm0 /* make a copy */
+ punpcklbw mm0, mm7 /* mm0 = 70 60 50 40 */
+
+ movq [edi+24], mm0 /* write 40 50 60 70 */
+ punpckhbw mm5, mm7 /* mm5 = 71 61 51 41 */
+
+ movq mm0, mm1 /* mm0 = 73 63 53 43 72 62 52 42 */
+ movq [edi+40], mm5 /* write 41 51 61 71 */
+
+ punpcklbw mm1, mm7 /* mm1 = 72 62 52 42 */
+ punpckhbw mm0, mm7 /* mm0 = 73 63 53 43 */
+
+ movq [edi+56], mm1 /* write 42 52 62 72 */
+ movq mm3, mm2 /* mm3 = 75 65 55 45 74 64 54 44 */
+
+ movq mm5, mm4 /* mm5 = 77 67 57 47 76 66 56 46 */
+ movq [edi+72], mm0 /* write 43 53 63 73 */
+
+ punpcklbw mm2, mm7 /* mm2 = 74 64 54 44 */
+ punpckhbw mm3, mm7 /* mm3 = 75 65 55 45 */
+
+ movq [edi+88], mm2 /* write 44 54 64 74 */
+ punpcklbw mm4, mm7 /* mm4 = 76 66 56 46 */
+
+ punpckhbw mm5, mm7 /* mm5 = 77 67 57 47 */
+ movq [edi+104], mm3 /* write 45 55 65 75 */
+
+ movq [edi+120], mm4 /* write 46 56 66 76 */
+ movq [edi+136], mm5 /* write 47 57 67 77 */
+
+
+ /* Now, compute the variances for Pixel 1-4 and 5-8 */
+
+ /* we use mm0,mm1,mm2 for 1234 and mm4, mm5, mm6 for 5-8 */
+ /* mm7 = 0, mm3 = {128, 128, 128, 128} */
+
+ pcmpeqw mm3, mm3 /* mm3 = FFFFFFFFFFFFFFFF */
+ psllw mm3, 15 /* mm3 = 8000800080008000 */
+ psrlw mm3, 8 /* mm3 = 0080008000800080 */
+
+ movq mm2, [edi+16] /* Pixel 1 */
+ movq mm6, [edi+80] /* Pixel 5 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ movq mm0, mm2 /* mm0 = pixel 1 */
+ movq mm4, mm6 /* mm4 = pixel 5 */
+
+ pmullw mm2, mm2 /* mm2 = pixel1 * pixel1 */
+ pmullw mm6, mm6 /* mm6 = pixel5 * pixel5 */
+
+ movq mm1, mm2 /* mm1 = pixel1^2 */
+ movq mm5, mm6 /* mm5 = pixel5^2 */
+
+ movq mm2, [edi+32] /* Pixel 2 */
+ movq mm6, [edi+96] /* Pixel 6 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 2 */
+ paddw mm4, mm6 /* mm4 += pixel 6 */
+
+ pmullw mm2, mm2 /* mm2 = pixel2^2 */
+ pmullw mm6, mm6 /* mm6 = pixel6^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel2^2 */
+ paddw mm5, mm6 /* mm5 += pixel6^2 */
+
+ movq mm2, [edi+48] /* Pixel 3 */
+ movq mm6, [edi+112] /* Pixel 7 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 3 */
+ paddw mm4, mm6 /* mm4 += pixel 7 */
+
+ pmullw mm2, mm2 /* mm2 = pixel3^2 */
+ pmullw mm6, mm6 /* mm6 = pixel7^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel3^2 */
+ paddw mm5, mm6 /* mm5 += pixel7^2 */
+
+ movq mm2, [edi+64] /* Pixel 4 */
+ movq mm6, [edi+128] /* Pixel 8 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 4 */
+ paddw mm4, mm6 /* mm4 += pixel 8 */
+
+ pmullw mm2, mm2 /* mm2 = pixel4^2 */
+ pmullw mm6, mm6 /* mm6 = pixel8^2 */
+
+ paddw mm1, mm2 /* mm1 = pixel4^2 */
+ paddw mm5, mm6 /* mm5 = pixel8^2 */
+
+ /* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* mm1 = x1 + x2 + x3 + x4 */
+ /* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* mm5 = x5 + x6 + x7 + x8 */
+
+ movq mm7, mm3 /* mm7 = mm3 */
+ psrlw mm7, 7 /* mm7 = 0001000100010001 */
+
+ movq mm2, mm0 /* make copy of sum1 */
+ movq mm6, mm4 /* make copy of sum2 */
+
+ paddw mm0, mm7 /* (sum1 + 1) */
+ paddw mm4, mm7 /* (sum2 + 1) */
+
+ psraw mm2, 1 /* sum1 /2 */
+ psraw mm6, 1 /* sum2 /2 */
+
+ psraw mm0, 1 /* (sum1 + 1)/2 */
+ psraw mm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw mm2, mm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw mm6, mm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw mm1, mm2 /* Variance 1 */
+ psubw mm5, mm6 /* Variance 2 */
+
+ movq [Variance11], mm1 /* Save Variance1 */
+ movq [Variance21], mm5 /* Save Variance2 */
+
+ movq mm7, FLimitMmx /* mm7 = FLimit */
+ movq mm2, mm1 /* copy of Variance 1*/
+
+ movq mm6, mm5 /* copy of Variance 2*/
+ psubw mm1, mm7 /* Variance 1 < Flimit? */
+
+ psubw mm5, mm7 /* Variance 2 < Flimit? */
+ psraw mm1, 15 /* FFFF/0000 for true/false */
+
+ psraw mm5, 15 /* FFFF/0000 for true/false */
+ psraw mm2, 15 /* Variance 1 > 32768 ? */
+
+ psraw mm6, 15 /* Variance 2 > 32768 ? */
+ movq mm7, [edi+64] /* mm0 = Pixel 4 */
+
+ pandn mm2, mm1 /* Variance 1 < Flimit &&
+ Variance 1 < 32768 */
+ pandn mm6, mm5 /* Variance 2 < Flimit &&
+ Variance 2 < 32768 */
+ movq mm4, [edi+80] /* mm4 = Pixel 5 */
+ pand mm6, mm2 /* mm1 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+ movq mm2, mm7 /* make copy of Pixel4 */
+
+ psubusw mm7, mm4 /* 4 - 5 */
+ psubusw mm4, mm2 /* 5 - 4 */
+
+ por mm7, mm4 /* abs(4 - 5) */
+ psubw mm7, QStepMmx /* abs(4-5)<QStepMmx ? */
+
+ psraw mm7, 15 /* FFFF/0000 for True/Flase */
+ pand mm7, mm6
+
+ /* mm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* now lets look at the right four colomn */
+
+ add edi, 8 /* offset 8 to right 4 cols */
+
+ movq mm2, [edi+16] /* Pixel 1 */
+ movq mm6, [edi+80] /* Pixel 5 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ movq mm0, mm2 /* mm0 = pixel 1 */
+ movq mm4, mm6 /* mm4 = pixel 5 */
+
+ pmullw mm2, mm2 /* mm2 = pixel1 * pixel1 */
+ pmullw mm6, mm6 /* mm6 = pixel5 * pixel5 */
+
+ movq mm1, mm2 /* mm1 = pixel1^2 */
+ movq mm5, mm6 /* mm5 = pixel5^2 */
+
+ movq mm2, [edi+32] /* Pixel 2 */
+ movq mm6, [edi+96] /* Pixel 6 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 2 */
+ paddw mm4, mm6 /* mm4 += pixel 6 */
+
+ pmullw mm2, mm2 /* mm2 = pixel2^2 */
+ pmullw mm6, mm6 /* mm6 = pixel6^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel2^2 */
+ paddw mm5, mm6 /* mm5 += pixel6^2 */
+
+ movq mm2, [edi+48] /* Pixel 3 */
+ movq mm6, [edi+112] /* Pixel 7 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 3 */
+ paddw mm4, mm6 /* mm4 += pixel 7 */
+
+ pmullw mm2, mm2 /* mm2 = pixel3^2 */
+ pmullw mm6, mm6 /* mm6 = pixel7^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel3^2 */
+ paddw mm5, mm6 /* mm5 += pixel7^2 */
+
+ movq mm2, [edi+64] /* Pixel 4 */
+ movq mm6, [edi+128] /* Pixel 8 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 4 */
+ paddw mm4, mm6 /* mm4 += pixel 8 */
+
+ pmullw mm2, mm2 /* mm2 = pixel4^2 */
+ pmullw mm6, mm6 /* mm6 = pixel8^2 */
+
+ paddw mm1, mm2 /* mm1 = pixel4^2 */
+ paddw mm5, mm6 /* mm5 = pixel8^2 */
+
+ /* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* mm1 = x1 + x2 + x3 + x4 */
+ /* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* mm5 = x5 + x6 + x7 + x8 */
+
+ psrlw mm3, 7 /* mm3 = 0001000100010001 */
+
+ movq mm2, mm0 /* make copy of sum1 */
+ movq mm6, mm4 /* make copy of sum2 */
+
+ paddw mm0, mm3 /* (sum1 + 1) */
+ paddw mm4, mm3 /* (sum2 + 1) */
+
+ psraw mm2, 1 /* sum1 /2 */
+ psraw mm6, 1 /* sum2 /2 */
+
+ psraw mm0, 1 /* (sum1 + 1)/2 */
+ psraw mm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw mm2, mm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw mm6, mm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw mm1, mm2 /* Variance 1 */
+ psubw mm5, mm6 /* Variance 2 */
+
+ movq [Variance12], mm1 /* Save Variance1 */
+ movq [Variance22], mm5 /* Save Variance2 */
+
+ movq mm3, FLimitMmx /* mm3 = FLimit */
+ movq mm2, mm1 /* copy of Varinace 1*/
+
+ movq mm6, mm5 /* Variance 2 */
+ psubw mm1, mm3 /* Variance 1 < Flimit? */
+
+ psubw mm5, mm3 /* Variance 2 < Flimit? */
+ psraw mm6, 15 /* Variance 1 > 32768 */
+
+ psraw mm2, 15 /* Variance 2 > 32768 */
+ psraw mm1, 15 /* FFFF/0000 for true/false */
+
+ psraw mm5, 15 /* FFFF/0000 for true/false */
+ movq mm0, [edi+64] /* mm0 = Pixel 4 */
+
+ pandn mm2, mm1 /* Variance1<32678 &&
+ Variance1<Limit */
+ pandn mm6, mm5 /* Variance2<32678 &&
+ Variance1<Limit */
+
+ movq mm4, [edi+80] /* mm4 = Pixel 5 */
+ pand mm6, mm2 /* mm1 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+ movq mm2, mm0 /* make copy of Pixel4 */
+
+ psubusw mm0, mm4 /* 4 - 5 */
+ psubusw mm4, mm2 /* 5 - 4 */
+
+ por mm0, mm4 /* abs(4 - 5) */
+ psubw mm0, QStepMmx /* abs(4-5)<QStepMmx ? */
+
+ psraw mm0, 15 /* FFFF/0000 for True/False */
+ pand mm0, mm6
+
+ sub edi, 8 /* offset edi back */
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 and mm7 now are in use */
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movq mm5, [edi] /* mm5 = -5 */
+ movq mm4, [edi + 16] /* mm4 = -4 */
+
+ movq mm3, mm4 /* copy of -4 */
+ movq mm6, mm5 /* copy of -5 */
+
+ psubusw mm4, mm6 /* mm4 = [-4] - [-5] */
+ psubusw mm5, mm3 /* mm5 = [-5] - [-4] */
+
+ por mm4, mm5 /* abs([-4]-[-5] ) */
+ psubw mm4, QStepMmx /* abs([-4]-[-5] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm1, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm1, mm3 /* */
+
+ por mm1, mm4 /* mm1 = p1 */
+
+ /* now find P2 */
+
+ movq mm4, [edi+128] /* mm4 = [3] */
+ movq mm5, [edi+144] /* mm5 = [4] */
+
+ movq mm3, mm4 /* copy of 3 */
+ movq mm6, mm5 /* copy of 4 */
+
+ psubusw mm4, mm6 /* mm4 = [3] - [4] */
+ psubusw mm5, mm3 /* mm5 = [4] - [3] */
+
+ por mm4, mm5 /* abs([3]-[4] ) */
+ psubw mm4, QStepMmx /* abs([3]-[4] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm2, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm2, mm3 /* */
+
+ por mm2, mm4 /* mm2 = p2 */
+
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+ /* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm3, mm1 /* mm3 = p1 */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+ movq mm4, [edi+16] /* mm4 = x1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+ movq mm4, mm3 /* mm4 = mm3 */
+
+ movq mm5, [edi+16] /* mm5 = x1 */
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+
+ psllw mm4, 1 /* mm4 = (sum+x1)<<1 */
+ psubw mm4, [edi+64] /* mm4 = (sum+x1)<<1-x4 */
+
+ paddw mm4, [edi+80] /* mm4 = (sum+x1)<<1-x4+x5 */
+ psraw mm4, 4 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm7 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+80] /* mm4 =(sum+x2)<<1-x5 */
+ paddw mm4, [edi+96] /* mm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw mm4, 4 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm7 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+96] /* mm4 =(sum+x3)<<1-x6 */
+ paddw mm4, [edi+112] /* mm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw mm4, 4 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm7 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ paddw mm4, mm4 /* mm4 *=2 */
+
+ paddw mm4, mm1 /* += p1 */
+ psubw mm4, [edi+16] /* -= x1 */
+
+ psubw mm4, [edi+112] /* -= x7 */
+ paddw mm4, [edi+128] /* += x8 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x4 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x4 */
+
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ paddw mm4, mm4 /* mm4 *= 2 */
+
+ paddw mm4, [edi+16] /* += x1 */
+ psubw mm4, [edi+32] /* -= x2 */
+
+ psubw mm4, [edi+128] /* -= x8 */
+ paddw mm4, mm2 /* += p2 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x5 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x5 */
+
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+32] /* +=x2 */
+ psubw mm4, [edi+48] /* -=x3 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x6 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x6 */
+
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+48] /* +=x3 */
+ psubw mm4, [edi+64] /* -=x4 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x7 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x7 */
+
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+64] /* +=x4 */
+ psubw mm4, [edi+80] /* -=x5 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x8 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x8 */
+
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with left four columns */
+ /* now do the righ four columns */
+
+ add edi, 8 /* shift to right four column */
+ add esi, 8 /* shift to right four column */
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 now are in use */
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movq mm5, [edi] /* mm5 = -5 */
+ movq mm4, [edi + 16] /* mm4 = -4 */
+
+ movq mm3, mm4 /* copy of -4 */
+ movq mm6, mm5 /* copy of -5 */
+
+ psubusw mm4, mm6 /* mm4 = [-4] - [-5] */
+ psubusw mm5, mm3 /* mm5 = [-5] - [-4] */
+
+ por mm4, mm5 /* abs([-4]-[-5] ) */
+ psubw mm4, QStepMmx /* abs([-4]-[-5] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm1, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm1, mm3 /* */
+
+ por mm1, mm4 /* mm1 = p1 */
+
+ /* now find P2 */
+
+ movq mm4, [edi+128] /* mm4 = [3] */
+ movq mm5, [edi+144] /* mm5 = [4] */
+
+ movq mm3, mm4 /* copy of 3 */
+ movq mm6, mm5 /* copy of 4 */
+
+ psubusw mm4, mm6 /* mm4 = [3] - [4] */
+ psubusw mm5, mm3 /* mm5 = [4] - [3] */
+
+ por mm4, mm5 /* abs([3]-[4] ) */
+ psubw mm4, QStepMmx /* abs([3]-[4] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm2, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm2, mm3 /* */
+
+ por mm2, mm4 /* mm2 = p2 */
+
+ /* psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4; */
+ /* Des[-w4] = (((psum + v[1]) << 1) - (v[4] - v[5])) >> 4; */
+ /* Des[-w4]=Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm3, mm1 /* mm3 = p1 */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+ movq mm4, [edi+16] /* mm4 = x1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+ movq mm4, mm3 /* mm4 = mm3 */
+
+ movq mm5, [edi+16] /* mm5 = x1 */
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+
+ psllw mm4, 1 /* mm4 = (sum+x1)<<1 */
+ psubw mm4, [edi+64] /* mm4 = (sum+x1)<<1-x4 */
+
+ paddw mm4, [edi+80] /* mm4 = (sum+x1)<<1-x4+x5 */
+ psraw mm4, 4 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm0 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+80] /* mm4 =(sum+x2)<<1-x5 */
+ paddw mm4, [edi+96] /* mm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw mm4, 4 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm0 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+96] /* mm4 =(sum+x3)<<1-x6 */
+ paddw mm4, [edi+112] /* mm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw mm4, 4 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm0 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ paddw mm4, mm4 /* mm4 *=2 */
+
+ paddw mm4, mm1 /* += p1 */
+ psubw mm4, [edi+16] /* -= x1 */
+
+ psubw mm4, [edi+112] /* -= x7 */
+ paddw mm4, [edi+128] /* += x8 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x4 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x4 */
+
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ paddw mm4, mm4 /* mm4 *= 2 */
+
+ paddw mm4, [edi+16] /* += x1 */
+ psubw mm4, [edi+32] /* -= x2 */
+
+ psubw mm4, [edi+128] /* -= x8 */
+ paddw mm4, mm2 /* += p2 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x5 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x5 */
+
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+32] /* +=x2 */
+ psubw mm4, [edi+48] /* -=x3 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x6 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x6 */
+
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+48] /* +=x3 */
+ psubw mm4, [edi+64] /* -=x4 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x7 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x7 */
+
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+64] /* +=x4 */
+ psubw mm4, [edi+80] /* -=x5 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x8 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x8 */
+
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with right four column */
+ /* transpose */
+ mov eax, Des /* the destination */
+ add edi, 8 /* shift edi to point x1 */
+
+ sub esi, 8 /* shift esi back to left x1 */
+ sub eax, 4
+
+ movq mm0, [esi] /* mm0 = 30 20 10 00 */
+ movq mm1, [esi+16] /* mm1 = 31 21 11 01 */
+
+ movq mm4, mm0 /* mm4 = 30 20 10 00 */
+ punpcklwd mm0, mm1 /* mm0 = 11 10 01 00 */
+
+ punpckhwd mm4, mm1 /* mm4 = 31 30 21 20 */
+ movq mm2, [esi+32] /* mm2 = 32 22 12 02 */
+
+ movq mm3, [esi+48] /* mm3 = 33 23 13 03 */
+ movq mm5, mm2 /* mm5 = 32 22 12 02 */
+
+ punpcklwd mm2, mm3 /* mm2 = 13 12 03 02 */
+ punpckhwd mm5, mm3 /* mm5 = 33 32 23 22 */
+
+ movq mm1, mm0 /* mm1 = 11 10 01 00 */
+ punpckldq mm0, mm2 /* mm0 = 03 02 01 00 */
+
+ movq [edi], mm0 /* write 00 01 02 03 */
+ punpckhdq mm1, mm2 /* mm1 = 13 12 11 10 */
+
+ movq mm0, mm4 /* mm0 = 31 30 21 20 */
+ movq [edi+16], mm1 /* write 10 11 12 13 */
+
+ punpckldq mm0, mm5 /* mm0 = 23 22 21 20 */
+ punpckhdq mm4, mm5 /* mm4 = 33 32 31 30 */
+
+ movq mm1, [esi+64] /* mm1 = 34 24 14 04 */
+ movq mm2, [esi+80] /* mm2 = 35 25 15 05 */
+
+ movq mm5, [esi+96] /* mm5 = 36 26 16 06 */
+ movq mm6, [esi+112] /* mm6 = 37 27 17 07 */
+
+ movq mm3, mm1 /* mm3 = 34 24 14 04 */
+ movq mm7, mm5 /* mm7 = 36 26 16 06 */
+
+ punpcklwd mm1, mm2 /* mm1 = 15 14 05 04 */
+ punpckhwd mm3, mm2 /* mm3 = 35 34 25 24 */
+
+ punpcklwd mm5, mm6 /* mm5 = 17 16 07 06 */
+ punpckhwd mm7, mm6 /* mm7 = 37 36 27 26 */
+
+ movq mm2, mm1 /* mm2 = 15 14 05 04 */
+ movq mm6, mm3 /* mm6 = 35 34 25 24 */
+
+ punpckldq mm1, mm5 /* mm1 = 07 06 05 04 */
+ punpckhdq mm2, mm5 /* mm2 = 17 16 15 14 */
+
+ punpckldq mm3, mm7 /* mm3 = 27 26 25 24 */
+ punpckhdq mm6, mm7 /* mm6 = 37 36 35 34 */
+
+ movq mm5, [edi] /* mm5 = 03 02 01 00 */
+ packuswb mm5, mm1 /* mm5 = 07 06 05 04 03 02 01 00 */
+
+ movq [eax], mm5 /* write 00 01 02 03 04 05 06 07 */
+ movq mm7, [edi+16] /* mm7 = 13 12 11 10 */
+
+ packuswb mm7, mm2 /* mm7 = 17 16 15 14 13 12 11 10 */
+ movq [eax+ecx], mm7 /* write 10 11 12 13 14 15 16 17 */
+
+ packuswb mm0, mm3 /* mm0 = 27 26 25 24 23 22 21 20 */
+ packuswb mm4, mm6 /* mm4 = 37 36 35 34 33 32 31 30 */
+
+ movq [eax+ecx*2], mm0 /* write 20 21 22 23 24 25 26 27 */
+ lea eax, [eax+ecx*4] /* mov forward the desPtr */
+
+ movq [eax+edx], mm4 /* write 30 31 32 33 34 35 36 37 */
+ add edi, 8 /* move to right four column */
+ add esi, 8 /* move to right x1 */
+
+ movq mm0, [esi] /* mm0 = 70 60 50 40 */
+ movq mm1, [esi+16] /* mm1 = 71 61 51 41 */
+
+ movq mm4, mm0 /* mm4 = 70 60 50 40 */
+ punpcklwd mm0, mm1 /* mm0 = 51 50 41 40 */
+
+ punpckhwd mm4, mm1 /* mm4 = 71 70 61 60 */
+ movq mm2, [esi+32] /* mm2 = 72 62 52 42 */
+
+ movq mm3, [esi+48] /* mm3 = 73 63 53 43 */
+ movq mm5, mm2 /* mm5 = 72 62 52 42 */
+
+ punpcklwd mm2, mm3 /* mm2 = 53 52 43 42 */
+ punpckhwd mm5, mm3 /* mm5 = 73 72 63 62 */
+
+ movq mm1, mm0 /* mm1 = 51 50 41 40 */
+ punpckldq mm0, mm2 /* mm0 = 43 42 41 40 */
+
+ movq [edi], mm0 /* write 40 41 42 43 */
+ punpckhdq mm1, mm2 /* mm1 = 53 52 51 50 */
+
+ movq mm0, mm4 /* mm0 = 71 70 61 60 */
+ movq [edi+16], mm1 /* write 50 51 52 53 */
+
+ punpckldq mm0, mm5 /* mm0 = 63 62 61 60 */
+ punpckhdq mm4, mm5 /* mm4 = 73 72 71 70 */
+
+ movq mm1, [esi+64] /* mm1 = 74 64 54 44 */
+ movq mm2, [esi+80] /* mm2 = 75 65 55 45 */
+
+ movq mm5, [esi+96] /* mm5 = 76 66 56 46 */
+ movq mm6, [esi+112] /* mm6 = 77 67 57 47 */
+
+ movq mm3, mm1 /* mm3 = 74 64 54 44 */
+ movq mm7, mm5 /* mm7 = 76 66 56 46 */
+
+ punpcklwd mm1, mm2 /* mm1 = 55 54 45 44 */
+ punpckhwd mm3, mm2 /* mm3 = 75 74 65 64 */
+
+ punpcklwd mm5, mm6 /* mm5 = 57 56 47 46 */
+ punpckhwd mm7, mm6 /* mm7 = 77 76 67 66 */
+
+ movq mm2, mm1 /* mm2 = 55 54 45 44 */
+ movq mm6, mm3 /* mm6 = 75 74 65 64 */
+
+ punpckldq mm1, mm5 /* mm1 = 47 46 45 44 */
+ punpckhdq mm2, mm5 /* mm2 = 57 56 55 54 */
+
+ punpckldq mm3, mm7 /* mm3 = 67 66 65 64 */
+ punpckhdq mm6, mm7 /* mm6 = 77 76 75 74 */
+
+ movq mm5, [edi] /* mm5 = 43 42 41 40 */
+ packuswb mm5, mm1 /* mm5 = 47 46 45 44 43 42 41 40 */
+
+ movq [eax], mm5 /* write 40 41 42 43 44 45 46 47 */
+ movq mm7, [edi+16] /* mm7 = 53 52 51 50 */
+
+ packuswb mm7, mm2 /* mm7 = 57 56 55 54 53 52 51 50 */
+ movq [eax+ecx], mm7 /* write 50 51 52 53 54 55 56 57 */
+
+ packuswb mm0, mm3 /* mm0 = 67 66 65 64 63 62 61 60 */
+ packuswb mm4, mm6 /* mm4 = 77 76 75 74 73 72 71 70 */
+
+ movq [eax+ecx*2], mm0 /* write 60 61 62 63 64 65 66 67 */
+ lea eax, [eax+ecx*4] /* mov forward the desPtr */
+
+ movq [eax+edx], mm4 /* write 70 71 72 73 74 75 76 77 */
+
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebp
+ pop eax
+ }
+
+ Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+ Var1 += Variance12[0]+ Variance12[1]+Variance12[2]+Variance12[3];
+ pbi->FragmentVariances[CurrentFrag] += Var1;
+
+ Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+ Var2 += Variance22[0]+ Variance22[1]+Variance22[2]+Variance22[3];
+ pbi->FragmentVariances[CurrentFrag + 1] += Var2;
+ }
+ CurrentFrag ++;
+ Src += 8;
+ Des += 8;
+ }
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockNonFilteredBand_MMX
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Filter both horizontal and vertical edge in a band
+ *
+ * SPECIAL NOTES :
+ *
+ * REFERENCE :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void DeblockNonFilteredBand_MMX(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+ )
+{
+ UINT32 j;
+ UINT32 CurrentFrag=StartFrag;
+ UINT32 QStep;
+ UINT32 LoopFLimit;
+ UINT8 *Src, *Des;
+ UINT32 Var1, Var2;
+
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+short QStepMmx[4];
+short FLimitMmx[4];
+short LoopFLimitMmx[4];
+short Rows[80];
+short NewRows[64];
+short LoopFilteredValuesUp[4];
+short LoopFilteredValuesDown[4];
+
+unsigned short Variance11[4];
+unsigned short Variance12[4];
+unsigned short Variance21[4];
+unsigned short Variance22[4];
+#pragma pack()
+#else
+__declspec(align(16)) short QStepMmx[4];
+__declspec(align(16)) short FLimitMmx[4];
+__declspec(align(16)) short LoopFLimitMmx[4];
+__declspec(align(16)) short Rows[80];
+__declspec(align(16)) short NewRows[64];
+__declspec(align(16)) short LoopFilteredValuesUp[4];
+__declspec(align(16)) short LoopFilteredValuesDown[4];
+
+__declspec(align(16)) unsigned short Variance11[4];
+__declspec(align(16)) unsigned short Variance12[4];
+__declspec(align(16)) unsigned short Variance21[4];
+__declspec(align(16)) unsigned short Variance22[4];
+#endif
+
+ LoopFLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+ LoopFLimitMmx[0] = (INT16)LoopFLimit;
+ LoopFLimitMmx[1] = (INT16)LoopFLimit;
+ LoopFLimitMmx[2] = (INT16)LoopFLimit;
+ LoopFLimitMmx[3] = (INT16)LoopFLimit;
+
+ while(CurrentFrag < StartFrag + FragAcross )
+ {
+
+ Src=SrcPtr+8*(CurrentFrag-StartFrag);
+ Des=DesPtr+8*(CurrentFrag-StartFrag);
+
+ QStep = QuantScale[ pbi->FragQIndex[CurrentFrag+FragAcross]];
+
+
+ __asm
+ {
+
+ push eax
+
+ push ebp
+
+ push ecx
+
+ push edx
+
+ push esi
+
+ push edi
+
+ /* Calculate the FLimit and store FLimit and QStep */
+ /* Copy the data to the intermediate buffer */
+ mov eax, QStep
+ xor edx, edx /* clear edx */
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ pcmpeqw mm6, mm6
+
+
+ movd mm5, eax
+ mov eax, Src /* eax = Src */
+
+ psrlw mm6, 14 /* mm6 = 3, 3, 3, 3*/
+ punpcklwd mm5, mm5
+
+ lea esi, NewRows /* esi = NewRows */
+ punpckldq mm5, mm5
+
+ sub edx, ecx /* edx = - Pitch */
+ pmullw mm6, mm5 /* Qstep * 3 */
+
+ movq QStepMmx, mm5
+ lea edi, Rows /* edi = Rows */
+
+ pxor mm7, mm7 /* Clear mm7 */
+ pmullw mm6, mm5
+
+ lea eax, [eax + edx * 4 ] /* eax = Src - 4*Pitch */
+ movq mm0, [eax + edx] /* mm0 = Src[-5*Pitch] */
+
+ movq mm1, mm0 /* mm1 = mm0 */
+ punpcklbw mm0, mm7 /* Lower Four -5 */
+
+ psrlw mm6, 5
+ movq [FLimitMmx], mm6
+
+ movq mm2, [eax] /* mm2 = Src[-4*Pitch] */
+ punpckhbw mm1, mm7 /* Higher Four -5 */
+
+ movq mm3, mm2 /* mm3 = mm2 */
+ punpcklbw mm2, mm7 /* Lower Four -4 */
+
+ movq [edi], mm0 /* Write Lower Four of -5 */
+ punpckhbw mm3, mm7 /* higher Four -4 */
+
+ movq [edi+8], mm1 /* Write Higher Four of -5 */
+ movq mm4, [eax + ecx] /* mm4 = Src[-3*Pitch] */
+
+ movq [edi+16], mm2 /* Write Lower -4 */
+ movq [edi+24], mm3 /* write hight -4 */
+
+ movq mm5, mm4 /* mm5 = mm4 */
+ punpcklbw mm4, mm7 /* lower four -3 */
+
+ movq mm0, [eax + ecx *2] /* mm0 = Src[-2*Pitch] */
+ punpckhbw mm5, mm7 /* higher four -3 */
+
+ movq mm1, mm0 /* mm1 = mm0 */
+ movq [edi+32], mm4 /* write Lower -3 */
+
+ punpcklbw mm0, mm7 /* lower four -2 */
+ lea eax, [eax + ecx *4] /* eax = Src */
+
+ movq [edi+40], mm5 /* write Higher -3 */
+ punpckhbw mm1, mm7 /* higher four -2 */
+
+ movq mm2, [eax + edx] /* mm2 = Src[-Pitch] */
+ movq [edi+48], mm0 /* lower -2 */
+
+ movq mm3, mm2 /* mm3 = mm2 */
+ punpcklbw mm2, mm7 /* lower -1 */
+
+ movq [edi+56], mm1 /* higher -2 */
+ punpckhbw mm3, mm7 /* Higher -1 */
+
+ movq mm4, [eax] /* mm4 = Src[0] */
+ movq [edi+64], mm2 /* Lower -1 */
+
+ movq mm5, mm4 /* mm5 = mm4 */
+ movq [edi+72], mm3 /* Higher -1 */
+
+ punpcklbw mm4, mm7 /* lower 0 */
+ punpckhbw mm5, mm7 /* higher 0 */
+
+ movq mm0, [eax + ecx] /* mm0 = Src[Pitch] */
+ movq [edi+80], mm4 /* write lower 0 */
+
+ movq mm1, mm0 /* mm1 = mm0 */
+ movq [edi+88], mm5 /* write higher 0 */
+
+ punpcklbw mm0, mm7 /* lower 1 */
+ punpckhbw mm1, mm7 /* higher 1 */
+
+ movq mm2, [eax + ecx *2 ] /* mm2 = Src[2*Pitch] */
+ lea eax, [eax + ecx *4] /* eax = Src + 4 * Pitch */
+
+ movq mm3, mm2 /* mm3 = mm2 */
+ movq [edi+96], mm0 /* write lower 1 */
+
+ punpcklbw mm2, mm7 /* lower 2 */
+ punpckhbw mm3, mm7 /* higher 2 */
+
+ movq mm4, [eax + edx ] /* mm4 = Src[3*pitch] */
+ movq [edi+104], mm1 /* wirte higher 1 */
+
+ movq mm5, mm4 /* mm5 = mm4 */
+ punpcklbw mm4, mm7 /* Low 3 */
+
+ movq [edi+112], mm2 /* write lower 2 */
+ movq [edi+120], mm3 /* write higher 2 */
+
+ movq mm0, [eax] /* mm0 = Src[4*pitch] */
+ punpckhbw mm5, mm7 /* high 3 */
+
+ movq mm1, mm0 /* mm1=mm0 */
+ movq [edi+128], mm4 /* low 3 */
+
+ punpcklbw mm0, mm7 /* low 4 */
+ punpckhbw mm1, mm7 /* high 4 */
+
+ movq [edi+136], mm5 /* high 3 */
+ movq [edi+144], mm0 /* low 4 */
+
+ movq [edi+152], mm1 /* high 4 */
+
+/*
+ mov eax, Des
+ lea eax, [eax+edx*4]
+ movq mm2, [eax]
+ movq mm2, [eax+ecx]
+ movq mm2, [eax+ecx*2]
+ lea eax, [eax+ecx*4]
+ movq mm2, [eax+edx]
+ movq mm2, [eax]
+ movq mm2, [eax+ecx]
+ movq mm2, [eax+ecx*2]
+ lea eax, [eax+ecx*4]
+ movq mm2, [eax+edx]
+ movq mm2, [eax]
+
+*/
+
+ /* done with copying everything to intermediate buffer */
+ /* Now, compute the variances for Pixel 1-4 and 5-8 */
+
+ /* we use mm0,mm1,mm2 for 1234 and mm4, mm5, mm6 for 5-8 */
+ /* mm7 = 0, mm3 = {128, 128, 128, 128} */
+
+
+ pcmpeqw mm3, mm3 /* mm3 = FFFFFFFFFFFFFFFF */
+ psllw mm3, 15 /* mm3 = 8000800080008000 */
+ psrlw mm3, 8 /* mm3 = 0080008000800080 */
+
+ movq mm2, [edi+16] /* Pixel 1 */
+ movq mm6, [edi+80] /* Pixel 5 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ movq mm0, mm2 /* mm0 = pixel 1 */
+ movq mm4, mm6 /* mm4 = pixel 5 */
+
+ pmullw mm2, mm2 /* mm2 = pixel1 * pixel1 */
+ pmullw mm6, mm6 /* mm6 = pixel5 * pixel5 */
+
+ movq mm1, mm2 /* mm1 = pixel1^2 */
+ movq mm5, mm6 /* mm5 = pixel5^2 */
+
+ movq mm2, [edi+32] /* Pixel 2 */
+ movq mm6, [edi+96] /* Pixel 6 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 2 */
+ paddw mm4, mm6 /* mm4 += pixel 6 */
+
+ pmullw mm2, mm2 /* mm2 = pixel2^2 */
+ pmullw mm6, mm6 /* mm6 = pixel6^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel2^2 */
+ paddw mm5, mm6 /* mm5 += pixel6^2 */
+
+ movq mm2, [edi+48] /* Pixel 3 */
+ movq mm6, [edi+112] /* Pixel 7 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 3 */
+ paddw mm4, mm6 /* mm4 += pixel 7 */
+
+ pmullw mm2, mm2 /* mm2 = pixel3^2 */
+ pmullw mm6, mm6 /* mm6 = pixel7^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel3^2 */
+ paddw mm5, mm6 /* mm5 += pixel7^2 */
+
+ movq mm2, [edi+64] /* Pixel 4 */
+ movq mm6, [edi+128] /* Pixel 8 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 4 */
+ paddw mm4, mm6 /* mm4 += pixel 8 */
+
+ pmullw mm2, mm2 /* mm2 = pixel4^2 */
+ pmullw mm6, mm6 /* mm6 = pixel8^2 */
+
+ paddw mm1, mm2 /* mm1 = pixel4^2 */
+ paddw mm5, mm6 /* mm5 = pixel8^2 */
+
+
+ /* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* mm1 = x1 + x2 + x3 + x4 */
+ /* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* mm5 = x5 + x6 + x7 + x8 */
+
+ movq mm7, mm3 /* mm7 = mm3 */
+ psrlw mm7, 7 /* mm7 = 0001000100010001 */
+
+ movq mm2, mm0 /* make copy of sum1 */
+ movq mm6, mm4 /* make copy of sum2 */
+
+ paddw mm0, mm7 /* (sum1 + 1) */
+ paddw mm4, mm7 /* (sum2 + 1) */
+
+ psraw mm2, 1 /* sum1 /2 */
+ psraw mm6, 1 /* sum2 /2 */
+
+ psraw mm0, 1 /* (sum1 + 1)/2 */
+ psraw mm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw mm2, mm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw mm6, mm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw mm1, mm2 /* Variance 1 */
+ psubw mm5, mm6 /* Variance 2 */
+
+ movq mm7, FLimitMmx /* mm7 = FLimit */
+ movq mm2, mm1 /* copy of Varinace 1*/
+
+ movq mm6, mm5 /* Variance 2 */
+ movq [Variance11], mm1 /* Save Variance1 */
+
+ movq [Variance21], mm5 /* Save Variance2 */
+ psubw mm1, mm7 /* Variance 1 < Flimit? */
+
+ psubw mm5, mm7 /* Variance 2 < Flimit? */
+ psraw mm2, 15 /* Variance 1 > 32768? */
+
+ psraw mm6, 15 /* Vaiance 2 > 32768? */
+ psraw mm1, 15 /* FFFF/0000 for true/false */
+
+ psraw mm5, 15 /* FFFF/0000 for true/false */
+ movq mm7, [edi+64] /* mm0 = Pixel 4 */
+
+ pandn mm2, mm1 /* Variance1<32678 &&
+ Variance1<Limit */
+ pandn mm6, mm5 /* Variance2<32678 &&
+ Variance1<Limit */
+
+ movq mm4, [edi+80] /* mm4 = Pixel 5 */
+ pand mm6, mm2 /* mm6 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+
+ movq mm2, mm7 /* make copy of Pixel4 */
+
+ psubusw mm7, mm4 /* 4 - 5 */
+ psubusw mm4, mm2 /* 5 - 4 */
+
+ por mm7, mm4 /* abs(4 - 5) */
+ psubw mm7, QStepMmx /* abs(4-5)<QStepMmx ? */
+
+ psraw mm7, 15 /* FFFF/0000 for True/Flase */
+ pand mm7, mm6
+
+ /* mm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* now lets look at the right four colomn */
+
+ add edi, 8 /* offset 8 to right 4 cols */
+
+ movq mm2, [edi+16] /* Pixel 1 */
+ movq mm6, [edi+80] /* Pixel 5 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ movq mm0, mm2 /* mm0 = pixel 1 */
+ movq mm4, mm6 /* mm4 = pixel 5 */
+
+ pmullw mm2, mm2 /* mm2 = pixel1 * pixel1 */
+ pmullw mm6, mm6 /* mm6 = pixel5 * pixel5 */
+
+ movq mm1, mm2 /* mm1 = pixel1^2 */
+ movq mm5, mm6 /* mm5 = pixel5^2 */
+
+ movq mm2, [edi+32] /* Pixel 2 */
+ movq mm6, [edi+96] /* Pixel 6 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 2 */
+ paddw mm4, mm6 /* mm4 += pixel 6 */
+
+ pmullw mm2, mm2 /* mm2 = pixel2^2 */
+ pmullw mm6, mm6 /* mm6 = pixel6^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel2^2 */
+ paddw mm5, mm6 /* mm5 += pixel6^2 */
+
+ movq mm2, [edi+48] /* Pixel 3 */
+ movq mm6, [edi+112] /* Pixel 7 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 3 */
+ paddw mm4, mm6 /* mm4 += pixel 7 */
+
+ pmullw mm2, mm2 /* mm2 = pixel3^2 */
+ pmullw mm6, mm6 /* mm6 = pixel7^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel3^2 */
+ paddw mm5, mm6 /* mm5 += pixel7^2 */
+
+ movq mm2, [edi+64] /* Pixel 4 */
+ movq mm6, [edi+128] /* Pixel 8 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 4 */
+ paddw mm4, mm6 /* mm4 += pixel 8 */
+
+ pmullw mm2, mm2 /* mm2 = pixel4^2 */
+ pmullw mm6, mm6 /* mm6 = pixel8^2 */
+
+ paddw mm1, mm2 /* mm1 = pixel4^2 */
+ paddw mm5, mm6 /* mm5 = pixel8^2 */
+
+ /* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* mm1 = x1 + x2 + x3 + x4 */
+ /* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* mm5 = x5 + x6 + x7 + x8 */
+
+ psrlw mm3, 7 /* mm3 = 0001000100010001 */
+
+ movq mm2, mm0 /* make copy of sum1 */
+ movq mm6, mm4 /* make copy of sum2 */
+
+ paddw mm0, mm3 /* (sum1 + 1) */
+ paddw mm4, mm3 /* (sum2 + 1) */
+
+ psraw mm2, 1 /* sum1 /2 */
+ psraw mm6, 1 /* sum2 /2 */
+
+ psraw mm0, 1 /* (sum1 + 1)/2 */
+ psraw mm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw mm2, mm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw mm6, mm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw mm1, mm2 /* Variance 1 */
+ psubw mm5, mm6 /* Variance 2 */
+
+ movq [Variance12], mm1 /* Save Variance1 */
+ movq [Variance22], mm5 /* Save Variance2 */
+
+ movq mm3, FLimitMmx /* mm3 = FLimit */
+ movq mm2, mm1 /* copy of Varinace 1*/
+
+ movq mm6, mm5 /* Variance 2 */
+ psubw mm1, mm3 /* Variance 1 < Flimit? */
+
+ psubw mm5, mm3 /* Variance 2 < Flimit? */
+ psraw mm2, 15 /* Variance 1 > 32768? */
+
+ psraw mm6, 15 /* Vaiance 2 > 32768? */
+ psraw mm1, 15 /* FFFF/0000 for true/false */
+
+ psraw mm5, 15 /* FFFF/0000 for true/false */
+ movq mm0, [edi+64] /* mm0 = Pixel 4 */
+
+ pandn mm2, mm1 /* Variance1<32678 &&
+ Variance1<Limit */
+ pandn mm6, mm5 /* Variance2<32678 &&
+ Variance1<Limit */
+
+ movq mm4, [edi+80] /* mm4 = Pixel 5 */
+ pand mm6, mm2 /* mm6 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+ movq mm2, mm0 /* make copy of Pixel4 */
+
+ psubusw mm0, mm4 /* 4 - 5 */
+ psubusw mm4, mm2 /* 5 - 4 */
+
+ por mm0, mm4 /* abs(4 - 5) */
+ psubw mm0, QStepMmx /* abs(4-5)<QStepMmx ? */
+
+ psraw mm0, 15 /* FFFF/0000 for True/False */
+ pand mm0, mm6
+
+ sub edi, 8 /* offset edi back */
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 and mm7 now are in use */
+
+ /* find the loop filtered values for the pixels on block boundary */
+ movq mm1, LoopFLimitMmx; /* Get the Flimit values for loop filter */
+ movq mm3, [edi + 48] /* mm3 = x3 = p[-2] */
+
+ movq mm4, [edi + 64] /* mm4 = x4 = p[-1] */
+ movq mm5, [edi + 80] /* mm5 = x5 = p[ 0] */
+
+ movq mm6, [edi + 96] /* mm6 = x6 = p[ 1] */
+ psubw mm5, mm4 /* mm5 = p[ 0] - p[-1] */
+
+ psubw mm3, mm6 /* mm3 = p[-2] - p[ 1] */
+ movq mm4, mm5 /* make a copy */
+
+ paddw mm4, mm5 /* 2 * ( p[0] - p[-1] ) */
+ paddw mm3, FourFours /* mm3 + 4 */
+
+ paddw mm5, mm4 /* 3 * ( p[0] - p[-1] ) */
+ paddw mm3, mm5 /* Filtval before shift */
+
+ psraw mm3, 3 /* FiltVal */
+ movq mm2, mm3 /* make a copy */
+
+ psraw mm3, 15 /* FFFF->Neg, 0000->Pos */
+ pxor mm2, mm3
+
+ psubsw mm2, mm3 /* mm2 = abs(FiltVal) */
+ por mm3, FourOnes /* -1 and 1 for + and - */
+
+ movq mm4, mm1 /* make a copy of Flimit */
+ psubw mm1, mm2 /* mm1= Flimit - abs(FiltVal) */
+
+ movq mm5, mm1 /* copy Flimit - abs(FiltVal) */
+ psraw mm1, 15 /* FFFF or 0000 */
+
+ pxor mm5, mm1
+ psubsw mm5, mm1 /* abs(Flimit - abs(FiltVal)) */
+
+ psubusw mm4, mm5 /* Flimit-abs(Flimit - abs(FiltVal)) */
+ pmullw mm4, mm3 /* get the sign back */
+
+ movq mm1, [edi+64] /* p[-1] */
+ movq mm2, [edi+80] /* p[0] */
+
+ paddw mm1, mm4 /* p[-1] + NewFiltVal */
+ psubw mm2, mm4 /* p[0] - NewFiltVal */
+
+ pxor mm6, mm6 /* clear mm6 */
+
+ packuswb mm1, mm1 /* clamping */
+ packuswb mm2, mm2 /* clamping */
+
+ punpcklbw mm1, mm6 /* unpack to word */
+ movq LoopFilteredValuesUp, mm1 /* save the values */
+
+ punpcklbw mm2, mm6 /* unpack to word */
+ movq LoopFilteredValuesDown, mm2 /* save the values */
+
+
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movq mm5, [edi] /* mm5 = -5 */
+ movq mm4, [edi + 16] /* mm4 = -4 */
+
+ movq mm3, mm4 /* copy of -4 */
+ movq mm6, mm5 /* copy of -5 */
+
+ psubusw mm4, mm6 /* mm4 = [-4] - [-5] */
+ psubusw mm5, mm3 /* mm5 = [-5] - [-4] */
+
+ por mm4, mm5 /* abs([-4]-[-5] ) */
+ psubw mm4, QStepMmx /* abs([-4]-[-5] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm1, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm1, mm3 /* */
+
+ por mm1, mm4 /* mm1 = p1 */
+
+ /* now find P2 */
+
+ movq mm4, [edi+128] /* mm4 = [3] */
+ movq mm5, [edi+144] /* mm5 = [4] */
+
+ movq mm3, mm4 /* copy of 3 */
+ movq mm6, mm5 /* copy of 4 */
+
+ psubusw mm4, mm6 /* mm4 = [3] - [4] */
+ psubusw mm5, mm3 /* mm5 = [4] - [3] */
+
+ por mm4, mm5 /* abs([3]-[4] ) */
+ psubw mm4, QStepMmx /* abs([3]-[4] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm2, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm2, mm3 /* */
+
+ por mm2, mm4 /* mm2 = p2 */
+
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+ /* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm3, mm1 /* mm3 = p1 */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+ movq mm4, [edi+16] /* mm4 = x1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+ movq mm4, mm3 /* mm4 = mm3 */
+
+ movq mm5, [edi+16] /* mm5 = x1 */
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+
+ psllw mm4, 1 /* mm4 = (sum+x1)<<1 */
+ psubw mm4, [edi+64] /* mm4 = (sum+x1)<<1-x4 */
+
+ paddw mm4, [edi+80] /* mm4 = (sum+x1)<<1-x4+x5 */
+ psraw mm4, 4 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm7 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+80] /* mm4 =(sum+x2)<<1-x5 */
+ paddw mm4, [edi+96] /* mm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw mm4, 4 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm7 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+96] /* mm4 =(sum+x3)<<1-x6 */
+ paddw mm4, [edi+112] /* mm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw mm4, 4 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm7 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ paddw mm4, mm4 /* mm4 *=2 */
+
+ paddw mm4, mm1 /* += p1 */
+ psubw mm4, [edi+16] /* -= x1 */
+
+ psubw mm4, [edi+112] /* -= x7 */
+ paddw mm4, [edi+128] /* += x8 */
+
+ movq mm5, LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+ psraw mm4, 4 /* >>=4 */
+
+ psubw mm4, mm5 /* -=x4 */
+ pand mm4, mm7 /* and flag */
+
+ paddw mm4, mm5 /* += x4 */
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ paddw mm4, mm4 /* mm4 *= 2 */
+
+ paddw mm4, [edi+16] /* += x1 */
+ psubw mm4, [edi+32] /* -= x2 */
+
+ psubw mm4, [edi+128] /* -= x8 */
+ paddw mm4, mm2 /* += p2 */
+
+ movq mm5, LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+ psraw mm4, 4 /* >>=4 */
+
+ psubw mm4, mm5 /* -=x5 */
+ pand mm4, mm7 /* and flag */
+
+ paddw mm4, mm5 /* += x5 */
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+32] /* +=x2 */
+ psubw mm4, [edi+48] /* -=x3 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x6 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x6 */
+
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+48] /* +=x3 */
+ psubw mm4, [edi+64] /* -=x4 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x7 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x7 */
+
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+64] /* +=x4 */
+ psubw mm4, [edi+80] /* -=x5 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x8 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x8 */
+
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with left four columns */
+ /* now do the righ four columns */
+
+ add edi, 8 /* shift to right four column */
+ add esi, 8 /* shift to right four column */
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 now are in use */
+
+ /* find the loop filtered values for the pixels on block boundary */
+
+ movq mm1, LoopFLimitMmx; /* Get the Flimit values for loop filter */
+ movq mm3, [edi + 48] /* mm3 = x3 = p[-2] */
+
+ movq mm4, [edi + 64] /* mm4 = x4 = p[-1] */
+ movq mm5, [edi + 80] /* mm5 = x5 = p[ 0] */
+
+ movq mm6, [edi + 96] /* mm6 = x6 = p[ 1] */
+ psubw mm5, mm4 /* mm5 = p[ 0] - p[-1] */
+
+ psubw mm3, mm6 /* mm3 = p[-2] - p[ 1] */
+ movq mm4, mm5 /* make a copy */
+
+ paddw mm3, FourFours /* mm3 + 4 */
+ paddw mm4, mm4 /* 2 * ( p[0] - p[-1] ) */
+
+ paddw mm3, mm4 /* 3 * ( p[0] - p[-1] ) */
+ paddw mm3, mm5 /* Filtval before shift */
+
+ psraw mm3, 3 /* FiltVal */
+ movq mm2, mm3 /* make a copy */
+
+ psraw mm3, 15 /* FFFF->Neg, 0000->Pos */
+ pxor mm2, mm3
+
+ psubsw mm2, mm3 /* mm2 = abs(FiltVal) */
+ por mm3, FourOnes /* -1 and 1 for + and - */
+
+ movq mm4, mm1 /* make a copy of Flimit */
+ psubw mm1, mm2 /* mm1= Flimit - abs(FiltVal) */
+
+ movq mm5, mm1 /* copy Flimit - abs(FiltVal) */
+ psraw mm1, 15 /* FFFF or 0000 */
+
+ pxor mm5, mm1
+ psubsw mm5, mm1 /* abs(Flimit - abs(FiltVal)) */
+
+ psubusw mm4, mm5 /* Flimit-abs(Flimit - abs(FiltVal)) */
+ pmullw mm4, mm3 /* get the sign back */
+
+ movq mm1, [edi+64] /* p[-1] */
+ movq mm2, [edi+80] /* p[0] */
+
+ paddw mm1, mm4 /* p[-1] + NewFiltVal */
+ psubw mm2, mm4 /* p[0] - NewFiltVal */
+
+ pxor mm6, mm6 /* clear mm6 */
+
+ packuswb mm1, mm1 /* clamping */
+ packuswb mm2, mm2 /* clamping */
+
+ punpcklbw mm1, mm6 /* unpack to word */
+ movq LoopFilteredValuesUp, mm1 /* save the values */
+
+ punpcklbw mm2, mm6 /* unpack to word */
+ movq LoopFilteredValuesDown, mm2 /* save the values */
+
+
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movq mm5, [edi] /* mm5 = -5 */
+ movq mm4, [edi + 16] /* mm4 = -4 */
+
+ movq mm3, mm4 /* copy of -4 */
+ movq mm6, mm5 /* copy of -5 */
+
+ psubusw mm4, mm6 /* mm4 = [-4] - [-5] */
+ psubusw mm5, mm3 /* mm5 = [-5] - [-4] */
+
+ por mm4, mm5 /* abs([-4]-[-5] ) */
+ psubw mm4, QStepMmx /* abs([-4]-[-5] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm1, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm1, mm3 /* */
+
+ por mm1, mm4 /* mm1 = p1 */
+
+ /* now find P2 */
+
+ movq mm4, [edi+128] /* mm4 = [3] */
+ movq mm5, [edi+144] /* mm5 = [4] */
+
+ movq mm3, mm4 /* copy of 3 */
+ movq mm6, mm5 /* copy of 4 */
+
+ psubusw mm4, mm6 /* mm4 = [3] - [4] */
+ psubusw mm5, mm3 /* mm5 = [4] - [3] */
+
+ por mm4, mm5 /* abs([3]-[4] ) */
+ psubw mm4, QStepMmx /* abs([3]-[4] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm2, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm2, mm3 /* */
+
+ por mm2, mm4 /* mm2 = p2 */
+
+ /* psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4; */
+ /* Des[-w4] = (((psum + v[1]) << 1) - (v[4] - v[5])) >> 4; */
+ /* Des[-w4]=Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm3, mm1 /* mm3 = p1 */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+ movq mm4, [edi+16] /* mm4 = x1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+ movq mm4, mm3 /* mm4 = mm3 */
+
+ movq mm5, [edi+16] /* mm5 = x1 */
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+
+ psllw mm4, 1 /* mm4 = (sum+x1)<<1 */
+ psubw mm4, [edi+64] /* mm4 = (sum+x1)<<1-x4 */
+
+ paddw mm4, [edi+80] /* mm4 = (sum+x1)<<1-x4+x5 */
+ psraw mm4, 4 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm0 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+80] /* mm4 =(sum+x2)<<1-x5 */
+ paddw mm4, [edi+96] /* mm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw mm4, 4 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm0 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+96] /* mm4 =(sum+x3)<<1-x6 */
+ paddw mm4, [edi+112] /* mm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw mm4, 4 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm0 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ paddw mm4, mm4 /* mm4 *=2 */
+
+ paddw mm4, mm1 /* += p1 */
+ psubw mm4, [edi+16] /* -= x1 */
+
+ psubw mm4, [edi+112] /* -= x7 */
+ paddw mm4, [edi+128] /* += x8 */
+
+ movq mm5, LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+ psraw mm4, 4 /* >>=4 */
+
+ psubw mm4, mm5 /* -=x4 */
+ pand mm4, mm0 /* and flag */
+
+ paddw mm4, mm5 /* += x4 */
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ paddw mm4, mm4 /* mm4 *= 2 */
+
+ paddw mm4, [edi+16] /* += x1 */
+ psubw mm4, [edi+32] /* -= x2 */
+
+ psubw mm4, [edi+128] /* -= x8 */
+ paddw mm4, mm2 /* += p2 */
+
+ movq mm5, LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+ psraw mm4, 4 /* >>=4 */
+
+ psubw mm4, mm5 /* -=x5 */
+ pand mm4, mm0 /* and flag */
+
+ paddw mm4, mm5 /* += x5 */
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+32] /* +=x2 */
+ psubw mm4, [edi+48] /* -=x3 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x6 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x6 */
+
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+48] /* +=x3 */
+ psubw mm4, [edi+64] /* -=x4 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x7 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x7 */
+
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+64] /* +=x4 */
+ psubw mm4, [edi+80] /* -=x5 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x8 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x8 */
+
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with right four column */
+ add edi, 8 /* shift edi to point x1 */
+ sub esi, 8 /* shift esi back to x1 */
+
+ mov ebp, Des /* the destination */
+ lea ebp, [ebp + edx *4] /* point to des[-w4] */
+
+ movq mm0, [esi]
+ packuswb mm0, [esi + 8]
+
+ movq [ebp], mm0 /* write des[-w4] */
+
+ movq mm1, [esi + 16]
+ packuswb mm1, [esi + 24]
+
+ movq [ebp+ecx ], mm1 /* write des[-w3] */
+
+ movq mm2, [esi + 32]
+ packuswb mm2, [esi + 40]
+
+ movq [ebp+ecx*2 ], mm2 /* write des[-w2] */
+
+ movq mm3, [esi + 48]
+ packuswb mm3, [esi + 56]
+
+ lea ebp, [ebp+ecx*4] /* point to des[0] */
+ movq [ebp+edx], mm3 /* write des[-w1] */
+
+ movq mm0, [esi + 64]
+ packuswb mm0, [esi + 72]
+
+ movq [ebp ], mm0 /* write des[0] */
+
+ movq mm1, [esi + 80]
+ packuswb mm1, [esi + 88]
+
+ movq [ebp+ecx], mm1 /* write des[w1] */
+
+ movq mm2, [esi + 96]
+ packuswb mm2, [esi + 104]
+
+ movq [ebp+ecx*2], mm2 /* write des[w2] */
+
+ movq mm3, [esi + 112]
+ packuswb mm3, [esi + 120]
+
+ lea ebp, [ebp+ecx*2] /* point to des[w4] */
+ movq [ebp+ecx], mm3 /* write des[w3] */
+
+
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebp
+ pop eax
+
+ } /* end of the macro */
+
+ Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+ Var1 += Variance12[0]+ Variance12[1]+Variance12[2]+Variance12[3];
+ pbi->FragmentVariances[CurrentFrag] += Var1;
+
+ Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+ Var2 += Variance22[0]+ Variance22[1]+Variance22[2]+Variance22[3];
+ pbi->FragmentVariances[CurrentFrag + FragAcross] += Var2;
+
+
+ if(CurrentFrag==StartFrag)
+ CurrentFrag++;
+ else
+ {
+
+ Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag);
+ Src=Des;
+
+ QStep = QuantScale[pbi->FragQIndex[CurrentFrag]];
+ for( j=0; j<8;j++)
+ {
+ Rows[j] = (short) (Src[-5 +j*PlaneLineStep]);
+ Rows[72+j] = (short)(Src[4+j*PlaneLineStep]);
+ }
+
+ __asm
+ {
+ /* Save the registers */
+ push eax
+ push ebp
+ /* Calculate the FLimit and store FLimit and QStep */
+ mov eax, QStep /* get QStep */
+ movd mm0, eax /* mm0 = 0, 0, 0, Q */
+
+ push ecx
+
+ punpcklwd mm0, mm0 /* mm0 = 0, 0, Q, Q */
+ movq mm1, FourThrees /* mm1 = 03 03 03 03 */
+
+ push edx
+
+ punpckldq mm0, mm0 /* mm0 = Q, Q, Q, Q */
+ movq QStepMmx, mm0 /* write the Q step */
+
+ push esi
+
+ pmullw mm1, mm0 /* mm1 = QStep * 3 */
+ pmullw mm1, mm0 /* mm1 = QStep * QStep * 3 */
+
+ push edi
+
+
+ psrlw mm1, 5 /* mm1 = FLimit */
+ movq [FLimitMmx], mm1 /* Save FLimit */
+
+ /* setup the pointers to data */
+
+ mov eax, Src /* eax = Src */
+ xor edx, edx /* clear edx */
+
+ sub eax, 4 /* eax = Src-4 */
+ lea esi, NewRows /* esi = NewRows */
+ lea edi, Rows /* edi = Rows */
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ sub edx, ecx /* edx = -Pitch */
+
+ /* Get the data to the intermediate buffer */
+
+ movq mm0, [eax] /* mm0 = 07 06 05 04 03 02 01 00 */
+ movq mm1, [eax+ecx] /* mm1 = 17 16 15 14 13 12 11 10 */
+
+ movq mm2, [eax+ecx*2] /* mm2 = 27 26 25 24 23 22 21 20 */
+ lea eax, [eax+ecx*4] /* Go down four Rows */
+
+ movq mm3, [eax+edx] /* mm3 = 37 36 35 34 33 32 31 30 */
+ movq mm4, mm0 /* mm4 = 07 06 05 04 03 02 01 00 */
+
+ punpcklbw mm0, mm1 /* mm0 = 13 03 12 02 11 01 10 00 */
+ punpckhbw mm4, mm1 /* mm4 = 17 07 16 06 15 05 14 04 */
+
+ movq mm5, mm2 /* mm5 = 27 26 25 24 23 22 21 20 */
+ punpcklbw mm2, mm3 /* mm2 = 33 23 32 22 31 21 30 20 */
+
+ punpckhbw mm5, mm3 /* mm5 = 37 27 36 26 35 25 34 24 */
+ movq mm1, mm0 /* mm1 = 13 03 12 02 11 01 10 00 */
+
+ punpcklwd mm0, mm2 /* mm0 = 31 21 11 01 30 20 10 00 */
+ punpckhwd mm1, mm2 /* mm1 = 33 23 13 03 32 22 12 02 */
+
+ movq mm2, mm4 /* mm2 = 17 07 16 06 15 05 14 04 */
+ punpckhwd mm4, mm5 /* mm4 = 37 27 17 07 36 26 16 06 */
+
+ punpcklwd mm2, mm5 /* mm2 = 35 25 15 05 34 24 14 04 */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm5, mm0 /* make a copy */
+ punpcklbw mm0, mm7 /* mm0 = 30 20 10 00 */
+
+ movq [edi+16], mm0 /* write 00 10 20 30 */
+ punpckhbw mm5, mm7 /* mm5 = 31 21 11 01 */
+
+ movq mm0, mm1 /* mm0 =33 23 13 03 32 22 12 02 */
+ movq [edi+32], mm5 /* write 01 11 21 31 */
+
+ punpcklbw mm1, mm7 /* mm1 = 32 22 12 02 */
+ punpckhbw mm0, mm7 /* mm0 = 33 23 12 03 */
+
+ movq [edi+48], mm1 /* write 02 12 22 32 */
+ movq mm3, mm2 /* mm3 = 35 25 15 05 34 24 14 04 */
+
+ movq mm5, mm4 /* mm5 = 37 27 17 07 36 26 16 06 */
+ movq [edi+64], mm0 /* write 03 13 23 33 */
+
+ punpcklbw mm2, mm7 /* mm2 = 34 24 14 04 */
+ punpckhbw mm3, mm7 /* mm3 = 35 25 15 05 */
+
+ movq [edi+80], mm2 /* write 04 14 24 34 */
+ punpcklbw mm4, mm7 /* mm4 = 36 26 16 06 */
+
+ punpckhbw mm5, mm7 /* mm5 = 37 27 17 07 */
+ movq [edi+96], mm3 /* write 05 15 25 35 */
+
+ movq mm0, [eax] /* mm0 = 47 46 45 44 43 42 41 40 */
+ movq mm1, [eax + ecx ] /* mm1 = 57 56 55 54 53 52 51 50 */
+
+ movq [edi+112], mm4 /* write 06 16 26 37 */
+ movq mm2, [eax+ecx*2] /* mm2 = 67 66 65 64 63 62 61 60 */
+
+ lea eax, [eax+ ecx*4] /* Go down four rows */
+ movq [edi+128], mm5 /* write 07 17 27 37 */
+
+ movq mm4, mm0 /* mm4 = 47 46 45 44 43 42 41 40 */
+ movq mm3, [eax+edx] /* mm3 = 77 76 75 74 73 72 71 70 */
+
+ punpcklbw mm0, mm1 /* mm0 = 53 43 52 42 51 41 50 40 */
+ punpckhbw mm4, mm1 /* mm4 = 57 57 56 46 55 45 54 44 */
+
+ movq mm5, mm2 /* mm5 = 67 66 65 64 63 62 61 60 */
+ punpcklbw mm2, mm3 /* mm2 = 73 63 72 62 71 61 70 60 */
+
+ punpckhbw mm5, mm3 /* mm5 = 77 67 76 66 75 65 74 64 */
+ movq mm1, mm0 /* mm1 = 53 43 52 42 51 41 50 40 */
+
+ punpcklwd mm0, mm2 /* mm0 = 71 61 51 41 70 60 50 40 */
+ punpckhwd mm1, mm2 /* mm1 = 73 63 53 43 72 62 52 42 */
+
+ movq mm2, mm4 /* mm2 = 57 57 56 46 55 45 54 44 */
+ punpckhwd mm4, mm5 /* mm4 = 77 67 57 47 76 66 56 46 */
+
+ punpcklwd mm2, mm5 /* mm2 = 75 65 55 45 74 64 54 44 */
+
+ movq mm5, mm0 /* make a copy */
+ punpcklbw mm0, mm7 /* mm0 = 70 60 50 40 */
+
+ movq [edi+24], mm0 /* write 40 50 60 70 */
+ punpckhbw mm5, mm7 /* mm5 = 71 61 51 41 */
+
+ movq mm0, mm1 /* mm0 = 73 63 53 43 72 62 52 42 */
+ movq [edi+40], mm5 /* write 41 51 61 71 */
+
+ punpcklbw mm1, mm7 /* mm1 = 72 62 52 42 */
+ punpckhbw mm0, mm7 /* mm0 = 73 63 53 43 */
+
+ movq [edi+56], mm1 /* write 42 52 62 72 */
+ movq mm3, mm2 /* mm3 = 75 65 55 45 74 64 54 44 */
+
+ movq mm5, mm4 /* mm5 = 77 67 57 47 76 66 56 46 */
+ movq [edi+72], mm0 /* write 43 53 63 73 */
+
+ punpcklbw mm2, mm7 /* mm2 = 74 64 54 44 */
+ punpckhbw mm3, mm7 /* mm3 = 75 65 55 45 */
+
+ movq [edi+88], mm2 /* write 44 54 64 74 */
+ punpcklbw mm4, mm7 /* mm4 = 76 66 56 46 */
+
+ punpckhbw mm5, mm7 /* mm5 = 77 67 57 47 */
+ movq [edi+104], mm3 /* write 45 55 65 75 */
+
+ movq [edi+120], mm4 /* write 46 56 66 76 */
+ movq [edi+136], mm5 /* write 47 57 67 77 */
+
+
+ /* Now, compute the variances for Pixel 1-4 and 5-8 */
+
+ /* we use mm0,mm1,mm2 for 1234 and mm4, mm5, mm6 for 5-8 */
+ /* mm7 = 0, mm3 = {128, 128, 128, 128} */
+
+ pcmpeqw mm3, mm3 /* mm3 = FFFFFFFFFFFFFFFF */
+ psllw mm3, 15 /* mm3 = 8000800080008000 */
+ psrlw mm3, 8 /* mm3 = 0080008000800080 */
+
+ movq mm2, [edi+16] /* Pixel 1 */
+ movq mm6, [edi+80] /* Pixel 5 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ movq mm0, mm2 /* mm0 = pixel 1 */
+ movq mm4, mm6 /* mm4 = pixel 5 */
+
+ pmullw mm2, mm2 /* mm2 = pixel1 * pixel1 */
+ pmullw mm6, mm6 /* mm6 = pixel5 * pixel5 */
+
+ movq mm1, mm2 /* mm1 = pixel1^2 */
+ movq mm5, mm6 /* mm5 = pixel5^2 */
+
+ movq mm2, [edi+32] /* Pixel 2 */
+ movq mm6, [edi+96] /* Pixel 6 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 2 */
+ paddw mm4, mm6 /* mm4 += pixel 6 */
+
+ pmullw mm2, mm2 /* mm2 = pixel2^2 */
+ pmullw mm6, mm6 /* mm6 = pixel6^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel2^2 */
+ paddw mm5, mm6 /* mm5 += pixel6^2 */
+
+ movq mm2, [edi+48] /* Pixel 3 */
+ movq mm6, [edi+112] /* Pixel 7 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 3 */
+ paddw mm4, mm6 /* mm4 += pixel 7 */
+
+ pmullw mm2, mm2 /* mm2 = pixel3^2 */
+ pmullw mm6, mm6 /* mm6 = pixel7^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel3^2 */
+ paddw mm5, mm6 /* mm5 += pixel7^2 */
+
+ movq mm2, [edi+64] /* Pixel 4 */
+ movq mm6, [edi+128] /* Pixel 8 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 4 */
+ paddw mm4, mm6 /* mm4 += pixel 8 */
+
+ pmullw mm2, mm2 /* mm2 = pixel4^2 */
+ pmullw mm6, mm6 /* mm6 = pixel8^2 */
+
+ paddw mm1, mm2 /* mm1 = pixel4^2 */
+ paddw mm5, mm6 /* mm5 = pixel8^2 */
+
+ /* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* mm1 = x1 + x2 + x3 + x4 */
+ /* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* mm5 = x5 + x6 + x7 + x8 */
+
+ movq mm7, mm3 /* mm7 = mm3 */
+ psrlw mm7, 7 /* mm7 = 0001000100010001 */
+
+ movq mm2, mm0 /* make copy of sum1 */
+ movq mm6, mm4 /* make copy of sum2 */
+
+ paddw mm0, mm7 /* (sum1 + 1) */
+ paddw mm4, mm7 /* (sum2 + 1) */
+
+ psraw mm2, 1 /* sum1 /2 */
+ psraw mm6, 1 /* sum2 /2 */
+
+ psraw mm0, 1 /* (sum1 + 1)/2 */
+ psraw mm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw mm2, mm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw mm6, mm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw mm1, mm2 /* Variance 1 */
+ psubw mm5, mm6 /* Variance 2 */
+
+ movq [Variance11], mm1 /* Save Variance1 */
+ movq [Variance21], mm5 /* Save Variance2 */
+
+ movq mm7, FLimitMmx /* mm7 = FLimit */
+ movq mm2, mm1 /* copy of Variance 1*/
+
+ movq mm6, mm5 /* copy of Variance 2*/
+ psubw mm1, mm7 /* Variance 1 < Flimit? */
+
+ psubw mm5, mm7 /* Variance 2 < Flimit? */
+ psraw mm1, 15 /* FFFF/0000 for true/false */
+
+ psraw mm5, 15 /* FFFF/0000 for true/false */
+ psraw mm2, 15 /* Variance 1 > 32768 ? */
+
+ psraw mm6, 15 /* Variance 2 > 32768 ? */
+ movq mm7, [edi+64] /* mm0 = Pixel 4 */
+
+ pandn mm2, mm1 /* Variance 1 < Flimit &&
+ Variance 1 < 32768 */
+ pandn mm6, mm5 /* Variance 2 < Flimit &&
+ Variance 2 < 32768 */
+ movq mm4, [edi+80] /* mm4 = Pixel 5 */
+ pand mm6, mm2 /* mm1 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+ movq mm2, mm7 /* make copy of Pixel4 */
+
+ psubusw mm7, mm4 /* 4 - 5 */
+ psubusw mm4, mm2 /* 5 - 4 */
+
+ por mm7, mm4 /* abs(4 - 5) */
+ psubw mm7, QStepMmx /* abs(4-5)<QStepMmx ? */
+
+ psraw mm7, 15 /* FFFF/0000 for True/Flase */
+ pand mm7, mm6
+
+ /* mm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* now lets look at the right four colomn */
+
+ add edi, 8 /* offset 8 to right 4 cols */
+
+ movq mm2, [edi+16] /* Pixel 1 */
+ movq mm6, [edi+80] /* Pixel 5 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ movq mm0, mm2 /* mm0 = pixel 1 */
+ movq mm4, mm6 /* mm4 = pixel 5 */
+
+ pmullw mm2, mm2 /* mm2 = pixel1 * pixel1 */
+ pmullw mm6, mm6 /* mm6 = pixel5 * pixel5 */
+
+ movq mm1, mm2 /* mm1 = pixel1^2 */
+ movq mm5, mm6 /* mm5 = pixel5^2 */
+
+ movq mm2, [edi+32] /* Pixel 2 */
+ movq mm6, [edi+96] /* Pixel 6 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 2 */
+ paddw mm4, mm6 /* mm4 += pixel 6 */
+
+ pmullw mm2, mm2 /* mm2 = pixel2^2 */
+ pmullw mm6, mm6 /* mm6 = pixel6^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel2^2 */
+ paddw mm5, mm6 /* mm5 += pixel6^2 */
+
+ movq mm2, [edi+48] /* Pixel 3 */
+ movq mm6, [edi+112] /* Pixel 7 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 3 */
+ paddw mm4, mm6 /* mm4 += pixel 7 */
+
+ pmullw mm2, mm2 /* mm2 = pixel3^2 */
+ pmullw mm6, mm6 /* mm6 = pixel7^2 */
+
+ paddw mm1, mm2 /* mm1 += pixel3^2 */
+ paddw mm5, mm6 /* mm5 += pixel7^2 */
+
+ movq mm2, [edi+64] /* Pixel 4 */
+ movq mm6, [edi+128] /* Pixel 8 */
+
+ psubw mm2, mm3 /* mm2 -=128 */
+ psubw mm6, mm3 /* mm6 -=128 */
+
+ paddw mm0, mm2 /* mm0 += pixel 4 */
+ paddw mm4, mm6 /* mm4 += pixel 8 */
+
+ pmullw mm2, mm2 /* mm2 = pixel4^2 */
+ pmullw mm6, mm6 /* mm6 = pixel8^2 */
+
+ paddw mm1, mm2 /* mm1 = pixel4^2 */
+ paddw mm5, mm6 /* mm5 = pixel8^2 */
+
+ /* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* mm1 = x1 + x2 + x3 + x4 */
+ /* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* mm5 = x5 + x6 + x7 + x8 */
+
+ psrlw mm3, 7 /* mm3 = 0001000100010001 */
+
+ movq mm2, mm0 /* make copy of sum1 */
+ movq mm6, mm4 /* make copy of sum2 */
+
+ paddw mm0, mm3 /* (sum1 + 1) */
+ paddw mm4, mm3 /* (sum2 + 1) */
+
+ psraw mm2, 1 /* sum1 /2 */
+ psraw mm6, 1 /* sum2 /2 */
+
+ psraw mm0, 1 /* (sum1 + 1)/2 */
+ psraw mm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw mm2, mm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw mm6, mm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw mm1, mm2 /* Variance 1 */
+ psubw mm5, mm6 /* Variance 2 */
+
+ movq [Variance12], mm1 /* Save Variance1 */
+ movq [Variance22], mm5 /* Save Variance2 */
+
+ movq mm3, FLimitMmx /* mm3 = FLimit */
+ movq mm2, mm1 /* copy of Varinace 1*/
+
+ movq mm6, mm5 /* Variance 2 */
+ psubw mm1, mm3 /* Variance 1 < Flimit? */
+
+ psubw mm5, mm3 /* Variance 2 < Flimit? */
+ psraw mm6, 15 /* Variance 1 > 32768 */
+
+ psraw mm2, 15 /* Variance 2 > 32768 */
+ psraw mm1, 15 /* FFFF/0000 for true/false */
+
+ psraw mm5, 15 /* FFFF/0000 for true/false */
+ movq mm0, [edi+64] /* mm0 = Pixel 4 */
+
+ pandn mm2, mm1 /* Variance1<32678 &&
+ Variance1<Limit */
+ pandn mm6, mm5 /* Variance2<32678 &&
+ Variance1<Limit */
+
+ movq mm4, [edi+80] /* mm4 = Pixel 5 */
+ pand mm6, mm2 /* mm1 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+ movq mm2, mm0 /* make copy of Pixel4 */
+
+ psubusw mm0, mm4 /* 4 - 5 */
+ psubusw mm4, mm2 /* 5 - 4 */
+
+ por mm0, mm4 /* abs(4 - 5) */
+ psubw mm0, QStepMmx /* abs(4-5)<QStepMmx ? */
+
+ psraw mm0, 15 /* FFFF/0000 for True/False */
+ pand mm0, mm6
+
+ sub edi, 8 /* offset edi back */
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 and mm7 now are in use */
+ /* find the loop filtered values for the pixels on block boundary */
+ movq mm1, LoopFLimitMmx; /* Get the Flimit values for loop filter */
+ movq mm3, [edi + 48] /* mm3 = x3 = p[-2] */
+
+ movq mm4, [edi + 64] /* mm4 = x4 = p[-1] */
+ movq mm5, [edi + 80] /* mm5 = x5 = p[ 0] */
+
+ movq mm6, [edi + 96] /* mm6 = x6 = p[ 1] */
+ psubw mm5, mm4 /* mm5 = p[ 0] - p[-1] */
+
+ psubw mm3, mm6 /* mm3 = p[-2] - p[ 1] */
+ movq mm4, mm5 /* make a copy */
+
+ paddw mm4, mm5 /* 2 * ( p[0] - p[-1] ) */
+ paddw mm3, FourFours /* mm3 + 4 */
+
+ paddw mm5, mm4 /* 3 * ( p[0] - p[-1] ) */
+ paddw mm3, mm5 /* Filtval before shift */
+
+ psraw mm3, 3 /* FiltVal */
+ movq mm2, mm3 /* make a copy */
+
+ psraw mm3, 15 /* FFFF->Neg, 0000->Pos */
+ pxor mm2, mm3
+
+ psubsw mm2, mm3 /* mm2 = abs(FiltVal) */
+ por mm3, FourOnes /* -1 and 1 for + and - */
+
+ movq mm4, mm1 /* make a copy of Flimit */
+ psubw mm1, mm2 /* mm1= Flimit - abs(FiltVal) */
+
+ movq mm5, mm1 /* copy Flimit - abs(FiltVal) */
+ psraw mm1, 15 /* FFFF or 0000 */
+
+ pxor mm5, mm1
+ psubsw mm5, mm1 /* abs(Flimit - abs(FiltVal)) */
+
+ psubusw mm4, mm5 /* Flimit-abs(Flimit - abs(FiltVal)) */
+ pmullw mm4, mm3 /* get the sign back */
+
+ movq mm1, [edi+64] /* p[-1] */
+ movq mm2, [edi+80] /* p[0] */
+
+ paddw mm1, mm4 /* p[-1] + NewFiltVal */
+ psubw mm2, mm4 /* p[0] - NewFiltVal */
+
+ pxor mm6, mm6 /* clear mm6 */
+
+ packuswb mm1, mm1 /* clamping */
+ packuswb mm2, mm2 /* clamping */
+
+ punpcklbw mm1, mm6 /* unpack to word */
+ movq LoopFilteredValuesUp, mm1 /* save the values */
+
+ punpcklbw mm2, mm6 /* unpack to word */
+ movq LoopFilteredValuesDown, mm2 /* save the values */
+
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movq mm5, [edi] /* mm5 = -5 */
+ movq mm4, [edi + 16] /* mm4 = -4 */
+
+ movq mm3, mm4 /* copy of -4 */
+ movq mm6, mm5 /* copy of -5 */
+
+ psubusw mm4, mm6 /* mm4 = [-4] - [-5] */
+ psubusw mm5, mm3 /* mm5 = [-5] - [-4] */
+
+ por mm4, mm5 /* abs([-4]-[-5] ) */
+ psubw mm4, QStepMmx /* abs([-4]-[-5] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm1, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm1, mm3 /* */
+
+ por mm1, mm4 /* mm1 = p1 */
+
+ /* now find P2 */
+
+ movq mm4, [edi+128] /* mm4 = [3] */
+ movq mm5, [edi+144] /* mm5 = [4] */
+
+ movq mm3, mm4 /* copy of 3 */
+ movq mm6, mm5 /* copy of 4 */
+
+ psubusw mm4, mm6 /* mm4 = [3] - [4] */
+ psubusw mm5, mm3 /* mm5 = [4] - [3] */
+
+ por mm4, mm5 /* abs([3]-[4] ) */
+ psubw mm4, QStepMmx /* abs([3]-[4] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm2, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm2, mm3 /* */
+
+ por mm2, mm4 /* mm2 = p2 */
+
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+ /* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm3, mm1 /* mm3 = p1 */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+ movq mm4, [edi+16] /* mm4 = x1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+ movq mm4, mm3 /* mm4 = mm3 */
+
+ movq mm5, [edi+16] /* mm5 = x1 */
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+
+ psllw mm4, 1 /* mm4 = (sum+x1)<<1 */
+ psubw mm4, [edi+64] /* mm4 = (sum+x1)<<1-x4 */
+
+ paddw mm4, [edi+80] /* mm4 = (sum+x1)<<1-x4+x5 */
+ psraw mm4, 4 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm7 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+80] /* mm4 =(sum+x2)<<1-x5 */
+ paddw mm4, [edi+96] /* mm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw mm4, 4 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm7 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+96] /* mm4 =(sum+x3)<<1-x6 */
+ paddw mm4, [edi+112] /* mm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw mm4, 4 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm7 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ paddw mm4, mm4 /* mm4 *=2 */
+
+ paddw mm4, mm1 /* += p1 */
+ psubw mm4, [edi+16] /* -= x1 */
+
+ psubw mm4, [edi+112] /* -= x7 */
+ paddw mm4, [edi+128] /* += x8 */
+
+ movq mm5, LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+ psraw mm4, 4 /* >>=4 */
+
+ psubw mm4, mm5 /* -=x4 */
+ pand mm4, mm7 /* and flag */
+
+ paddw mm4, mm5 /* += x4 */
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ paddw mm4, mm4 /* mm4 *= 2 */
+
+ paddw mm4, [edi+16] /* += x1 */
+ psubw mm4, [edi+32] /* -= x2 */
+
+ psubw mm4, [edi+128] /* -= x8 */
+ paddw mm4, mm2 /* += p2 */
+
+ movq mm5, LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+ psraw mm4, 4 /* >>=4 */
+
+ psubw mm4, mm5 /* -=x5 */
+ pand mm4, mm7 /* and flag */
+
+ paddw mm4, mm5 /* += x5 */
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+32] /* +=x2 */
+ psubw mm4, [edi+48] /* -=x3 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x6 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x6 */
+
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+48] /* +=x3 */
+ psubw mm4, [edi+64] /* -=x4 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x7 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x7 */
+
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+64] /* +=x4 */
+ psubw mm4, [edi+80] /* -=x5 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x8 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x8 */
+
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with left four columns */
+ /* now do the righ four columns */
+
+ add edi, 8 /* shift to right four column */
+ add esi, 8 /* shift to right four column */
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 now are in use */
+ /* find the loop filtered values for the pixels on block boundary */
+ movq mm1, LoopFLimitMmx; /* Get the Flimit values for loop filter */
+ movq mm3, [edi + 48] /* mm3 = x3 = p[-2] */
+
+ movq mm4, [edi + 64] /* mm4 = x4 = p[-1] */
+ movq mm5, [edi + 80] /* mm5 = x5 = p[ 0] */
+
+ movq mm6, [edi + 96] /* mm6 = x6 = p[ 1] */
+ psubw mm5, mm4 /* mm5 = p[ 0] - p[-1] */
+
+ psubw mm3, mm6 /* mm3 = p[-2] - p[ 1] */
+ movq mm4, mm5 /* make a copy */
+
+ paddw mm4, mm5 /* 2 * ( p[0] - p[-1] ) */
+ paddw mm3, FourFours /* mm3 + 4 */
+
+ paddw mm5, mm4 /* 3 * ( p[0] - p[-1] ) */
+ paddw mm3, mm5 /* Filtval before shift */
+
+ psraw mm3, 3 /* FiltVal */
+ movq mm2, mm3 /* make a copy */
+
+ psraw mm3, 15 /* FFFF->Neg, 0000->Pos */
+ pxor mm2, mm3
+
+ psubsw mm2, mm3 /* mm2 = abs(FiltVal) */
+ por mm3, FourOnes /* -1 and 1 for + and - */
+
+ movq mm4, mm1 /* make a copy of Flimit */
+ psubw mm1, mm2 /* mm1= Flimit - abs(FiltVal) */
+
+ movq mm5, mm1 /* copy Flimit - abs(FiltVal) */
+ psraw mm1, 15 /* FFFF or 0000 */
+
+ pxor mm5, mm1
+ psubsw mm5, mm1 /* abs(Flimit - abs(FiltVal)) */
+
+ psubusw mm4, mm5 /* Flimit-abs(Flimit - abs(FiltVal)) */
+ pmullw mm4, mm3 /* get the sign back */
+
+ movq mm1, [edi+64] /* p[-1] */
+ movq mm2, [edi+80] /* p[0] */
+
+ paddw mm1, mm4 /* p[-1] + NewFiltVal */
+ psubw mm2, mm4 /* p[0] - NewFiltVal */
+
+ pxor mm6, mm6 /* clear mm6 */
+
+ packuswb mm1, mm1 /* clamping */
+ packuswb mm2, mm2 /* clamping */
+
+ punpcklbw mm1, mm6 /* unpack to word */
+ movq LoopFilteredValuesUp, mm1 /* save the values */
+
+ punpcklbw mm2, mm6 /* unpack to word */
+ movq LoopFilteredValuesDown, mm2 /* save the values */
+
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movq mm5, [edi] /* mm5 = -5 */
+ movq mm4, [edi + 16] /* mm4 = -4 */
+
+ movq mm3, mm4 /* copy of -4 */
+ movq mm6, mm5 /* copy of -5 */
+
+ psubusw mm4, mm6 /* mm4 = [-4] - [-5] */
+ psubusw mm5, mm3 /* mm5 = [-5] - [-4] */
+
+ por mm4, mm5 /* abs([-4]-[-5] ) */
+ psubw mm4, QStepMmx /* abs([-4]-[-5] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm1, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm1, mm3 /* */
+
+ por mm1, mm4 /* mm1 = p1 */
+
+ /* now find P2 */
+
+ movq mm4, [edi+128] /* mm4 = [3] */
+ movq mm5, [edi+144] /* mm5 = [4] */
+
+ movq mm3, mm4 /* copy of 3 */
+ movq mm6, mm5 /* copy of 4 */
+
+ psubusw mm4, mm6 /* mm4 = [3] - [4] */
+ psubusw mm5, mm3 /* mm5 = [4] - [3] */
+
+ por mm4, mm5 /* abs([3]-[4] ) */
+ psubw mm4, QStepMmx /* abs([3]-[4] )<QStep? */
+
+ psraw mm4, 15 /* FFFF/0000 for True/False */
+ movq mm2, mm4 /* copy of the mm4 */
+
+ pand mm4, mm6 /* */
+ pandn mm2, mm3 /* */
+
+ por mm2, mm4 /* mm2 = p2 */
+
+ /* psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4; */
+ /* Des[-w4] = (((psum + v[1]) << 1) - (v[4] - v[5])) >> 4; */
+ /* Des[-w4]=Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm3, mm1 /* mm3 = p1 */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+ movq mm4, [edi+16] /* mm4 = x1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+ movq mm4, mm3 /* mm4 = mm3 */
+
+ movq mm5, [edi+16] /* mm5 = x1 */
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+
+ psllw mm4, 1 /* mm4 = (sum+x1)<<1 */
+ psubw mm4, [edi+64] /* mm4 = (sum+x1)<<1-x4 */
+
+ paddw mm4, [edi+80] /* mm4 = (sum+x1)<<1-x4+x5 */
+ psraw mm4, 4 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm0 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+80] /* mm4 =(sum+x2)<<1-x5 */
+ paddw mm4, [edi+96] /* mm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw mm4, 4 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm0 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ paddw mm4, mm4 /* mm4 <<= 1 */
+
+ psubw mm4, [edi+96] /* mm4 =(sum+x3)<<1-x6 */
+ paddw mm4, [edi+112] /* mm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw mm4, 4 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm0 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ paddw mm4, mm4 /* mm4 *=2 */
+
+ paddw mm4, mm1 /* += p1 */
+ psubw mm4, [edi+16] /* -= x1 */
+
+ psubw mm4, [edi+112] /* -= x7 */
+ paddw mm4, [edi+128] /* += x8 */
+
+ movq mm5, LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+ psraw mm4, 4 /* >>=4 */
+
+ psubw mm4, mm5 /* -=x4 */
+ pand mm4, mm0 /* and flag */
+
+ paddw mm4, mm5 /* += x4 */
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ paddw mm4, mm4 /* mm4 *= 2 */
+
+ paddw mm4, [edi+16] /* += x1 */
+ psubw mm4, [edi+32] /* -= x2 */
+
+ psubw mm4, [edi+128] /* -= x8 */
+ paddw mm4, mm2 /* += p2 */
+
+ movq mm5, LoopFilteredValuesDown/* Read the loopfiltered value of x5 */
+ psraw mm4, 4 /* >>=4 */
+
+ psubw mm4, mm5 /* -=x5 */
+ pand mm4, mm0 /* and flag */
+
+ paddw mm4, mm5 /* += x5 */
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+32] /* +=x2 */
+ psubw mm4, [edi+48] /* -=x3 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x6 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x6 */
+
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+48] /* +=x3 */
+ psubw mm4, [edi+64] /* -=x4 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x7 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x7 */
+
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ paddw mm4, mm4 /* mm4 *= 2*/
+
+ paddw mm4, [edi+64] /* +=x4 */
+ psubw mm4, [edi+80] /* -=x5 */
+
+ psraw mm4, 4 /* >>=4 */
+ psubw mm4, mm5 /* -=x8 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x8 */
+
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with right four column */
+ /* transpose */
+ mov eax, Des /* the destination */
+ add edi, 8 /* shift edi to point x1 */
+
+ sub esi, 8 /* shift esi back to left x1 */
+ sub eax, 4
+
+ movq mm0, [esi] /* mm0 = 30 20 10 00 */
+ movq mm1, [esi+16] /* mm1 = 31 21 11 01 */
+
+ movq mm4, mm0 /* mm4 = 30 20 10 00 */
+ punpcklwd mm0, mm1 /* mm0 = 11 10 01 00 */
+
+ punpckhwd mm4, mm1 /* mm4 = 31 30 21 20 */
+ movq mm2, [esi+32] /* mm2 = 32 22 12 02 */
+
+ movq mm3, [esi+48] /* mm3 = 33 23 13 03 */
+ movq mm5, mm2 /* mm5 = 32 22 12 02 */
+
+ punpcklwd mm2, mm3 /* mm2 = 13 12 03 02 */
+ punpckhwd mm5, mm3 /* mm5 = 33 32 23 22 */
+
+ movq mm1, mm0 /* mm1 = 11 10 01 00 */
+ punpckldq mm0, mm2 /* mm0 = 03 02 01 00 */
+
+ movq [edi], mm0 /* write 00 01 02 03 */
+ punpckhdq mm1, mm2 /* mm1 = 13 12 11 10 */
+
+ movq mm0, mm4 /* mm0 = 31 30 21 20 */
+ movq [edi+16], mm1 /* write 10 11 12 13 */
+
+ punpckldq mm0, mm5 /* mm0 = 23 22 21 20 */
+ punpckhdq mm4, mm5 /* mm4 = 33 32 31 30 */
+
+ movq mm1, [esi+64] /* mm1 = 34 24 14 04 */
+ movq mm2, [esi+80] /* mm2 = 35 25 15 05 */
+
+ movq mm5, [esi+96] /* mm5 = 36 26 16 06 */
+ movq mm6, [esi+112] /* mm6 = 37 27 17 07 */
+
+ movq mm3, mm1 /* mm3 = 34 24 14 04 */
+ movq mm7, mm5 /* mm7 = 36 26 16 06 */
+
+ punpcklwd mm1, mm2 /* mm1 = 15 14 05 04 */
+ punpckhwd mm3, mm2 /* mm3 = 35 34 25 24 */
+
+ punpcklwd mm5, mm6 /* mm5 = 17 16 07 06 */
+ punpckhwd mm7, mm6 /* mm7 = 37 36 27 26 */
+
+ movq mm2, mm1 /* mm2 = 15 14 05 04 */
+ movq mm6, mm3 /* mm6 = 35 34 25 24 */
+
+ punpckldq mm1, mm5 /* mm1 = 07 06 05 04 */
+ punpckhdq mm2, mm5 /* mm2 = 17 16 15 14 */
+
+ punpckldq mm3, mm7 /* mm3 = 27 26 25 24 */
+ punpckhdq mm6, mm7 /* mm6 = 37 36 35 34 */
+
+ movq mm5, [edi] /* mm5 = 03 02 01 00 */
+ packuswb mm5, mm1 /* mm5 = 07 06 05 04 03 02 01 00 */
+
+ movq [eax], mm5 /* write 00 01 02 03 04 05 06 07 */
+ movq mm7, [edi+16] /* mm7 = 13 12 11 10 */
+
+ packuswb mm7, mm2 /* mm7 = 17 16 15 14 13 12 11 10 */
+ movq [eax+ecx], mm7 /* write 10 11 12 13 14 15 16 17 */
+
+ packuswb mm0, mm3 /* mm0 = 27 26 25 24 23 22 21 20 */
+ packuswb mm4, mm6 /* mm4 = 37 36 35 34 33 32 31 30 */
+
+ movq [eax+ecx*2], mm0 /* write 20 21 22 23 24 25 26 27 */
+ lea eax, [eax+ecx*4] /* mov forward the desPtr */
+
+ movq [eax+edx], mm4 /* write 30 31 32 33 34 35 36 37 */
+ add edi, 8 /* move to right four column */
+ add esi, 8 /* move to right x1 */
+
+ movq mm0, [esi] /* mm0 = 70 60 50 40 */
+ movq mm1, [esi+16] /* mm1 = 71 61 51 41 */
+
+ movq mm4, mm0 /* mm4 = 70 60 50 40 */
+ punpcklwd mm0, mm1 /* mm0 = 51 50 41 40 */
+
+ punpckhwd mm4, mm1 /* mm4 = 71 70 61 60 */
+ movq mm2, [esi+32] /* mm2 = 72 62 52 42 */
+
+ movq mm3, [esi+48] /* mm3 = 73 63 53 43 */
+ movq mm5, mm2 /* mm5 = 72 62 52 42 */
+
+ punpcklwd mm2, mm3 /* mm2 = 53 52 43 42 */
+ punpckhwd mm5, mm3 /* mm5 = 73 72 63 62 */
+
+ movq mm1, mm0 /* mm1 = 51 50 41 40 */
+ punpckldq mm0, mm2 /* mm0 = 43 42 41 40 */
+
+ movq [edi], mm0 /* write 40 41 42 43 */
+ punpckhdq mm1, mm2 /* mm1 = 53 52 51 50 */
+
+ movq mm0, mm4 /* mm0 = 71 70 61 60 */
+ movq [edi+16], mm1 /* write 50 51 52 53 */
+
+ punpckldq mm0, mm5 /* mm0 = 63 62 61 60 */
+ punpckhdq mm4, mm5 /* mm4 = 73 72 71 70 */
+
+ movq mm1, [esi+64] /* mm1 = 74 64 54 44 */
+ movq mm2, [esi+80] /* mm2 = 75 65 55 45 */
+
+ movq mm5, [esi+96] /* mm5 = 76 66 56 46 */
+ movq mm6, [esi+112] /* mm6 = 77 67 57 47 */
+
+ movq mm3, mm1 /* mm3 = 74 64 54 44 */
+ movq mm7, mm5 /* mm7 = 76 66 56 46 */
+
+ punpcklwd mm1, mm2 /* mm1 = 55 54 45 44 */
+ punpckhwd mm3, mm2 /* mm3 = 75 74 65 64 */
+
+ punpcklwd mm5, mm6 /* mm5 = 57 56 47 46 */
+ punpckhwd mm7, mm6 /* mm7 = 77 76 67 66 */
+
+ movq mm2, mm1 /* mm2 = 55 54 45 44 */
+ movq mm6, mm3 /* mm6 = 75 74 65 64 */
+
+ punpckldq mm1, mm5 /* mm1 = 47 46 45 44 */
+ punpckhdq mm2, mm5 /* mm2 = 57 56 55 54 */
+
+ punpckldq mm3, mm7 /* mm3 = 67 66 65 64 */
+ punpckhdq mm6, mm7 /* mm6 = 77 76 75 74 */
+
+ movq mm5, [edi] /* mm5 = 43 42 41 40 */
+ packuswb mm5, mm1 /* mm5 = 47 46 45 44 43 42 41 40 */
+
+ movq [eax], mm5 /* write 40 41 42 43 44 45 46 47 */
+ movq mm7, [edi+16] /* mm7 = 53 52 51 50 */
+
+ packuswb mm7, mm2 /* mm7 = 57 56 55 54 53 52 51 50 */
+ movq [eax+ecx], mm7 /* write 50 51 52 53 54 55 56 57 */
+
+ packuswb mm0, mm3 /* mm0 = 67 66 65 64 63 62 61 60 */
+ packuswb mm4, mm6 /* mm4 = 77 76 75 74 73 72 71 70 */
+
+ movq [eax+ecx*2], mm0 /* write 60 61 62 63 64 65 66 67 */
+ lea eax, [eax+ecx*4] /* mov forward the desPtr */
+
+ movq [eax+edx], mm4 /* write 70 71 72 73 74 75 76 77 */
+
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebp
+ pop eax
+ }//__asm
+
+ Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+ Var1 += Variance12[0]+ Variance12[1]+Variance12[2]+Variance12[3];
+ pbi->FragmentVariances[CurrentFrag-1] += Var1;
+
+ Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+ Var2 += Variance22[0]+ Variance22[1]+Variance22[2]+Variance22[3];
+ pbi->FragmentVariances[CurrentFrag] += Var2;
+
+ CurrentFrag ++;
+
+ }//else
+
+ }//while
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockNonFilteredBandNewFilter_MMX(
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Filter both horizontal and vertical edge in a band
+ *
+ * SPECIAL NOTES : Using Sum of abs to determine where to apply the
+ * new 7 tap filter
+ *
+ * REFERENCE :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+ void DeblockNonFilteredBandNewFilter_MMX(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+ )
+{
+ UINT32 j;
+ UINT32 CurrentFrag=StartFrag;
+ UINT32 QStep;
+ UINT32 LoopFLimit;
+ UINT8 *Src, *Des;
+
+#if defined(_WIN32_WCE)
+ #pragma pack(16)
+ short QStepMmx[4];
+ short FLimitMmx[4];
+ short LoopFLimitMmx[4];
+ short Rows[80];
+ short NewRows[64];
+ short LoopFilteredValuesUp[4];
+ short LoopFilteredValuesDown[4];
+ unsigned char Variance11[8];
+ unsigned char Variance21[8];
+ UINT32 Var1, Var2;
+ #pragma pack()
+#else
+ __declspec(align(16)) short QStepMmx[4];
+ __declspec(align(16)) short FLimitMmx[4];
+ __declspec(align(16)) short LoopFLimitMmx[4];
+ __declspec(align(16)) short Rows[80];
+ __declspec(align(16)) short NewRows[64];
+ __declspec(align(16)) short LoopFilteredValuesUp[4];
+ __declspec(align(16)) short LoopFilteredValuesDown[4];
+ __declspec(align(16)) unsigned char Variance11[8];
+ __declspec(align(16)) unsigned char Variance21[8];
+ UINT32 Var1, Var2;
+#endif
+
+
+ QStep = QuantScale[pbi->FrameQIndex];
+ QStepMmx[0] = (INT16)QStep;
+ QStepMmx[1] = (INT16)QStep;
+ QStepMmx[2] = (INT16)QStep;
+ QStepMmx[3] = (INT16)QStep;
+ LoopFLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+ LoopFLimitMmx[0] = (INT16)LoopFLimit;
+ LoopFLimitMmx[1] = (INT16)LoopFLimit;
+ LoopFLimitMmx[2] = (INT16)LoopFLimit;
+ LoopFLimitMmx[3] = (INT16)LoopFLimit;
+
+ while(CurrentFrag < StartFrag + FragAcross )
+ {
+
+ Src=SrcPtr+8*(CurrentFrag-StartFrag);
+ Des=DesPtr+8*(CurrentFrag-StartFrag);
+ __asm
+ {
+
+ push eax
+
+ push ebp
+
+ push ecx
+
+ push edx
+
+ push esi
+
+ push edi
+
+ /* Calculate the FLimit and store FLimit and QStep */
+ /* Copy the data to the intermediate buffer */
+ mov eax, QStep
+ xor edx, edx /* clear edx */
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ movd mm5, eax
+
+ mov eax, Src /* eax = Src */
+ punpcklwd mm5, mm5
+
+ lea esi, NewRows /* esi = NewRows */
+ punpckldq mm5, mm5
+
+ sub edx, ecx /* edx = - Pitch */
+ movq mm6, mm5 /* Q Q Q Q */
+
+ paddw mm6, mm5
+ paddw mm6, mm5 /* 3Q3Q3Q3Q */
+
+ packuswb mm5, mm5 /* QQQQQQQQ */
+ movq QStepMmx, mm5
+
+ psraw mm6, 2 /* F F F F */
+ packuswb mm6, mm6 /* FFFFFFFF */
+
+ lea edi, Rows /* edi = Rows */
+ pxor mm7, mm7 /* Clear mm7 */
+
+ psubb mm6, Eight128c /* Eight (F-128)s */
+
+ lea eax, [eax + edx * 4 ] /* eax = Src - 4*Pitch */
+ movq mm0, [eax + edx] /* mm0 = Src[-5*Pitch] */
+
+ movq mm1, mm0 /* mm1 = mm0 */
+ punpcklbw mm0, mm7 /* Lower Four -5 */
+
+ movq mm4, mm1 /* mm4 = Src[-5*Pitch] */
+ movq [FLimitMmx], mm6 /* FFFF FFFF */
+
+ movq mm2, [eax] /* mm2 = Src[-4*Pitch] */
+ punpckhbw mm1, mm7 /* Higher Four -5 */
+
+ movq [edi], mm0 /* Write Lower Four of -5 */
+ movq mm5, mm2 /* mm5 = S_4 */
+
+ movq mm3, mm2 /* mm3 = S_4 */
+ movq [edi+8], mm1 /* Write Higher Four of -5 */
+
+ movq mm0, [eax + ecx] /* mm0 = Src[-3*Pitch] */
+ psubusb mm5, mm4 /* S_4 - S_5 */
+
+ psubusb mm4, mm2 /* S_5 - S_4 */
+ punpcklbw mm2, mm7 /* Lower Four -4 */
+
+ por mm4, mm5 /* abs(S_4-S_5) */
+ movq [edi+16], mm2 /* Write Lower -4 */
+
+ movq mm6, mm3 /* mm6 = S_4 */
+ punpckhbw mm3, mm7 /* higher Four -4 */
+
+ movq [edi+24], mm3 /* write hight -4 */
+ movq mm1, mm0 /* mm1 = S_3 */
+
+ punpcklbw mm0, mm7 /* lower four -3 */
+ movq [edi+32], mm0 /* write Lower -3 */
+
+ movq mm2, [eax + ecx *2] /* mm2 = Src[-2*Pitch] */
+ movq mm5, mm1 /* mm5 = S_3 */
+
+ psubusb mm5, mm6 /* S_3 - S_4 */
+ psubusb mm6, mm1 /* S_4 - S_3 */
+
+ por mm5, mm6 /* abs(S_4-S_3) */
+ movq mm6, mm1 /* mm6 = S_3 */
+
+ punpckhbw mm1, mm7 /* higher four -3 */
+ movq mm3, mm2 /* mm3 = S_2 */
+
+ movq [edi+40], mm1 /* write Higher -3 */
+ paddusb mm4, mm5 /* abs(S_5-S_4)+abs(S_4-S_3) */
+
+ movq mm5, mm2 /* mm5 = S_2 */
+ psubusb mm5, mm6 /* S_2 - S_3 */
+
+ psubusb mm6, mm2 /* S_3 - S_2 */
+ por mm5, mm6 /* abs(S_3 - S_2) */
+
+ movq mm6, mm2 /* mm6 = S_2 */
+
+ punpcklbw mm2, mm7 /* lower four -2 */
+ lea eax, [eax + ecx *4] /* eax = Src */
+
+ punpckhbw mm3, mm7 /* higher four -2 */
+
+ movq mm0, [eax + edx] /* mm2 = Src[-Pitch] */
+ movq [edi+48], mm2 /* lower -2 */
+
+ paddusb mm4, mm5 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2) */
+ movq mm5, mm0 /* mm5 = S_1 */
+
+ movq [edi+56], mm3 /* higher -2 */
+ movq mm1, mm0 /* mm1 = S_1 */
+
+ psubusb mm5, mm6 /* S_1 - S_2 */
+ psubusb mm6, mm1 /* S_2 - S_1 */
+
+ punpcklbw mm0, mm7 /* lower -1 */
+ por mm5, mm6 /* abs(S_2 - S_1) */
+
+ movq [edi+64], mm0 /* Lower -1 */
+ movq mm6, mm1 /* mm6 = S_1 */
+
+ punpckhbw mm1, mm7 /* Higher -1 */
+ movq [edi+72], mm1 /* Higher -1 */
+
+ movq mm0, [eax] /* mm0 = Src[0] */
+ paddusb mm4, mm5 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2 - S_1) */
+
+ movq [Variance11], mm4; /* save the variance */
+
+ movq mm5, FLimitMmx /* mm5 = FFFF FFFF */
+ psubb mm4, Eight128c /* abs(..) - 128 */
+
+ pcmpgtb mm5, mm4 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2 - S_1) < FLimit ? */
+
+ movq mm1, mm0 /* mm1 = S0 */
+ punpcklbw mm0, mm7 /* lower 0 */
+
+ movq mm4, mm1 /* mm4 = S0 */
+ movq [edi+80], mm0 /* write lower 0 */
+
+ psubusb mm4, mm6 /* S0 - S_1 */
+ psubusb mm6, mm1 /* S_1 - S0 */
+
+ movq mm0, [eax + ecx] /* mm0 = Src[Pitch] */
+ movq mm3, QStepMmx /* mm3 = QQQQQQQQQ */
+
+ por mm4, mm6 /* abs(S0 - S_1) */
+ movq mm6, mm1 /* mm6 = S0 */
+
+ psubb mm3, Eight128c /* -128 for using signed compare*/
+ psubb mm4, Eight128c /* -128 for using signed compare*/
+
+ pcmpgtb mm3, mm4 /* abs(S0-S_1) < QStep */
+ punpckhbw mm1, mm7 /* higher 0 */
+
+ movq mm4, mm0 /* mm4 = S1 */
+ pand mm5, mm3 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2 - S_1) < FLimit &&
+ abs(S0-S_1) < QStep */
+
+ movq [edi+88], mm1 /* write higher 0 */
+
+ movq mm1, mm0 /* mm1 = S1 */
+ psubusb mm4, mm6 /* S1 - S0 */
+
+ punpcklbw mm0, mm7 /* lower 1 */
+ psubusb mm6, mm1 /* S0 - S1 */
+
+ movq [edi+96], mm0 /* write lower 1 */
+ por mm4, mm6 /* mm4 = abs(S1-S0) */
+
+ movq mm2, [eax + ecx *2 ] /* mm2 = Src[2*Pitch] */
+ movq mm6, mm1 /* mm6 = S1 */
+
+ lea eax, [eax + ecx *4] /* eax = Src + 4 * Pitch */
+ punpckhbw mm1, mm7 /* higher 1 */
+
+
+ movq mm0, mm2 /* mm0 = S2 */
+ movq [edi+104], mm1 /* wirte higher 1 */
+
+
+ movq mm3, mm0 /* mm3 = S2 */
+ movq mm1, [eax + edx ] /* mm4 = Src[3*pitch] */
+
+ punpcklbw mm2, mm7 /* lower 2 */
+ psubusb mm3, mm6 /* S2 - S1 */
+
+ psubusb mm6, mm0 /* S1 - S2 */
+ por mm3, mm6 /* abs(S1-S2) */
+
+ movq [edi+112], mm2 /* write lower 2 */
+ movq mm6, mm0 /* mm6 = S2 */
+
+ punpckhbw mm0, mm7 /* higher 2 */
+ paddusb mm4, mm3 /* abs(S0-S1)+abs(S1-S2) */
+
+ movq mm2, mm1 /* mm2 = S3 */
+ movq mm3, mm1 /* mm3 = S3 */
+
+ movq [edi+120], mm0 /* write higher 2 */
+ punpcklbw mm1, mm7 /* Low 3 */
+
+ movq mm0, [eax] /* mm0 = Src[4*pitch] */
+ psubusb mm3, mm6 /* S3 - S2 */
+
+ psubusb mm6, mm2 /* S2 - S3 */
+ por mm3, mm6 /* abs(S2-S3) */
+
+ movq [edi+128], mm1 /* low 3 */
+ movq mm6, mm2 /* mm6 = S3 */
+
+ punpckhbw mm2, mm7 /* high 3 */
+ paddusb mm4, mm3 /* abs(S0-S1)+abs(S1-S2)+abs(S2-S3) */
+
+
+ movq mm1, mm0 /* mm1 = S4 */
+ movq mm3, mm0 /* mm3 = S4 */
+
+ movq [edi+136], mm2 /* high 3 */
+ punpcklbw mm0, mm7 /* low 4 */
+
+ psubusb mm3, mm6 /* S4 - S3 */
+ movq [edi+144], mm0 /* low 4 */
+
+ psubusb mm6, mm1 /* S3 - S4 */
+ por mm3, mm6 /* abs(S3-S4) */
+
+ punpckhbw mm1, mm7 /* high 4 */
+ paddusb mm4, mm3 /* abs((S0-S1)+abs(S1-S2)+abs(S2-S3)+abs(S3-S4) */
+
+ movq [Variance21], mm4; /* save the variance */
+
+ movq mm6, FLimitMmx /* mm6 = FFFFFFFFF */
+ psubb mm4, Eight128c /* abs(..) - 128 */
+
+ movq [edi+152], mm1 /* high 4 */
+
+ pcmpgtb mm6, mm4 /* abs((S0-S1)+abs(S1-S2)+abs(S2-S3)+abs(S3-S4)<FLimit? */
+ pand mm6, mm5 /* Flag */
+
+ /* done with copying everything to intermediate buffer */
+ /* mm7 = 0, mm6 = Flag */
+ movq mm0, mm6
+ movq mm7, mm6
+
+ punpckhbw mm0, mm6
+ punpcklbw mm7, mm6
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 and mm7 now are in use */
+
+ /* find the loop filtered values for the pixels on block boundary */
+ movq mm1, LoopFLimitMmx; /* Get the Flimit values for loop filter */
+ movq mm3, [edi + 48] /* mm3 = x3 = p[-2] */
+
+ movq mm4, [edi + 64] /* mm4 = x4 = p[-1] */
+ movq mm5, [edi + 80] /* mm5 = x5 = p[ 0] */
+
+ movq mm6, [edi + 96] /* mm6 = x6 = p[ 1] */
+ psubw mm5, mm4 /* mm5 = p[ 0] - p[-1] */
+
+ psubw mm3, mm6 /* mm3 = p[-2] - p[ 1] */
+ movq mm4, mm5 /* make a copy */
+
+ paddw mm4, mm5 /* 2 * ( p[0] - p[-1] ) */
+ paddw mm3, FourFours /* mm3 + 4 */
+
+ paddw mm5, mm4 /* 3 * ( p[0] - p[-1] ) */
+ paddw mm3, mm5 /* Filtval before shift */
+
+ psraw mm3, 3 /* FiltVal */
+ movq mm2, mm3 /* make a copy */
+
+ psraw mm3, 15 /* FFFF->Neg, 0000->Pos */
+ pxor mm2, mm3
+
+ psubsw mm2, mm3 /* mm2 = abs(FiltVal) */
+ por mm3, FourOnes /* -1 and 1 for + and - */
+
+ movq mm4, mm1 /* make a copy of Flimit */
+ psubw mm1, mm2 /* mm1= Flimit - abs(FiltVal) */
+
+ movq mm5, mm1 /* copy Flimit - abs(FiltVal) */
+ psraw mm1, 15 /* FFFF or 0000 */
+
+ pxor mm5, mm1
+ psubsw mm5, mm1 /* abs(Flimit - abs(FiltVal)) */
+
+ psubusw mm4, mm5 /* Flimit-abs(Flimit - abs(FiltVal)) */
+ pmullw mm4, mm3 /* get the sign back */
+
+ movq mm1, [edi+64] /* p[-1] */
+ movq mm2, [edi+80] /* p[0] */
+
+ paddw mm1, mm4 /* p[-1] + NewFiltVal */
+ psubw mm2, mm4 /* p[0] - NewFiltVal */
+
+ pxor mm6, mm6 /* clear mm6 */
+
+ packuswb mm1, mm1 /* clamping */
+ packuswb mm2, mm2 /* clamping */
+
+ punpcklbw mm1, mm6 /* unpack to word */
+ movq LoopFilteredValuesUp, mm1 /* save the values */
+
+ punpcklbw mm2, mm6 /* unpack to word */
+ movq LoopFilteredValuesDown, mm2 /* save the values */
+
+ /* Let's do the filtering now */
+ /* p1 = Src[-5] */
+ /* p2 = Src[+4] */
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+
+ movq mm3, [edi] /* mm3 = [-5] */
+ movq mm2, [edi+144] /* mm2 = [4] */
+
+ movq mm1, mm3 /* p1 = [-4] */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ movq mm4, [edi+16] /* mm4 = x1 */
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+
+ /* Des[-w4] = (((sum + x1) >> 3; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm4, mm3 /* mm4 = mm3 */
+ movq mm5, [edi+16] /* mm5 = x1 */
+
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+ psraw mm4, 3 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm7 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)>>3 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ psraw mm4, 3 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm7 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])>>3 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ psraw mm4, 3 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+
+ psubw mm4, mm5 /* new value - old value */
+ pand mm4, mm7 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ movq mm5, LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+
+ psraw mm4, 3 /* >>=4 */
+ psubw mm4, mm5 /* -=x4 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x4 */
+
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)>>3 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ movq mm5, LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+
+ psraw mm4, 3 /* >>=4 */
+ psubw mm4, mm5 /* -=x5 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x5 */
+
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)>>3 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x6 */
+ pand mm4, mm7 /* and flag */
+
+ paddw mm4, mm5 /* += x6 */
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = (sum+x7)>>3 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x7 */
+ pand mm4, mm7 /* and flag */
+
+ paddw mm4, mm5 /* += x7 */
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)>>3 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x8 */
+ pand mm4, mm7 /* and flag */
+
+ paddw mm4, mm5 /* += x8 */
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with left four columns */
+ /* now do the righ four columns */
+
+ add edi, 8 /* shift to right four column */
+ add esi, 8 /* shift to right four column */
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 now are in use */
+
+
+ /* find the loop filtered values for the pixels on block boundary */
+ movq mm1, LoopFLimitMmx; /* Get the Flimit values for loop filter */
+ movq mm3, [edi + 48] /* mm3 = x3 = p[-2] */
+
+ movq mm4, [edi + 64] /* mm4 = x4 = p[-1] */
+ movq mm5, [edi + 80] /* mm5 = x5 = p[ 0] */
+
+ movq mm6, [edi + 96] /* mm6 = x6 = p[ 1] */
+ psubw mm5, mm4 /* mm5 = p[ 0] - p[-1] */
+
+ psubw mm3, mm6 /* mm3 = p[-2] - p[ 1] */
+ movq mm4, mm5 /* make a copy */
+
+ paddw mm3, FourFours /* mm3 + 4 */
+ paddw mm4, mm4 /* 2 * ( p[0] - p[-1] ) */
+
+ paddw mm3, mm4 /* 3 * ( p[0] - p[-1] ) */
+ paddw mm3, mm5 /* Filtval before shift */
+
+ psraw mm3, 3 /* FiltVal */
+ movq mm2, mm3 /* make a copy */
+
+ psraw mm3, 15 /* FFFF->Neg, 0000->Pos */
+ pxor mm2, mm3
+
+ psubsw mm2, mm3 /* mm2 = abs(FiltVal) */
+ por mm3, FourOnes /* -1 and 1 for + and - */
+
+ movq mm4, mm1 /* make a copy of Flimit */
+ psubw mm1, mm2 /* mm1= Flimit - abs(FiltVal) */
+
+ movq mm5, mm1 /* copy Flimit - abs(FiltVal) */
+ psraw mm1, 15 /* FFFF or 0000 */
+
+ pxor mm5, mm1
+ psubsw mm5, mm1 /* abs(Flimit - abs(FiltVal)) */
+
+ psubusw mm4, mm5 /* Flimit-abs(Flimit - abs(FiltVal)) */
+ pmullw mm4, mm3 /* get the sign back */
+
+ movq mm1, [edi+64] /* p[-1] */
+ movq mm2, [edi+80] /* p[0] */
+
+ paddw mm1, mm4 /* p[-1] + NewFiltVal */
+ psubw mm2, mm4 /* p[0] - NewFiltVal */
+
+ pxor mm6, mm6 /* clear mm6 */
+
+ packuswb mm1, mm1 /* clamping */
+ packuswb mm2, mm2 /* clamping */
+
+ punpcklbw mm1, mm6 /* unpack to word */
+ movq LoopFilteredValuesUp, mm1 /* save the values */
+
+ punpcklbw mm2, mm6 /* unpack to word */
+ movq LoopFilteredValuesDown, mm2 /* save the values */
+
+
+ /* Let's do the filtering now */
+ /* p1 = Src[-5] */
+ /* p2 = Src[+4] */
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+
+ movq mm3, [edi] /* mm3 = [-5] */
+ movq mm2, [edi+144] /* mm2 = [4] */
+
+ movq mm1, mm3 /* p1 = [-4] */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ movq mm4, [edi+16] /* mm4 = x1 */
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+
+ /* Des[-w4] = (((sum + x1) >> 3; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+ movq mm4, mm3 /* mm4 = mm3 */
+ movq mm5, [edi+16] /* mm5 = x1 */
+
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+ psraw mm4, 3 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm0 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)>>3 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ psraw mm4, 3 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm0 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])>>3 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ psraw mm4, 3 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+
+ psubw mm4, mm5 /* new value - old value */
+ pand mm4, mm0 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ movq mm5, LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+
+ psraw mm4, 3 /* >>=4 */
+ psubw mm4, mm5 /* -=x4 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x4 */
+
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)>>3 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ movq mm5, LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+
+ psraw mm4, 3 /* >>=4 */
+ psubw mm4, mm5 /* -=x5 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x5 */
+
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)>>3 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x6 */
+ pand mm4, mm0 /* and flag */
+
+ paddw mm4, mm5 /* += x6 */
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = (sum+x7)>>3 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x7 */
+ pand mm4, mm0 /* and flag */
+
+ paddw mm4, mm5 /* += x7 */
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)>>3 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x8 */
+ pand mm4, mm0 /* and flag */
+
+ paddw mm4, mm5 /* += x8 */
+ movq [esi+112], mm4 /* write new x8 */
+
+
+ /* done with right four column */
+ add edi, 8 /* shift edi to point x1 */
+ sub esi, 8 /* shift esi back to x1 */
+
+ mov ebp, Des /* the destination */
+ lea ebp, [ebp + edx *4] /* point to des[-w4] */
+
+ movq mm0, [esi]
+ packuswb mm0, [esi + 8]
+
+ movq [ebp], mm0 /* write des[-w4] */
+
+ movq mm1, [esi + 16]
+ packuswb mm1, [esi + 24]
+
+ movq [ebp+ecx ], mm1 /* write des[-w3] */
+
+ movq mm2, [esi + 32]
+ packuswb mm2, [esi + 40]
+
+ movq [ebp+ecx*2 ], mm2 /* write des[-w2] */
+
+ movq mm3, [esi + 48]
+ packuswb mm3, [esi + 56]
+
+ lea ebp, [ebp+ecx*4] /* point to des[0] */
+ movq [ebp+edx], mm3 /* write des[-w1] */
+
+ movq mm0, [esi + 64]
+ packuswb mm0, [esi + 72]
+
+ movq [ebp ], mm0 /* write des[0] */
+
+ movq mm1, [esi + 80]
+ packuswb mm1, [esi + 88]
+
+ movq [ebp+ecx], mm1 /* write des[w1] */
+
+ movq mm2, [esi + 96]
+ packuswb mm2, [esi + 104]
+
+ movq [ebp+ecx*2], mm2 /* write des[w2] */
+
+ movq mm3, [esi + 112]
+ packuswb mm3, [esi + 120]
+
+ lea ebp, [ebp+ecx*2] /* point to des[w4] */
+ movq [ebp+ecx], mm3 /* write des[w3] */
+
+
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebp
+ pop eax
+
+ } /* end of the macro */
+
+ Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+ Var1 += Variance11[4]+ Variance11[5]+Variance11[6]+Variance11[7];
+ pbi->FragmentVariances[CurrentFrag] += Var1;
+
+ Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+ Var2 += Variance21[4]+ Variance21[5]+Variance21[6]+Variance21[7];
+ pbi->FragmentVariances[CurrentFrag + FragAcross] += Var2;
+
+ if(CurrentFrag==StartFrag)
+ CurrentFrag++;
+ else
+ {
+
+ Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag);
+ Src=Des;
+
+ for( j=0; j<8;j++)
+ {
+ Rows[j] = (short) (Src[-5+j*PlaneLineStep]);
+ Rows[72+j] = (short)(Src[4+j*PlaneLineStep]);
+ }
+
+ __asm
+ {
+ /* Save the registers */
+ push eax
+ push ebp
+ /* Calculate the FLimit and store FLimit and QStep */
+ mov eax, QStep /* get QStep */
+ movd mm0, eax /* mm0 = 0, 0, 0, Q */
+
+ push ecx
+
+ punpcklwd mm0, mm0 /* mm0 = 0, 0, Q, Q */
+ punpckldq mm0, mm0 /* mm0 = Q, Q, Q, Q */
+
+ push edx
+
+ movq mm1, mm0 /* mm1 = Q, Q, Q, Q */
+ paddw mm1, mm0
+
+
+ push esi
+
+ paddw mm1, mm0
+ packuswb mm0, mm0
+
+ push edi
+
+ movq QStepMmx, mm0 /* write the Q step */
+ psraw mm1, 2 /* mm1 = FLimit */
+
+ packuswb mm1, mm1 /* mm1 = FFFF FFFF */
+ psubb mm1, Eight128c /* F-128 */
+
+ movq [FLimitMmx], mm1 /* Save FLimit */
+
+ /* setup the pointers to data */
+
+ mov eax, Src /* eax = Src */
+ xor edx, edx /* clear edx */
+
+ sub eax, 4 /* eax = Src-4 */
+ lea esi, NewRows /* esi = NewRows */
+ lea edi, Rows /* edi = Rows */
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ sub edx, ecx /* edx = -Pitch */
+
+ /* Get the data to the intermediate buffer */
+
+ movq mm0, [eax] /* mm0 = 07 06 05 04 03 02 01 00 */
+ movq mm1, [eax+ecx] /* mm1 = 17 16 15 14 13 12 11 10 */
+
+ movq mm2, [eax+ecx*2] /* mm2 = 27 26 25 24 23 22 21 20 */
+ lea eax, [eax+ecx*4] /* Go down four Rows */
+
+ movq mm3, [eax+edx] /* mm3 = 37 36 35 34 33 32 31 30 */
+ movq mm4, mm0 /* mm4 = 07 06 05 04 03 02 01 00 */
+
+ punpcklbw mm0, mm1 /* mm0 = 13 03 12 02 11 01 10 00 */
+ punpckhbw mm4, mm1 /* mm4 = 17 07 16 06 15 05 14 04 */
+
+ movq mm5, mm2 /* mm5 = 27 26 25 24 23 22 21 20 */
+ punpcklbw mm2, mm3 /* mm2 = 33 23 32 22 31 21 30 20 */
+
+ punpckhbw mm5, mm3 /* mm5 = 37 27 36 26 35 25 34 24 */
+ movq mm1, mm0 /* mm1 = 13 03 12 02 11 01 10 00 */
+
+ punpcklwd mm0, mm2 /* mm0 = 31 21 11 01 30 20 10 00 */
+ punpckhwd mm1, mm2 /* mm1 = 33 23 13 03 32 22 12 02 */
+
+ movq mm2, mm4 /* mm2 = 17 07 16 06 15 05 14 04 */
+ punpckhwd mm4, mm5 /* mm4 = 37 27 17 07 36 26 16 06 */
+
+ punpcklwd mm2, mm5 /* mm2 = 35 25 15 05 34 24 14 04 */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm5, mm0 /* make a copy */
+ punpcklbw mm0, mm7 /* mm0 = 30 20 10 00 */
+
+ movq [edi+16], mm0 /* write 00 10 20 30 */
+
+ punpckhbw mm5, mm7 /* mm5 = 31 21 11 01 */
+
+ movq mm0, mm1 /* mm0 =33 23 13 03 32 22 12 02 */
+ movq [edi+32], mm5 /* write 01 11 21 31 */
+
+ punpcklbw mm1, mm7 /* mm1 = 32 22 12 02 */
+ punpckhbw mm0, mm7 /* mm0 = 33 23 12 03 */
+
+ movq [edi+48], mm1 /* write 02 12 22 32 */
+ movq mm3, mm2 /* mm3 = 35 25 15 05 34 24 14 04 */
+
+ movq mm5, mm4 /* mm5 = 37 27 17 07 36 26 16 06 */
+ movq [edi+64], mm0 /* write 03 13 23 33 */
+
+
+ punpcklbw mm2, mm7 /* mm2 = 34 24 14 04 */
+ punpckhbw mm3, mm7 /* mm3 = 35 25 15 05 */
+
+ movq [edi+80], mm2 /* write 04 14 24 34 */
+ punpcklbw mm4, mm7 /* mm4 = 36 26 16 06 */
+
+ punpckhbw mm5, mm7 /* mm5 = 37 27 17 07 */
+ movq [edi+96], mm3 /* write 05 15 25 35 */
+
+ movq mm0, [eax] /* mm0 = 47 46 45 44 43 42 41 40 */
+ movq mm1, [eax + ecx ] /* mm1 = 57 56 55 54 53 52 51 50 */
+
+ movq [edi+112], mm4 /* write 06 16 26 37 */
+ movq mm2, [eax+ecx*2] /* mm2 = 67 66 65 64 63 62 61 60 */
+
+ lea eax, [eax+ ecx*4] /* Go down four rows */
+ movq [edi+128], mm5 /* write 07 17 27 37 */
+
+ movq mm4, mm0 /* mm4 = 47 46 45 44 43 42 41 40 */
+ movq mm3, [eax+edx] /* mm3 = 77 76 75 74 73 72 71 70 */
+
+ punpcklbw mm0, mm1 /* mm0 = 53 43 52 42 51 41 50 40 */
+ punpckhbw mm4, mm1 /* mm4 = 57 57 56 46 55 45 54 44 */
+
+ movq mm5, mm2 /* mm5 = 67 66 65 64 63 62 61 60 */
+ punpcklbw mm2, mm3 /* mm2 = 73 63 72 62 71 61 70 60 */
+
+ punpckhbw mm5, mm3 /* mm5 = 77 67 76 66 75 65 74 64 */
+ movq mm1, mm0 /* mm1 = 53 43 52 42 51 41 50 40 */
+
+ punpcklwd mm0, mm2 /* mm0 = 71 61 51 41 70 60 50 40 */
+ punpckhwd mm1, mm2 /* mm1 = 73 63 53 43 72 62 52 42 */
+
+ movq mm2, mm4 /* mm2 = 57 57 56 46 55 45 54 44 */
+ punpckhwd mm4, mm5 /* mm4 = 77 67 57 47 76 66 56 46 */
+
+ punpcklwd mm2, mm5 /* mm2 = 75 65 55 45 74 64 54 44 */
+
+ movq mm5, mm0 /* make a copy */
+ punpcklbw mm0, mm7 /* mm0 = 70 60 50 40 */
+
+ movq [edi+24], mm0 /* write 40 50 60 70 */
+ punpckhbw mm5, mm7 /* mm5 = 71 61 51 41 */
+
+ movq mm0, mm1 /* mm0 = 73 63 53 43 72 62 52 42 */
+ movq [edi+40], mm5 /* write 41 51 61 71 */
+
+ punpcklbw mm1, mm7 /* mm1 = 72 62 52 42 */
+ punpckhbw mm0, mm7 /* mm0 = 73 63 53 43 */
+
+ movq [edi+56], mm1 /* write 42 52 62 72 */
+ movq mm3, mm2 /* mm3 = 75 65 55 45 74 64 54 44 */
+
+ movq mm5, mm4 /* mm5 = 77 67 57 47 76 66 56 46 */
+ movq [edi+72], mm0 /* write 43 53 63 73 */
+
+ punpcklbw mm2, mm7 /* mm2 = 74 64 54 44 */
+ punpckhbw mm3, mm7 /* mm3 = 75 65 55 45 */
+
+ movq [edi+88], mm2 /* write 44 54 64 74 */
+ punpcklbw mm4, mm7 /* mm4 = 76 66 56 46 */
+
+ punpckhbw mm5, mm7 /* mm5 = 77 67 57 47 */
+ movq [edi+104], mm3 /* write 45 55 65 75 */
+
+ movq [edi+120], mm4 /* write 46 56 66 76 */
+ movq [edi+136], mm5 /* write 47 57 67 77 */
+
+
+ /* Now, compute the variances for Pixel 1-4 and 5-8 */
+
+
+ movq mm0, [edi] /* S_5 */
+ movq mm1, [edi+16] /* S_4 */
+
+ movq mm2, [edi+32] /* S_3 */
+ packuswb mm0, [edi+8]
+
+ packuswb mm1, [edi+24]
+ packuswb mm2, [edi+40]
+
+ movq mm3, [edi+48] /* S_2 */
+ movq mm4, [edi+64] /* S_1 */
+
+ packuswb mm3, [edi+56]
+ packuswb mm4, [edi+72]
+
+ movq mm5, mm1 /* S_4 */
+ movq mm6, mm2 /* S_3 */
+
+ psubusb mm5, mm0 /* S_4 - S_5 */
+ psubusb mm0, mm1 /* S_5 - S_4 */
+
+ por mm0, mm5 /* abs(S_5-S_4) */
+ psubusb mm6, mm1 /* S_3 - S_4 */
+
+ psubusb mm1, mm2 /* S_4 - S_3 */
+ movq mm5, mm3 /* S_2 */
+
+ por mm1, mm6 /* abs(S_4-S_3) */
+ psubusb mm5, mm2 /* S_2 - S_3 */
+
+ psubusb mm2, mm3 /* S_3 - S_2 */
+ movq mm6, mm4 /* S_1 */
+
+ por mm2, mm5 /* abs(S_3-S_2) */
+ psubusb mm6, mm3 /* S_1 - S_2 */
+
+ psubusb mm3, mm4 /* S_2 - S_1 */
+ por mm3, mm6 /* abs(S_2-S_1) */
+
+ paddusb mm0, mm1 /* abs(S_5-S_4)+abs(S_4-S_3) */
+ paddusb mm2, mm3 /* abs(S_3-S_2)+abs(S_2-S_1) */
+
+ movq mm7, FLimitMmx /* FFFFF FFFF */
+ paddusb mm0, mm2 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2-S_1) */
+
+ movq [Variance11], mm0 /* Save the variance */
+
+ movq mm6, mm4 /* S_1 */
+ psubb mm0, Eight128c /* abs(..) - 128 */
+ pcmpgtb mm7, mm0 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2-S_1)<? */
+
+ movq mm5, [edi+80] /* S0 */
+ movq mm1, [edi+96] /* S1 */
+
+ movq mm2, [edi+112] /* S2 */
+ packuswb mm5, [edi+88]
+
+ packuswb mm1, [edi+104]
+ packuswb mm2, [edi+120]
+
+ movq mm3, [edi+128] /* S3 */
+ movq mm4, [edi+144] /* S4 */
+
+ packuswb mm3, [edi+136]
+ packuswb mm4, [edi+152]
+
+ movq mm0, mm5 /* S0 */
+ psubusb mm5, mm6 /* S0-S_1 */
+
+ psubusb mm6, mm0 /* S_1-S0 */
+ por mm5, mm6 /* abs(S_1-S0) */
+
+ movq mm6, QStepMmx /* QQQQ QQQQ */
+ psubb mm5, Eight128c /* -128 for using signed compare*/
+
+ psubb mm6, Eight128c /* -128 for using signed compare*/
+ pcmpgtb mm6, mm5 /* abs(S_1-S0)<QStep? */
+
+ movq mm5, mm1 /* S1 */
+ pand mm7, mm6 /* abs(S_1-S0)<QStep &&
+ abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2-S_1)<FLimit? */
+ movq mm6, mm2 /* S2 */
+ psubusb mm5, mm0 /* S1 - S0 */
+
+ psubusb mm0, mm1 /* S0 - S1*/
+
+ por mm0, mm5 /* abs(S0-S1) */
+ psubusb mm6, mm1 /* S2 - S1 */
+
+ psubusb mm1, mm2 /* S1 - S2*/
+ movq mm5, mm3 /* S3 */
+
+ por mm1, mm6 /* abs(S1-S2) */
+ psubusb mm5, mm2 /* S3 - S2 */
+
+ psubusb mm2, mm3 /* S2 - S3 */
+ movq mm6, mm4 /* S4 */
+
+ por mm2, mm5 /* abs(S2-S3) */
+ psubusb mm6, mm3 /* S4 - S3 */
+
+ psubusb mm3, mm4 /* S3 - S4 */
+ por mm3, mm6 /* abs(S3-S4) */
+
+ paddusb mm0, mm1 /* abs(S0-S1)+abs(S1-S2) */
+ paddusb mm2, mm3 /* abs(S2-S3)+abs(S3-S4) */
+
+ movq mm6, FLimitMmx /* FFFFF FFFF */
+ paddusb mm0, mm2 /* abs(S0-S1)+abs(S1-S2)+abs(S2-S3)+abs(S3-S4) */
+
+ movq [Variance21], mm0 /* Save the variance */
+
+ psubb mm0, Eight128c /* abs(..) - 128 */
+ pcmpgtb mm6, mm0 /* abs(S0-S1)+abs(S1-S2)+abs(S2-S3)+abs(S3-S4)<FLimit */
+ pand mm6, mm7 /* Flag */
+
+ movq mm0, mm6
+ movq mm7, mm6
+
+ punpckhbw mm0, mm6
+ punpcklbw mm7, mm6
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 and mm7 now are in use */
+ /* find the loop filtered values for the pixels on block boundary */
+ movq mm1, LoopFLimitMmx; /* Get the Flimit values for loop filter */
+ movq mm3, [edi + 48] /* mm3 = x3 = p[-2] */
+
+ movq mm4, [edi + 64] /* mm4 = x4 = p[-1] */
+ movq mm5, [edi + 80] /* mm5 = x5 = p[ 0] */
+
+ movq mm6, [edi + 96] /* mm6 = x6 = p[ 1] */
+ psubw mm5, mm4 /* mm5 = p[ 0] - p[-1] */
+
+ psubw mm3, mm6 /* mm3 = p[-2] - p[ 1] */
+ movq mm4, mm5 /* make a copy */
+
+ paddw mm4, mm5 /* 2 * ( p[0] - p[-1] ) */
+ paddw mm3, FourFours /* mm3 + 4 */
+
+ paddw mm5, mm4 /* 3 * ( p[0] - p[-1] ) */
+ paddw mm3, mm5 /* Filtval before shift */
+
+ psraw mm3, 3 /* FiltVal */
+ movq mm2, mm3 /* make a copy */
+
+ psraw mm3, 15 /* FFFF->Neg, 0000->Pos */
+ pxor mm2, mm3
+
+ psubsw mm2, mm3 /* mm2 = abs(FiltVal) */
+ por mm3, FourOnes /* -1 and 1 for + and - */
+
+ movq mm4, mm1 /* make a copy of Flimit */
+ psubw mm1, mm2 /* mm1= Flimit - abs(FiltVal) */
+
+ movq mm5, mm1 /* copy Flimit - abs(FiltVal) */
+ psraw mm1, 15 /* FFFF or 0000 */
+
+ pxor mm5, mm1
+ psubsw mm5, mm1 /* abs(Flimit - abs(FiltVal)) */
+
+ psubusw mm4, mm5 /* Flimit-abs(Flimit - abs(FiltVal)) */
+ pmullw mm4, mm3 /* get the sign back */
+
+ movq mm1, [edi+64] /* p[-1] */
+ movq mm2, [edi+80] /* p[0] */
+
+ paddw mm1, mm4 /* p[-1] + NewFiltVal */
+ psubw mm2, mm4 /* p[0] - NewFiltVal */
+
+ pxor mm6, mm6 /* clear mm6 */
+
+ packuswb mm1, mm1 /* clamping */
+ packuswb mm2, mm2 /* clamping */
+
+ punpcklbw mm1, mm6 /* unpack to word */
+ movq LoopFilteredValuesUp, mm1 /* save the values */
+
+ punpcklbw mm2, mm6 /* unpack to word */
+ movq LoopFilteredValuesDown, mm2 /* save the values */
+
+ /* Let's do the filtering now */
+ /* p1 = Src[-5] */
+ /* p2 = Src[+4] */
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+
+ movq mm3, [edi] /* mm3 = [-5] */
+ movq mm2, [edi+144] /* mm2 = [4] */
+
+ movq mm1, mm3 /* p1 = [-4] */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ movq mm4, [edi+16] /* mm4 = x1 */
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+
+ /* Des[-w4] = (((sum + x1) >> 3; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm4, mm3 /* mm4 = mm3 */
+ movq mm5, [edi+16] /* mm5 = x1 */
+
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+ psraw mm4, 3 /* mm4 >>=3 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm7 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)>>3 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ psraw mm4, 3 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm7 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])>>3 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ psraw mm4, 3 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+
+ psubw mm4, mm5 /* new value - old value */
+ pand mm4, mm7 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ movq mm5, LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+
+ psraw mm4, 3 /* >>=4 */
+ psubw mm4, mm5 /* -=x4 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x4 */
+
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)>>3 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ movq mm5, LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+
+ psraw mm4, 3 /* >>=4 */
+ psubw mm4, mm5 /* -=x5 */
+
+ pand mm4, mm7 /* and flag */
+ paddw mm4, mm5 /* += x5 */
+
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)>>3 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x6 */
+ pand mm4, mm7 /* and flag */
+
+ paddw mm4, mm5 /* += x6 */
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = (sum+x7)>>3 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x7 */
+ pand mm4, mm7 /* and flag */
+
+ paddw mm4, mm5 /* += x7 */
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)>>3 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x8 */
+ pand mm4, mm7 /* and flag */
+
+ paddw mm4, mm5 /* += x8 */
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with left four columns */
+ /* now do the righ four columns */
+ add edi, 8 /* shift to right four column */
+ add esi, 8 /* shift to right four column */
+
+ /* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* mm0 now are in use */
+ /* find the loop filtered values for the pixels on block boundary */
+ movq mm1, LoopFLimitMmx; /* Get the Flimit values for loop filter */
+ movq mm3, [edi + 48] /* mm3 = x3 = p[-2] */
+
+ movq mm4, [edi + 64] /* mm4 = x4 = p[-1] */
+ movq mm5, [edi + 80] /* mm5 = x5 = p[ 0] */
+
+ movq mm6, [edi + 96] /* mm6 = x6 = p[ 1] */
+ psubw mm5, mm4 /* mm5 = p[ 0] - p[-1] */
+
+ psubw mm3, mm6 /* mm3 = p[-2] - p[ 1] */
+ movq mm4, mm5 /* make a copy */
+
+ paddw mm4, mm5 /* 2 * ( p[0] - p[-1] ) */
+ paddw mm3, FourFours /* mm3 + 4 */
+
+ paddw mm5, mm4 /* 3 * ( p[0] - p[-1] ) */
+ paddw mm3, mm5 /* Filtval before shift */
+
+ psraw mm3, 3 /* FiltVal */
+ movq mm2, mm3 /* make a copy */
+
+ psraw mm3, 15 /* FFFF->Neg, 0000->Pos */
+ pxor mm2, mm3
+
+ psubsw mm2, mm3 /* mm2 = abs(FiltVal) */
+ por mm3, FourOnes /* -1 and 1 for + and - */
+
+ movq mm4, mm1 /* make a copy of Flimit */
+ psubw mm1, mm2 /* mm1= Flimit - abs(FiltVal) */
+
+ movq mm5, mm1 /* copy Flimit - abs(FiltVal) */
+ psraw mm1, 15 /* FFFF or 0000 */
+
+ pxor mm5, mm1
+ psubsw mm5, mm1 /* abs(Flimit - abs(FiltVal)) */
+
+ psubusw mm4, mm5 /* Flimit-abs(Flimit - abs(FiltVal)) */
+ pmullw mm4, mm3 /* get the sign back */
+
+ movq mm1, [edi+64] /* p[-1] */
+ movq mm2, [edi+80] /* p[0] */
+
+ paddw mm1, mm4 /* p[-1] + NewFiltVal */
+ psubw mm2, mm4 /* p[0] - NewFiltVal */
+
+ pxor mm6, mm6 /* clear mm6 */
+
+ packuswb mm1, mm1 /* clamping */
+ packuswb mm2, mm2 /* clamping */
+
+ punpcklbw mm1, mm6 /* unpack to word */
+ movq LoopFilteredValuesUp, mm1 /* save the values */
+
+ punpcklbw mm2, mm6 /* unpack to word */
+ movq LoopFilteredValuesDown, mm2 /* save the values */
+
+
+ /* Let's do the filtering now */
+ /* p1 = Src[-5] */
+ /* p2 = Src[+4] */
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+
+ movq mm3, [edi] /* mm3 = [-5] */
+ movq mm2, [edi+144] /* mm2 = [4] */
+
+ movq mm1, mm3 /* p1 = [-4] */
+ paddw mm3, mm3 /* mm3 = p1 + p1 */
+
+ movq mm4, [edi+16] /* mm4 = x1 */
+ paddw mm3, mm1 /* mm3 = p1 + p1 + p1 */
+
+ paddw mm3, [edi+32] /* mm3 = p1+p1+p1+ x2 */
+ paddw mm4, [edi+48] /* mm4 = x1+x3 */
+
+ paddw mm3, [edi+64] /* mm3 += x4 */
+ paddw mm4, FourFours /* mm4 = x1 + x3 + 4 */
+
+ paddw mm3, mm4 /* mm3 = 3*p1+x1+x2+x3+x4+4 */
+
+ /* Des[-w4] = (((sum + x1) >> 3; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+ movq mm4, mm3 /* mm4 = mm3 */
+ movq mm5, [edi+16] /* mm5 = x1 */
+
+ paddw mm4, mm5 /* mm4 = sum+x1 */
+ psraw mm4, 3 /* mm4 >>=4 */
+
+ psubw mm4, mm5 /* New Value - old Value */
+ pand mm4, mm0 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi], mm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)>>3 */
+
+ movq mm5, [edi+32] /* mm5= x2 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+80] /* sum=sum+x5 */
+ movq mm4, mm5 /* copy sum */
+
+ paddw mm4, mm3 /* mm4=sum+x2 */
+ psraw mm4, 3 /* mm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw mm4, mm5 /* new value - old value */
+
+ pand mm4, mm0 /* And the flag */
+ paddw mm4, mm5 /* add the old value back */
+
+ movq [esi+16], mm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])>>3 */
+
+ movq mm5, [edi+48] /* mm5= x3 */
+ psubw mm3, mm1 /* sum=sum-p1 */
+
+ paddw mm3, [edi+96] /* sum=sum+x6 */
+ movq mm4, mm5 /* copy x3 */
+
+ paddw mm4, mm3 /* mm4=sum+x3 */
+ psraw mm4, 3 /* mm4=((sum+x3)<<1-x6+x7)>>4 */
+
+ psubw mm4, mm5 /* new value - old value */
+ pand mm4, mm0 /* And the flag */
+
+ paddw mm4, mm5 /* add the old value back */
+ movq [esi+32], mm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movq mm5, [edi+64] /* mm5 = x4 */
+ psubw mm3, mm1 /* sum = sum-p1 */
+
+ paddw mm3, [edi+112] /* sum = sum+x7 */
+ movq mm4, mm5 /* mm4 = x4 */
+
+ paddw mm4, mm3 /* mm4 = sum + x4 */
+ movq mm5, LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+
+ psraw mm4, 3 /* >>=4 */
+ psubw mm4, mm5 /* -=x4 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x4 */
+
+ movq [esi+48], mm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)>>3 */
+
+ movq mm5, [edi+80] /* mm5 = x5 */
+ psubw mm3, [edi+16] /* sum -= x1 */
+
+ paddw mm3, [edi+128] /* sub += x8 */
+ movq mm4, mm5 /* mm4 = x5 */
+
+ paddw mm4, mm3 /* mm4= sum+x5 */
+ movq mm5, LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+
+ psraw mm4, 3 /* >>=4 */
+ psubw mm4, mm5 /* -=x5 */
+
+ pand mm4, mm0 /* and flag */
+ paddw mm4, mm5 /* += x5 */
+
+ movq [esi+64], mm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)>>3 */
+
+ movq mm5, [edi+96] /* mm5 = x6 */
+ psubw mm3, [edi+32] /* -= x2 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x6 */
+
+ paddw mm4, mm3 /* mm4 = sum+x6 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x6 */
+ pand mm4, mm0 /* and flag */
+
+ paddw mm4, mm5 /* += x6 */
+ movq [esi+80], mm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = (sum+x7)>>3 */
+
+ movq mm5, [edi+112] /* mm5 = x7 */
+ psubw mm3, [edi+48] /* -= x3 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x7 */
+
+ paddw mm4, mm3 /* mm4 = sum+x7 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x7 */
+ pand mm4, mm0 /* and flag */
+
+ paddw mm4, mm5 /* += x7 */
+ movq [esi+96], mm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)>>3 */
+
+ movq mm5, [edi+128] /* mm5 = x8 */
+ psubw mm3, [edi+64] /* -= x4 */
+
+ paddw mm3, mm2 /* += p2 */
+ movq mm4, mm5 /* mm4 = x8 */
+
+ paddw mm4, mm3 /* mm4 = sum+x8 */
+ psraw mm4, 3 /* >>=3 */
+
+ psubw mm4, mm5 /* -=x8 */
+ pand mm4, mm0 /* and flag */
+
+ paddw mm4, mm5 /* += x8 */
+ movq [esi+112], mm4 /* write new x8 */
+
+ /* done with right four column */
+ /* transpose */
+ mov eax, Des /* the destination */
+ add edi, 8 /* shift edi to point x1 */
+
+ sub esi, 8 /* shift esi back to left x1 */
+ sub eax, 4
+
+ movq mm0, [esi] /* mm0 = 30 20 10 00 */
+ movq mm1, [esi+16] /* mm1 = 31 21 11 01 */
+
+ movq mm4, mm0 /* mm4 = 30 20 10 00 */
+ punpcklwd mm0, mm1 /* mm0 = 11 10 01 00 */
+
+ punpckhwd mm4, mm1 /* mm4 = 31 30 21 20 */
+ movq mm2, [esi+32] /* mm2 = 32 22 12 02 */
+
+ movq mm3, [esi+48] /* mm3 = 33 23 13 03 */
+ movq mm5, mm2 /* mm5 = 32 22 12 02 */
+
+ punpcklwd mm2, mm3 /* mm2 = 13 12 03 02 */
+ punpckhwd mm5, mm3 /* mm5 = 33 32 23 22 */
+
+ movq mm1, mm0 /* mm1 = 11 10 01 00 */
+ punpckldq mm0, mm2 /* mm0 = 03 02 01 00 */
+
+ movq [edi], mm0 /* write 00 01 02 03 */
+ punpckhdq mm1, mm2 /* mm1 = 13 12 11 10 */
+
+ movq mm0, mm4 /* mm0 = 31 30 21 20 */
+ movq [edi+16], mm1 /* write 10 11 12 13 */
+
+ punpckldq mm0, mm5 /* mm0 = 23 22 21 20 */
+ punpckhdq mm4, mm5 /* mm4 = 33 32 31 30 */
+
+ movq mm1, [esi+64] /* mm1 = 34 24 14 04 */
+ movq mm2, [esi+80] /* mm2 = 35 25 15 05 */
+
+ movq mm5, [esi+96] /* mm5 = 36 26 16 06 */
+ movq mm6, [esi+112] /* mm6 = 37 27 17 07 */
+
+ movq mm3, mm1 /* mm3 = 34 24 14 04 */
+ movq mm7, mm5 /* mm7 = 36 26 16 06 */
+
+ punpcklwd mm1, mm2 /* mm1 = 15 14 05 04 */
+ punpckhwd mm3, mm2 /* mm3 = 35 34 25 24 */
+
+ punpcklwd mm5, mm6 /* mm5 = 17 16 07 06 */
+ punpckhwd mm7, mm6 /* mm7 = 37 36 27 26 */
+
+ movq mm2, mm1 /* mm2 = 15 14 05 04 */
+ movq mm6, mm3 /* mm6 = 35 34 25 24 */
+
+ punpckldq mm1, mm5 /* mm1 = 07 06 05 04 */
+ punpckhdq mm2, mm5 /* mm2 = 17 16 15 14 */
+
+ punpckldq mm3, mm7 /* mm3 = 27 26 25 24 */
+ punpckhdq mm6, mm7 /* mm6 = 37 36 35 34 */
+
+ movq mm5, [edi] /* mm5 = 03 02 01 00 */
+ packuswb mm5, mm1 /* mm5 = 07 06 05 04 03 02 01 00 */
+
+ movq [eax], mm5 /* write 00 01 02 03 04 05 06 07 */
+ movq mm7, [edi+16] /* mm7 = 13 12 11 10 */
+
+ packuswb mm7, mm2 /* mm7 = 17 16 15 14 13 12 11 10 */
+ movq [eax+ecx], mm7 /* write 10 11 12 13 14 15 16 17 */
+
+ packuswb mm0, mm3 /* mm0 = 27 26 25 24 23 22 21 20 */
+ packuswb mm4, mm6 /* mm4 = 37 36 35 34 33 32 31 30 */
+
+ movq [eax+ecx*2], mm0 /* write 20 21 22 23 24 25 26 27 */
+ lea eax, [eax+ecx*4] /* mov forward the desPtr */
+
+ movq [eax+edx], mm4 /* write 30 31 32 33 34 35 36 37 */
+ add edi, 8 /* move to right four column */
+ add esi, 8 /* move to right x1 */
+
+ movq mm0, [esi] /* mm0 = 70 60 50 40 */
+ movq mm1, [esi+16] /* mm1 = 71 61 51 41 */
+
+ movq mm4, mm0 /* mm4 = 70 60 50 40 */
+ punpcklwd mm0, mm1 /* mm0 = 51 50 41 40 */
+
+ punpckhwd mm4, mm1 /* mm4 = 71 70 61 60 */
+ movq mm2, [esi+32] /* mm2 = 72 62 52 42 */
+
+ movq mm3, [esi+48] /* mm3 = 73 63 53 43 */
+ movq mm5, mm2 /* mm5 = 72 62 52 42 */
+
+ punpcklwd mm2, mm3 /* mm2 = 53 52 43 42 */
+ punpckhwd mm5, mm3 /* mm5 = 73 72 63 62 */
+
+ movq mm1, mm0 /* mm1 = 51 50 41 40 */
+ punpckldq mm0, mm2 /* mm0 = 43 42 41 40 */
+
+ movq [edi], mm0 /* write 40 41 42 43 */
+ punpckhdq mm1, mm2 /* mm1 = 53 52 51 50 */
+
+ movq mm0, mm4 /* mm0 = 71 70 61 60 */
+ movq [edi+16], mm1 /* write 50 51 52 53 */
+
+ punpckldq mm0, mm5 /* mm0 = 63 62 61 60 */
+ punpckhdq mm4, mm5 /* mm4 = 73 72 71 70 */
+
+ movq mm1, [esi+64] /* mm1 = 74 64 54 44 */
+ movq mm2, [esi+80] /* mm2 = 75 65 55 45 */
+
+ movq mm5, [esi+96] /* mm5 = 76 66 56 46 */
+ movq mm6, [esi+112] /* mm6 = 77 67 57 47 */
+
+ movq mm3, mm1 /* mm3 = 74 64 54 44 */
+ movq mm7, mm5 /* mm7 = 76 66 56 46 */
+
+ punpcklwd mm1, mm2 /* mm1 = 55 54 45 44 */
+ punpckhwd mm3, mm2 /* mm3 = 75 74 65 64 */
+
+ punpcklwd mm5, mm6 /* mm5 = 57 56 47 46 */
+ punpckhwd mm7, mm6 /* mm7 = 77 76 67 66 */
+
+ movq mm2, mm1 /* mm2 = 55 54 45 44 */
+ movq mm6, mm3 /* mm6 = 75 74 65 64 */
+
+ punpckldq mm1, mm5 /* mm1 = 47 46 45 44 */
+ punpckhdq mm2, mm5 /* mm2 = 57 56 55 54 */
+
+ punpckldq mm3, mm7 /* mm3 = 67 66 65 64 */
+ punpckhdq mm6, mm7 /* mm6 = 77 76 75 74 */
+
+ movq mm5, [edi] /* mm5 = 43 42 41 40 */
+ packuswb mm5, mm1 /* mm5 = 47 46 45 44 43 42 41 40 */
+
+ movq [eax], mm5 /* write 40 41 42 43 44 45 46 47 */
+ movq mm7, [edi+16] /* mm7 = 53 52 51 50 */
+
+ packuswb mm7, mm2 /* mm7 = 57 56 55 54 53 52 51 50 */
+ movq [eax+ecx], mm7 /* write 50 51 52 53 54 55 56 57 */
+
+ packuswb mm0, mm3 /* mm0 = 67 66 65 64 63 62 61 60 */
+ packuswb mm4, mm6 /* mm4 = 77 76 75 74 73 72 71 70 */
+
+ movq [eax+ecx*2], mm0 /* write 60 61 62 63 64 65 66 67 */
+ lea eax, [eax+ecx*4] /* mov forward the desPtr */
+
+ movq [eax+edx], mm4 /* write 70 71 72 73 74 75 76 77 */
+
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebp
+ pop eax
+ }//__asm
+ Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+ Var1 += Variance11[4]+ Variance11[5]+Variance11[6]+Variance11[7];
+ pbi->FragmentVariances[CurrentFrag-1] += Var1;
+
+ Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+ Var2 += Variance21[4]+ Variance21[5]+Variance21[6]+Variance21[7];
+ pbi->FragmentVariances[CurrentFrag] += Var2;
+
+
+ CurrentFrag ++;
+ }//else
+
+ }//while
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : PlaneAddNoise_mmx
+ *
+ * INPUTS : UINT8 *Start starting address of buffer to add gaussian
+ * noise to
+ * UINT32 Width width of plane
+ * UINT32 Height height of plane
+ * INT32 Pitch distance between subsequent lines of frame
+ * INT32 q quantizer used to determine amount of noise
+ * to add
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : adds gaussian noise to a plane of pixels
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PlaneAddNoise_mmx( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q)
+{
+ unsigned int i;
+ INT32 Pitch4 = Pitch * 4;
+ const int noiseAmount = 2;
+ const int noiseAdder = 2 * noiseAmount + 1;
+
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+ unsigned char blackclamp[16];
+ unsigned char whiteclamp[16];
+ unsigned char bothclamp[16];
+#pragma pack()
+#else
+ __declspec(align(16)) unsigned char blackclamp[16];
+ __declspec(align(16)) unsigned char whiteclamp[16];
+ __declspec(align(16)) unsigned char bothclamp[16];
+#endif
+ char CharDist[300];
+ char Rand[2048] =
+ {
+ -2,0,-2,2,0,0,-1,2,2,1,-2,2,1,0,-1,-2,-2,-1,-2,-2,2,0,-2,-2,-2,-1,0,0,1,1,-2,1,0,-1,-2,1,1,2,0,-1,2,1,2,2,0,-2,0,-1,2,-1,1,2,2,2,1,-1,-1,-1,2,-2,-1,-2,1,-2,-2,2,-1,-1,0,1,2,1,0,-1,1,0,0,2,1,-2,0,-1,1,1,0,-1,-2,-1,0,2,0,2,1,-1,-2,1,0,-2,1,0,-2,2,-2,2,1,-1,0,-2,2,1,-2,2,2,0,-2,-2,2,0,-2,0,1,0,-1,0,1,1,1,0,-2,-1,2,-2,0,1,0,-2,2,2,0,-1,0,-1,2,-1,0,-1,2,-1,1,0,-2,1,2,-1,0,2,-2,2,0,-2,0,-2,2,1,1,-2,2,-2,-2,1,-1,2,-1,-1,-2,1,2,1,1,1,-1,-2,-2,-2,2,2,-1,-2,0,-2,-2,0,1,1,0,-2,0,-1,1,-1,0,-1,0,0,1,-2,0,2,1,2,-2,-1,-2,2,0,2,-2,1,-2,0,2,-2,2,-1,-1,1,0,-1,1,1,0,0,0,1,2,2,1,1,0,-1,-2,1,0,2,-1,-2,1,1,0,-1,0,-2,1,1,1,1,2,-2,0,2,2,1,1,-2,1,2,-1,0,-1,-2,-2,2,2,1,-2,-1,-2,-2,1,2,0,0,0,-1,0,0,-2,-1,1,-1,2,2,2,1,-1,2,-2,-2,1,0,1,2,-2,2,1,-1,-2,0,-1,-1,2,0,1,-2,0,-1,0,1,0,-1,1,0,1,-1,-2,1,-2,1,2,0,1,0,-1,1,0,-1,2,1,-2,-1,-2,1,2,1,-2,-1,-2,1,-2,2,2,0,1,2,-2,-2,1,1,-1,-2,-2,1,-1,-1,-1,1,2,2,0,1,1,2,-2,1,0,-1,-2,2,-2,0,0,-1,0,-1,-1,-2,2,-2,-1,1,2,1,1,1,-1,2,-1,2,-1,-1,0,2,-2,-2,0,0,-2,-1,2,-1,-2,-2,2,-2,-2,-2,-1,2,-1,0,2,2,0,2,1,-1,-1,-2,0,2,-1,-1,0,-1,1,2,0,2,-2,2,1,1,0,-2,-1,-1,-2,0,-2,1,2,-2,2,1,1,2,0,1,-2,1,1,1,-2,2,1,1,-2,0,2,-2,-1,-2,2,1,-1,2,-1,1,-1,-2,-1,0,2,-2,2,0,-2,1,-2,2,1,2,-1,0,-2,1,-2,0,-1,2,-2,-1,-2,-1,-2,1,2,2,-2,1,1,1,2,0,2,1,-2,1,0,0,2,0,0,0,-1,-1,-1,-2,1,-2,-2,-1,0,-2,
+ -2,-2,1,0,1,1,0,1,-1,2,0,-2,2,2,-1,2,-2,2,0,0,1,1,-2,-1,-1,0,2,1,1,2,-1,-1,2,-1,-1,0,-1,1,1,1,1,-2,-1,-1,1,2,-1,0,-2,2,-1,0,1,0,1,-2,-2,-2,-2,-1,-1,1,-2,-1,-2,1,1,-2,1,1,1,0,-2,0,-2,2,0,2,1,0,1,1,-1,-1,-2,2,-2,-2,-1,1,-1,-1,0,-2,0,0,1,1,0,-1,2,2,1,2,-2,0,2,-1,-1,-1,-2,1,-1,-2,-2,0,2,2,0,1,1,2,2,0,0,-2,1,0,0,0,0,2,1,-1,-2,-1,-1,-1,1,-1,2,-2,1,1,2,-2,0,2,1,2,-2,2,1,2,2,2,1,-2,1,-1,-1,1,1,-2,1,0,-2,2,2,-2,-1,0,0,1,-2,1,2,-2,1,1,-2,-2,-1,1,2,0,-1,1,-1,1,-1,-1,2,-1,-2,1,-2,-2,-2,-1,1,-1,0,0,-2,0,1,-1,1,2,0,0,-2,0,-1,0,2,0,-2,0,1,1,2,2,-1,2,1,1,2,1,2,2,2,0,0,-2,-1,2,0,-2,-2,1,1,-2,-2,-1,1,2,-2,-2,-2,-1,-2,2,1,-2,2,1,0,-2,-1,-1,1,1,-2,2,-2,1,0,2,0,-1,-1,1,-1,0,1,-2,2,1,-2,0,1,2,1,1,1,2,1,-1,0,-1,0,1,-1,0,0,2,1,1,1,0,1,1,2,-1,1,2,0,2,0,0,0,2,2,-2,-1,-1,1,2,1,-2,1,-2,0,0,0,-2,2,-2,1,-2,-2,1,-1,-1,1,0,0,-1,1,-2,0,0,2,0,-2,-1,-1,-2,2,1,2,1,1,0,1,1,2,0,-1,-2,2,2,0,-2,2,1,-2,0,2,-2,-2,-1,-2,0,-2,1,0,1,1,2,1,-1,2,-1,2,1,-1,-2,-1,-2,0,-2,2,-2,-1,-1,-2,-2,-2,1,1,2,-2,0,0,2,0,0,1,-1,0,-2,2,2,2,-2,0,1,1,1,-1,2,1,-2,0,-2,0,1,1,-2,1,0,2,2,1,-1,-1,0,-2,1,-2,1,1,-1,-2,-2,1,-2,-1,1,1,0,2,1,-1,0,2,-2,-2,-2,-2,2,-1,-1,2,-2,2,-1,2,-1,-1,-1,-1,2,2,2,2,1,-2,-2,-2,-1,0,-2,2,1,0,2,0,1,2,2,2,2,-2,-1,-1,-2,2,1,1,-2,1,2,1,2,-2,1,-1,1,2,2,-2,1,0,-2,-1,0,-2,2,0,-1,1,2,-1,-2,1,-1,0,2,2,-1,0,2,2,1,
+ -1,2,-1,-1,-2,0,-1,-2,-1,2,-1,2,-2,2,2,0,-1,1,0,1,0,-2,2,-2,-1,-1,1,0,2,1,1,0,2,1,-2,0,-2,-2,1,-1,2,0,1,-2,1,-2,1,2,0,1,-1,2,1,0,-1,2,0,1,-1,-2,0,1,0,-1,-2,-1,0,2,0,2,-1,0,-2,2,2,0,1,-1,1,0,0,-2,-1,-1,2,2,2,1,0,-2,0,-1,0,-2,2,-1,1,2,0,-1,-1,0,2,-1,-1,1,2,-1,-2,0,2,0,-2,2,-2,1,-1,-2,-2,-1,0,2,-2,-2,-1,-1,0,0,0,2,1,-1,0,0,2,0,2,1,2,0,2,-1,2,-1,2,1,-2,1,0,-2,-2,-2,0,2,-2,-2,-1,2,1,1,1,-1,1,2,2,-1,0,-2,-2,-2,-1,1,0,-2,-1,-2,1,-2,-2,0,-1,2,-2,2,-2,-2,-2,2,-1,0,-1,0,1,2,2,2,-2,-2,0,2,2,-2,2,2,-1,0,1,0,-1,2,2,1,0,-1,-2,-2,1,0,-1,-1,0,1,2,1,2,-1,0,-1,2,0,-1,0,0,-1,-1,-2,-1,-1,2,1,2,1,1,-1,1,-2,1,2,-1,-2,0,-2,2,1,0,1,0,1,1,1,1,2,-2,0,1,-2,0,-2,0,-1,-2,-1,2,0,1,-2,-1,2,2,-1,-1,-1,-2,2,-2,-2,-1,-1,1,1,-2,-1,-2,-1,0,-2,1,-2,0,1,-1,-2,-1,1,2,0,2,-2,1,2,1,1,0,0,-2,2,-1,-2,-1,-1,0,1,-1,2,-1,1,-1,-2,1,-1,-1,1,2,-1,2,-1,2,1,-1,-1,-1,0,-1,-1,-2,-2,1,2,1,2,-2,0,1,2,-1,1,1,2,2,2,1,-1,1,-2,0,1,-1,2,-2,0,-2,1,-1,-2,-1,-2,2,1,-2,0,-2,2,-2,0,2,0,2,0,0,0,1,2,2,-1,-2,1,-2,1,0,2,1,-1,0,-1,1,2,-2,-2,-1,-1,-1,2,2,-1,-2,0,0,2,0,-1,0,-1,0,2,-1,-1,2,0,0,1,1,-2,-2,-1,-2,-1,0,1,-1,-2,1,-2,-1,2,0,2,-1,-2,0,-1,-2,0,1,-2,2,-1,2,0,-1,-1,0,-1,0,1,2,-1,0,1,1,-2,-2,1,2,1,-1,0,-2,0,-2,-1,2,-1,-1,-2,-1,-2,-1,-1,-2,-1,-2,0,2,2,0,2,-2,0,0,1,-1,2,-1,-1,2,2,1,1,-2,-1,-1,2,2,0,1,-1,2,0,-2,2,-2,-1,-1,1,0,0,-2,
+ 2,-2,-2,2,0,1,-2,-2,0,1,0,2,2,-1,0,2,-2,2,0,-1,-2,-1,-2,-2,-2,2,0,1,-1,1,1,2,2,2,-1,-2,-2,2,-2,2,-1,2,-1,-1,1,2,-1,0,1,-1,0,0,2,1,1,0,2,0,-1,-1,-2,2,1,-1,-1,-1,-1,-2,2,-1,0,-2,2,1,1,-2,0,1,0,1,2,-2,-1,2,1,-2,2,-2,1,-2,-2,-2,0,0,0,-1,-2,-1,-2,0,-2,-1
+ };
+
+ double sigma;
+ __asm emms
+ sigma = 1 + .8*(63-q) / 63.0;
+
+ // set up a lookup table of 256 entries that matches
+ // a gaussian distribution with sigma determined by q.
+ //
+ {
+ double i,sum=0;
+ int next,j;
+
+ next=0;
+ for(i=-32;i<32;i++)
+ {
+ int a = (int)(.5+256*gaussian(sigma,0,i));
+
+ if(a)
+ {
+ for(j=0;j<a;j++)
+ {
+ CharDist[next+j]=(char) i;
+ }
+ next = next+j;
+ }
+
+ }
+ for(next=next;next<256;next++)
+ CharDist[next] = 0;
+
+ }
+
+ for(i=0;i<2048;i++)
+ {
+ Rand[i]=CharDist[rand() & 0xff];
+ }
+
+ for(i=0;i<16;i++)
+ {
+ blackclamp[i]=-CharDist[0];
+ whiteclamp[i]=-CharDist[0];
+ bothclamp[i]=-2*CharDist[0];
+ }
+
+ for(i=0;i<Height;i++)
+ {
+ UINT8 *Pos = Start + i *Pitch;
+ INT8 *Ref = Rand + (rand() & 0xff);
+
+ __asm
+ {
+ mov ecx, [Width]
+ mov esi,Pos
+ mov edi,Ref
+ xor eax,eax
+
+ nextset:
+ movq mm1,[esi+eax] // get the source
+
+ psubusb mm1,blackclamp // clamp both sides so we don't outrange adding noise
+ paddusb mm1,bothclamp
+ psubusb mm1,whiteclamp
+
+ movq mm2,[edi+eax] // get the noise for this line
+ paddb mm1,mm2 // add it in
+ movq [esi+eax],mm1 // store the result
+
+ add eax,8 // move to the next line
+
+ cmp eax, ecx
+ jl nextset
+
+
+ }
+
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockwmtopt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockwmtopt.c
new file mode 100644
index 00000000..10ff9cee
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockwmtopt.c
@@ -0,0 +1,2828 @@
+/****************************************************************************
+ *
+ * Module Title : DeblockwmtOpt.c
+ *
+ * Description : Optimized functions for deblocking
+ *
+ * AUTHOR : Yaowu Xu
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ * 1.02 YWX 08-Dec-00 Configuration baseline from deblockopt.c
+ *
+ *****************************************************************************
+ */
+
+
+/****************************************************************************
+ * Header Frames
+ *****************************************************************************
+ */
+
+
+
+#include "postp.h"
+#include "stdlib.h"
+#include <math.h>
+
+/****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+
+#if defined(_WIN32_WCE)
+#else
+__declspec(align(16)) static short Eight128s[] = {128, 128, 128, 128,128, 128, 128, 128 };
+__declspec(align(16)) static short Eight64s[] = {64, 64, 64, 64, 64, 64, 64, 64 };
+__declspec(align(16)) static short EightThrees[]= {3, 3, 3, 3, 3, 3, 3, 3};
+__declspec(align(16)) static short EightFours[]= {4, 4, 4, 4, 4, 4, 4, 4};
+__declspec(align(16)) static short Four128s[] = {128, 128, 128, 128};
+__declspec(align(16)) static short Four64s[] = {64, 64, 64, 64 };
+__declspec(align(16)) static short FourThrees[]= {3, 3, 3, 3};
+__declspec(align(16)) static short FourFours[]= {4, 4, 4, 4};
+__declspec(align(16)) static short EightOnes[]= { 1, 1, 1, 1, 1, 1, 1, 1};
+#endif
+
+/****************************************************************************
+ * Explicit Imports
+ *****************************************************************************
+ */
+
+extern double gaussian(double sigma, double mu, double x);
+extern UINT32 *DeblockLimitValuesV2;
+
+/****************************************************************************
+ * Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Exported Functions
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Module Statics
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockLoopFilteredBand_MMX
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Filter both horizontal and vertical edge in a band
+ *
+ * SPECIAL NOTES :
+ *
+ * REFERENCE :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void DeblockLoopFilteredBand_WMT(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+ )
+{
+ UINT32 j;
+ UINT32 CurrentFrag=StartFrag;
+ UINT32 QStep;
+ UINT8 *Src, *Des;
+ UINT32 Var1, Var2;
+
+#if defined(_WIN32_WCE)
+ return;
+#else
+
+__declspec(align(16)) short QStepWMT[8];
+__declspec(align(16)) short FLimitWMT[8];
+__declspec(align(16)) short Rows[80];
+
+__declspec(align(16)) unsigned short Variance1[8];
+__declspec(align(16)) unsigned short Variance2[8];
+
+
+ Src=SrcPtr;
+ Des=DesPtr;
+
+ while(CurrentFrag < StartFrag + FragAcross )
+ {
+
+ QStep = QuantScale[ pbi->FragQIndex[CurrentFrag+FragAcross]];
+ if( QStep > 3 )
+ {
+ QStepWMT[0] = (INT16)QStep;
+ QStepWMT[1] = (INT16)QStep;
+ QStepWMT[2] = (INT16)QStep;
+ QStepWMT[3] = (INT16)QStep;
+ QStepWMT[4] = (INT16)QStep;
+ QStepWMT[5] = (INT16)QStep;
+ QStepWMT[6] = (INT16)QStep;
+ QStepWMT[7] = (INT16)QStep;
+
+ __asm
+ {
+
+ /* Save the registers */
+ push eax
+ push ecx
+ push edx
+ push esi
+ push edi
+
+
+ /* Calculate the FLimit and store FLimit and QStep */
+
+ movdqa xmm0, QStepWMT /* xmm0 = QStep */
+ movdqa xmm1, EightThrees /* mm1 = 03030303 */
+
+ pmullw xmm1, xmm0 /* mm1 = QStep * 3 */
+ pmullw xmm1, xmm0 /* mm1 = QStep * QStep * 3 */
+
+ psrlw xmm1, 5 /* mm1 = FLimit */
+ movdqa [FLimitWMT], xmm1 /* Save FLimit */
+
+ /* setup the pointers */
+ mov eax, Src /* eax = Src */
+ xor edx, edx /* clear edx */
+
+ mov esi, Des /* esi = Des */
+ lea edi, Rows /* edi = Rows */
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ pxor xmm7, xmm7 /* Clear xmm7 */
+
+ sub edx, ecx /* edx = -Pitch */
+
+ lea eax, [eax + edx * 4 ] /* eax = Src - 4*Pitch */
+ lea esi, [esi + edx * 2 ] /* esi = Des - 2 * Pitch */
+
+ /* Copy the data to the intermediate buffer */
+
+ movq xmm0, QWORD PTR [eax + edx]/* xmm0 = Src[-5*Pitch] */
+ movq xmm1, QWORD PTR [eax ] /* xmm1 = Src[-4*Pitch */
+
+ punpcklbw xmm0, xmm7 /* expand to words */
+ punpcklbw xmm1, xmm7 /* expand to words */
+
+ movdqa [edi], xmm0 /* write 8 words */
+ movdqa [edi+16], xmm1 /* write 8 words */
+
+ movq xmm2, QWORD PTR [eax+ecx] /* xmm2 = Src[-3*Pitch] */
+ movq xmm3, QWORD PTR [eax+ecx*2]/* xmm3 = Src[-2*Pitch] */
+
+ punpcklbw xmm2, xmm7 /* expand to words */
+ punpcklbw xmm3, xmm7 /* expand to words */
+
+ movdqa [edi+32], xmm2 /* write 8 words */
+ movdqa [edi+48], xmm3 /* write 8 words */
+
+ lea eax, [eax+ecx*4] /* eax= Src */
+
+ movq xmm0, QWORD PTR [eax + edx]/* xmm0 = Src[-Pitch] */
+ movq xmm1, QWORD PTR [eax ] /* xmm1 = Src[0] */
+
+ punpcklbw xmm0, xmm7 /* expand to words */
+ punpcklbw xmm1, xmm7 /* expand to words */
+
+ movdqa [edi+64], xmm0 /* write 8 words */
+ movdqa [edi+80], xmm1 /* write 8 words */
+
+ movq xmm2, QWORD PTR [eax+ecx] /* xmm2 = Src[Pitch] */
+ movq xmm3, QWORD PTR [eax+ecx*2]/* xmm3 = Src[2*Pitch] */
+
+ punpcklbw xmm2, xmm7 /* expand to words */
+ punpcklbw xmm3, xmm7 /* expand to words */
+
+ movdqa [edi+96], xmm2 /* write 8 words */
+ movdqa [edi+112], xmm3 /* write 8 words */
+
+ lea eax, [eax+ecx*4] /* eax= Src+4*Pitch */
+
+ movq xmm0, QWORD PTR [eax + edx]/* xmm0 = Src[3*Pitch] */
+ movq xmm1, QWORD PTR [eax ] /* xmm1 = Src[4*Pitch] */
+
+ punpcklbw xmm0, xmm7 /* expand to words */
+ punpcklbw xmm1, xmm7 /* expand to words */
+
+ movdqa [edi+128], xmm0 /* write 8 words */
+ movdqa [edi+144], xmm1 /* write 8 words */
+
+
+ /* done with copying everything to intermediate buffer */
+ /* Now, compute the variances for Pixel 1-4 and 5-8 */
+
+ /* we use xmm0,xmm1,xmm2 for 1234 and xmm4, xmm5, xmm6 for 5-8 */
+ /* xmm7 = 0, xmm3 = {128, 128, 128, 128, 128, 128, 128, 128} */
+
+ pcmpeqw xmm3, xmm3 /* xmm3 = FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF */
+ psllw xmm3, 15 /* xmm3 = 80008000800080008000800080008000 */
+ psrlw xmm3, 8 /* xmm3 = 00800080008000800080008000800080 */
+
+ movdqa xmm2, [edi+16] /* Pixel 1 */
+ movdqa xmm6, [edi+80] /* Pixel 5 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ movdqa xmm0, xmm2 /* xmm0 = pixel 1 */
+ movdqa xmm4, xmm6 /* xmm4 = pixel 5 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel1 * pixel1 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel5 * pixel5 */
+
+ movdqa xmm1, xmm2 /* xmm1 = pixel1^2 */
+ movdqa xmm5, xmm6 /* xmm5 = pixel5^2 */
+
+ movdqa xmm2, [edi+32] /* Pixel 2 */
+ movdqa xmm6, [edi+96] /* Pixel 6 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 2 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 6 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel2^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel6^2 */
+
+ paddw xmm1, xmm2 /* xmm1 += pixel2^2 */
+ paddw xmm5, xmm6 /* xmm5 += pixel6^2 */
+
+ movdqa xmm2, [edi+48] /* Pixel 3 */
+ movdqa xmm6, [edi+112] /* Pixel 7 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 3 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 7 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel3^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel7^2 */
+
+ paddw xmm1, xmm2 /* xmm1 += pixel3^2 */
+ paddw xmm5, xmm6 /* xmm5 += pixel7^2 */
+
+ movdqa xmm2, [edi+64] /* Pixel 4 */
+ movdqa xmm6, [edi+128] /* Pixel 8 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 4 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 8 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel4^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel8^2 */
+
+ paddw xmm1, xmm2 /* xmm1 = pixel4^2 */
+ paddw xmm5, xmm6 /* xmm5 = pixel8^2 */
+
+ /* xmm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* xmm1 = x1 + x2 + x3 + x4 */
+ /* xmm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* xmm5 = x5 + x6 + x7 + x8 */
+
+ movdqa xmm7, xmm3 /* xmm7 = xmm3 */
+ psrlw xmm7, 7 /* xmm7 = 00010001000100010001000100010001 */
+
+ movdqa xmm2, xmm0 /* make copy of sum1 */
+ movdqa xmm6, xmm4 /* make copy of sum2 */
+
+ paddw xmm0, xmm7 /* (sum1 + 1) */
+ paddw xmm4, xmm7 /* (sum2 + 1) */
+
+ psraw xmm2, 1 /* sum1 /2 */
+ psraw xmm6, 1 /* sum2 /2 */
+
+ psraw xmm0, 1 /* (sum1 + 1)/2 */
+ psraw xmm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw xmm2, xmm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw xmm6, xmm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw xmm1, xmm2 /* Variance 1 */
+ psubw xmm5, xmm6 /* Variance 2 */
+
+ movdqa xmm7, FLimitWMT /* xmm7 = FLimit */
+ movdqa xmm2, xmm1 /* copy of Varinace 1*/
+
+ movdqa [Variance1], xmm1 /* save the varinace1 */
+ movdqa [Variance2], xmm5 /* save the varinace2 */
+
+ movdqa xmm6, xmm5 /* Variance 2 */
+ psubw xmm1, xmm7 /* Variance 1 < Flimit? */
+
+ psubw xmm5, xmm7 /* Variance 2 < Flimit? */
+ psraw xmm2, 15 /* Variance 1 > 32768? */
+
+ psraw xmm6, 15 /* Vaiance 2 > 32768? */
+ psraw xmm1, 15 /* FFFF/0000 for true/false */
+
+ psraw xmm5, 15 /* FFFF/0000 for true/false */
+ movdqa xmm7, [edi+64] /* xmm0 = Pixel 4 */
+
+ pandn xmm2, xmm1 /* Variance1<32678 &&
+ Variance1<Limit */
+ pandn xmm6, xmm5 /* Variance2<32678 &&
+ Variance1<Limit */
+
+ movdqa xmm4, [edi+80] /* xmm4 = Pixel 5 */
+ pand xmm6, xmm2 /* xmm6 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+
+ movdqa xmm2, xmm7 /* make copy of Pixel4 */
+
+ psubusw xmm7, xmm4 /* 4 - 5 */
+ psubusw xmm4, xmm2 /* 5 - 4 */
+
+ por xmm7, xmm4 /* abs(4 - 5) */
+ psubw xmm7, QStepWMT /* abs(4-5)<QStepxmmx ? */
+
+ psraw xmm7, 15 /* FFFF/0000 for True/Flase */
+ pand xmm7, xmm6
+
+ /* xmm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* xmm7 now are in use */
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movdqa xmm5, [edi] /* xmm5 = -5 */
+ movdqa xmm4, [edi + 16] /* xmm4 = -4 */
+
+ movdqa xmm3, xmm4 /* copy of -4 */
+ movdqa xmm6, xmm5 /* copy of -5 */
+
+ psubusw xmm4, xmm6 /* xmm4 = [-4] - [-5] */
+ psubusw xmm5, xmm3 /* xmm5 = [-5] - [-4] */
+
+ por xmm4, xmm5 /* abs([-4]-[-5] ) */
+ psubw xmm4, QStepWMT /* abs([-4]-[-5] )<QStep? */
+
+ psraw xmm4, 15 /* FFFF/0000 for True/False */
+ movdqa xmm1, xmm4 /* copy of the xmm4 */
+
+ pand xmm4, xmm6 /* */
+ pandn xmm1, xmm3 /* */
+
+ por xmm1, xmm4 /* xmm1 = p1 */
+
+ /* now find P2 */
+
+ movdqa xmm4, [edi+128] /* xmm4 = [3] */
+ movdqa xmm5, [edi+144] /* xmm5 = [4] */
+
+ movdqa xmm3, xmm4 /* copy of 3 */
+ movdqa xmm6, xmm5 /* copy of 4 */
+
+ psubusw xmm4, xmm6 /* xmm4 = [3] - [4] */
+ psubusw xmm5, xmm3 /* xmm5 = [4] - [3] */
+
+ por xmm4, xmm5 /* abs([3]-[4] ) */
+ psubw xmm4, QStepWMT /* abs([3]-[4] )<QStep? */
+
+ psraw xmm4, 15 /* FFFF/0000 for True/False */
+ movdqa xmm2, xmm4 /* copy of the xmm4 */
+
+ pand xmm4, xmm6 /* */
+ pandn xmm2, xmm3 /* */
+
+ por xmm2, xmm4 /* xmm2 = p2 */
+
+ /* Data is ready, now do the filtering */
+
+ pxor xmm0, xmm0 /* clear xmm0 */
+
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+ /* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+
+ movdqa xmm3, xmm1 /* xmm3 = p1 */
+ paddw xmm3, xmm3 /* xmm3 = p1 + p1 */
+
+ paddw xmm3, xmm1 /* xmm3 = p1 + p1 + p1 */
+ movdqa xmm4, [edi+16] /* xmm4 = x1 */
+
+ paddw xmm3, [edi+32] /* xmm3 = p1+p1+p1+ x2 */
+ paddw xmm4, [edi+48] /* xmm4 = x1+x3 */
+
+ paddw xmm3, [edi+64] /* xmm3 += x4 */
+ paddw xmm4, EightFours /* xmm4 = x1 + x3 + 4 */
+
+ paddw xmm3, xmm4 /* xmm3 = 3*p1+x1+x2+x3+x4+4 */
+ movdqa xmm4, xmm3 /* xmm4 = xmm3 */
+
+ movdqa xmm5, [edi+16] /* xmm5 = x1 */
+ paddw xmm4, xmm5 /* xmm4 = sum+x1 */
+
+ psllw xmm4, 1 /* xmm4 = (sum+x1)<<1 */
+ psubw xmm4, [edi+64] /* xmm4 = (sum+x1)<<1-x4 */
+
+ paddw xmm4, [edi+80] /* xmm4 = (sum+x1)<<1-x4+x5 */
+ psraw xmm4, 4 /* xmm4 >>=4 */
+
+ psubw xmm4, xmm5 /* New Value - old Value */
+ pand xmm4, xmm7 /* And the flag */
+
+ paddw xmm4, xmm5 /* add the old value back */
+ packuswb xmm4, xmm0 /* pack it to bytes */
+
+ movq QWORD PTR [esi+edx*2], xmm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movdqa xmm5, [edi+32] /* xmm5= x2 */
+ psubw xmm3, xmm1 /* sum=sum-p1 */
+
+ paddw xmm3, [edi+80] /* sum=sum+x5 */
+ movdqa xmm4, xmm5 /* copy sum */
+
+ paddw xmm4, xmm3 /* xmm4=sum+x2 */
+ paddw xmm4, xmm4 /* xmm4 <<= 1 */
+
+ psubw xmm4, [edi+80] /* xmm4 =(sum+x2)<<1-x5 */
+ paddw xmm4, [edi+96] /* xmm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw xmm4, 4 /* xmm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw xmm4, xmm5 /* new value - old value */
+
+ pand xmm4, xmm7 /* And the flag */
+ paddw xmm4, xmm5 /* add the old value back */
+
+ packuswb xmm4, xmm0 /* pack it to bytes */
+ movq QWORD PTR [esi+edx], xmm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movdqa xmm5, [edi+48] /* xmm5= x3 */
+ psubw xmm3, xmm1 /* sum=sum-p1 */
+
+ paddw xmm3, [edi+96] /* sum=sum+x6 */
+ movdqa xmm4, xmm5 /* copy x3 */
+
+ paddw xmm4, xmm3 /* xmm4=sum+x3 */
+ paddw xmm4, xmm4 /* xmm4 <<= 1 */
+
+ psubw xmm4, [edi+96] /* xmm4 =(sum+x3)<<1-x6 */
+ paddw xmm4, [edi+112] /* xmm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw xmm4, 4 /* xmm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw xmm4, xmm5 /* new value - old value */
+
+ pand xmm4, xmm7 /* And the flag */
+ paddw xmm4, xmm5 /* add the old value back */
+
+ packuswb xmm4, xmm0 /* pack it to bytes */
+ movq QWORD PTR [esi],xmm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movdqa xmm5, [edi+64] /* xmm5 = x4 */
+ psubw xmm3, xmm1 /* sum = sum-p1 */
+
+ paddw xmm3, [edi+112] /* sum = sum+x7 */
+ movdqa xmm4, xmm5 /* xmm4 = x4 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum + x4 */
+ paddw xmm4, xmm4 /* xmm4 *=2 */
+
+ paddw xmm4, xmm1 /* += p1 */
+ psubw xmm4, [edi+16] /* -= x1 */
+
+ psubw xmm4, [edi+112] /* -= x7 */
+ paddw xmm4, [edi+128] /* += x8 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x4 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x4 */
+
+ packuswb xmm4, xmm0 /* pack it to bytes */
+ movq QWORD PTR [esi+ecx], xmm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movdqa xmm5, [edi+80] /* xmm5 = x5 */
+ psubw xmm3, [edi+16] /* sum -= x1 */
+
+ paddw xmm3, [edi+128] /* sub += x8 */
+ movdqa xmm4, xmm5 /* xmm4 = x5 */
+
+ paddw xmm4, xmm3 /* xmm4= sum+x5 */
+ paddw xmm4, xmm4 /* xmm4 *= 2 */
+
+ paddw xmm4, [edi+16] /* += x1 */
+ psubw xmm4, [edi+32] /* -= x2 */
+
+ psubw xmm4, [edi+128] /* -= x8 */
+ paddw xmm4, xmm2 /* += p2 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x5 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x5 */
+
+ lea esi, [esi+ecx*4] /* esi=des + 2*pitch */
+ packuswb xmm4, xmm0 /* pack to bytes */
+
+ movq QWORD PTR [esi+edx*2], xmm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movdqa xmm5, [edi+96] /* xmm5 = x6 */
+ psubw xmm3, [edi+32] /* -= x2 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x6 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x6 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+32] /* +=x2 */
+ psubw xmm4, [edi+48] /* -=x3 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x6 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x6 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movq QWORD PTR [esi+edx], xmm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movdqa xmm5, [edi+112] /* xmm5 = x7 */
+ psubw xmm3, [edi+48] /* -= x3 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x7 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x7 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+48] /* +=x3 */
+ psubw xmm4, [edi+64] /* -=x4 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x7 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x7 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movq QWORD PTR [esi],xmm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movdqa xmm5, [edi+128] /* xmm5 = x8 */
+ psubw xmm3, [edi+64] /* -= x4 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x8 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x8 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+64] /* +=x4 */
+ psubw xmm4, [edi+80] /* -=x5 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x8 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x8 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movq QWORD PTR [esi+ecx], xmm4 /* write new x8 */
+
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop eax
+
+ } /* end of the macro */
+ Var1=Variance1[0]+Variance1[1]+Variance1[2]+Variance1[3]+Variance1[4]+Variance1[5]+Variance1[6]+Variance1[7];
+ Var2=Variance2[0]+Variance2[1]+Variance2[2]+Variance2[3]+Variance2[4]+Variance2[5]+Variance2[6]+Variance2[7];
+
+ pbi->FragmentVariances[CurrentFrag] += Var1;
+ pbi->FragmentVariances[CurrentFrag + FragAcross] += Var2;
+
+ }
+ else
+ {
+
+ /* copy from src to des */
+ __asm
+ {
+ push esi
+ push edi
+ push ecx
+
+ mov esi, Src /* esi = Src */
+ mov edi, Des /* edi = Des */
+
+ push edx
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ xor edx, edx /* clear edx */
+
+ sub edx, ecx /* edx = -Pitch */
+ lea esi, [esi+edx*4] /* esi=Src-4*Pitch*/
+
+ movq mm0, [esi] /* first row */
+ movq [edi+edx*4], mm0 /* write first row */
+
+ lea edi, [edi+edx*4] /* edi=Des-4*Pitch*/
+ movq mm1, [esi+ecx] /* Src-3*Pitch */
+
+ movq [edi+ecx], mm1 /* write second row */
+ movq mm2, [esi+ecx*2] /* Src-2*Pitch */
+
+ lea esi, [esi+ecx*4] /* Src */
+ movq [edi+ecx*2], mm2 /* write third row */
+
+ lea edi, [edi+ecx*4] /* Des */
+ movq mm3, [esi+edx] /* Src-Pitch */
+
+ movq [edi+edx], mm3 /* write fourth row */
+ movq mm4, [esi] /* Src */
+
+ movq mm5, [esi+ecx] /* Src+Pitch */
+ movq [edi], mm4 /* write fifth rwo */
+
+ movq mm6, [esi+ecx*2]
+ lea esi, [esi+ecx*4] /* Src+pitch*4 */
+
+ movq [edi+ecx], mm5 /* write the sixth rwo */
+ movq [edi+ecx*2], mm6 /* write the seventh row */
+
+ movq mm7, [esi+edx]
+ lea edi, [edi+ecx*4] /* Des+Pitch*4 */
+
+ movq [edi+edx], mm7 /* write the last row */
+
+ pop edx
+ pop ecx
+ pop edi
+ pop esi
+ }
+
+ }
+
+ Src += 8;
+ Des += 8;
+ CurrentFrag ++;
+ }
+
+ Des -= ((PlaneLineStep + FragAcross)<<3);
+ Des += 8;
+ Src = Des;
+
+ CurrentFrag = StartFrag ;
+
+ while(CurrentFrag < StartFrag + FragAcross - 1)
+ {
+
+ QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
+
+ if( QStep > 3 )
+ {
+ QStepWMT[0] = (INT16)QStep;
+ QStepWMT[1] = (INT16)QStep;
+ QStepWMT[2] = (INT16)QStep;
+ QStepWMT[3] = (INT16)QStep;
+ QStepWMT[4] = (INT16)QStep;
+ QStepWMT[5] = (INT16)QStep;
+ QStepWMT[6] = (INT16)QStep;
+ QStepWMT[7] = (INT16)QStep;
+
+ for( j=0; j<8;j++)
+ {
+ Rows[j] = (short) (Src[-5 +j*PlaneLineStep]);
+ Rows[72+j] = (short)(Src[4+j*PlaneLineStep]);
+ }
+
+ __asm
+ {
+ /* Save the registers */
+ push eax
+ push ecx
+ push edx
+ push esi
+ push edi
+
+ /* Calculate the FLimit and store FLimit and QStep */
+
+ movdqa xmm0, QStepWMT /* Get QStep */
+ movdqa xmm1, EightThrees /* mm1 = 03030303 */
+
+ pmullw xmm1, xmm0 /* mm1 = QStep * 3 */
+ pmullw xmm1, xmm0 /* mm1 = QStep * QStep * 3 */
+
+ psrlw xmm1, 5 /* mm1 = FLimit */
+ movdqa [FLimitWMT], xmm1 /* Save FLimit */
+
+ /* setup the pointers to data */
+
+ mov eax, Src /* eax = Src */
+ xor edx, edx /* clear edx */
+
+ mov esi, Des /* esi = Des */
+ sub eax, 4 /* eax = Src-4 */
+
+ sub esi, 4 /* esi = Des-4 */
+ lea edi, Rows /* edi = Rows */
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ sub edx, ecx /* edx = -Pitch */
+
+ lea esi, [esi+ecx*2] /* esi = Des-4 + 2 * Pitch */
+
+ /* Get the data to the intermediate buffer */
+
+ movq mm0, [eax] /* mm0 = 07 06 05 04 03 02 01 00 */
+ movq mm1, [eax+ecx] /* mm1 = 17 16 15 14 13 12 11 10 */
+
+ movq mm2, [eax+ecx*2] /* mm2 = 27 26 25 24 23 22 21 20 */
+ lea eax, [eax+ecx*4] /* Go down four Rows */
+
+ movq mm3, [eax+edx] /* mm3 = 37 36 35 34 33 32 31 30 */
+ movq mm4, mm0 /* mm4 = 07 06 05 04 03 02 01 00 */
+
+ punpcklbw mm0, mm1 /* mm0 = 13 03 12 02 11 01 10 00 */
+ punpckhbw mm4, mm1 /* mm4 = 17 07 16 06 15 05 14 04 */
+
+ movq mm5, mm2 /* mm5 = 27 26 25 24 23 22 21 20 */
+ punpcklbw mm2, mm3 /* mm2 = 33 23 32 22 31 21 30 20 */
+
+ punpckhbw mm5, mm3 /* mm5 = 37 27 36 26 35 25 34 24 */
+ movq mm1, mm0 /* mm1 = 13 03 12 02 11 01 10 00 */
+
+ punpcklwd mm0, mm2 /* mm0 = 31 21 11 01 30 20 10 00 */
+ punpckhwd mm1, mm2 /* mm1 = 33 23 13 03 32 22 12 02 */
+
+ movq mm2, mm4 /* mm2 = 17 07 16 06 15 05 14 04 */
+ punpckhwd mm4, mm5 /* mm4 = 37 27 17 07 36 26 16 06 */
+
+ punpcklwd mm2, mm5 /* mm2 = 35 25 15 05 34 24 14 04 */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm5, mm0 /* make a copy */
+ punpcklbw mm0, mm7 /* mm0 = 30 20 10 00 */
+
+ movq [edi+16], mm0 /* write 00 10 20 30 */
+ punpckhbw mm5, mm7 /* mm5 = 31 21 11 01 */
+
+ movq mm0, mm1 /* mm0 =33 23 13 03 32 22 12 02 */
+ movq [edi+32], mm5 /* write 01 11 21 31 */
+
+ punpcklbw mm1, mm7 /* mm1 = 32 22 12 02 */
+ punpckhbw mm0, mm7 /* mm0 = 33 23 12 03 */
+
+ movq [edi+48], mm1 /* write 02 12 22 32 */
+ movq mm3, mm2 /* mm3 = 35 25 15 05 34 24 14 04 */
+
+ movq mm5, mm4 /* mm5 = 37 27 17 07 36 26 16 06 */
+ movq [edi+64], mm0 /* write 03 13 23 33 */
+
+ punpcklbw mm2, mm7 /* mm2 = 34 24 14 04 */
+ punpckhbw mm3, mm7 /* mm3 = 35 25 15 05 */
+
+ movq [edi+80], mm2 /* write 04 14 24 34 */
+ punpcklbw mm4, mm7 /* mm4 = 36 26 16 06 */
+
+ punpckhbw mm5, mm7 /* mm5 = 37 27 17 07 */
+ movq [edi+96], mm3 /* write 05 15 25 35 */
+
+ movq mm0, [eax] /* mm0 = 47 46 45 44 43 42 41 40 */
+ movq mm1, [eax + ecx ] /* mm1 = 57 56 55 54 53 52 51 50 */
+
+ movq [edi+112], mm4 /* write 06 16 26 37 */
+ movq mm2, [eax+ecx*2] /* mm2 = 67 66 65 64 63 62 61 60 */
+
+ lea eax, [eax+ ecx*4] /* Go down four rows */
+ movq [edi+128], mm5 /* write 07 17 27 37 */
+
+ movq mm4, mm0 /* mm4 = 47 46 45 44 43 42 41 40 */
+ movq mm3, [eax+edx] /* mm3 = 77 76 75 74 73 72 71 70 */
+
+ punpcklbw mm0, mm1 /* mm0 = 53 43 52 42 51 41 50 40 */
+ punpckhbw mm4, mm1 /* mm4 = 57 57 56 46 55 45 54 44 */
+
+ movq mm5, mm2 /* mm5 = 67 66 65 64 63 62 61 60 */
+ punpcklbw mm2, mm3 /* mm2 = 73 63 72 62 71 61 70 60 */
+
+ punpckhbw mm5, mm3 /* mm5 = 77 67 76 66 75 65 74 64 */
+ movq mm1, mm0 /* mm1 = 53 43 52 42 51 41 50 40 */
+
+ punpcklwd mm0, mm2 /* mm0 = 71 61 51 41 70 60 50 40 */
+ punpckhwd mm1, mm2 /* mm1 = 73 63 53 43 72 62 52 42 */
+
+ movq mm2, mm4 /* mm2 = 57 57 56 46 55 45 54 44 */
+ punpckhwd mm4, mm5 /* mm4 = 77 67 57 47 76 66 56 46 */
+
+ punpcklwd mm2, mm5 /* mm2 = 75 65 55 45 74 64 54 44 */
+
+ movq mm5, mm0 /* make a copy */
+ punpcklbw mm0, mm7 /* mm0 = 70 60 50 40 */
+
+ movq [edi+24], mm0 /* write 40 50 60 70 */
+ punpckhbw mm5, mm7 /* mm5 = 71 61 51 41 */
+
+ movq mm0, mm1 /* mm0 = 73 63 53 43 72 62 52 42 */
+ movq [edi+40], mm5 /* write 41 51 61 71 */
+
+ punpcklbw mm1, mm7 /* mm1 = 72 62 52 42 */
+ punpckhbw mm0, mm7 /* mm0 = 73 63 53 43 */
+
+ movq [edi+56], mm1 /* write 42 52 62 72 */
+ movq mm3, mm2 /* mm3 = 75 65 55 45 74 64 54 44 */
+
+ movq mm5, mm4 /* mm5 = 77 67 57 47 76 66 56 46 */
+ movq [edi+72], mm0 /* write 43 53 63 73 */
+
+ punpcklbw mm2, mm7 /* mm2 = 74 64 54 44 */
+ punpckhbw mm3, mm7 /* mm3 = 75 65 55 45 */
+
+ movq [edi+88], mm2 /* write 44 54 64 74 */
+ punpcklbw mm4, mm7 /* mm4 = 76 66 56 46 */
+
+ punpckhbw mm5, mm7 /* mm5 = 77 67 57 47 */
+ movq [edi+104], mm3 /* write 45 55 65 75 */
+
+ movq [edi+120], mm4 /* write 46 56 66 76 */
+ movq [edi+136], mm5 /* write 47 57 67 77 */
+
+ /* we use xmm0,xmm1,xmm2 for 1234 and xmm4, xmm5, xmm6 for 5-8 */
+ /* xmm7 = 0, xmm3 = {128, 128, 128, 128, 128, 128, 128, 128} */
+
+ pcmpeqw xmm3, xmm3 /* xmm3 = FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF */
+ psllw xmm3, 15 /* xmm3 = 80008000800080008000800080008000 */
+ psrlw xmm3, 8 /* xmm3 = 00800080008000800080008000800080 */
+
+ movdqa xmm2, [edi+16] /* Pixel 1 */
+ movdqa xmm6, [edi+80] /* Pixel 5 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ movdqa xmm0, xmm2 /* xmm0 = pixel 1 */
+ movdqa xmm4, xmm6 /* xmm4 = pixel 5 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel1 * pixel1 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel5 * pixel5 */
+
+ movdqa xmm1, xmm2 /* xmm1 = pixel1^2 */
+ movdqa xmm5, xmm6 /* xmm5 = pixel5^2 */
+
+ movdqa xmm2, [edi+32] /* Pixel 2 */
+ movdqa xmm6, [edi+96] /* Pixel 6 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 2 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 6 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel2^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel6^2 */
+
+ paddw xmm1, xmm2 /* xmm1 += pixel2^2 */
+ paddw xmm5, xmm6 /* xmm5 += pixel6^2 */
+
+ movdqa xmm2, [edi+48] /* Pixel 3 */
+ movdqa xmm6, [edi+112] /* Pixel 7 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 3 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 7 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel3^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel7^2 */
+
+ paddw xmm1, xmm2 /* xmm1 += pixel3^2 */
+ paddw xmm5, xmm6 /* xmm5 += pixel7^2 */
+
+ movdqa xmm2, [edi+64] /* Pixel 4 */
+ movdqa xmm6, [edi+128] /* Pixel 8 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 4 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 8 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel4^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel8^2 */
+
+ paddw xmm1, xmm2 /* xmm1 = pixel4^2 */
+ paddw xmm5, xmm6 /* xmm5 = pixel8^2 */
+
+ /* xmm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* xmm1 = x1 + x2 + x3 + x4 */
+ /* xmm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* xmm5 = x5 + x6 + x7 + x8 */
+
+ movdqa xmm7, xmm3 /* xmm7 = xmm3 */
+ psrlw xmm7, 7 /* xmm7 = 00010001000100010001000100010001 */
+
+ movdqa xmm2, xmm0 /* make copy of sum1 */
+ movdqa xmm6, xmm4 /* make copy of sum2 */
+
+ paddw xmm0, xmm7 /* (sum1 + 1) */
+ paddw xmm4, xmm7 /* (sum2 + 1) */
+
+ psraw xmm2, 1 /* sum1 /2 */
+ psraw xmm6, 1 /* sum2 /2 */
+
+ psraw xmm0, 1 /* (sum1 + 1)/2 */
+ psraw xmm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw xmm2, xmm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw xmm6, xmm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw xmm1, xmm2 /* Variance 1 */
+ psubw xmm5, xmm6 /* Variance 2 */
+
+ movdqa xmm7, FLimitWMT /* xmm7 = FLimit */
+ movdqa xmm2, xmm1 /* copy of Varinace 1*/
+
+ movdqa [Variance1], xmm1 /* save the varinace1 */
+ movdqa [Variance2], xmm5 /* save the varinace2 */
+
+ movdqa xmm6, xmm5 /* Variance 2 */
+ psubw xmm1, xmm7 /* Variance 1 < Flimit? */
+
+ psubw xmm5, xmm7 /* Variance 2 < Flimit? */
+ psraw xmm2, 15 /* Variance 1 > 32768? */
+
+ psraw xmm6, 15 /* Vaiance 2 > 32768? */
+ psraw xmm1, 15 /* FFFF/0000 for true/false */
+
+ psraw xmm5, 15 /* FFFF/0000 for true/false */
+ movdqa xmm7, [edi+64] /* xmm0 = Pixel 4 */
+
+ pandn xmm2, xmm1 /* Variance1<32678 &&
+ Variance1<Limit */
+ pandn xmm6, xmm5 /* Variance2<32678 &&
+ Variance1<Limit */
+
+ movdqa xmm4, [edi+80] /* xmm4 = Pixel 5 */
+ pand xmm6, xmm2 /* xmm6 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+
+ movdqa xmm2, xmm7 /* make copy of Pixel4 */
+
+ psubusw xmm7, xmm4 /* 4 - 5 */
+ psubusw xmm4, xmm2 /* 5 - 4 */
+
+ por xmm7, xmm4 /* abs(4 - 5) */
+ psubw xmm7, QStepWMT /* abs(4-5)<QStepxmmx ? */
+
+ psraw xmm7, 15 /* FFFF/0000 for True/Flase */
+ pand xmm7, xmm6
+
+ /* xmm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* xmm7 now are in use */
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movdqa xmm5, [edi] /* xmm5 = -5 */
+ movdqa xmm4, [edi + 16] /* xmm4 = -4 */
+
+ movdqa xmm3, xmm4 /* copy of -4 */
+ movdqa xmm6, xmm5 /* copy of -5 */
+
+ psubusw xmm4, xmm6 /* xmm4 = [-4] - [-5] */
+ psubusw xmm5, xmm3 /* xmm5 = [-5] - [-4] */
+
+ por xmm4, xmm5 /* abs([-4]-[-5] ) */
+ psubw xmm4, QStepWMT /* abs([-4]-[-5] )<QStep? */
+
+ psraw xmm4, 15 /* FFFF/0000 for True/False */
+ movdqa xmm1, xmm4 /* copy of the xmm4 */
+
+ pand xmm4, xmm6 /* */
+ pandn xmm1, xmm3 /* */
+
+ por xmm1, xmm4 /* xmm1 = p1 */
+
+ /* now find P2 */
+
+ movdqa xmm4, [edi+128] /* xmm4 = [3] */
+ movdqa xmm5, [edi+144] /* xmm5 = [4] */
+
+ movdqa xmm3, xmm4 /* copy of 3 */
+ movdqa xmm6, xmm5 /* copy of 4 */
+
+ psubusw xmm4, xmm6 /* xmm4 = [3] - [4] */
+ psubusw xmm5, xmm3 /* xmm5 = [4] - [3] */
+
+ por xmm4, xmm5 /* abs([3]-[4] ) */
+ psubw xmm4, QStepWMT /* abs([3]-[4] )<QStep? */
+
+ psraw xmm4, 15 /* FFFF/0000 for True/False */
+ movdqa xmm2, xmm4 /* copy of the xmm4 */
+
+ pand xmm4, xmm6 /* */
+ pandn xmm2, xmm3 /* */
+
+ por xmm2, xmm4 /* xmm2 = p2 */
+
+ /* Data is ready, now do the filtering */
+
+ pxor xmm0, xmm0 /* clear xmm0 */
+
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+ /* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+
+ movdqa xmm3, xmm1 /* xmm3 = p1 */
+ paddw xmm3, xmm3 /* xmm3 = p1 + p1 */
+
+ paddw xmm3, xmm1 /* xmm3 = p1 + p1 + p1 */
+ movdqa xmm4, [edi+16] /* xmm4 = x1 */
+
+ paddw xmm3, [edi+32] /* xmm3 = p1+p1+p1+ x2 */
+ paddw xmm4, [edi+48] /* xmm4 = x1+x3 */
+
+ paddw xmm3, [edi+64] /* xmm3 += x4 */
+ paddw xmm4, EightFours /* xmm4 = x1 + x3 + 4 */
+
+ paddw xmm3, xmm4 /* xmm3 = 3*p1+x1+x2+x3+x4+4 */
+ movdqa xmm4, xmm3 /* xmm4 = xmm3 */
+
+ movdqa xmm5, [edi+16] /* xmm5 = x1 */
+ paddw xmm4, xmm5 /* xmm4 = sum+x1 */
+
+ psllw xmm4, 1 /* xmm4 = (sum+x1)<<1 */
+ psubw xmm4, [edi+64] /* xmm4 = (sum+x1)<<1-x4 */
+
+ paddw xmm4, [edi+80] /* xmm4 = (sum+x1)<<1-x4+x5 */
+ psraw xmm4, 4 /* xmm4 >>=4 */
+
+ psubw xmm4, xmm5 /* New Value - old Value */
+ pand xmm4, xmm7 /* And the flag */
+
+ paddw xmm4, xmm5 /* add the old value back */
+ packuswb xmm4, xmm0 /* pack it to bytes */
+
+ movdq2q mm0, xmm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movdqa xmm5, [edi+32] /* xmm5= x2 */
+ psubw xmm3, xmm1 /* sum=sum-p1 */
+
+ paddw xmm3, [edi+80] /* sum=sum+x5 */
+ movdqa xmm4, xmm5 /* copy sum */
+
+ paddw xmm4, xmm3 /* xmm4=sum+x2 */
+ paddw xmm4, xmm4 /* xmm4 <<= 1 */
+
+ psubw xmm4, [edi+80] /* xmm4 =(sum+x2)<<1-x5 */
+ paddw xmm4, [edi+96] /* xmm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw xmm4, 4 /* xmm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw xmm4, xmm5 /* new value - old value */
+
+ pand xmm4, xmm7 /* And the flag */
+ paddw xmm4, xmm5 /* add the old value back */
+
+ packuswb xmm4, xmm0 /* pack it to bytes */
+ movdq2q mm1, xmm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movdqa xmm5, [edi+48] /* xmm5= x3 */
+ psubw xmm3, xmm1 /* sum=sum-p1 */
+
+ paddw xmm3, [edi+96] /* sum=sum+x6 */
+ movdqa xmm4, xmm5 /* copy x3 */
+
+ paddw xmm4, xmm3 /* xmm4=sum+x3 */
+ paddw xmm4, xmm4 /* xmm4 <<= 1 */
+
+ psubw xmm4, [edi+96] /* xmm4 =(sum+x3)<<1-x6 */
+ paddw xmm4, [edi+112] /* xmm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw xmm4, 4 /* xmm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw xmm4, xmm5 /* new value - old value */
+
+ pand xmm4, xmm7 /* And the flag */
+ paddw xmm4, xmm5 /* add the old value back */
+
+ packuswb xmm4, xmm0 /* pack it to bytes */
+ movdq2q mm2, xmm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movdqa xmm5, [edi+64] /* xmm5 = x4 */
+ psubw xmm3, xmm1 /* sum = sum-p1 */
+
+ paddw xmm3, [edi+112] /* sum = sum+x7 */
+ movdqa xmm4, xmm5 /* xmm4 = x4 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum + x4 */
+ paddw xmm4, xmm4 /* xmm4 *=2 */
+
+ paddw xmm4, xmm1 /* += p1 */
+ psubw xmm4, [edi+16] /* -= x1 */
+
+ psubw xmm4, [edi+112] /* -= x7 */
+ paddw xmm4, [edi+128] /* += x8 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x4 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x4 */
+
+ packuswb xmm4, xmm0 /* pack it to bytes */
+ movdq2q mm3, xmm4 /* write new x4 */
+
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movdqa xmm5, [edi+80] /* xmm5 = x5 */
+ psubw xmm3, [edi+16] /* sum -= x1 */
+
+ paddw xmm3, [edi+128] /* sub += x8 */
+ movdqa xmm4, xmm5 /* xmm4 = x5 */
+
+ paddw xmm4, xmm3 /* xmm4= sum+x5 */
+ paddw xmm4, xmm4 /* xmm4 *= 2 */
+
+ paddw xmm4, [edi+16] /* += x1 */
+ psubw xmm4, [edi+32] /* -= x2 */
+
+ psubw xmm4, [edi+128] /* -= x8 */
+ paddw xmm4, xmm2 /* += p2 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x5 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x5 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movdq2q mm4, xmm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movdqa xmm5, [edi+96] /* xmm5 = x6 */
+ psubw xmm3, [edi+32] /* -= x2 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x6 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x6 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+32] /* +=x2 */
+ psubw xmm4, [edi+48] /* -=x3 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x6 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x6 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movdq2q mm5, xmm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movdqa xmm5, [edi+112] /* xmm5 = x7 */
+ psubw xmm3, [edi+48] /* -= x3 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x7 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x7 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+48] /* +=x3 */
+ psubw xmm4, [edi+64] /* -=x4 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x7 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x7 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movdq2q mm6, xmm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movdqa xmm5, [edi+128] /* xmm5 = x8 */
+ psubw xmm3, [edi+64] /* -= x4 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x8 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x8 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+64] /* +=x4 */
+ psubw xmm4, [edi+80] /* -=x5 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x8 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x8 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movdq2q mm7, xmm4 /* write new x8 */
+
+
+ /* transpose */
+ movq2dq xmm0, mm0 /* xmm0 = 70 60 50 40 30 20 10 00 */
+ movq2dq xmm1, mm1 /* xmm1 = 71 61 51 41 31 21 11 01 */
+
+ movq2dq xmm2, mm2 /* xmm2 = 72 62 52 42 32 22 12 02 */
+ movq2dq xmm3, mm3 /* xmm3 = 73 63 53 43 33 23 13 03 */
+
+ punpcklbw xmm0, xmm1 /* xmm0 = 7170 6160 5150 4140 3130 2120 1110 0100 */
+ punpcklbw xmm2, xmm3 /* xmm2 = 7372 6362 5352 4342 3332 2322 1312 0302 */
+
+ movdqa xmm1, xmm0 /* xmm1 = 7170 6160 5150 4140 3130 2120 1110 0100 */
+ punpcklwd xmm0, xmm2 /* xmm0 = 33323130 23222120 13121110 03020100 */
+
+ punpckhwd xmm1, xmm2 /* xmm1 = 73727170 63626160 53525150 43424140 */
+
+ movq2dq xmm4, mm4 /* xmm4 = 74 64 54 44 34 24 14 04 */
+ movq2dq xmm5, mm5 /* xmm5 = 75 65 55 45 35 25 15 05 */
+
+ movq2dq xmm6, mm6 /* xmm6 = 76 66 56 46 36 26 16 06 */
+ movq2dq xmm7, mm7 /* xmm7 = 77 67 57 47 37 27 17 07 */
+
+ punpcklbw xmm4, xmm5 /* xmm4 = 7574 6564 5554 4544 3534 2524 1514 0504 */
+ punpcklbw xmm6, xmm7 /* xmm6 = 7776 6766 5756 4746 3736 2726 1716 0706 */
+
+ movdqa xmm5, xmm4 /* xmm5 = 7574 6564 5554 4544 3534 2524 1514 0504 */
+ punpcklwd xmm4, xmm6 /* xmm4 = 37363534 27262524 17161514 07060504 */
+
+ punpckhwd xmm5, xmm6 /* xmm5 = 77767574 67666564 57565554 47464544 */
+ movdqa xmm2, xmm0 /* xmm2 = 33323130 23222120 13121110 03020100 */
+
+ punpckldq xmm0, xmm4 /* xmm0 = 1716151413121110 0706050403020100 */
+ movq QWORD PTR [esi+edx*2],xmm0 /* write 00 01 02 03 04 05 06 07 */
+
+ psrldq xmm0, 8 /* xmm0 = 1716151413121110 */
+ punpckhdq xmm2, xmm4 /* xmm2 = 3736353433323130 2726252423222120 */
+
+ movq QWORD PTR [esi+edx], xmm0 /* write 10 11 12 13 14 15 16 17 */
+ movdqa xmm3, xmm1 /* xmm3 = 73727170 63626160 53525150 43424140 */
+
+ punpckldq xmm1, xmm5 /* xmm1 = 5756555453525150 4746454443424140 */
+ movq QWORD PTR [esi], xmm2 /* write 20 21 22 23 24 25 26 27 */
+
+ psrldq xmm2, 8 /* xmm2 = 3736353433323130 */
+ punpckhdq xmm3, xmm5 /* xmm3 = 7776757473727170 6766656463626160 */
+
+ movq QWORD PTR [esi+ecx], xmm2 /* write 30 31 32 33 34 35 36 37 */
+ lea esi, [esi+ecx*4] /* esi= Des - 4 + 4 *pitch */
+
+ movq QWORD PTR [esi+edx*2], xmm1 /* write 40 41 42 43 44 45 46 47 */
+ movq QWORD PTR [esi], xmm3 /* write 60 61 62 63 64 65 66 67 */
+
+ psrldq xmm1, 8 /* xmm1 = 5756555453525150 */
+ psrldq xmm3, 8 /* xmm3 = 7776757473727170 */
+
+ movq QWORD PTR [esi+edx], xmm1 /* write 50 51 52 53 54 55 56 57 */
+ movq QWORD PTR [esi+ecx], xmm3 /* write 70 71 72 73 74 75 76 77 */
+
+
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop eax
+ }// end of __asm
+
+ Var1=Variance1[0]+Variance1[1]+Variance1[2]+Variance1[3]+Variance1[4]+Variance1[5]+Variance1[6]+Variance1[7];
+ Var2=Variance2[0]+Variance2[1]+Variance2[2]+Variance2[3]+Variance2[4]+Variance2[5]+Variance2[6]+Variance2[7];
+
+ pbi->FragmentVariances[CurrentFrag] += Var1;
+ pbi->FragmentVariances[CurrentFrag + 1] += Var2;
+ }// end of if
+ CurrentFrag ++;
+ Src += 8;
+ Des += 8;
+ }//end of while
+#endif
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : DeblockNonFilteredBand_WMT
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Filter both horizontal and vertical edge in a band
+ *
+ * SPECIAL NOTES :
+ *
+ * REFERENCE :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void DeblockNonFilteredBand_WMT(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *SrcPtr,
+ UINT8 *DesPtr,
+ UINT32 PlaneLineStep,
+ UINT32 FragAcross,
+ UINT32 StartFrag,
+ UINT32 *QuantScale
+ )
+{
+ UINT32 j;
+ UINT32 CurrentFrag=StartFrag;
+ UINT32 QStep;
+ UINT32 LoopFLimit;
+ UINT8 *Src, *Des;
+ UINT32 Var1, Var2;
+#if defined(_WIN32_WCE)
+ return;
+#else
+__declspec(align(16)) short QStepWMT[8];
+__declspec(align(16)) short FLimitWMT[8];
+__declspec(align(16)) short Rows[80];
+__declspec(align(16)) short LoopFLimitWMT[8];
+__declspec(align(16)) short LoopFilteredValuesUp[8];
+__declspec(align(16)) short LoopFilteredValuesDown[8];
+
+__declspec(align(16)) unsigned short Variance1[8];
+__declspec(align(16)) unsigned short Variance2[8];
+
+
+ LoopFLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+ LoopFLimitWMT[0] = (INT16)LoopFLimit;
+ LoopFLimitWMT[1] = (INT16)LoopFLimit;
+ LoopFLimitWMT[2] = (INT16)LoopFLimit;
+ LoopFLimitWMT[3] = (INT16)LoopFLimit;
+ LoopFLimitWMT[4] = (INT16)LoopFLimit;
+ LoopFLimitWMT[5] = (INT16)LoopFLimit;
+ LoopFLimitWMT[6] = (INT16)LoopFLimit;
+ LoopFLimitWMT[7] = (INT16)LoopFLimit;
+
+
+ while(CurrentFrag < StartFrag + FragAcross )
+ {
+
+ Src=SrcPtr+8*(CurrentFrag-StartFrag);
+ Des=DesPtr+8*(CurrentFrag-StartFrag);
+
+ QStep = QuantScale[ pbi->FragQIndex[CurrentFrag+FragAcross]];
+
+
+ __asm
+ {
+
+ push eax
+ push ecx
+ push edx
+ push esi
+ push edi
+
+ /* Calculate the FLimit and store FLimit and QStep */
+ /* Copy the data to the intermediate buffer */
+ mov eax, QStep
+ xor edx, edx /* clear edx */
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ pcmpeqw xmm6, xmm6 /* xmm6 = FFFFFF... */
+
+
+ movd mm5, eax /* mm5 = QStep */
+ psrlw xmm6, 14 /* xmm6 = 3, 3, 3, 3, 3, 3, 3, 3*/
+
+ punpcklwd mm5, mm5 /* mm5 = QQ */
+ mov eax, Src /* eax = Src */
+
+ punpckldq mm5, mm5 /* mm5 = QQQQ */
+ sub edx, ecx /* edx = - Pitch */
+
+ movq2dq xmm5, mm5 /* xmm5 = QQQQ */
+ punpcklqdq xmm5, xmm5 /* xmm5 = QQQQQQQQ */
+
+ pmullw xmm6, xmm5 /* Qstep * 3 */
+ movdqa QStepWMT, xmm5
+
+ lea edi, Rows /* edi = Rows */
+ pxor xmm7, xmm7 /* Clear mm7 */
+
+ mov esi, Des /* esi = des */
+ pmullw xmm6, xmm5
+
+ lea eax, [eax + edx * 4 ] /* eax = Src - 4*Pitch */
+ lea esi, [esi + edx * 2] /* esi = Des - 2*Pitch */
+
+ psraw xmm6, 5
+ movdqa FLimitWMT, xmm6
+
+ /* Copy the data to the intermediate buffer */
+
+ movq xmm0, QWORD PTR [eax + edx]/* xmm0 = Src[-5*Pitch] */
+ movq xmm1, QWORD PTR [eax ] /* xmm1 = Src[-4*Pitch */
+
+ punpcklbw xmm0, xmm7 /* expand to words */
+ punpcklbw xmm1, xmm7 /* expand to words */
+
+ movdqa [edi], xmm0 /* write 8 words */
+ movdqa [edi+16], xmm1 /* write 8 words */
+
+ movq xmm2, QWORD PTR [eax+ecx] /* xmm2 = Src[-3*Pitch] */
+ movq xmm3, QWORD PTR [eax+ecx*2]/* xmm3 = Src[-2*Pitch] */
+
+ punpcklbw xmm2, xmm7 /* expand to words */
+ punpcklbw xmm3, xmm7 /* expand to words */
+
+ movdqa [edi+32], xmm2 /* write 8 words */
+ movdqa [edi+48], xmm3 /* write 8 words */
+
+ lea eax, [eax+ecx*4] /* eax= Src */
+
+ movq xmm0, QWORD PTR [eax + edx]/* xmm0 = Src[-Pitch] */
+ movq xmm1, QWORD PTR [eax ] /* xmm1 = Src[0] */
+
+ punpcklbw xmm0, xmm7 /* expand to words */
+ punpcklbw xmm1, xmm7 /* expand to words */
+
+ movdqa [edi+64], xmm0 /* write 8 words */
+ movdqa [edi+80], xmm1 /* write 8 words */
+
+ movq xmm2, QWORD PTR [eax+ecx] /* xmm2 = Src[Pitch] */
+ movq xmm3, QWORD PTR [eax+ecx*2]/* xmm3 = Src[2*Pitch] */
+
+ punpcklbw xmm2, xmm7 /* expand to words */
+ punpcklbw xmm3, xmm7 /* expand to words */
+
+ movdqa [edi+96], xmm2 /* write 8 words */
+ movdqa [edi+112], xmm3 /* write 8 words */
+
+ lea eax, [eax+ecx*4] /* eax= Src+4*Pitch */
+
+ movq xmm0, QWORD PTR [eax + edx]/* xmm0 = Src[3*Pitch] */
+ movq xmm1, QWORD PTR [eax ] /* xmm1 = Src[4*Pitch] */
+
+ punpcklbw xmm0, xmm7 /* expand to words */
+ punpcklbw xmm1, xmm7 /* expand to words */
+
+ movdqa [edi+128], xmm0 /* write 8 words */
+ movdqa [edi+144], xmm1 /* write 8 words */
+
+
+ /* done with copying everything to intermediate buffer */
+ /* Now, compute the variances for Pixel 1-4 and 5-8 */
+
+ /* we use xmm0,xmm1,xmm2 for 1234 and xmm4, xmm5, xmm6 for 5-8 */
+ /* xmm7 = 0, xmm3 = {128, 128, 128, 128, 128, 128, 128, 128} */
+
+ pcmpeqw xmm3, xmm3 /* xmm3 = FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF */
+ psllw xmm3, 15 /* xmm3 = 80008000800080008000800080008000 */
+ psrlw xmm3, 8 /* xmm3 = 00800080008000800080008000800080 */
+
+ movdqa xmm2, [edi+16] /* Pixel 1 */
+ movdqa xmm6, [edi+80] /* Pixel 5 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ movdqa xmm0, xmm2 /* xmm0 = pixel 1 */
+ movdqa xmm4, xmm6 /* xmm4 = pixel 5 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel1 * pixel1 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel5 * pixel5 */
+
+ movdqa xmm1, xmm2 /* xmm1 = pixel1^2 */
+ movdqa xmm5, xmm6 /* xmm5 = pixel5^2 */
+
+ movdqa xmm2, [edi+32] /* Pixel 2 */
+ movdqa xmm6, [edi+96] /* Pixel 6 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 2 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 6 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel2^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel6^2 */
+
+ paddw xmm1, xmm2 /* xmm1 += pixel2^2 */
+ paddw xmm5, xmm6 /* xmm5 += pixel6^2 */
+
+ movdqa xmm2, [edi+48] /* Pixel 3 */
+ movdqa xmm6, [edi+112] /* Pixel 7 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 3 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 7 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel3^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel7^2 */
+
+ paddw xmm1, xmm2 /* xmm1 += pixel3^2 */
+ paddw xmm5, xmm6 /* xmm5 += pixel7^2 */
+
+ movdqa xmm2, [edi+64] /* Pixel 4 */
+ movdqa xmm6, [edi+128] /* Pixel 8 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 4 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 8 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel4^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel8^2 */
+
+ paddw xmm1, xmm2 /* xmm1 = pixel4^2 */
+ paddw xmm5, xmm6 /* xmm5 = pixel8^2 */
+
+ /* xmm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* xmm1 = x1 + x2 + x3 + x4 */
+ /* xmm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* xmm5 = x5 + x6 + x7 + x8 */
+
+ movdqa xmm7, xmm3 /* xmm7 = xmm3 */
+ psrlw xmm7, 7 /* xmm7 = 00010001000100010001000100010001 */
+
+ movdqa xmm2, xmm0 /* make copy of sum1 */
+ movdqa xmm6, xmm4 /* make copy of sum2 */
+
+ paddw xmm0, xmm7 /* (sum1 + 1) */
+ paddw xmm4, xmm7 /* (sum2 + 1) */
+
+ psraw xmm2, 1 /* sum1 /2 */
+ psraw xmm6, 1 /* sum2 /2 */
+
+ psraw xmm0, 1 /* (sum1 + 1)/2 */
+ psraw xmm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw xmm2, xmm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw xmm6, xmm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw xmm1, xmm2 /* Variance 1 */
+ psubw xmm5, xmm6 /* Variance 2 */
+
+ movdqa xmm7, FLimitWMT /* xmm7 = FLimit */
+ movdqa xmm2, xmm1 /* copy of Varinace 1*/
+
+ movdqa [Variance1], xmm1 /* save the varinace1 */
+ movdqa [Variance2], xmm5 /* save the varinace2 */
+
+ movdqa xmm6, xmm5 /* Variance 2 */
+ psubw xmm1, xmm7 /* Variance 1 < Flimit? */
+
+ psubw xmm5, xmm7 /* Variance 2 < Flimit? */
+ psraw xmm2, 15 /* Variance 1 > 32768? */
+
+ psraw xmm6, 15 /* Vaiance 2 > 32768? */
+ psraw xmm1, 15 /* FFFF/0000 for true/false */
+
+ psraw xmm5, 15 /* FFFF/0000 for true/false */
+ movdqa xmm7, [edi+64] /* xmm0 = Pixel 4 */
+
+ pandn xmm2, xmm1 /* Variance1<32678 &&
+ Variance1<Limit */
+ pandn xmm6, xmm5 /* Variance2<32678 &&
+ Variance1<Limit */
+
+ movdqa xmm4, [edi+80] /* xmm4 = Pixel 5 */
+ pand xmm6, xmm2 /* xmm6 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+
+ movdqa xmm2, xmm7 /* make copy of Pixel4 */
+
+ psubusw xmm7, xmm4 /* 4 - 5 */
+ psubusw xmm4, xmm2 /* 5 - 4 */
+
+ por xmm7, xmm4 /* abs(4 - 5) */
+ psubw xmm7, QStepWMT /* abs(4-5)<QStepxmmx ? */
+
+ psraw xmm7, 15 /* FFFF/0000 for True/Flase */
+ pand xmm7, xmm6
+
+ /* xmm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* xmm7 now are in use */
+
+
+ /* find the loop filtered values for the pixels on block boundary */
+ movdqa xmm1, LoopFLimitWMT; /* Get the Flimit values for loop filter */
+ movdqa xmm3, [edi + 48] /* xmm3 = x3 = p[-2] */
+
+ movdqa xmm4, [edi + 64] /* mm4 = x4 = p[-1] */
+ movdqa xmm5, [edi + 80] /* mm5 = x5 = p[ 0] */
+
+ movdqa xmm6, [edi + 96] /* mm6 = x6 = p[ 1] */
+ psubw xmm5, xmm4 /* mm5 = p[ 0] - p[-1] */
+
+ psubw xmm3, xmm6 /* mm3 = p[-2] - p[ 1] */
+ movdqa xmm4, xmm5 /* make a copy */
+
+ paddw xmm4, xmm5 /* 2 * ( p[0] - p[-1] ) */
+ paddw xmm3, EightFours /* mm3 + 4 */
+
+ paddw xmm5, xmm4 /* 3 * ( p[0] - p[-1] ) */
+ paddw xmm3, xmm5 /* Filtval before shift */
+
+ psraw xmm3, 3 /* FiltVal */
+ movdqa xmm2, xmm3 /* make a copy */
+
+ psraw xmm3, 15 /* FFFF->Neg, 0000->Pos */
+ pxor xmm2, xmm3
+
+ psubsw xmm2, xmm3 /* mm2 = abs(FiltVal) */
+ por xmm3, EightOnes /* -1 and 1 for + and - */
+
+ movdqa xmm4, xmm1 /* make a copy of Flimit */
+ psubw xmm1, xmm2 /* mm1= Flimit - abs(FiltVal) */
+
+ movdqa xmm5, xmm1 /* copy Flimit - abs(FiltVal) */
+ psraw xmm1, 15 /* FFFF or 0000 */
+
+ pxor xmm5, xmm1
+ psubsw xmm5, xmm1 /* abs(Flimit - abs(FiltVal)) */
+
+ psubusw xmm4, xmm5 /* Flimit-abs(Flimit - abs(FiltVal)) */
+ pmullw xmm4, xmm3 /* get the sign back */
+
+ movdqa xmm1, [edi+64] /* p[-1] */
+ movdqa xmm2, [edi+80] /* p[0] */
+
+ paddw xmm1, mm4 /* p[-1] + NewFiltVal */
+ psubw xmm2, mm4 /* p[0] - NewFiltVal */
+
+ pxor xmm6, xmm6 /* clear mm6 */
+ packuswb xmm1, xmm1 /* clamping */
+
+ packuswb xmm2, xmm2
+ punpcklbw xmm1, xmm6 /* unpack to word */
+
+ movdqa LoopFilteredValuesUp, xmm1 /* save the values */
+ punpcklbw xmm2, xmm6 /* unpack to word */
+
+ movdqa LoopFilteredValuesDown, xmm2 /* save the values */
+
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movdqa xmm5, [edi] /* xmm5 = -5 */
+ movdqa xmm4, [edi + 16] /* xmm4 = -4 */
+
+ movdqa xmm3, xmm4 /* copy of -4 */
+ movdqa xmm6, xmm5 /* copy of -5 */
+
+ psubusw xmm4, xmm6 /* xmm4 = [-4] - [-5] */
+ psubusw xmm5, xmm3 /* xmm5 = [-5] - [-4] */
+
+ por xmm4, xmm5 /* abs([-4]-[-5] ) */
+ psubw xmm4, QStepWMT /* abs([-4]-[-5] )<QStep? */
+
+ psraw xmm4, 15 /* FFFF/0000 for True/False */
+ movdqa xmm1, xmm4 /* copy of the xmm4 */
+
+ pand xmm4, xmm6 /* */
+ pandn xmm1, xmm3 /* */
+
+ por xmm1, xmm4 /* xmm1 = p1 */
+
+ /* now find P2 */
+
+ movdqa xmm4, [edi+128] /* xmm4 = [3] */
+ movdqa xmm5, [edi+144] /* xmm5 = [4] */
+
+ movdqa xmm3, xmm4 /* copy of 3 */
+ movdqa xmm6, xmm5 /* copy of 4 */
+
+ psubusw xmm4, xmm6 /* xmm4 = [3] - [4] */
+ psubusw xmm5, xmm3 /* xmm5 = [4] - [3] */
+
+ por xmm4, xmm5 /* abs([3]-[4] ) */
+ psubw xmm4, QStepWMT /* abs([3]-[4] )<QStep? */
+
+ psraw xmm4, 15 /* FFFF/0000 for True/False */
+ movdqa xmm2, xmm4 /* copy of the xmm4 */
+
+ pand xmm4, xmm6 /* */
+ pandn xmm2, xmm3 /* */
+
+ por xmm2, xmm4 /* xmm2 = p2 */
+
+ /* Data is ready, now do the filtering */
+
+ pxor xmm0, xmm0 /* clear xmm0 */
+
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+ /* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+
+ movdqa xmm3, xmm1 /* xmm3 = p1 */
+ paddw xmm3, xmm3 /* xmm3 = p1 + p1 */
+
+ paddw xmm3, xmm1 /* xmm3 = p1 + p1 + p1 */
+ movdqa xmm4, [edi+16] /* xmm4 = x1 */
+
+ paddw xmm3, [edi+32] /* xmm3 = p1+p1+p1+ x2 */
+ paddw xmm4, [edi+48] /* xmm4 = x1+x3 */
+
+ paddw xmm3, [edi+64] /* xmm3 += x4 */
+ paddw xmm4, EightFours /* xmm4 = x1 + x3 + 4 */
+
+ paddw xmm3, xmm4 /* xmm3 = 3*p1+x1+x2+x3+x4+4 */
+ movdqa xmm4, xmm3 /* xmm4 = xmm3 */
+
+ movdqa xmm5, [edi+16] /* xmm5 = x1 */
+ paddw xmm4, xmm5 /* xmm4 = sum+x1 */
+
+ psllw xmm4, 1 /* xmm4 = (sum+x1)<<1 */
+ psubw xmm4, [edi+64] /* xmm4 = (sum+x1)<<1-x4 */
+
+ paddw xmm4, [edi+80] /* xmm4 = (sum+x1)<<1-x4+x5 */
+ psraw xmm4, 4 /* xmm4 >>=4 */
+
+ psubw xmm4, xmm5 /* New Value - old Value */
+ pand xmm4, xmm7 /* And the flag */
+
+ paddw xmm4, xmm5 /* add the old value back */
+ packuswb xmm4, xmm0 /* pack it to bytes */
+
+ movq QWORD PTR [esi+edx*2], xmm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movdqa xmm5, [edi+32] /* xmm5= x2 */
+ psubw xmm3, xmm1 /* sum=sum-p1 */
+
+ paddw xmm3, [edi+80] /* sum=sum+x5 */
+ movdqa xmm4, xmm5 /* copy sum */
+
+ paddw xmm4, xmm3 /* xmm4=sum+x2 */
+ paddw xmm4, xmm4 /* xmm4 <<= 1 */
+
+ psubw xmm4, [edi+80] /* xmm4 =(sum+x2)<<1-x5 */
+ paddw xmm4, [edi+96] /* xmm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw xmm4, 4 /* xmm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw xmm4, xmm5 /* new value - old value */
+
+ pand xmm4, xmm7 /* And the flag */
+ paddw xmm4, xmm5 /* add the old value back */
+
+ packuswb xmm4, xmm0 /* pack it to bytes */
+ movq QWORD PTR [esi+edx], xmm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movdqa xmm5, [edi+48] /* xmm5= x3 */
+ psubw xmm3, xmm1 /* sum=sum-p1 */
+
+ paddw xmm3, [edi+96] /* sum=sum+x6 */
+ movdqa xmm4, xmm5 /* copy x3 */
+
+ paddw xmm4, xmm3 /* xmm4=sum+x3 */
+ paddw xmm4, xmm4 /* xmm4 <<= 1 */
+
+ psubw xmm4, [edi+96] /* xmm4 =(sum+x3)<<1-x6 */
+ paddw xmm4, [edi+112] /* xmm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw xmm4, 4 /* xmm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw xmm4, xmm5 /* new value - old value */
+
+ pand xmm4, xmm7 /* And the flag */
+ paddw xmm4, xmm5 /* add the old value back */
+
+ packuswb xmm4, xmm0 /* pack it to bytes */
+ movq QWORD PTR [esi],xmm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movdqa xmm5, [edi+64] /* xmm5 = x4 */
+ psubw xmm3, xmm1 /* sum = sum-p1 */
+
+ paddw xmm3, [edi+112] /* sum = sum+x7 */
+ movdqa xmm4, xmm5 /* xmm4 = x4 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum + x4 */
+ paddw xmm4, xmm4 /* xmm4 *=2 */
+
+ paddw xmm4, xmm1 /* += p1 */
+ psubw xmm4, [edi+16] /* -= x1 */
+
+ psubw xmm4, [edi+112] /* -= x7 */
+ paddw xmm4, [edi+128] /* += x8 */
+
+ movdqa xmm5, LoopFilteredValuesUp /* Read the loop filtered value of x4 */
+ psraw xmm4, 4 /* >>=4 */
+
+ psubw xmm4, xmm5 /* -=x4 */
+ pand xmm4, xmm7 /* and flag */
+
+ paddw xmm4, xmm5 /* += x4 */
+ packuswb xmm4, xmm0 /* pack it to bytes */
+
+ movq QWORD PTR [esi+ecx], xmm4 /* write new x4 */
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movdqa xmm5, [edi+80] /* xmm5 = x5 */
+ psubw xmm3, [edi+16] /* sum -= x1 */
+
+ paddw xmm3, [edi+128] /* sub += x8 */
+ movdqa xmm4, xmm5 /* xmm4 = x5 */
+
+ paddw xmm4, xmm3 /* xmm4= sum+x5 */
+ paddw xmm4, xmm4 /* xmm4 *= 2 */
+
+ paddw xmm4, [edi+16] /* += x1 */
+ psubw xmm4, [edi+32] /* -= x2 */
+
+ psubw xmm4, [edi+128] /* -= x8 */
+ paddw xmm4, xmm2 /* += p2 */
+
+ movdqa xmm5, LoopFilteredValuesDown /* Read the loop filtered value of x5 */
+ psraw xmm4, 4 /* >>=4 */
+
+ psubw xmm4, xmm5 /* -=x5 */
+ pand xmm4, xmm7 /* and flag */
+
+ paddw xmm4, xmm5 /* += x5 */
+ lea esi, [esi+ecx*4] /* esi=des + 2*pitch */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movq QWORD PTR [esi+edx*2], xmm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movdqa xmm5, [edi+96] /* xmm5 = x6 */
+ psubw xmm3, [edi+32] /* -= x2 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x6 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x6 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+32] /* +=x2 */
+ psubw xmm4, [edi+48] /* -=x3 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x6 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x6 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movq QWORD PTR [esi+edx], xmm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movdqa xmm5, [edi+112] /* xmm5 = x7 */
+ psubw xmm3, [edi+48] /* -= x3 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x7 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x7 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+48] /* +=x3 */
+ psubw xmm4, [edi+64] /* -=x4 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x7 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x7 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movq QWORD PTR [esi],xmm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movdqa xmm5, [edi+128] /* xmm5 = x8 */
+ psubw xmm3, [edi+64] /* -= x4 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x8 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x8 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+64] /* +=x4 */
+ psubw xmm4, [edi+80] /* -=x5 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x8 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x8 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movq QWORD PTR [esi+ecx], xmm4 /* write new x8 */
+
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop eax
+
+
+ } /* end of the macro */
+
+ Var1=Variance1[0]+Variance1[1]+Variance1[2]+Variance1[3]+Variance1[4]+Variance1[5]+Variance1[6]+Variance1[7];
+ Var2=Variance2[0]+Variance2[1]+Variance2[2]+Variance2[3]+Variance2[4]+Variance2[5]+Variance2[6]+Variance2[7];
+ pbi->FragmentVariances[CurrentFrag] += Var1;
+ pbi->FragmentVariances[CurrentFrag + FragAcross] += Var2;
+
+
+ if(CurrentFrag==StartFrag)
+ CurrentFrag++;
+ else
+ {
+
+ Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag);
+ Src=Des;
+
+ QStep = QuantScale[pbi->FragQIndex[CurrentFrag]];
+ QStepWMT[0] = (INT16)QStep;
+ QStepWMT[1] = (INT16)QStep;
+ QStepWMT[2] = (INT16)QStep;
+ QStepWMT[3] = (INT16)QStep;
+ QStepWMT[4] = (INT16)QStep;
+ QStepWMT[5] = (INT16)QStep;
+ QStepWMT[6] = (INT16)QStep;
+ QStepWMT[7] = (INT16)QStep;
+
+ for( j=0; j<8;j++)
+ {
+ Rows[j] = (short) (Src[-5 +j*PlaneLineStep]);
+ Rows[72+j] = (short)(Src[4+j*PlaneLineStep]);
+ }
+
+ __asm
+ {
+ /* Save the registers */
+ push eax
+ push ecx
+ push edx
+ push esi
+ push edi
+
+ /* Calculate the FLimit and store FLimit and QStep */
+
+ movdqa xmm0, QStepWMT /* Get QStep */
+ movdqa xmm1, EightThrees /* mm1 = 03030303 */
+
+ pmullw xmm1, xmm0 /* mm1 = QStep * 3 */
+ pmullw xmm1, xmm0 /* mm1 = QStep * QStep * 3 */
+
+ psrlw xmm1, 5 /* mm1 = FLimit */
+ movdqa [FLimitWMT], xmm1 /* Save FLimit */
+
+ /* setup the pointers to data */
+
+ mov eax, Src /* eax = Src */
+ xor edx, edx /* clear edx */
+
+ mov esi, Des /* esi = Des */
+ sub eax, 4 /* eax = Src-4 */
+
+ sub esi, 4 /* esi = Des-4 */
+ lea edi, Rows /* edi = Rows */
+
+ mov ecx, PlaneLineStep /* ecx = Pitch */
+ sub edx, ecx /* edx = -Pitch */
+
+ lea esi, [esi+ecx*2] /* esi = Des-4 + 2 * Pitch */
+
+ /* Get the data to the intermediate buffer */
+
+ movq mm0, [eax] /* mm0 = 07 06 05 04 03 02 01 00 */
+ movq mm1, [eax+ecx] /* mm1 = 17 16 15 14 13 12 11 10 */
+
+ movq mm2, [eax+ecx*2] /* mm2 = 27 26 25 24 23 22 21 20 */
+ lea eax, [eax+ecx*4] /* Go down four Rows */
+
+ movq mm3, [eax+edx] /* mm3 = 37 36 35 34 33 32 31 30 */
+ movq mm4, mm0 /* mm4 = 07 06 05 04 03 02 01 00 */
+
+ punpcklbw mm0, mm1 /* mm0 = 13 03 12 02 11 01 10 00 */
+ punpckhbw mm4, mm1 /* mm4 = 17 07 16 06 15 05 14 04 */
+
+ movq mm5, mm2 /* mm5 = 27 26 25 24 23 22 21 20 */
+ punpcklbw mm2, mm3 /* mm2 = 33 23 32 22 31 21 30 20 */
+
+ punpckhbw mm5, mm3 /* mm5 = 37 27 36 26 35 25 34 24 */
+ movq mm1, mm0 /* mm1 = 13 03 12 02 11 01 10 00 */
+
+ punpcklwd mm0, mm2 /* mm0 = 31 21 11 01 30 20 10 00 */
+ punpckhwd mm1, mm2 /* mm1 = 33 23 13 03 32 22 12 02 */
+
+ movq mm2, mm4 /* mm2 = 17 07 16 06 15 05 14 04 */
+ punpckhwd mm4, mm5 /* mm4 = 37 27 17 07 36 26 16 06 */
+
+ punpcklwd mm2, mm5 /* mm2 = 35 25 15 05 34 24 14 04 */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm5, mm0 /* make a copy */
+ punpcklbw mm0, mm7 /* mm0 = 30 20 10 00 */
+
+ movq [edi+16], mm0 /* write 00 10 20 30 */
+ punpckhbw mm5, mm7 /* mm5 = 31 21 11 01 */
+
+ movq mm0, mm1 /* mm0 =33 23 13 03 32 22 12 02 */
+ movq [edi+32], mm5 /* write 01 11 21 31 */
+
+ punpcklbw mm1, mm7 /* mm1 = 32 22 12 02 */
+ punpckhbw mm0, mm7 /* mm0 = 33 23 12 03 */
+
+ movq [edi+48], mm1 /* write 02 12 22 32 */
+ movq mm3, mm2 /* mm3 = 35 25 15 05 34 24 14 04 */
+
+ movq mm5, mm4 /* mm5 = 37 27 17 07 36 26 16 06 */
+ movq [edi+64], mm0 /* write 03 13 23 33 */
+
+ punpcklbw mm2, mm7 /* mm2 = 34 24 14 04 */
+ punpckhbw mm3, mm7 /* mm3 = 35 25 15 05 */
+
+ movq [edi+80], mm2 /* write 04 14 24 34 */
+ punpcklbw mm4, mm7 /* mm4 = 36 26 16 06 */
+
+ punpckhbw mm5, mm7 /* mm5 = 37 27 17 07 */
+ movq [edi+96], mm3 /* write 05 15 25 35 */
+
+ movq mm0, [eax] /* mm0 = 47 46 45 44 43 42 41 40 */
+ movq mm1, [eax + ecx ] /* mm1 = 57 56 55 54 53 52 51 50 */
+
+ movq [edi+112], mm4 /* write 06 16 26 37 */
+ movq mm2, [eax+ecx*2] /* mm2 = 67 66 65 64 63 62 61 60 */
+
+ lea eax, [eax+ ecx*4] /* Go down four rows */
+ movq [edi+128], mm5 /* write 07 17 27 37 */
+
+ movq mm4, mm0 /* mm4 = 47 46 45 44 43 42 41 40 */
+ movq mm3, [eax+edx] /* mm3 = 77 76 75 74 73 72 71 70 */
+
+ punpcklbw mm0, mm1 /* mm0 = 53 43 52 42 51 41 50 40 */
+ punpckhbw mm4, mm1 /* mm4 = 57 57 56 46 55 45 54 44 */
+
+ movq mm5, mm2 /* mm5 = 67 66 65 64 63 62 61 60 */
+ punpcklbw mm2, mm3 /* mm2 = 73 63 72 62 71 61 70 60 */
+
+ punpckhbw mm5, mm3 /* mm5 = 77 67 76 66 75 65 74 64 */
+ movq mm1, mm0 /* mm1 = 53 43 52 42 51 41 50 40 */
+
+ punpcklwd mm0, mm2 /* mm0 = 71 61 51 41 70 60 50 40 */
+ punpckhwd mm1, mm2 /* mm1 = 73 63 53 43 72 62 52 42 */
+
+ movq mm2, mm4 /* mm2 = 57 57 56 46 55 45 54 44 */
+ punpckhwd mm4, mm5 /* mm4 = 77 67 57 47 76 66 56 46 */
+
+ punpcklwd mm2, mm5 /* mm2 = 75 65 55 45 74 64 54 44 */
+
+ movq mm5, mm0 /* make a copy */
+ punpcklbw mm0, mm7 /* mm0 = 70 60 50 40 */
+
+ movq [edi+24], mm0 /* write 40 50 60 70 */
+ punpckhbw mm5, mm7 /* mm5 = 71 61 51 41 */
+
+ movq mm0, mm1 /* mm0 = 73 63 53 43 72 62 52 42 */
+ movq [edi+40], mm5 /* write 41 51 61 71 */
+
+ punpcklbw mm1, mm7 /* mm1 = 72 62 52 42 */
+ punpckhbw mm0, mm7 /* mm0 = 73 63 53 43 */
+
+ movq [edi+56], mm1 /* write 42 52 62 72 */
+ movq mm3, mm2 /* mm3 = 75 65 55 45 74 64 54 44 */
+
+ movq mm5, mm4 /* mm5 = 77 67 57 47 76 66 56 46 */
+ movq [edi+72], mm0 /* write 43 53 63 73 */
+
+ punpcklbw mm2, mm7 /* mm2 = 74 64 54 44 */
+ punpckhbw mm3, mm7 /* mm3 = 75 65 55 45 */
+
+ movq [edi+88], mm2 /* write 44 54 64 74 */
+ punpcklbw mm4, mm7 /* mm4 = 76 66 56 46 */
+
+ punpckhbw mm5, mm7 /* mm5 = 77 67 57 47 */
+ movq [edi+104], mm3 /* write 45 55 65 75 */
+
+ movq [edi+120], mm4 /* write 46 56 66 76 */
+ movq [edi+136], mm5 /* write 47 57 67 77 */
+
+ /* we use xmm0,xmm1,xmm2 for 1234 and xmm4, xmm5, xmm6 for 5-8 */
+ /* xmm7 = 0, xmm3 = {128, 128, 128, 128, 128, 128, 128, 128} */
+
+ pcmpeqw xmm3, xmm3 /* xmm3 = FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF */
+ psllw xmm3, 15 /* xmm3 = 80008000800080008000800080008000 */
+ psrlw xmm3, 8 /* xmm3 = 00800080008000800080008000800080 */
+
+ movdqa xmm2, [edi+16] /* Pixel 1 */
+ movdqa xmm6, [edi+80] /* Pixel 5 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ movdqa xmm0, xmm2 /* xmm0 = pixel 1 */
+ movdqa xmm4, xmm6 /* xmm4 = pixel 5 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel1 * pixel1 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel5 * pixel5 */
+
+ movdqa xmm1, xmm2 /* xmm1 = pixel1^2 */
+ movdqa xmm5, xmm6 /* xmm5 = pixel5^2 */
+
+ movdqa xmm2, [edi+32] /* Pixel 2 */
+ movdqa xmm6, [edi+96] /* Pixel 6 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 2 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 6 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel2^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel6^2 */
+
+ paddw xmm1, xmm2 /* xmm1 += pixel2^2 */
+ paddw xmm5, xmm6 /* xmm5 += pixel6^2 */
+
+ movdqa xmm2, [edi+48] /* Pixel 3 */
+ movdqa xmm6, [edi+112] /* Pixel 7 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 3 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 7 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel3^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel7^2 */
+
+ paddw xmm1, xmm2 /* xmm1 += pixel3^2 */
+ paddw xmm5, xmm6 /* xmm5 += pixel7^2 */
+
+ movdqa xmm2, [edi+64] /* Pixel 4 */
+ movdqa xmm6, [edi+128] /* Pixel 8 */
+
+ psubw xmm2, xmm3 /* xmm2 -=128 */
+ psubw xmm6, xmm3 /* xmm6 -=128 */
+
+ paddw xmm0, xmm2 /* xmm0 += pixel 4 */
+ paddw xmm4, xmm6 /* xmm4 += pixel 8 */
+
+ pmullw xmm2, xmm2 /* xmm2 = pixel4^2 */
+ pmullw xmm6, xmm6 /* xmm6 = pixel8^2 */
+
+ paddw xmm1, xmm2 /* xmm1 = pixel4^2 */
+ paddw xmm5, xmm6 /* xmm5 = pixel8^2 */
+
+ /* xmm0 = x1^2 + x2^2 + x3^2 + x4^2 */
+ /* xmm1 = x1 + x2 + x3 + x4 */
+ /* xmm4 = x5^2 + x6^2 + x7^2 + x8^2 */
+ /* xmm5 = x5 + x6 + x7 + x8 */
+
+ movdqa xmm7, xmm3 /* xmm7 = xmm3 */
+ psrlw xmm7, 7 /* xmm7 = 00010001000100010001000100010001 */
+
+ movdqa xmm2, xmm0 /* make copy of sum1 */
+ movdqa xmm6, xmm4 /* make copy of sum2 */
+
+ paddw xmm0, xmm7 /* (sum1 + 1) */
+ paddw xmm4, xmm7 /* (sum2 + 1) */
+
+ psraw xmm2, 1 /* sum1 /2 */
+ psraw xmm6, 1 /* sum2 /2 */
+
+ psraw xmm0, 1 /* (sum1 + 1)/2 */
+ psraw xmm4, 1 /* (sum2 + 1)/2 */
+
+ pmullw xmm2, xmm0 /* (sum1)/2*(sum1+1)/2 */
+ pmullw xmm6, xmm4 /* (sum2)/2*(sum2+1)/2 */
+
+ psubw xmm1, xmm2 /* Variance 1 */
+ psubw xmm5, xmm6 /* Variance 2 */
+
+ movdqa xmm7, FLimitWMT /* xmm7 = FLimit */
+ movdqa xmm2, xmm1 /* copy of Varinace 1*/
+
+ movdqa [Variance1], xmm1 /* save the varinace1 */
+ movdqa [Variance2], xmm5 /* save the varinace2 */
+
+ movdqa xmm6, xmm5 /* Variance 2 */
+ psubw xmm1, xmm7 /* Variance 1 < Flimit? */
+
+ psubw xmm5, xmm7 /* Variance 2 < Flimit? */
+ psraw xmm2, 15 /* Variance 1 > 32768? */
+
+ psraw xmm6, 15 /* Vaiance 2 > 32768? */
+ psraw xmm1, 15 /* FFFF/0000 for true/false */
+
+ psraw xmm5, 15 /* FFFF/0000 for true/false */
+ movdqa xmm7, [edi+64] /* xmm0 = Pixel 4 */
+
+ pandn xmm2, xmm1 /* Variance1<32678 &&
+ Variance1<Limit */
+ pandn xmm6, xmm5 /* Variance2<32678 &&
+ Variance1<Limit */
+
+ movdqa xmm4, [edi+80] /* xmm4 = Pixel 5 */
+ pand xmm6, xmm2 /* xmm6 = Variance1 < Flimit */
+ /* &&Variance2 < Flimit */
+
+ movdqa xmm2, xmm7 /* make copy of Pixel4 */
+
+ psubusw xmm7, xmm4 /* 4 - 5 */
+ psubusw xmm4, xmm2 /* 5 - 4 */
+
+ por xmm7, xmm4 /* abs(4 - 5) */
+ psubw xmm7, QStepWMT /* abs(4-5)<QStepxmmx ? */
+
+ psraw xmm7, 15 /* FFFF/0000 for True/Flase */
+ pand xmm7, xmm6
+
+ /* xmm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */
+ /* xmm7 now are in use */
+ /* find the loop filtered values for the pixels on block boundary */
+ movdqa xmm1, LoopFLimitWMT; /* Get the Flimit values for loop filter */
+ movdqa xmm3, [edi + 48] /* xmm3 = x3 = p[-2] */
+
+ movdqa xmm4, [edi + 64] /* mm4 = x4 = p[-1] */
+ movdqa xmm5, [edi + 80] /* mm5 = x5 = p[ 0] */
+
+ movdqa xmm6, [edi + 96] /* mm6 = x6 = p[ 1] */
+ psubw xmm5, xmm4 /* mm5 = p[ 0] - p[-1] */
+
+ psubw xmm3, xmm6 /* mm3 = p[-2] - p[ 1] */
+ movdqa xmm4, xmm5 /* make a copy */
+
+ paddw xmm4, xmm5 /* 2 * ( p[0] - p[-1] ) */
+ paddw xmm3, EightFours /* mm3 + 4 */
+
+ paddw xmm5, xmm4 /* 3 * ( p[0] - p[-1] ) */
+ paddw xmm3, xmm5 /* Filtval before shift */
+
+ psraw xmm3, 3 /* FiltVal */
+ movdqa xmm2, xmm3 /* make a copy */
+
+ psraw xmm3, 15 /* FFFF->Neg, 0000->Pos */
+ pxor xmm2, xmm3
+
+ psubsw xmm2, xmm3 /* mm2 = abs(FiltVal) */
+ por xmm3, EightOnes /* -1 and 1 for + and - */
+
+ movdqa xmm4, xmm1 /* make a copy of Flimit */
+ psubw xmm1, xmm2 /* mm1= Flimit - abs(FiltVal) */
+
+ movdqa xmm5, xmm1 /* copy Flimit - abs(FiltVal) */
+ psraw xmm1, 15 /* FFFF or 0000 */
+
+ pxor xmm5, xmm1
+ psubsw xmm5, xmm1 /* abs(Flimit - abs(FiltVal)) */
+
+ psubusw xmm4, xmm5 /* Flimit-abs(Flimit - abs(FiltVal)) */
+ pmullw xmm4, xmm3 /* get the sign back */
+
+ movdqa xmm1, [edi+64] /* p[-1] */
+ movdqa xmm2, [edi+80] /* p[0] */
+
+ paddw xmm1, mm4 /* p[-1] + NewFiltVal */
+ psubw xmm2, mm4 /* p[0] - NewFiltVal */
+
+ pxor xmm6, xmm6 /* clear mm6 */
+ packuswb xmm1, xmm1 /* clamping */
+
+ packuswb xmm2, xmm2
+ punpcklbw xmm1, xmm6 /* unpack to word */
+
+ movdqa LoopFilteredValuesUp, xmm1 /* save the values */
+ punpcklbw xmm2, xmm6 /* unpack to word */
+
+ movdqa LoopFilteredValuesDown, xmm2 /* save the values */
+
+ /* Let's do the filtering now */
+ /* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ? Src[-5] : Src[-4]; */
+ /* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ? Src[+4] : Src[+3]; */
+
+ movdqa xmm5, [edi] /* xmm5 = -5 */
+ movdqa xmm4, [edi + 16] /* xmm4 = -4 */
+
+ movdqa xmm3, xmm4 /* copy of -4 */
+ movdqa xmm6, xmm5 /* copy of -5 */
+
+ psubusw xmm4, xmm6 /* xmm4 = [-4] - [-5] */
+ psubusw xmm5, xmm3 /* xmm5 = [-5] - [-4] */
+
+ por xmm4, xmm5 /* abs([-4]-[-5] ) */
+ psubw xmm4, QStepWMT /* abs([-4]-[-5] )<QStep? */
+
+ psraw xmm4, 15 /* FFFF/0000 for True/False */
+ movdqa xmm1, xmm4 /* copy of the xmm4 */
+
+ pand xmm4, xmm6 /* */
+ pandn xmm1, xmm3 /* */
+
+ por xmm1, xmm4 /* xmm1 = p1 */
+
+ /* now find P2 */
+
+ movdqa xmm4, [edi+128] /* xmm4 = [3] */
+ movdqa xmm5, [edi+144] /* xmm5 = [4] */
+
+ movdqa xmm3, xmm4 /* copy of 3 */
+ movdqa xmm6, xmm5 /* copy of 4 */
+
+ psubusw xmm4, xmm6 /* xmm4 = [3] - [4] */
+ psubusw xmm5, xmm3 /* xmm5 = [4] - [3] */
+
+ por xmm4, xmm5 /* abs([3]-[4] ) */
+ psubw xmm4, QStepWMT /* abs([3]-[4] )<QStep? */
+
+ psraw xmm4, 15 /* FFFF/0000 for True/False */
+ movdqa xmm2, xmm4 /* copy of the xmm4 */
+
+ pand xmm4, xmm6 /* */
+ pandn xmm2, xmm3 /* */
+
+ por xmm2, xmm4 /* xmm2 = p2 */
+
+ /* Data is ready, now do the filtering */
+
+ pxor xmm0, xmm0 /* clear xmm0 */
+
+ /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */
+ /* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */
+ /* Des[-w4] = Src[-w4]; */
+ /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */
+
+
+ movdqa xmm3, xmm1 /* xmm3 = p1 */
+ paddw xmm3, xmm3 /* xmm3 = p1 + p1 */
+
+ paddw xmm3, xmm1 /* xmm3 = p1 + p1 + p1 */
+ movdqa xmm4, [edi+16] /* xmm4 = x1 */
+
+ paddw xmm3, [edi+32] /* xmm3 = p1+p1+p1+ x2 */
+ paddw xmm4, [edi+48] /* xmm4 = x1+x3 */
+
+ paddw xmm3, [edi+64] /* xmm3 += x4 */
+ paddw xmm4, EightFours /* xmm4 = x1 + x3 + 4 */
+
+ paddw xmm3, xmm4 /* xmm3 = 3*p1+x1+x2+x3+x4+4 */
+ movdqa xmm4, xmm3 /* xmm4 = xmm3 */
+
+ movdqa xmm5, [edi+16] /* xmm5 = x1 */
+ paddw xmm4, xmm5 /* xmm4 = sum+x1 */
+
+ psllw xmm4, 1 /* xmm4 = (sum+x1)<<1 */
+ psubw xmm4, [edi+64] /* xmm4 = (sum+x1)<<1-x4 */
+
+ paddw xmm4, [edi+80] /* xmm4 = (sum+x1)<<1-x4+x5 */
+ psraw xmm4, 4 /* xmm4 >>=4 */
+
+ psubw xmm4, xmm5 /* New Value - old Value */
+ pand xmm4, xmm7 /* And the flag */
+
+ paddw xmm4, xmm5 /* add the old value back */
+ packuswb xmm4, xmm0 /* pack it to bytes */
+
+ movdq2q mm0, xmm4 /* Write new x1 */
+
+ /* sum += x5 -p1 */
+ /* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */
+
+ movdqa xmm5, [edi+32] /* xmm5= x2 */
+ psubw xmm3, xmm1 /* sum=sum-p1 */
+
+ paddw xmm3, [edi+80] /* sum=sum+x5 */
+ movdqa xmm4, xmm5 /* copy sum */
+
+ paddw xmm4, xmm3 /* xmm4=sum+x2 */
+ paddw xmm4, xmm4 /* xmm4 <<= 1 */
+
+ psubw xmm4, [edi+80] /* xmm4 =(sum+x2)<<1-x5 */
+ paddw xmm4, [edi+96] /* xmm4 =(sum+x2)<<1-x5+x6 */
+
+ psraw xmm4, 4 /* xmm4=((sum+x2)<<1-x5+x6)>>4 */
+ psubw xmm4, xmm5 /* new value - old value */
+
+ pand xmm4, xmm7 /* And the flag */
+ paddw xmm4, xmm5 /* add the old value back */
+
+ packuswb xmm4, xmm0 /* pack it to bytes */
+ movdq2q mm1, xmm4 /* write new x2 */
+
+ /* sum += x6 - p1 */
+ /* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */
+
+ movdqa xmm5, [edi+48] /* xmm5= x3 */
+ psubw xmm3, xmm1 /* sum=sum-p1 */
+
+ paddw xmm3, [edi+96] /* sum=sum+x6 */
+ movdqa xmm4, xmm5 /* copy x3 */
+
+ paddw xmm4, xmm3 /* xmm4=sum+x3 */
+ paddw xmm4, xmm4 /* xmm4 <<= 1 */
+
+ psubw xmm4, [edi+96] /* xmm4 =(sum+x3)<<1-x6 */
+ paddw xmm4, [edi+112] /* xmm4 =(sum+x3)<<1-x6+x7 */
+
+ psraw xmm4, 4 /* xmm4=((sum+x3)<<1-x6+x7)>>4 */
+ psubw xmm4, xmm5 /* new value - old value */
+
+ pand xmm4, xmm7 /* And the flag */
+ paddw xmm4, xmm5 /* add the old value back */
+
+ packuswb xmm4, xmm0 /* pack it to bytes */
+ movdq2q mm2, xmm4 /* write new x3 */
+
+ /* sum += x7 - p1 */
+ /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */
+
+ movdqa xmm5, [edi+64] /* xmm5 = x4 */
+ psubw xmm3, xmm1 /* sum = sum-p1 */
+
+ paddw xmm3, [edi+112] /* sum = sum+x7 */
+ movdqa xmm4, xmm5 /* xmm4 = x4 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum + x4 */
+ paddw xmm4, xmm4 /* xmm4 *=2 */
+
+ paddw xmm4, xmm1 /* += p1 */
+ psubw xmm4, [edi+16] /* -= x1 */
+
+ psubw xmm4, [edi+112] /* -= x7 */
+ paddw xmm4, [edi+128] /* += x8 */
+
+ movdqa xmm5, LoopFilteredValuesUp /* Read the loop filtered value of x4 */
+ psraw xmm4, 4 /* >>=4 */
+
+ psubw xmm4, xmm5 /* -=x4 */
+ pand xmm4, xmm7 /* and flag */
+
+ paddw xmm4, xmm5 /* += x4 */
+ packuswb xmm4, xmm0 /* pack it to bytes */
+
+ movdq2q mm3, xmm4 /* write new x4 */
+
+
+ /* sum+= x8-x1 */
+ /* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */
+
+ movdqa xmm5, [edi+80] /* xmm5 = x5 */
+ psubw xmm3, [edi+16] /* sum -= x1 */
+
+ paddw xmm3, [edi+128] /* sub += x8 */
+ movdqa xmm4, xmm5 /* xmm4 = x5 */
+
+ paddw xmm4, xmm3 /* xmm4= sum+x5 */
+ paddw xmm4, xmm4 /* xmm4 *= 2 */
+
+ paddw xmm4, [edi+16] /* += x1 */
+ psubw xmm4, [edi+32] /* -= x2 */
+
+ psubw xmm4, [edi+128] /* -= x8 */
+ paddw xmm4, xmm2 /* += p2 */
+
+ movdqa xmm5, LoopFilteredValuesDown /* Read the loop filtered value of x4 */
+ psraw xmm4, 4 /* >>=4 */
+
+ psubw xmm4, xmm5 /* -=x5 */
+ pand xmm4, xmm7 /* and flag */
+
+ paddw xmm4, xmm5 /* += x5 */
+ packuswb xmm4, xmm0 /* pack to bytes */
+
+ movdq2q mm4, xmm4 /* write new x5 */
+
+ /* sum += p2 - x2 */
+ /* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */
+
+ movdqa xmm5, [edi+96] /* xmm5 = x6 */
+ psubw xmm3, [edi+32] /* -= x2 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x6 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x6 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+32] /* +=x2 */
+ psubw xmm4, [edi+48] /* -=x3 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x6 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x6 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movdq2q mm5, xmm4 /* write new x6 */
+
+ /* sum += p2 - x3 */
+ /* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */
+
+ movdqa xmm5, [edi+112] /* xmm5 = x7 */
+ psubw xmm3, [edi+48] /* -= x3 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x7 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x7 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+48] /* +=x3 */
+ psubw xmm4, [edi+64] /* -=x4 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x7 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x7 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movdq2q mm6, xmm4 /* write new x7 */
+
+ /* sum += p2 - x4 */
+ /* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */
+
+ movdqa xmm5, [edi+128] /* xmm5 = x8 */
+ psubw xmm3, [edi+64] /* -= x4 */
+
+ paddw xmm3, xmm2 /* += p2 */
+ movdqa xmm4, xmm5 /* xmm4 = x8 */
+
+ paddw xmm4, xmm3 /* xmm4 = sum+x8 */
+ paddw xmm4, xmm4 /* xmm4 *= 2*/
+
+ paddw xmm4, [edi+64] /* +=x4 */
+ psubw xmm4, [edi+80] /* -=x5 */
+
+ psraw xmm4, 4 /* >>=4 */
+ psubw xmm4, xmm5 /* -=x8 */
+
+ pand xmm4, xmm7 /* and flag */
+ paddw xmm4, xmm5 /* += x8 */
+
+ packuswb xmm4, xmm0 /* pack to bytes */
+ movdq2q mm7, xmm4 /* write new x8 */
+
+
+ /* transpose */
+ movq2dq xmm0, mm0 /* xmm0 = 70 60 50 40 30 20 10 00 */
+ movq2dq xmm1, mm1 /* xmm1 = 71 61 51 41 31 21 11 01 */
+
+ movq2dq xmm2, mm2 /* xmm2 = 72 62 52 42 32 22 12 02 */
+ movq2dq xmm3, mm3 /* xmm3 = 73 63 53 43 33 23 13 03 */
+
+ punpcklbw xmm0, xmm1 /* xmm0 = 7170 6160 5150 4140 3130 2120 1110 0100 */
+ punpcklbw xmm2, xmm3 /* xmm2 = 7372 6362 5352 4342 3332 2322 1312 0302 */
+
+ movdqa xmm1, xmm0 /* xmm1 = 7170 6160 5150 4140 3130 2120 1110 0100 */
+ punpcklwd xmm0, xmm2 /* xmm0 = 33323130 23222120 13121110 03020100 */
+
+ punpckhwd xmm1, xmm2 /* xmm1 = 73727170 63626160 53525150 43424140 */
+
+ movq2dq xmm4, mm4 /* xmm4 = 74 64 54 44 34 24 14 04 */
+ movq2dq xmm5, mm5 /* xmm5 = 75 65 55 45 35 25 15 05 */
+
+ movq2dq xmm6, mm6 /* xmm6 = 76 66 56 46 36 26 16 06 */
+ movq2dq xmm7, mm7 /* xmm7 = 77 67 57 47 37 27 17 07 */
+
+ punpcklbw xmm4, xmm5 /* xmm4 = 7574 6564 5554 4544 3534 2524 1514 0504 */
+ punpcklbw xmm6, xmm7 /* xmm6 = 7776 6766 5756 4746 3736 2726 1716 0706 */
+
+ movdqa xmm5, xmm4 /* xmm5 = 7574 6564 5554 4544 3534 2524 1514 0504 */
+ punpcklwd xmm4, xmm6 /* xmm4 = 37363534 27262524 17161514 07060504 */
+
+ punpckhwd xmm5, xmm6 /* xmm5 = 77767574 67666564 57565554 47464544 */
+ movdqa xmm2, xmm0 /* xmm2 = 33323130 23222120 13121110 03020100 */
+
+ punpckldq xmm0, xmm4 /* xmm0 = 1716151413121110 0706050403020100 */
+ movq QWORD PTR [esi+edx*2],xmm0 /* write 00 01 02 03 04 05 06 07 */
+
+ psrldq xmm0, 8 /* xmm0 = 1716151413121110 */
+ punpckhdq xmm2, xmm4 /* xmm2 = 3736353433323130 2726252423222120 */
+
+ movq QWORD PTR [esi+edx], xmm0 /* write 10 11 12 13 14 15 16 17 */
+ movdqa xmm3, xmm1 /* xmm3 = 73727170 63626160 53525150 43424140 */
+
+ punpckldq xmm1, xmm5 /* xmm1 = 5756555453525150 4746454443424140 */
+ movq QWORD PTR [esi], xmm2 /* write 20 21 22 23 24 25 26 27 */
+
+ psrldq xmm2, 8 /* xmm2 = 3736353433323130 */
+ punpckhdq xmm3, xmm5 /* xmm3 = 7776757473727170 6766656463626160 */
+
+ movq QWORD PTR [esi+ecx], xmm2 /* write 30 31 32 33 34 35 36 37 */
+ lea esi, [esi+ecx*4] /* esi= Des - 4 + 4 *pitch */
+
+ movq QWORD PTR [esi+edx*2], xmm1 /* write 40 41 42 43 44 45 46 47 */
+ movq QWORD PTR [esi], xmm3 /* write 60 61 62 63 64 65 66 67 */
+
+ psrldq xmm1, 8 /* xmm1 = 5756555453525150 */
+ psrldq xmm3, 8 /* xmm3 = 7776757473727170 */
+
+ movq QWORD PTR [esi+edx], xmm1 /* write 50 51 52 53 54 55 56 57 */
+ movq QWORD PTR [esi+ecx], xmm3 /* write 70 71 72 73 74 75 76 77 */
+
+
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop eax
+ }// end of __asm
+
+ Var1=Variance1[0]+Variance1[1]+Variance1[2]+Variance1[3]+Variance1[4]+Variance1[5]+Variance1[6]+Variance1[7];
+ Var2=Variance2[0]+Variance2[1]+Variance2[2]+Variance2[3]+Variance2[4]+Variance2[5]+Variance2[6]+Variance2[7];
+ pbi->FragmentVariances[CurrentFrag-1] += Var1;
+ pbi->FragmentVariances[CurrentFrag] += Var2;
+ CurrentFrag ++;
+
+ }//else
+
+ }//while
+#endif
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : PlaneAddNoise_wmt
+ *
+ * INPUTS : UINT8 *Start starting address of buffer to add gaussian
+ * noise to
+ * UINT32 Width width of plane
+ * UINT32 Height height of plane
+ * INT32 Pitch distance between subsequent lines of frame
+ * INT32 q quantizer used to determine amount of noise
+ * to add
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : adds gaussian noise to a plane of pixels
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PlaneAddNoise_wmt( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q)
+{
+ unsigned int i;
+ INT32 Pitch4 = Pitch * 4;
+ const int noiseAmount = 2;
+ const int noiseAdder = 2 * noiseAmount + 1;
+#if defined(_WIN32_WCE)
+ return;
+#else
+
+ __declspec(align(16)) unsigned char blackclamp[16];
+ __declspec(align(16)) unsigned char whiteclamp[16];
+ __declspec(align(16)) unsigned char bothclamp[16];
+ char CharDist[300];
+ char Rand[2048];
+ double sigma;
+// return;
+ __asm emms
+ sigma = 1 + .8*(63-q) / 63.0;
+
+ // set up a lookup table of 256 entries that matches
+ // a gaussian distribution with sigma determined by q.
+ //
+ {
+ double i,sum=0;
+ int next,j;
+
+ next=0;
+ for(i=-32;i<32;i++)
+ {
+ int a = (int)(.5+256*gaussian(sigma,0,i));
+
+ if(a)
+ {
+ for(j=0;j<a;j++)
+ {
+ CharDist[next+j]=(char) i;
+ }
+ next = next+j;
+ }
+
+ }
+ for(next=next;next<256;next++)
+ CharDist[next] = 0;
+
+ }
+
+ for(i=0;i<2048;i++)
+ {
+ Rand[i]=CharDist[rand() & 0xff];
+ }
+
+ for(i=0;i<16;i++)
+ {
+ blackclamp[i]=-CharDist[0];
+ whiteclamp[i]=-CharDist[0];
+ bothclamp[i]=-2*CharDist[0];
+ }
+
+ for(i=0;i<Height;i++)
+ {
+ UINT8 *Pos = Start + i *Pitch;
+ INT8 *Ref = Rand + (rand() & 0xff);
+
+ __asm
+ {
+ mov ecx, [Width]
+ mov esi,Pos
+ mov edi,Ref
+ xor eax,eax
+
+ nextset:
+ movdqu xmm1,[esi+eax] // get the source
+
+ psubusb xmm1,blackclamp // clamp both sides so we don't outrange adding noise
+ paddusb xmm1,bothclamp
+ psubusb xmm1,whiteclamp
+
+ movdqu xmm2,[edi+eax] // get the noise for this line
+ paddb xmm1,xmm2 // add it in
+ movdqu [esi+eax],xmm1 // store the result
+
+ add eax,16 // move to the next line
+
+ cmp eax, ecx
+ jl nextset
+
+
+ }
+
+ }
+#endif
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/deringopt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/deringopt.c
new file mode 100644
index 00000000..35a39265
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/deringopt.c
@@ -0,0 +1,2529 @@
+/****************************************************************************
+ *
+ * Module Title : DeRingingOpt.c
+ *
+ * Description : Optimized functions for PostProcessor
+ *
+ ***************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include "postp.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#pragma warning(disable:4799)
+#pragma warning(disable:4731)
+#pragma warning(disable:4305)
+
+/****************************************************************************
+* Module constants.
+****************************************************************************/
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+static unsigned short FourOnes[] = { 1, 1, 1, 1 };
+static unsigned short Four128s[] = { 128, 128, 128, 128 };
+static unsigned short Four64s[] = { 64, 64, 64, 64};
+
+static char eight64s [] = { 64, 64, 64, 64, 64, 64, 64, 64 };
+static char eight32s [] = { 32, 32, 32, 32, 32, 32, 32, 32 };
+static char eight127s []= { 127, 127, 127, 127, 127, 127, 127, 127 };
+static char eight128s []= { 128, 128, 128, 128, 128, 128, 128, 128 };
+static unsigned char eight223s[] = { 223, 223, 223, 223, 223, 223, 223, 223 };
+static unsigned char eight231s[] = { 231, 231, 231, 231, 231, 231, 231, 231 };
+#pragma pack()
+#else
+
+__declspec(align(16)) static unsigned short FourOnes[] = { 1, 1, 1, 1 };
+__declspec(align(16)) static unsigned short Four128s[] = { 128, 128, 128, 128 };
+__declspec(align(16)) static unsigned short Four64s[] = { 64, 64, 64, 64};
+
+__declspec(align(16)) static char eight64s [] = { 64, 64, 64, 64, 64, 64, 64, 64 };
+__declspec(align(16)) static char eight32s [] = { 32, 32, 32, 32, 32, 32, 32, 32 };
+__declspec(align(16)) static char eight127s []= { 127, 127, 127, 127, 127, 127, 127, 127 };
+__declspec(align(16)) static char eight128s []= { 128, 128, 128, 128, 128, 128, 128, 128 };
+__declspec(align(16)) static unsigned char eight223s[] = { 223, 223, 223, 223, 223, 223, 223, 223 };
+__declspec(align(16)) static unsigned char eight231s[] = { 231, 231, 231, 231, 231, 231, 231, 231 };
+
+#endif
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern UINT32 SharpenModifier[];
+
+/****************************************************************************
+ *
+ * ROUTINE : DeRingBlockStrong_MMX
+ *
+ * INPUTS : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * const UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DstPtr : Pointer to output image.
+ * const INT32 Pitch : Image stride.
+ * UINT32 FragQIndex : Q-index block encoded with.
+ * UINT32 *QuantScale : Array of quantization scale factors.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filtering a block for de-ringing purpose.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockStrong_MMX
+(
+ const POSTPROC_INSTANCE *pbi,
+ const UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ const INT32 Pitch,
+ UINT32 FragQIndex,
+ UINT32 *QuantScale
+)
+{
+
+#if defined(_WIN32_WCE)
+ #pragma pack(16)
+ short UDMod[72];
+ short LRMod[128];
+ #pragma pack()
+#else
+ __declspec(align(16)) short UDMod[72];
+ __declspec(align(16)) short LRMod[128];
+#endif
+ unsigned int PlaneLineStep = Pitch;
+ const unsigned char *Src = SrcPtr;
+ unsigned char *Des = DstPtr;
+
+ short *UDPointer = UDMod;
+ short *LRPointer = LRMod;
+
+ UINT32 QStep = QuantScale[FragQIndex];
+ INT32 Sharpen = SharpenModifier[FragQIndex];
+ (void) pbi;
+
+ __asm
+ {
+ push esi
+ push edi
+
+ mov esi, Src /* Source Pointer */
+ mov edi, UDPointer /* UD modifier pointer */
+
+ push ecx
+ push edx
+
+ mov ecx, PlaneLineStep /* Pitch Step */
+ xor edx, edx
+
+ push eax
+ push ebx
+
+ mov eax, QStep /* QValue */
+ mov ebx, Sharpen /* Sharpen */
+
+ movd mm0, eax /* QValue */
+ movd mm2, ebx /* sharpen */
+
+ punpcklbw mm0, mm0 /* 00 00 00 QQ */
+ sub edx, ecx /* Negative Pitch */
+
+ punpcklbw mm2, mm2 /* 00 00 00 SS */
+ pxor mm7, mm7 /* clear mm7 for unpacks */
+
+ punpcklbw mm0, mm0 /* 00 00 qq qq */
+ mov eax, LRPointer /* Left and Right Modifier */
+
+ punpcklbw mm2, mm2 /* 00 00 ss ss */
+ lea ebx, [esi+ecx*8] /* Source Pointer of last row */
+
+ punpcklbw mm0, mm0 /* qq qq qq qq */
+ movq mm1, mm0; /* make a copy */
+
+ punpcklbw mm2, mm2 /* ss ss ss ss */
+ paddb mm1, mm0 /* QValue * 2 */
+
+ paddb mm1, mm0 /* High = 3 * Qvalue */
+ paddusb mm1, eight223s /* clamping high to 32 */
+
+ paddb mm0, eight32s /* 32+QValues */
+ psubusb mm1, eight223s /* Get the real value back */
+
+ movq mm3, eight127s /* 7f 7f 7f 7f 7f 7f 7f 7f */
+ pandn mm1, mm3 /* ClampHigh */
+
+ /* mm0,mm1,mm2,mm7 are in use */
+ /* mm0---> QValue+32 */
+ /* mm1---> ClampHigh */
+ /* mm2---> Sharpen */
+ /* mm7---> Cleared for unpack */
+
+FillModLoop1:
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
+
+ movq mm5, mm3 /* make a copy of p */
+ psubusb mm3, mm4 /* p-pu */
+
+ psubusb mm4, mm5 /* pu-p */
+ por mm3, mm4 /* abs(p-pu) */
+
+ movq mm6, mm0 /* 32+QValues */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128s /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64s /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ pxor mm5, mm5 /* clear mm5 */
+
+ pxor mm4, mm4 /* clear mm4 */
+ punpcklbw mm5, mm6 /* 03 xx 02 xx 01 xx 00 xx */
+
+ psraw mm5, 8 /* sign extended */
+ movq QWORD PTR [edi], mm5 /* writeout UDmod, low four */
+
+ punpckhbw mm4, mm6
+ psraw mm4, 8
+
+ movq QWORD PTR [edi+8], mm4 /* writeout UDmod, high four */
+
+
+ /* left Mod */
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ movq mm4, QWORD PTR [esi-1] /* Pixels on top pu */
+
+ movq mm5, mm3 /* make a copy of p */
+ psubusb mm3, mm4 /* p-pu */
+
+ psubusb mm4, mm5 /* pu-p */
+ por mm3, mm4 /* abs(p-pu) */
+
+ movq mm6, mm0 /* 32+QValues */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128s /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64s /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ pxor mm5, mm5 /* clear mm5 */
+
+ pxor mm4, mm4 /* clear mm4 */
+ punpcklbw mm5, mm6 /* 03 xx 02 xx 01 xx 00 xx */
+
+ psraw mm5, 8 /* sign extended */
+ movq QWORD PTR [eax], mm5 /* writeout UDmod, low four */
+
+ punpckhbw mm4, mm6
+ psraw mm4, 8
+
+ movq QWORD PTR [eax+8], mm4 /* writeout UDmod, high four */
+
+
+
+ /* Right Mod */
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ movq mm4, QWORD PTR [esi+1] /* Pixels on top pu */
+
+ movq mm5, mm3 /* make a copy of p */
+ psubusb mm3, mm4 /* p-pu */
+
+ psubusb mm4, mm5 /* pu-p */
+ por mm3, mm4 /* abs(p-pu) */
+
+ movq mm6, mm0 /* 32+QValues */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128s /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64s /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ pxor mm5, mm5 /* clear mm5 */
+
+ pxor mm4, mm4 /* clear mm4 */
+ punpcklbw mm5, mm6 /* 03 xx 02 xx 01 xx 00 xx */
+
+ psraw mm5, 8 /* sign extended */
+ movq QWORD PTR [eax+128], mm5 /* writeout UDmod, low four */
+
+ punpckhbw mm4, mm6
+ psraw mm4, 8
+
+ movq QWORD PTR [eax+136], mm4 /* writeout UDmod, high four */
+ add esi, ecx
+
+
+ add edi, 16
+ add eax, 16
+
+ cmp esi, ebx
+ jne FillModLoop1
+
+ /* last UDMod */
+
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
+
+ movq mm5, mm3 /* make a copy of p */
+ psubusb mm3, mm4 /* p-pu */
+
+ psubusb mm4, mm5 /* pu-p */
+ por mm3, mm4 /* abs(p-pu) */
+
+ movq mm6, mm0 /* 32+QValues */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128s /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64s /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ pxor mm5, mm5 /* clear mm5 */
+
+ pxor mm4, mm4 /* clear mm4 */
+ punpcklbw mm5, mm6 /* 03 xx 02 xx 01 xx 00 xx */
+
+ psraw mm5, 8 /* sign extended */
+ movq QWORD PTR [edi], mm5 /* writeout UDmod, low four */
+
+ punpckhbw mm4, mm6
+ psraw mm4, 8
+
+ movq QWORD PTR [edi+8], mm4 /* writeout UDmod, high four */
+
+ mov esi, Src
+ mov edi, Des
+
+ mov eax, UDPointer
+ mov ebx, LRPointer
+
+ /* First Row */
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+ /* Second Row */
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+ /* Third Row */
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+
+
+ /* Fourth Row */
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+ /* Fifth Row */
+
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+ /* Sixth Row */
+
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+ /* Seventh Row */
+
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+ /* Eighth Row */
+
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ pop ebx
+ pop eax
+
+ pop edx
+ pop ecx
+
+ pop edi
+ pop esi
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeRingBlockWeak_MMX
+ *
+ * INPUTS : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * const UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DstPtr : Pointer to output image.
+ * const INT32 Pitch : Image stride.
+ * UINT32 FragQIndex : Q-index block encoded with.
+ * UINT32 *QuantScale : Array of quantization scale factors.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filters a block for de-ringing purpose.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockWeak_MMX
+(
+ const POSTPROC_INSTANCE *pbi,
+ const UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ const INT32 Pitch,
+ UINT32 FragQIndex,
+ UINT32 *QuantScale
+)
+{
+#if defined(_WIN32_WCE)
+ #pragma pack(16)
+ short UDMod[72];
+ short LRMod[128];
+ #pragma pack()
+#else
+ __declspec(align(16)) short UDMod[72];
+ __declspec(align(16)) short LRMod[128];
+#endif
+
+ unsigned int PlaneLineStep = Pitch;
+ const unsigned char *Src = SrcPtr;
+ unsigned char *Des = DstPtr;
+
+ short *UDPointer = UDMod;
+ short *LRPointer = LRMod;
+
+ UINT32 QStep = QuantScale[FragQIndex];
+ INT32 Sharpen = SharpenModifier[FragQIndex];
+ (void) pbi;
+
+ __asm
+ {
+ push esi
+ push edi
+
+ mov esi, Src /* Source Pointer */
+ mov edi, UDPointer /* UD modifier pointer */
+
+ push ecx
+ push edx
+
+ mov ecx, PlaneLineStep /* Pitch Step */
+ xor edx, edx
+
+ push eax
+ push ebx
+
+ mov eax, QStep /* QValue */
+ mov ebx, Sharpen /* Sharpen */
+
+ movd mm0, eax /* QValue */
+ movd mm2, ebx /* sharpen */
+
+ punpcklbw mm0, mm0 /* 00 00 00 QQ */
+ sub edx, ecx /* Negative Pitch */
+
+ punpcklbw mm2, mm2 /* 00 00 00 SS */
+ pxor mm7, mm7 /* clear mm7 for unpacks */
+
+ punpcklbw mm0, mm0 /* 00 00 qq qq */
+ mov eax, LRPointer /* Left and Right Modifier */
+
+ punpcklbw mm2, mm2 /* 00 00 ss ss */
+ lea ebx, [esi+ecx*8] /* Source Pointer of last row */
+
+ punpcklbw mm0, mm0 /* qq qq qq qq */
+ movq mm1, mm0; /* make a copy */
+
+ punpcklbw mm2, mm2 /* ss ss ss ss */
+ paddb mm1, mm0 /* QValue * 2 */
+
+ paddb mm1, mm0 /* High = 3 * Qvalue */
+ paddusb mm1, eight231s /* clamping high to 24 */
+
+ paddb mm0, eight32s /* 32+QValues */
+ psubusb mm1, eight231s /* Get the real value back */
+
+ movq mm3, eight127s /* 7f 7f 7f 7f 7f 7f 7f 7f */
+ pandn mm1, mm3 /* ClampHigh */
+
+ /* mm0,mm1,mm2,mm7 are in use */
+ /* mm0---> QValue+32 */
+ /* mm1---> ClampHigh */
+ /* mm2---> Sharpen */
+ /* mm7---> Cleared for unpack */
+
+FillModLoop1:
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
+
+ movq mm5, mm3 /* make a copy of p */
+ psubusb mm3, mm4 /* p-pu */
+
+ psubusb mm4, mm5 /* pu-p */
+ por mm3, mm4 /* abs(p-pu) */
+
+ movq mm6, mm0 /* 32+QValues */
+ paddusb mm3, mm3 /* 2*abs(p-pu) */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128s /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64s /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ pxor mm5, mm5 /* clear mm5 */
+
+ pxor mm4, mm4 /* clear mm4 */
+ punpcklbw mm5, mm6 /* 03 xx 02 xx 01 xx 00 xx */
+
+ psraw mm5, 8 /* sign extended */
+ movq QWORD PTR [edi], mm5 /* writeout UDmod, low four */
+
+ punpckhbw mm4, mm6
+ psraw mm4, 8
+
+ movq QWORD PTR [edi+8], mm4 /* writeout UDmod, high four */
+
+
+ /* left Mod */
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ movq mm4, QWORD PTR [esi-1] /* Pixels on top pu */
+
+ movq mm5, mm3 /* make a copy of p */
+ psubusb mm3, mm4 /* p-pu */
+
+ psubusb mm4, mm5 /* pu-p */
+ por mm3, mm4 /* abs(p-pu) */
+
+ movq mm6, mm0 /* 32+QValues */
+ paddusb mm3, mm3 /* 2*abs(p-pu) */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128s /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64s /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ pxor mm5, mm5 /* clear mm5 */
+
+ pxor mm4, mm4 /* clear mm4 */
+ punpcklbw mm5, mm6 /* 03 xx 02 xx 01 xx 00 xx */
+
+ psraw mm5, 8 /* sign extended */
+ movq QWORD PTR [eax], mm5 /* writeout UDmod, low four */
+
+ punpckhbw mm4, mm6
+ psraw mm4, 8
+
+ movq QWORD PTR [eax+8], mm4 /* writeout UDmod, high four */
+
+
+
+ /* Right Mod */
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ movq mm4, QWORD PTR [esi+1] /* Pixels on top pu */
+
+ movq mm5, mm3 /* make a copy of p */
+ psubusb mm3, mm4 /* p-pu */
+
+ psubusb mm4, mm5 /* pu-p */
+ por mm3, mm4 /* abs(p-pu) */
+
+ movq mm6, mm0 /* 32+QValues */
+ paddusb mm3, mm3 /* 2*abs(p-pu) */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128s /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64s /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ pxor mm5, mm5 /* clear mm5 */
+
+ pxor mm4, mm4 /* clear mm4 */
+ punpcklbw mm5, mm6 /* 03 xx 02 xx 01 xx 00 xx */
+
+ psraw mm5, 8 /* sign extended */
+ movq QWORD PTR [eax+128], mm5 /* writeout UDmod, low four */
+
+ punpckhbw mm4, mm6
+ psraw mm4, 8
+
+ movq QWORD PTR [eax+136], mm4 /* writeout UDmod, high four */
+ add esi, ecx
+
+
+ add edi, 16
+ add eax, 16
+
+ cmp esi, ebx
+ jne FillModLoop1
+
+ /* last UDMod */
+
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
+
+ movq mm5, mm3 /* make a copy of p */
+ psubusb mm3, mm4 /* p-pu */
+
+ psubusb mm4, mm5 /* pu-p */
+ por mm3, mm4 /* abs(p-pu) */
+
+ movq mm6, mm0 /* 32+QValues */
+ paddusb mm3, mm3 /* 2*abs(p-pu) */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128s /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64s /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ pxor mm5, mm5 /* clear mm5 */
+
+ pxor mm4, mm4 /* clear mm4 */
+ punpcklbw mm5, mm6 /* 03 xx 02 xx 01 xx 00 xx */
+
+ psraw mm5, 8 /* sign extended */
+ movq QWORD PTR [edi], mm5 /* writeout UDmod, low four */
+
+ punpckhbw mm4, mm6
+ psraw mm4, 8
+
+ movq QWORD PTR [edi+8], mm4 /* writeout UDmod, high four */
+
+ mov esi, Src
+ mov edi, Des
+
+ mov eax, UDPointer
+ mov ebx, LRPointer
+
+ /* First Row */
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+ /* Second Row */
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+ /* Third Row */
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+
+
+ /* Fourth Row */
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+ /* Fifth Row */
+
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+ /* Sixth Row */
+
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+
+ /* Seventh Row */
+
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ add esi, ecx /* Src += Pitch */
+ add edi, ecx /* Des += Pitch */
+
+ add eax, 16 /* UDPointer += 8 */
+ add ebx, 16 /* LPointer +=8 */
+
+ /* Eighth Row */
+
+ movq mm0, [esi+edx] /* mm0 = Pixels above */
+ pxor mm7, mm7 /* clear mm7 */
+
+ movq mm1, mm0 /* make a copy of mm0 */
+ punpcklbw mm0, mm7 /* lower four pixels */
+
+ movq mm4, [eax] /* au */
+ punpckhbw mm1, mm7 /* high four pixels */
+
+ movq mm5, [eax+8] /* au */
+
+ pmullw mm0, mm4 /* pu*au */
+ movq mm2, [esi+ecx] /* mm2 = pixels below */
+
+ pmullw mm1, mm5 /* pu*au */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* lower four */
+ movq mm6, [eax+16] /* ad */
+
+ punpckhbw mm3, mm7 /* higher four */
+ paddw mm4, mm6 /* au+ad */
+
+ pmullw mm2, mm6 /* au*pu+ad*pd */
+ movq mm6, [eax+24] /* ad */
+
+ paddw mm0, mm2
+ paddw mm5, mm6 /* au+ad */
+
+ pmullw mm3, mm6 /* ad*pd */
+ movq mm2, [esi-1] /* pixel to the left */
+
+ paddw mm1, mm3 /* au*pu+ad*pd */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx] /* al */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al */
+
+ pmullw mm2, mm6 /* pl * al */
+ movq mm6, [ebx+8] /* al */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl */
+ paddw mm5, mm6 /* au+ad+al */
+
+ pmullw mm3, mm6 /* al*pl */
+ movq mm2, [esi+1] /* pixel to the right */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl */
+ movq mm3, mm2 /* make a copy of mm2 */
+
+ punpcklbw mm2, mm7 /* four left pixels */
+ movq mm6, [ebx+128] /* ar */
+
+ punpckhbw mm3, mm7 /* four right pixels */
+ paddw mm4, mm6 /* au + ad + al + ar */
+
+ pmullw mm2, mm6 /* pr * ar */
+ movq mm6, [ebx+136] /* ar */
+
+ paddw mm0, mm2 /* au*pu+ad*pd+al*pl+pr*ar */
+ paddw mm5, mm6 /* au+ad+al+ar */
+
+ pmullw mm3, mm6 /* ar*pr */
+ movq mm2, [esi] /* p */
+
+ paddw mm1, mm3 /* au*pu+ad*pd+al*pl+ar*pr */
+ movq mm3, mm2 /* make a copy of the pixel */
+
+ /* mm0, mm1 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* mm4, mm5 --- au + ad + al + ar */
+
+ punpcklbw mm2, mm7 /* left four pixels */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ punpckhbw mm3, mm7 /* right four pixels */
+ psubw mm6, mm4 /* 128-(au+ad+al+ar) */
+
+ pmullw mm2, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four128s /* 0080 0080 0080 0080 */
+
+ paddw mm0, mm2 /* sum */
+ psubw mm6, mm5 /* 128-(au+ad+al+ar) */
+
+ pmullw mm3, mm6 /* p*(128-(au+ad+al+ar)) */
+ movq mm6, Four64s /* {64, 64, 64, 64 } */
+
+ movq mm7, mm6 /* {64, 64, 64, 64} */
+ paddw mm0, mm6 /* sum+B */
+
+ paddw mm1, mm3 /* sum */
+ psllw mm7, 8 /* {16384, .. } */
+
+ paddw mm0, mm7 /* clamping */
+ paddw mm1, mm6 /* sum+B */
+
+ paddw mm1, mm7 /* clamping */
+ psubusw mm0, mm7 /* clamping */
+
+ psubusw mm1, mm7 /* clamping */
+ psrlw mm0, 7 /* (sum+B)>>7 */
+
+ psrlw mm1, 7 /* (sum+B)>>7 */
+ packuswb mm0, mm1 /* pack to 8 bytes */
+
+ movq [edi], mm0 /* write to destination */
+
+ pop ebx
+ pop eax
+
+ pop edx
+ pop ecx
+
+ pop edi
+ pop esi
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/deringwmtopt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/deringwmtopt.c
new file mode 100644
index 00000000..574ac1ba
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/deringwmtopt.c
@@ -0,0 +1,748 @@
+/****************************************************************************
+ *
+ * Module Title : DeRingingWmtOpt.c
+ *
+ * Description : Optimized functions for PostProcessor
+ *
+ ***************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+* MAcros
+****************************************************************************/
+#pragma warning(disable:4305)
+#pragma warning(disable:4731)
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+#if defined(_WIN32_WCE)
+#else
+__declspec(align(16)) static unsigned short eight128s []= { 128, 128, 128, 128, 128, 128, 128, 128};
+__declspec(align(16)) static unsigned short eight64s[] = { 64, 64, 64, 64, 64, 64, 64, 64};
+__declspec(align(16)) static char eight64c [] = { 64, 64, 64,64,64,64,64,64};
+__declspec(align(16)) static char eight32c [] = { 32,32,32,32,32,32,32,32};
+__declspec(align(16)) static char eight127c []= { 127, 127, 127, 127, 127, 127, 127, 127};
+__declspec(align(16)) static char eight128c []= { 128, 128, 128, 128, 128, 128, 128, 128};
+__declspec(align(16)) static unsigned char eight223c[] = { 223,223,223,223,223,223,223,223};
+__declspec(align(16)) static unsigned char eight231c[] = { 231,231,231,231,231,231,231,231};
+#endif
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern UINT32 SharpenModifier[];
+
+/****************************************************************************
+ *
+ * ROUTINE : DeRingBlockStrong_WMT
+ *
+ * INPUTS : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * const UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DstPtr : Pointer to output image.
+ * const INT32 Pitch : Image stride.
+ * UINT32 FragQIndex : Q-index block encoded with.
+ * UINT32 *QuantScale : Array of quantization scale factors.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filtering a block for de-ringing purpose.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockStrong_WMT
+(
+ const POSTPROC_INSTANCE *pbi,
+ const UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ const INT32 Pitch,
+ UINT32 FragQIndex,
+ UINT32 *QuantScale
+)
+{
+#if defined(_WIN32_WCE)
+ return;
+#else
+
+ __declspec(align(16)) short UDMod[72];
+ __declspec(align(16)) short LRMod[128];
+
+ unsigned int PlaneLineStep = Pitch;
+ const unsigned char *Src = SrcPtr;
+ unsigned char *Des = DstPtr;
+
+ short *UDPointer = UDMod;
+ short *LRPointer = LRMod;
+
+ UINT32 QStep = QuantScale[FragQIndex];
+ INT32 Sharpen = SharpenModifier[FragQIndex];
+ (void) pbi;
+
+ __asm
+ {
+ push esi
+ push edi
+
+ mov esi, Src /* Source Pointer */
+ mov edi, UDPointer /* UD modifier pointer */
+
+ push ecx
+ push edx
+
+ mov ecx, PlaneLineStep /* Pitch Step */
+ xor edx, edx
+
+ push eax
+ push ebx
+
+ mov eax, QStep /* QValue */
+ mov ebx, Sharpen /* Sharpen */
+
+ movd mm0, eax /* QValue */
+ movd mm2, ebx /* sharpen */
+
+ push ebp
+
+ punpcklbw mm0, mm0 /* 00 00 00 QQ */
+ sub edx, ecx /* Negative Pitch */
+
+ punpcklbw mm2, mm2 /* 00 00 00 SS */
+ pxor mm7, mm7 /* clear mm7 for unpacks */
+
+ punpcklbw mm0, mm0 /* 00 00 qq qq */
+ mov eax, LRPointer /* Left and Right Modifier */
+
+ punpcklbw mm2, mm2 /* 00 00 ss ss */
+ lea ebx, [esi+ecx*8] /* Source Pointer of last row */
+
+ punpcklbw mm0, mm0 /* qq qq qq qq */
+ movq mm1, mm0; /* make a copy */
+
+ punpcklbw mm2, mm2 /* ss ss ss ss */
+ paddb mm1, mm0 /* QValue * 2 */
+
+ paddb mm1, mm0 /* High = 3 * Qvalue */
+ paddusb mm1, eight223c /* clamping high to 32 */
+
+ paddb mm0, eight32c /* 32+QValues */
+ psubusb mm1, eight223c /* Get the real value back */
+
+ movq mm3, eight127c /* 7f 7f 7f 7f 7f 7f 7f 7f */
+ pandn mm1, mm3 /* ClampHigh */
+
+ /* mm0,mm1,mm2,mm7 are in use */
+ /* mm0---> QValue+32 */
+ /* mm1---> ClampHigh */
+ /* mm2---> Sharpen */
+ /* mm7---> Cleared for unpack */
+
+FillModLoop1:
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ pxor xmm7, xmm7 /* clear xmm7 */
+
+ movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
+ movq mm5, mm3 /* make a copy of p */
+
+ psubusb mm3, mm4 /* p-pu */
+ psubusb mm4, mm5 /* pu-p */
+
+ por mm3, mm4 /* abs(p-pu) */
+ movq mm6, mm0 /* 32+QValues */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64c /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+
+ movq2dq xmm0, mm6
+ movq mm4, QWORD PTR [esi-1] /* Pixels on top pu */
+
+ punpcklbw xmm7, xmm0 /* extended to words */
+ movq mm5, mm3 /* make a copy of p */
+
+ psraw xmm7, 8 /* sign extended */
+ psubusb mm3, mm4 /* p-pu */
+
+ movdqa [edi], xmm7 /* writeout UDmod*/
+ psubusb mm4, mm5 /* pu-p */
+
+ por mm3, mm4 /* abs(p-pu) */
+ movq mm6, mm0 /* 32+QValues */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64c /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+
+ pxor xmm7, xmm7 /* clear xmm7 */
+ movq mm4, QWORD PTR [esi+1] /* Pixels on top pu */
+
+ movq2dq xmm0, mm6
+ movq mm5, mm3 /* make a copy of p */
+
+ punpcklbw xmm7, xmm0 /* extened to shorts */
+ psubusb mm3, mm4 /* p-pu */
+
+ psraw xmm7, 8 /* sign extended */
+ psubusb mm4, mm5 /* pu-p */
+
+ movdqa [eax], xmm7 /* writeout UDmod*/
+ por mm3, mm4 /* abs(p-pu) */
+
+ movq mm6, mm0 /* 32+QValues */
+ pxor xmm7, xmm7 /* clear xmm7 */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64c /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ add esi, ecx
+
+ movq2dq xmm0, mm6
+ add edi, 16
+
+ punpcklbw xmm7, mm0 /* extended to shorts */
+ add eax, 16
+
+ psraw xmm7, 8 /* sign extended */
+ cmp esi, ebx
+
+ movdqa [eax+112], xmm7 /* writeout UDmod*/
+ jne FillModLoop1
+
+ /* last UDMod */
+
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ pxor xmm7, xmm7 /* clear xmm7 */
+
+
+ movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
+ movq mm5, mm3 /* make a copy of p */
+
+ psubusb mm3, mm4 /* p-pu */
+ psubusb mm4, mm5 /* pu-p */
+
+ por mm3, mm4 /* abs(p-pu) */
+ movq mm6, mm0 /* 32+QValues */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64c /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ movq2dq xmm6, mm6
+
+ punpcklbw xmm7, xmm6 /* 03 xx 02 xx 01 xx 00 xx */
+
+ psraw xmm7, 8 /* sign extended */
+ movdqa [edi], xmm7 /* writeout UDmod */
+
+ mov esi, Src
+ mov edi, Des
+
+ mov eax, UDPointer
+ mov ebx, LRPointer
+
+ mov ebp, 8
+
+FilterLoop1:
+
+ movq xmm0, QWORD PTR [esi+edx] /* mm0 = Pixels above */
+ pxor xmm7, xmm7 /* clear mm7 */
+
+ movdqa xmm4, [eax] /* au */
+ punpcklbw xmm0, xmm7 /* extended to shorts */
+
+ movq xmm2, QWORD PTR [esi+ecx] /* mm2 = pixels below */
+ pmullw xmm0, xmm4 /* pu*au */
+
+ movdqa xmm6, [eax+16] /* ad */
+ punpcklbw xmm2, xmm7 /* extened to shorts*/
+
+ movq xmm1, QWORD PTR [esi-1] /* pixel to the left */
+ pmullw xmm2, xmm6 /* ad*pd */
+
+ movdqa xmm3, [ebx] /* al */
+ punpcklbw xmm1, xmm7 /* extended to shorts */
+
+ movq xmm5, QWORD PTR [esi+1] /* pixel to the right */
+ pmullw xmm1, xmm3 /* al * pl */
+
+ paddw xmm4, xmm6 /* au+ad */
+ punpcklbw xmm5, xmm7 /* extends to shorts */
+
+ movdqa xmm6, [ebx+128] /* ar */
+ pmullw xmm5, xmm6 /* ar * pr */
+
+ paddw xmm0, xmm2 /* au*pu + ad*pd */
+ paddw xmm4, xmm3 /* au+ad+al */
+
+ paddw xmm0, xmm1 /* au*pu+ad*pd+al*pl */
+ paddw xmm4, xmm6 /* au+ad+al+ar */
+
+ movq xmm2, QWORD PTR [esi] /* p */
+ paddw xmm0, xmm5 /* au*pu+ad*pd+al*pl+ar*pr */
+
+
+ /* xmm0 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* xmm4 --- au + ad + al + ar */
+
+ movdqa xmm1, eight128s /* 0080 0080 0080 0080 0080 0080 0080 0080 */
+ punpcklbw xmm2, xmm7 /* extended to shorts */
+
+ psubw xmm1, xmm4 /* 128-(au+ad+al+ar) */
+ pmullw xmm2, xmm1 /* p*(128-(au+ad+al+ar)) */
+
+ add esi, ecx /* Src += Pitch */
+ movdqa xmm6, eight64s /* 64, 64, 64, 64, 64, 64, 64, 64 */
+
+ movdqa xmm7, xmm6 /* 64, 64, 64, 64, 64, 64, 64, 64 */
+ add eax, 16 /* UDPointer += 8 */
+
+ psllw xmm7, 8 /* {16384, .. } */
+ paddw xmm0, xmm2 /* sum */
+
+ add edi, ecx /* Des += Pitch */
+ paddw xmm0, xmm6 /* sum+B */
+
+ add ebx, 16 /* LPointer +=8 */
+ paddw xmm0, xmm7 /* clamping */
+
+ psubusw xmm0, xmm7 /* clamping */
+ dec ebp
+
+ psrlw xmm0, 7 /* (sum+B)>>7 */
+ packuswb xmm0, xmm7 /* pack to 8 bytes */
+
+ movq QWORD PTR [edi+edx], xmm0 /* write to destination */
+ jnz FilterLoop1
+
+
+ pop ebp
+
+ pop ebx
+ pop eax
+
+ pop edx
+ pop ecx
+
+ pop edi
+ pop esi
+ }
+#endif
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : DeRingBlockWeak_WMT
+ *
+ * INPUTS : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ * const UINT8 *SrcPtr : Pointer to input image.
+ * UINT8 *DstPtr : Pointer to output image.
+ * const INT32 Pitch : Image stride.
+ * UINT32 FragQIndex : Q-index block encoded with.
+ * UINT32 *QuantScale : Array of quantization scale factors.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filtering a block for de-ringing purpose.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockWeak_WMT
+(
+ const POSTPROC_INSTANCE *pbi,
+ const UINT8 *SrcPtr,
+ UINT8 *DstPtr,
+ const INT32 Pitch,
+ UINT32 FragQIndex,
+ UINT32 *QuantScale
+)
+{
+#if defined(_WIN32_WCE)
+ return;
+#else
+
+ __declspec(align(16)) short UDMod[72];
+ __declspec(align(16)) short LRMod[128];
+
+ unsigned int PlaneLineStep = Pitch;
+ const unsigned char *Src = SrcPtr;
+ unsigned char *Des = DstPtr;
+
+ short *UDPointer = UDMod;
+ short *LRPointer = LRMod;
+
+ UINT32 QStep = QuantScale[FragQIndex];
+ INT32 Sharpen = SharpenModifier[FragQIndex];
+ (void) pbi;
+
+ __asm
+ {
+ push esi
+ push edi
+
+ mov esi, Src /* Source Pointer */
+ mov edi, UDPointer /* UD modifier pointer */
+
+ push ecx
+ push edx
+
+ mov ecx, PlaneLineStep /* Pitch Step */
+ xor edx, edx
+
+ push eax
+ push ebx
+
+ mov eax, QStep /* QValue */
+ mov ebx, Sharpen /* Sharpen */
+
+ movd mm0, eax /* QValue */
+ movd mm2, ebx /* sharpen */
+
+ push ebp
+
+ punpcklbw mm0, mm0 /* 00 00 00 QQ */
+ sub edx, ecx /* Negative Pitch */
+
+ punpcklbw mm2, mm2 /* 00 00 00 SS */
+ pxor mm7, mm7 /* clear mm7 for unpacks */
+
+ punpcklbw mm0, mm0 /* 00 00 qq qq */
+ mov eax, LRPointer /* Left and Right Modifier */
+
+ punpcklbw mm2, mm2 /* 00 00 ss ss */
+ lea ebx, [esi+ecx*8] /* Source Pointer of last row */
+
+ punpcklbw mm0, mm0 /* qq qq qq qq */
+ movq mm1, mm0; /* make a copy */
+
+ punpcklbw mm2, mm2 /* ss ss ss ss */
+ paddb mm1, mm0 /* QValue * 2 */
+
+ paddb mm1, mm0 /* High = 3 * Qvalue */
+ paddusb mm1, eight231c /* clamping high to 24 */
+
+ paddb mm0, eight32c /* 32+QValues */
+ psubusb mm1, eight231c /* Get the real value back */
+
+ movq mm3, eight127c /* 7f 7f 7f 7f 7f 7f 7f 7f */
+ pandn mm1, mm3 /* ClampHigh */
+
+ /* mm0,mm1,mm2,mm7 are in use */
+ /* mm0---> QValue+32 */
+ /* mm1---> ClampHigh */
+ /* mm2---> Sharpen */
+ /* mm7---> Cleared for unpack */
+
+FillModLoop1:
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ pxor xmm7, xmm7 /* clear xmm7 */
+
+ movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
+ movq mm5, mm3 /* make a copy of p */
+
+ psubusb mm3, mm4 /* p-pu */
+ psubusb mm4, mm5 /* pu-p */
+
+ por mm3, mm4 /* abs(p-pu) */
+ movq mm6, mm0 /* 32+QValues */
+
+ paddusb mm3, mm3 /* 2*abs(p-pu) */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64c /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+
+ movq2dq xmm0, mm6
+ movq mm4, QWORD PTR [esi-1] /* Pixels on top pu */
+
+ punpcklbw xmm7, xmm0 /* extended to words */
+ movq mm5, mm3 /* make a copy of p */
+
+ psraw xmm7, 8 /* sign extended */
+ psubusb mm3, mm4 /* p-pu */
+
+ movdqa [edi], xmm7 /* writeout UDmod*/
+ psubusb mm4, mm5 /* pu-p */
+
+ por mm3, mm4 /* abs(p-pu) */
+ movq mm6, mm0 /* 32+QValues */
+
+ paddusb mm3, mm3 /* 2*abs(p-pu) */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64c /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+
+ pxor xmm7, xmm7 /* clear xmm7 */
+ movq mm4, QWORD PTR [esi+1] /* Pixels on top pu */
+
+ movq2dq xmm0, mm6
+ movq mm5, mm3 /* make a copy of p */
+
+ punpcklbw xmm7, xmm0 /* extened to shorts */
+ psubusb mm3, mm4 /* p-pu */
+
+ psraw xmm7, 8 /* sign extended */
+ psubusb mm4, mm5 /* pu-p */
+
+ movdqa [eax], xmm7 /* writeout UDmod*/
+ por mm3, mm4 /* abs(p-pu) */
+
+ movq mm6, mm0 /* 32+QValues */
+ paddusb mm3, mm3 /* 2*abs(p-pu) */
+
+ pxor xmm7, xmm7 /* clear xmm7 */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64c /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ add esi, ecx
+
+ movq2dq xmm0, mm6
+ add edi, 16
+
+ punpcklbw xmm7, mm0 /* extended to shorts */
+ add eax, 16
+
+ psraw xmm7, 8 /* sign extended */
+ cmp esi, ebx
+
+ movdqa [eax+112], xmm7 /* writeout UDmod*/
+ jne FillModLoop1
+
+ /* last UDMod */
+
+ movq mm3, QWORD PTR [esi] /* read 8 pixels p */
+ pxor xmm7, xmm7 /* clear xmm7 */
+
+
+ movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
+ movq mm5, mm3 /* make a copy of p */
+
+ psubusb mm3, mm4 /* p-pu */
+ psubusb mm4, mm5 /* pu-p */
+
+ por mm3, mm4 /* abs(p-pu) */
+ movq mm6, mm0 /* 32+QValues */
+
+ paddusb mm3, mm3 /* 2*abs(p-pu) */
+
+ movq mm4, mm0 /* 32+QValues */
+ psubusb mm6, mm3 /* zero clampled TmpMod */
+
+ movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
+ paddb mm4, eight64c /* 32+QValues + 64 */
+
+ pxor mm4, mm5 /* convert to a sign number */
+ pxor mm3, mm5 /* convert to a sign number */
+
+ pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
+ pand mm3, mm2 /* use sharpen */
+
+ paddsb mm6, mm1 /* clamping to high */
+ psubsb mm6, mm1 /* offset back */
+
+ por mm6, mm3 /* Mod value to be stored */
+ movq2dq xmm6, mm6
+
+ punpcklbw xmm7, xmm6 /* 03 xx 02 xx 01 xx 00 xx */
+
+ psraw xmm7, 8 /* sign extended */
+ movdqa [edi], xmm7 /* writeout UDmod */
+
+ mov esi, Src
+ mov edi, Des
+
+ mov eax, UDPointer
+ mov ebx, LRPointer
+
+ mov ebp, 8
+
+FilterLoop1:
+
+ movq xmm0, QWORD PTR [esi+edx] /* mm0 = Pixels above */
+ pxor xmm7, xmm7 /* clear mm7 */
+
+ movdqa xmm4, [eax] /* au */
+ punpcklbw xmm0, xmm7 /* extended to shorts */
+
+ movq xmm2, QWORD PTR [esi+ecx] /* mm2 = pixels below */
+ pmullw xmm0, xmm4 /* pu*au */
+
+ movdqa xmm6, [eax+16] /* ad */
+ punpcklbw xmm2, xmm7 /* extened to shorts*/
+
+ movq xmm1, QWORD PTR [esi-1] /* pixel to the left */
+ pmullw xmm2, xmm6 /* ad*pd */
+
+ movdqa xmm3, [ebx] /* al */
+ punpcklbw xmm1, xmm7 /* extended to shorts */
+
+ movq xmm5, QWORD PTR [esi+1] /* pixel to the right */
+ pmullw xmm1, xmm3 /* al * pl */
+
+ paddw xmm4, xmm6 /* au+ad */
+ punpcklbw xmm5, xmm7 /* extends to shorts */
+
+ movdqa xmm6, [ebx+128] /* ar */
+ pmullw xmm5, xmm6 /* ar * pr */
+
+ paddw xmm0, xmm2 /* au*pu + ad*pd */
+ paddw xmm4, xmm3 /* au+ad+al */
+
+ paddw xmm0, xmm1 /* au*pu+ad*pd+al*pl */
+ paddw xmm4, xmm6 /* au+ad+al+ar */
+
+ movq xmm2, QWORD PTR [esi] /* p */
+ paddw xmm0, xmm5 /* au*pu+ad*pd+al*pl+ar*pr */
+
+
+ /* xmm0 --- au*pu+ad*pd+al*pl+ar*pr */
+ /* xmm4 --- au + ad + al + ar */
+
+ movdqa xmm1, eight128s /* 0080 0080 0080 0080 0080 0080 0080 0080 */
+ punpcklbw xmm2, xmm7 /* extended to shorts */
+
+ psubw xmm1, xmm4 /* 128-(au+ad+al+ar) */
+ pmullw xmm2, xmm1 /* p*(128-(au+ad+al+ar)) */
+
+ add esi, ecx /* Src += Pitch */
+ movdqa xmm6, eight64s /* 64, 64, 64, 64, 64, 64, 64, 64 */
+
+ movdqa xmm7, xmm6 /* 64, 64, 64, 64, 64, 64, 64, 64 */
+ add eax, 16 /* UDPointer += 8 */
+
+ psllw xmm7, 8 /* {16384, .. } */
+ paddw xmm0, xmm2 /* sum */
+
+ add edi, ecx /* Des += Pitch */
+ paddw xmm0, xmm6 /* sum+B */
+
+ add ebx, 16 /* LPointer +=8 */
+ paddw xmm0, xmm7 /* clamping */
+
+ psubusw xmm0, xmm7 /* clamping */
+ dec ebp
+
+ psrlw xmm0, 7 /* (sum+B)>>7 */
+ packuswb xmm0, xmm7 /* pack to 8 bytes */
+
+ movq QWORD PTR [edi+edx], xmm0 /* write to destination */
+ jnz FilterLoop1
+
+
+ pop ebp
+
+ pop ebx
+ pop eax
+
+ pop edx
+ pop ecx
+
+ pop edi
+ pop esi
+ }
+#endif
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/doptsystemdependant.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/doptsystemdependant.c
new file mode 100644
index 00000000..633174a8
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/doptsystemdependant.c
@@ -0,0 +1,211 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+* Imports
+*****************************************************************************/
+extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
+
+// c imports
+extern void FilteringVert_12_C(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+extern void FilteringHoriz_12_C(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void FilteringVert_8_C(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void FilteringHoriz_8_C(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void HorizontalLine_1_2_Scale_C( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void HorizontalLine_3_5_Scale_C( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void HorizontalLine_4_5_Scale_C( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void VerticalBand_4_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_4_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_3_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_3_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_1_2_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_1_2_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void FilterHoriz_Simple_C( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_Simple_C( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterHoriz_Generic( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_Generic( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern INT32 *SetupBoundingValueArray_Generic( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern INT32 *SetupDeblockValueArray_Generic( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern void DeringBlockWeak_C( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void DeringBlockStrong_C( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale);
+extern void DeblockLoopFilteredBand_C( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBand_C( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBandNewFilter_C( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void PlaneAddNoise_C( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+// mmx imports
+extern void FilteringVert_12_MMX(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+extern void FilteringHoriz_12_MMX(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void FilteringVert_8_MMX(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void FilteringHoriz_8_MMX(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void HorizontalLine_1_2_Scale_MMX( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void HorizontalLine_3_5_Scale_MMX( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void HorizontalLine_4_5_Scale_MMX( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void VerticalBand_4_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_4_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_3_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_3_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_1_2_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_1_2_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void FilterHoriz_Simple_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_Simple_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterHoriz_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern INT32 *SetupBoundingValueArray_ForMMX( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern INT32 *SetupDeblockValueArray_ForMMX( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern void DeringBlockWeak_MMX( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void DeringBlockStrong_MMX( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale);
+extern void DeblockLoopFilteredBand_MMX( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBand_MMX( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBandNewFilter_MMX( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void FillLoopFilterLimitValues_MMX(void);
+extern INT16 *LoopFilterLimitValuesV2_MMX;
+extern void PlaneAddNoise_mmx( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+
+// wmt imports
+extern void DeblockLoopFilteredBand_WMT( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBand_WMT( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeringBlockWeak_WMT( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void DeringBlockStrong_WMT( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void CFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
+extern void MmxFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
+extern void WmtFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
+extern void ClampLevels_C( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
+extern void ClampLevels_wmt( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
+extern void ClampLevels_mmx( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
+extern void PlaneAddNoise_wmt( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+/****************************************************************************
+ *
+ * ROUTINE : PostProcMachineSpecificConfig
+ *
+ * INPUTS : UINT32 Version : Codec version number.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support
+ * sets appropriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PostProcMachineSpecificConfig ( UINT32 Version )
+{
+ // If MMX supported then set to use MMX versions of functions else
+ // use original 'C' versions.
+ INT32 MmxEnabled;
+ INT32 XmmEnabled;
+ INT32 WmtEnabled;
+
+ GetProcessorFlags( &MmxEnabled, &XmmEnabled, &WmtEnabled );
+
+ if ( WmtEnabled )
+ {
+ // Willamette
+ FillLoopFilterLimitValues_MMX();
+
+ FilterHoriz = FilterHoriz_MMX;
+ FilterVert = FilterVert_MMX;
+ SetupBoundingValueArray = SetupBoundingValueArray_ForMMX;
+ SetupDeblockValueArray = SetupDeblockValueArray_ForMMX;
+ DeringBlockWeak = DeringBlockWeak_WMT;
+ DeringBlockStrong = DeringBlockStrong_WMT;
+ DeblockLoopFilteredBand = DeblockLoopFilteredBand_WMT;
+ DeblockNonFilteredBand = DeblockNonFilteredBand_WMT;
+ DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_MMX;
+ FilterHoriz_Simple = FilterHoriz_Simple_MMX;
+ FilterVert_Simple = FilterVert_Simple_MMX;
+ HorizontalLine_1_2_Scale = HorizontalLine_1_2_Scale_MMX;
+ HorizontalLine_3_5_Scale = HorizontalLine_3_5_Scale_MMX;
+ HorizontalLine_4_5_Scale = HorizontalLine_4_5_Scale_MMX;
+ VerticalBand_1_2_Scale = VerticalBand_1_2_Scale_MMX;
+ LastVerticalBand_1_2_Scale = LastVerticalBand_1_2_Scale_MMX;
+ VerticalBand_3_5_Scale = VerticalBand_3_5_Scale_MMX;
+ LastVerticalBand_3_5_Scale = LastVerticalBand_3_5_Scale_MMX;
+ VerticalBand_4_5_Scale = VerticalBand_4_5_Scale_MMX;
+ LastVerticalBand_4_5_Scale = LastVerticalBand_4_5_Scale_MMX;
+ FilteringHoriz_8 = FilteringHoriz_8_MMX;
+ FilteringVert_8 = FilteringVert_8_MMX;
+ FilteringHoriz_12 = FilteringHoriz_12_MMX;
+ FilteringVert_12 = FilteringVert_12_MMX;
+ FastDeInterlace = WmtFastDeInterlace;
+ ClampLevels = ClampLevels_wmt;
+ PlaneAddNoise = PlaneAddNoise_wmt;
+ }
+ else if ( MmxEnabled )
+ {
+ FillLoopFilterLimitValues_MMX();
+
+ FilterHoriz = FilterHoriz_MMX;
+ FilterVert = FilterVert_MMX;
+ SetupBoundingValueArray = SetupBoundingValueArray_ForMMX;
+ SetupDeblockValueArray = SetupDeblockValueArray_ForMMX;
+ DeringBlockWeak = DeringBlockWeak_MMX;
+ DeringBlockStrong = DeringBlockStrong_MMX;
+ DeblockLoopFilteredBand = DeblockLoopFilteredBand_MMX;
+ DeblockNonFilteredBand = DeblockNonFilteredBand_MMX;
+ DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_MMX;
+ FilterHoriz_Simple = FilterHoriz_Simple_MMX;
+ FilterVert_Simple = FilterVert_Simple_MMX;
+ HorizontalLine_1_2_Scale = HorizontalLine_1_2_Scale_MMX;
+ HorizontalLine_3_5_Scale = HorizontalLine_3_5_Scale_MMX;
+ HorizontalLine_4_5_Scale = HorizontalLine_4_5_Scale_MMX;
+ VerticalBand_1_2_Scale = VerticalBand_1_2_Scale_MMX;
+ LastVerticalBand_1_2_Scale = LastVerticalBand_1_2_Scale_MMX;
+ VerticalBand_3_5_Scale = VerticalBand_3_5_Scale_MMX;
+ LastVerticalBand_3_5_Scale = LastVerticalBand_3_5_Scale_MMX;
+ VerticalBand_4_5_Scale = VerticalBand_4_5_Scale_MMX;
+ LastVerticalBand_4_5_Scale = LastVerticalBand_4_5_Scale_MMX;
+ FilteringHoriz_8 = FilteringHoriz_8_MMX;
+ FilteringVert_8 = FilteringVert_8_MMX;
+ FilteringHoriz_12 = FilteringHoriz_12_MMX;
+ FilteringVert_12 = FilteringVert_12_MMX;
+ FastDeInterlace = MmxFastDeInterlace;
+ ClampLevels = ClampLevels_mmx;
+ PlaneAddNoise = PlaneAddNoise_mmx;
+ }
+ else
+ {
+
+ FilterHoriz = FilterHoriz_Generic;
+ FilterVert = FilterVert_Generic;
+ SetupBoundingValueArray = SetupBoundingValueArray_Generic;
+ SetupDeblockValueArray = SetupDeblockValueArray_Generic;
+ DeringBlockWeak = DeringBlockWeak_C;
+ DeringBlockStrong = DeringBlockStrong_C;
+ DeblockLoopFilteredBand = DeblockLoopFilteredBand_C;
+ DeblockNonFilteredBand = DeblockNonFilteredBand_C;
+ DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_C;
+ FilterHoriz_Simple = FilterHoriz_Simple_C;
+ FilterVert_Simple = FilterVert_Simple_C;
+ HorizontalLine_1_2_Scale = HorizontalLine_1_2_Scale_C;
+ VerticalBand_1_2_Scale = VerticalBand_1_2_Scale_C;
+ LastVerticalBand_1_2_Scale = LastVerticalBand_1_2_Scale_C;
+ HorizontalLine_3_5_Scale = HorizontalLine_3_5_Scale_C;
+ VerticalBand_3_5_Scale = VerticalBand_3_5_Scale_C;
+ LastVerticalBand_3_5_Scale = LastVerticalBand_3_5_Scale_C;
+ HorizontalLine_4_5_Scale = HorizontalLine_4_5_Scale_C;
+ VerticalBand_4_5_Scale = VerticalBand_4_5_Scale_C;
+ LastVerticalBand_4_5_Scale = LastVerticalBand_4_5_Scale_C;
+ FilteringHoriz_8 = FilteringHoriz_8_C;
+ FilteringVert_8 = FilteringVert_8_C;
+ FilteringHoriz_12 = FilteringHoriz_12_C;
+ FilteringVert_12 = FilteringVert_12_C;
+ FastDeInterlace = CFastDeInterlace;
+ ClampLevels = ClampLevels_C;
+ PlaneAddNoise = PlaneAddNoise_C;
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/loopf_asm.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/loopf_asm.c
new file mode 100644
index 00000000..68f779a7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/loopf_asm.c
@@ -0,0 +1,540 @@
+/****************************************************************************
+*
+* Module Title : loopf_asm.c
+*
+* Description : Optimized version of the loop filter.
+*
+****************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Frames
+****************************************************************************/
+#include <stdio.h>
+#include <memory.h>
+#include "postp.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#pragma warning (disable:4799)
+#pragma warning (disable:4731)
+
+#define LIMIT_OFFSET 0
+#define FOURONES_OFFSET 8
+#define LFABS_OFFSET 16
+#define TRANS_OFFSET 24
+
+/****************************************************************************
+ *
+ * ROUTINE : SetupBoundingValueArray_ForMMX
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processing instance.
+ * INT32 FLimit : Filter limiting value.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : INT32*: Pointer to bounding value array.
+ *
+ * FUNCTION : Sets up bounding value array used in filtering operations.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INT32 *SetupBoundingValueArray_ForMMX ( POSTPROC_INSTANCE *pbi, INT32 FLimit )
+{
+ INT32 *BoundingValuePtr;
+
+ /* Since the FiltBoundingValue array is currently only used in the generic */
+ /* version, we are going to reuse this memory for our own purposes. */
+ /* 2 longs for limit, 2 longs for _4ONES, 2 longs for LFABS_MMX, and */
+ /* 8 longs for temp work storage */
+ BoundingValuePtr = (INT32 *)((UINT32)(&pbi->FiltBoundingValue[256]) & 0xffffffe0);
+
+ // expand for mmx code
+ BoundingValuePtr[0] = BoundingValuePtr[1] = FLimit * 0x00010001;
+ BoundingValuePtr[2] = BoundingValuePtr[3] = 0x00010001;
+ BoundingValuePtr[4] = BoundingValuePtr[5] = 0x00040004;
+
+ return BoundingValuePtr;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterHoriz_MMX
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processing instance.
+ * UINT8 *PixelPtr : Pointer to input frame.
+ * INT32 LineLength : Length of line in input frame.
+ * INT32 *BoundingValuePtr : Pointer to bouning value array.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a loop filter to the vertical edge (i.e. horizontally).
+ *
+ * SPECIAL NOTES : This version attempts to fix the DC_misalign stalls.
+ *
+ ****************************************************************************/
+void FilterHoriz_MMX
+(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *PixelPtr,
+ INT32 LineLength,
+ INT32 *BoundingValuePtr
+)
+{
+ (void) pbi;
+
+ /* A somewhat optimized MMX version of the left edge filter. */
+ __asm
+ {
+ mov eax,[BoundingValuePtr]
+ mov edx,[LineLength] //stride
+
+ mov ebx,[PixelPtr]
+ mov ecx,[LineLength] //stride
+
+ movd mm0,[ebx + -2] //xx xx xx xx 01 00 xx xx
+ ;-
+
+ movd mm4,[ebx + 2] //xx xx xx xx xx xx 03 02
+ psrld mm0,16 //xx xx xx xx 00 00 01 00
+
+ movd mm1,[ebx + ecx + -2] //xx xx xx xx 11 10 xx xx
+ punpcklwd mm0,mm4 //xx xx xx xx 03 02 01 00
+
+ movd mm4,[ebx + ecx + 2] //xx xx xx xx xx xx 13 12
+ psrld mm1,16 //xx xx xx xx 00 00 11 10
+
+ punpcklwd mm1,mm4 //xx xx xx xx 13 12 11 10
+ lea edx,[edx + edx*2] //stride * 3
+
+ movd mm2,[ebx + ecx*2 + -2] //xx xx xx xx 21 20 xx xx
+ punpcklbw mm0,mm1 //13 03 12 02 11 01 10 00
+
+ movd mm4,[ebx + ecx*2 + 2] //xx xx xx xx xx xx 23 22
+ psrld mm2,16 //xx xx xx xx 00 00 21 20
+
+ movd mm1,[ebx + edx + -2] //xx xx xx xx 31 30 xx xx
+ punpcklwd mm2,mm4 //xx xx xx xx 23 22 21 20
+
+ movd mm4,[ebx + edx + 2] //xx xx xx xx xx xx 33 32
+ psrld mm1,16 //xx xx xx xx 00 00 31 30
+
+ punpcklwd mm1,mm4 //xx xx xx xx 33 32 31 30
+ pxor mm4,mm4
+
+ punpcklbw mm2,mm1 //33 23 32 22 31 21 30 20
+ movq mm1,mm0
+
+ punpcklwd mm0,mm2 //31 21 11 01 30 20 10 00
+ lea ebx,[ebx + ecx*4] //base + (stride * 4)
+
+ punpckhwd mm1,mm2 //33 23 13 03 32 22 12 02
+ movq mm6,mm0 //xx xx xx xx 30 20 10 00
+
+ movq [eax + TRANS_OFFSET + 0],mm0
+ movq mm2,mm1
+
+ movq [eax + TRANS_OFFSET + 8],mm1
+ psrlq mm0,32 //xx xx xx xx 31 21 11 01
+
+;-----------
+ movd mm7,[ebx + -2] //xx xx xx xx 41 40 xx xx
+ punpcklbw mm1,mm4 //convert to words
+
+ movd mm4,[ebx + 2] //xx xx xx xx xx xx 43 42
+ psrld mm7,16 //xx xx xx xx 00 00 41 40
+
+ movd mm5,[ebx + ecx + -2] //xx xx xx xx 51 50 xx xx
+ punpcklwd mm7,mm4 //xx xx xx xx 43 42 41 40
+
+ movd mm4,[ebx + ecx + 2] //xx xx xx xx xx xx 53 52
+ psrld mm5,16
+
+ punpcklwd mm5,mm4
+ pxor mm4,mm4
+
+ punpcklbw mm0,mm4
+;-
+
+ psrlq mm2,32 //xx xx xx xx 33 23 13 03
+ psubw mm1,mm0 //x = p[0] - p[ms]
+
+ punpcklbw mm7,mm5 //53 43 52 42 51 41 50 40
+ movq mm3,mm1
+;-------------------
+ punpcklbw mm6,mm4
+ paddw mm3,mm1
+
+ punpcklbw mm2,mm4
+ paddw mm1,mm3
+
+ paddw mm1,[eax + LFABS_OFFSET] //x += LoopFilterAdjustBeforeShift
+ psubw mm6,mm2
+
+ movd mm2,[ebx + ecx*2 + -2] //xx xx xx xx 61 60 xx xx
+ paddw mm6,mm1
+
+ movd mm4,[ebx + ecx*2 + 2] //xx xx xx xx xx xx 63 62
+ psrld mm2,16
+
+ movd mm5,[ebx + edx + -2] //xx xx xx xx 71 70 xx xx
+ punpcklwd mm2,mm4 //xx xx xx xx 63 62 61 60
+
+ movd mm4,[ebx + edx + 2] //xx xx xx xx xx xx 73 72
+ psrld mm5,16 //xx xx xx xx 00 00 71 70
+
+ mov ebx,[PixelPtr] //restore PixelPtr
+ punpcklwd mm5,mm4 //xx xx xx xx 73 72 71 70
+
+ psraw mm6,3 //values to be clipped
+ pxor mm4,mm4
+
+ punpcklbw mm2,mm5 //73 63 72 62 71 61 70 60
+ movq mm5,mm7 //53 43 52 42 51 41 50 40
+
+ movq mm1,mm6
+ punpckhwd mm5,mm2 //73 63 53 43 72 62 52 42
+
+
+ movq [eax + TRANS_OFFSET + 24],mm5 //save for later
+ punpcklwd mm7,mm2 //71 61 51 41 70 60 50 40
+
+ movq [eax + TRANS_OFFSET + 16],mm7 //save for later
+ psraw mm6,15
+
+ movq mm2,[eax + LIMIT_OFFSET] //get the limit value
+ movq mm0,mm7 //xx xx xx xx 70 60 50 41
+
+ psrlq mm7,32 //xx xx xx xx 71 61 51 41
+ pxor mm1,mm6
+
+ psubsw mm1,mm6 //abs(i)
+ punpcklbw mm5,mm4
+
+ por mm6,[eax + FOURONES_OFFSET] //now have -1 or 1
+ movq mm3,mm2
+
+ punpcklbw mm7,mm4
+ psubw mm3,mm1 //limit - abs(i)
+
+ movq mm4,mm3
+ psraw mm3,15
+
+ push ebp
+ ;-
+
+ psubw mm5,mm7 //x = p[0] - p[ms]
+ pxor mm4,mm3
+
+ psubsw mm4,mm3 //abs(limit - abs(i))
+ pxor mm3,mm3
+
+ movq mm1,[eax + TRANS_OFFSET + 28] //xx xx xx xx 73 63 53 43
+ psubusw mm2,mm4 //limit - abs(limit - abs(i))
+
+ punpcklbw mm0,mm3
+ movq mm7,mm5
+
+ paddw mm7,mm5
+ pmullw mm2,mm6 //new y -- wait 3 cycles
+
+ punpcklbw mm1,mm3
+ paddw mm5,mm7
+
+ paddw mm5,[eax + LFABS_OFFSET] //x += LoopFilterAdjustBeforeShift
+ psubw mm0,mm1
+
+ paddw mm0,mm5
+ pxor mm6,mm6
+
+ movd mm7,[eax + TRANS_OFFSET + 8] //xx xx xx xx 32 22 12 02
+ psraw mm0,3 //values to be clipped
+
+ movd mm3,[eax + TRANS_OFFSET + 4] //xx xx xx xx 31 21 11 01
+ punpcklbw mm7,mm6
+
+ psubw mm7,mm2 //p[ms] + y
+ punpcklbw mm3,mm6
+
+ paddw mm3,mm2 //p[0] - y
+ packuswb mm7,mm7 //clamp[ p[ms] + y]
+
+ packuswb mm3,mm3 //clamp[ p[0] - y]
+ movq mm1,mm0
+
+ movq mm2,[eax + LIMIT_OFFSET] //get the limit value
+ psraw mm0,15
+
+ //values to write out
+ punpcklbw mm3,mm7 //32 31 22 21 12 11 02 01
+ movq mm7,mm0 //save sign
+
+ movd ebp,mm3 //12 11 02 01
+ pxor mm1,mm0
+
+ //xor bp,bp
+
+ mov WORD PTR[ebx + 1],bp //02 01
+ psubsw mm1,mm0 //abs(i)
+
+ shr ebp,16
+ movq mm5,mm2
+
+ mov WORD PTR[ebx + ecx + 1],bp
+ psrlq mm3,32 //xx xx xx xx 32 31 22 21
+
+ por mm7,[eax + FOURONES_OFFSET] //now have -1 or 1
+ psubw mm5,mm1 //limit - abs(i)
+
+ movd ebp,mm3 //32 31 22 21
+ movq mm4,mm5
+
+ mov [ebx + ecx*2 + 1],bp
+ psraw mm5,15
+
+ shr ebp,16
+ pxor mm4,mm5
+
+ mov [ebx + edx + 1],bp
+ psubsw mm4,mm5 //abs(limit - abs(i))
+
+ movd mm5,[eax + TRANS_OFFSET + 24] //xx xx xx xx 72 62 52 42
+ psubusw mm2,mm4 //limit - abs(limit - abs(i))
+
+ pmullw mm2,mm7 //new y
+ pxor mm6,mm6
+
+ movd mm3,[eax + TRANS_OFFSET + 20] //xx xx xx xx 71 61 51 41
+ punpcklbw mm5,mm6
+
+ lea ebx,[ebx + ecx*4]
+ punpcklbw mm3,mm6
+
+ paddw mm3,mm2 //p[ms] + y
+ psubw mm5,mm2 //p[0] - y
+
+ packuswb mm3,mm3 //clamp[ p[ms] + y]
+ pop ebp
+ ;-
+
+//
+//NOTE: optimize the following somehow
+//
+ packuswb mm5,mm5 //clamp[ p[0] - y]
+ ;-
+ punpcklbw mm3,mm5 //72 71 62 61 52 51 42 41
+ ;-
+
+ movd eax,mm3 //52 51 42 41
+ psrlq mm3,32 //xx xx xx xx 72 71 62 61
+
+ mov [ebx + 1],ax
+ ;-
+ shr eax,16
+ ;-
+
+ mov [ebx + ecx + 1],ax
+ ;-
+
+
+ movd eax,mm3
+ ;-
+
+ mov [ebx + ecx*2 + 1],ax
+ ;-
+
+ shr eax,16
+ ;-
+
+ mov [ebx + edx + 1],ax
+ ;-
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterVert_MMX
+ *
+ * INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processing instance.
+ * UINT8 *PixelPtr : Pointer to input frame.
+ * INT32 LineLength : Length of line in input frame.
+ * INT32 *BoundingValuePtr : Pointer to bouning value array.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a loop filter to the horizontal edge (i.e. vertically).
+ *
+ * SPECIAL NOTES : This version attempts to fix the DC_misalign stalls.
+ *
+ ****************************************************************************/
+void FilterVert_MMX
+(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 *PixelPtr,
+ INT32 LineLength,
+ INT32 *BoundingValuePtr
+)
+{
+ INT32 ms = -LineLength;
+ (void) pbi;
+
+ /* A somewhat optimized MMX version of the top edge filter. */
+ __asm
+ {
+ mov eax,[BoundingValuePtr]
+ ;-
+
+ mov ebx,[PixelPtr]
+ mov ecx,[ms] //negative stride
+
+ movd mm1,[ebx + 0] //p[0]
+ pxor mm4,mm4
+
+ movd mm0,[ebx + ecx] //get row above -- p[ms]
+ punpcklbw mm1,mm4 //convert to words
+
+ mov edx,[LineLength]
+ punpcklbw mm0,mm4
+
+ movd mm6,[ebx + ecx*2] //p[ms2]
+ psubw mm1,mm0 //x = p[0] - p[ms]
+
+ movq mm2,[ebx + edx] //p[stride]
+ movq mm3,mm1
+
+ punpcklbw mm6,mm4
+ paddw mm3,mm1
+
+ punpcklbw mm2,mm4
+ paddw mm1,mm3
+
+ paddw mm1,[eax + LFABS_OFFSET] //x += LoopFilterAdjustBeforeShift
+ psubw mm6,mm2
+
+ movq mm2,[eax + LIMIT_OFFSET] //get the limit value
+ paddw mm6,mm1
+
+ movd mm5,[ebx + 4] //p[0]
+ psraw mm6,3 //values to be clipped
+
+ movq mm1,mm6
+ psraw mm6,15
+
+ movd mm7,[ebx + ecx + 4] //p[ms]
+ pxor mm1,mm6
+
+ psubsw mm1,mm6 //abs(i)
+ pxor mm0,mm0
+
+ punpcklbw mm5,mm0
+ movq mm3,mm2
+
+ por mm6,[eax + FOURONES_OFFSET] //now have -1 or 1
+ punpcklbw mm7,mm0
+
+ psubw mm3,mm1 //limit - abs(i)
+ psubw mm5,mm7 //x = p[0] - p[ms]
+
+ movq mm4,mm3
+ psraw mm3,15
+
+ movd mm0,[ebx + ecx*2 + 4] //p[ms2]
+ pxor mm4,mm3
+
+ movd mm1,[ebx + edx +4] //p[stride]
+ psubsw mm4,mm3 //abs(limit - abs(i))
+
+ pxor mm3,mm3
+ psubusw mm2,mm4 //limit - abs(limit - abs(i))
+
+ punpcklbw mm0,mm3
+ movq mm7,mm5
+
+ paddw mm7,mm5
+ pmullw mm2,mm6 //new y -- wait 3 cycles
+
+ punpcklbw mm1,mm3
+ paddw mm5,mm7
+
+ paddw mm5,[eax + LFABS_OFFSET] //x += LoopFilterAdjustBeforeShift
+ psubw mm0,mm1
+
+ paddw mm0,mm5
+ pxor mm6,mm6
+
+ movd mm7,[ebx + 0] //p[0]
+ psraw mm0,3 //values to be clipped
+
+ movd mm3,[ebx + ecx] //get row above -- p[ms]
+ punpcklbw mm7,mm6
+
+ psubw mm7,mm2 //p[ms] + y
+ punpcklbw mm3,mm6
+
+ paddw mm3,mm2 //p[0] - y
+ packuswb mm7,mm7 //clamp[ p[ms] + y]
+
+ packuswb mm3,mm3 //clamp[ p[0] - y]
+ movq mm1,mm0
+
+ movd [ebx + 0],mm7 //write p[0]
+ psraw mm0,15
+
+ movq mm7,mm0 //save sign
+ pxor mm1,mm0
+
+;
+;
+ movq mm2,[eax + LIMIT_OFFSET] //get the limit value
+;
+;
+
+ psubsw mm1,mm0 //abs(i)
+ movq mm5,mm2
+
+ por mm7,[eax + FOURONES_OFFSET] //now have -1 or 1
+ psubw mm5,mm1 //limit - abs(i)
+
+ movq mm4,mm5
+ psraw mm5,15
+
+ movd [ebx + ecx],mm3 //write p[ms]
+ pxor mm4,mm5
+
+ psubsw mm4,mm5 //abs(limit - abs(i))
+ pxor mm6,mm6
+
+ movd mm5,[ebx + 4] //p[0]
+ psubusw mm2,mm4 //limit - abs(limit - abs(i))
+
+ movd mm3,[ebx + ecx + 4] //p[ms]
+ pmullw mm2,mm7 //new y
+
+ punpcklbw mm5,mm6
+ ;-
+
+ punpcklbw mm3,mm6
+ ;-
+
+ paddw mm3,mm2 //p[ms] + y
+ psubw mm5,mm2 //p[0] - y
+
+ packuswb mm3,mm3 //clamp[ p[ms] + y]
+ ;-
+
+ packuswb mm5,mm5 //clamp[ p[0] - y]
+ ;-
+
+ movd [ebx + ecx + 4],mm3 //write p[ms]
+;
+
+ movd [ebx + 4],mm5 //write p[0]
+ }
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/newlooptest_asm.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/newlooptest_asm.c
new file mode 100644
index 00000000..f33df64a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/newlooptest_asm.c
@@ -0,0 +1,1123 @@
+/****************************************************************************
+ *
+ * Module Title : newlooptest_asm.c
+ *
+ * Description : Codec specific functions
+ *
+ ***************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h>
+#include "postp.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern UINT32 *LoopFilterLimitValuesV2;
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+INT16 LoopFilterLimitValuesV2_MMX[64*4];
+
+/****************************************************************************
+ *
+ * ROUTINE : FillLoopFilterLimitValues_MMX
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Sets-up array of limit values for use in loop-filter.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FillLoopFilterLimitValues_MMX ( void )
+{
+ int i;
+
+ for ( i=0; i<64; i++ )
+ {
+ LoopFilterLimitValuesV2_MMX[i*4+0] = LoopFilterLimitValuesV2[i];
+ LoopFilterLimitValuesV2_MMX[i*4+1] = LoopFilterLimitValuesV2[i];
+ LoopFilterLimitValuesV2_MMX[i*4+2] = LoopFilterLimitValuesV2[i];
+ LoopFilterLimitValuesV2_MMX[i*4+3] = LoopFilterLimitValuesV2[i];
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilteringHoriz
+ *
+ * INPUTS : UINT32 QIndex : Quantization index.
+ * UINT8 *Src : Pointer to source block.
+ * INT32 Pitch : Pitch of input image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filters the vertical block edge inside a prediction
+ * block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FilteringHoriz_8_MMX ( UINT32 QIndex, UINT8 *Src, INT32 Pitch )
+{
+ INT16 *FLimitPtr = &LoopFilterLimitValuesV2_MMX[QIndex*4];
+
+ __declspec(align(16)) const short fourFours[] = {4, 4, 4, 4};
+ __declspec(align(16)) const short fourOnes[] = { 1, 1, 1, 1};
+ __declspec(align(16)) unsigned char Temp[32];
+
+ __asm
+ {
+ mov eax, FLimitPtr
+ mov edx, Pitch
+
+ mov esi, Src
+ lea edi, Temp
+
+ mov ecx, edx //stride
+ movd mm0, [esi + -4] //xx xx xx xx 01 00 xx xx
+
+ movd mm4, [esi] //xx xx xx xx xx xx 03 02
+ psrld mm0, 16 //xx xx xx xx 00 00 01 00
+
+ movd mm1, [esi + ecx + -4] //xx xx xx xx 11 10 xx xx
+ punpcklwd mm0, mm4 //xx xx xx xx 03 02 01 00
+
+ movd mm4, [esi + ecx] //xx xx xx xx xx xx 13 12
+ psrld mm1, 16 //xx xx xx xx 00 00 11 10
+
+ punpcklwd mm1, mm4 //xx xx xx xx 13 12 11 10
+ lea edx, [edx + edx*2] //stride * 3
+
+ movd mm2, [esi + ecx*2 + -4] //xx xx xx xx 21 20 xx xx
+ punpcklbw mm0, mm1 //13 03 12 02 11 01 10 00
+
+ movd mm4, [esi + ecx*2] //xx xx xx xx xx xx 23 22
+ psrld mm2, 16 //xx xx xx xx 00 00 21 20
+
+ movd mm1, [esi + edx + -4] //xx xx xx xx 31 30 xx xx
+ punpcklwd mm2, mm4 //xx xx xx xx 23 22 21 20
+
+ movd mm4, [esi + edx] //xx xx xx xx xx xx 33 32
+ psrld mm1, 16 //xx xx xx xx 00 00 31 30
+
+ punpcklwd mm1, mm4 //xx xx xx xx 33 32 31 30
+ pxor mm4, mm4 // clear mm4
+
+ punpcklbw mm2, mm1 //33 23 32 22 31 21 30 20
+ movq mm1, mm0 //13 03 12 03 11 01 10 00
+
+ punpcklwd mm0, mm2 //31 21 11 01 30 20 10 00
+ lea esi, [esi + ecx*4] //base + (stride * 4)
+
+ punpckhwd mm1, mm2 //33 23 13 03 32 22 12 02
+ movq mm6, mm0 //xx xx xx xx 30 20 10 00
+
+ movq [edi], mm0 // save to memory
+ movq mm2, mm1 // make a copy
+
+ movq [edi+8], mm1 // save to memory
+ psrlq mm0, 32 //xx xx xx xx 31 21 11 01
+
+ movd mm7, [esi + -4] //xx xx xx xx 41 40 xx xx
+ punpcklbw mm1, mm4 //xx 32 xx 22 xx 12 xx 02
+
+ movd mm4, [esi] //xx xx xx xx xx xx 43 42
+ psrld mm7, 16 //xx xx xx xx 00 00 41 40
+
+ movd mm5, [esi + ecx + -4] //xx xx xx xx 51 50 xx xx
+ punpcklwd mm7, mm4 //xx xx xx xx 43 42 41 40
+
+ movd mm4, [esi + ecx] //xx xx xx xx xx xx 53 52
+ psrld mm5, 16 //xx xx xx xx xx xx 51 50
+
+ punpcklwd mm5, mm4 //xx xx xx xx 53 52 51 50
+ pxor mm4, mm4 // clear mm4
+
+ punpcklbw mm0, mm4 //xx 31 xx 21 xx 11 xx 01
+
+ psrlq mm2, 32 //xx xx xx xx 33 23 13 03
+ psubw mm1, mm0 //x = p[0] - p[ms]
+
+ punpcklbw mm7, mm5 //53 43 52 42 51 41 50 40
+ movq mm3, mm1 // make a copy of x
+
+ punpcklbw mm6, mm4 //xx 30 xx 20 xx 10 xx 00
+ paddw mm3, mm1 //x = 2*(p[0] - p[ms])
+
+ punpcklbw mm2, mm4 //xx 33 xx 23 xx 13 xx 03
+ paddw mm1, mm3 //mm1 = 3*(p[0] - p[-1])
+
+ paddw mm1, fourFours //mm1 += LoopFilterAdjustBeforeShift
+ psubw mm6, mm2 //mm6 = (p[-2]-p[1])
+
+ movd mm2, [esi + ecx*2 + -4] //xx xx xx xx 61 60 xx xx
+ paddw mm6, mm1 //mm6 = 3*(p[0] - p[-1]) +(p[-2]-p[1]) + 4
+
+ movd mm4, [esi + ecx*2] //xx xx xx xx xx xx 63 62
+ psrld mm2, 16 //xx xx xx xx xx xx 61 60
+
+ movd mm5, [esi + edx + -4] //xx xx xx xx 71 70 xx xx
+ punpcklwd mm2, mm4 //xx xx xx xx 63 62 61 60
+
+ movd mm4, [esi + edx] //xx xx xx xx xx xx 73 72
+ psrld mm5, 16 //xx xx xx xx 00 00 71 70
+
+ mov esi, Src //restore PixelPtr
+ punpcklwd mm5, mm4 //xx xx xx xx 73 72 71 70
+
+ psraw mm6, 3 //values to be clipped
+ pxor mm4, mm4 // clear mm4
+
+ punpcklbw mm2, mm5 //73 63 72 62 71 61 70 60
+ movq mm5, mm7 //53 43 52 42 51 41 50 40
+
+ movq mm1, mm6 // make a copy of results
+ punpckhwd mm5, mm2 //73 63 53 43 72 62 52 42
+
+
+ movq [edi+24], mm5 //save for later
+ punpcklwd mm7, mm2 //71 61 51 41 70 60 50 40
+
+ movq [edi+16], mm7 //save for later
+ psraw mm6, 15 // FFFF or 0000
+
+ movq mm2, [eax] //get the limit value
+ movq mm0, mm7 //xx xx xx xx 70 60 50 41
+
+ psrlq mm7, 32 //xx xx xx xx 71 61 51 41
+ pxor mm1, mm6
+
+ psubsw mm1, mm6 //abs(i)
+ punpcklbw mm5, mm4
+
+ por mm6, fourOnes //now have -1 or 1
+ movq mm3, mm2
+
+ punpcklbw mm7, mm4
+ psubw mm3, mm1 //limit - abs(i)
+
+ movq mm4, mm3
+ psraw mm3, 15
+
+ psubw mm5, mm7 //x = p[0] - p[ms]
+ pxor mm4, mm3
+
+ psubsw mm4, mm3 //abs(limit - abs(i))
+ pxor mm3, mm3
+
+ movd mm1, [edi + 28] //xx xx xx xx 73 63 53 43
+ psubusw mm2, mm4 //limit - abs(limit - abs(i))
+
+ punpcklbw mm0, mm3
+ movq mm7, mm5
+
+ paddw mm7, mm5
+ pmullw mm2, mm6 //new y -- wait 3 cycles
+
+ punpcklbw mm1, mm3
+ paddw mm5, mm7
+
+ paddw mm5, fourFours //x += LoopFilterAdjustBeforeShift
+ psubw mm0, mm1
+
+ paddw mm0, mm5
+ pxor mm6, mm6
+
+ movd mm7, [edi + 8] //xx xx xx xx 32 22 12 02
+ psraw mm0, 3 //values to be clipped
+
+ movd mm3, [edi + 4] //xx xx xx xx 31 21 11 01
+ punpcklbw mm7, mm6
+
+ psubw mm7, mm2 //p[ms] + y
+ punpcklbw mm3, mm6
+
+ paddw mm3, mm2 //p[0] - y
+ packuswb mm7, mm7 //clamp[ p[ms] + y]
+
+ packuswb mm3, mm3 //clamp[ p[0] - y]
+ movq mm1, mm0
+
+ movq mm2, [eax] //get the limit value
+ psraw mm0, 15
+
+ punpcklbw mm3, mm7 //32 31 22 21 12 11 02 01
+ movq mm7, mm0 //save sign
+
+ movd eax, mm3 //12 11 02 01
+ pxor mm1, mm0
+
+
+ mov WORD PTR [esi - 1],ax //02 01
+ psubsw mm1, mm0 //abs(i)
+
+ shr eax, 16
+ movq mm5, mm2
+
+ mov WORD PTR [esi + ecx - 1],ax
+ psrlq mm3, 32 //xx xx xx xx 32 31 22 21
+
+ por mm7, fourOnes //now have -1 or 1
+ psubw mm5, mm1 //limit - abs(i)
+
+ movd eax, mm3 //32 31 22 21
+ movq mm4, mm5
+
+ mov [esi + ecx*2 - 1],ax
+ psraw mm5, 15
+
+ shr eax, 16
+ pxor mm4, mm5
+
+ mov [esi + edx - 1],ax
+ psubsw mm4, mm5 //abs(limit - abs(i))
+
+ movd mm5, [edi + 24] //xx xx xx xx 72 62 52 42
+ psubusw mm2, mm4 //limit - abs(limit - abs(i))
+
+ pmullw mm2, mm7 //new y
+ pxor mm6, mm6
+
+ movd mm3, [edi + 20] //xx xx xx xx 71 61 51 41
+ punpcklbw mm5, mm6
+
+ lea esi, [esi + ecx*4]
+ punpcklbw mm3, mm6
+
+ paddw mm3, mm2 //p[ms] + y
+ psubw mm5, mm2 //p[0] - y
+
+ packuswb mm3, mm3 //clamp[ p[ms] + y]
+
+
+ packuswb mm5, mm5 //clamp[ p[0] - y]
+ punpcklbw mm3, mm5 //72 71 62 61 52 51 42 41
+
+ movd eax, mm3 //52 51 42 41
+ psrlq mm3, 32 //xx xx xx xx 72 71 62 61
+
+ mov [esi - 1],ax
+ shr eax, 16
+
+ mov [esi + ecx - 1],ax
+ movd eax, mm3
+
+ mov [esi + ecx*2 - 1],ax
+ shr eax,16
+
+ mov [esi + edx - 1],ax
+
+ }
+/*
+ INT32 j;
+ INT32 FiltVal;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+ UINT32 FLimit;
+
+ FLimit = LoopFilterLimitValuesV2[QValue];
+
+ for ( j=0; j<Length; j++ )
+ {
+ // set up blur kernel for differences
+ FiltVal = (( Src[-2] ) -
+ ( Src[-1] * 3 ) +
+ ( Src[ 0] * 3 ) -
+ ( Src[ 1] ) + 4 ) >> 3;
+
+ FiltVal = Bound ( FLimit, FiltVal );
+
+ Dest[-1] = LimitTable[(INT32)Src[-1] + FiltVal];
+ Dest[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
+
+ Src += SrcPitch;
+ Dest += DestPitch;
+ }
+*/
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilteringVert_8_MMX
+ *
+ * INPUTS : UINT32 QIndex : Quantization index.
+ * UINT8 *PixelPtr : Pointer to source block.
+ * INT32 Pitch : Pitch of input image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filters the horizontal block edge inside a prediction
+ * block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FilteringVert_8_MMX ( UINT32 QIndex, UINT8 *PixelPtr, INT32 Pitch )
+{
+ INT16 *FLimitPtr = &LoopFilterLimitValuesV2_MMX[QIndex*4];
+
+ __declspec(align(16)) const short fourFours[] = { 4, 4, 4, 4 };
+ __declspec(align(16)) const short fourOnes[] = { 1, 1, 1, 1 };
+
+ __asm
+ {
+ mov eax, FLimitPtr // 4 FLimit Values in shorts
+ mov edx, Pitch // Pitch
+
+ xor ecx, ecx // clear ecx to get negative Pitch
+ sub ecx, edx // Negative Pitch
+
+ mov esi, PixelPtr // Src and Dest pointer
+ movd mm0, [esi] // p[0], four pixels
+
+ pxor mm7, mm7 // clear mm7
+ movd mm1, [esi+ecx] // p[-1], four pixels
+
+ punpcklbw mm0, mm7 // unpack to short
+ movd mm2, [esi+edx] // p[1], four pixels
+
+ punpcklbw mm1, mm7 // unpack p[-1] to shorts
+ movd mm3, [esi+ecx*2] // p[-2], four pixels
+
+ movq mm5, mm0 // copy of unpacked p[0]
+ movq mm6, mm1 // copy of unpacked p[-1]
+
+ psubw mm0, mm1 // p[0] - p[-1]
+ punpcklbw mm2, mm7 // unpack p[1]
+
+ movq mm1, mm0 // make a copy of p[0]-p[-1]
+ punpcklbw mm3, mm7 // unpack p[-2]
+
+ paddw mm0, mm1 // (p[0]-p[-1]) * 2
+ psubw mm3, mm2 // (p[-2]-p[1])
+
+ paddw mm1, mm0 // (p[0]-p[-1]) * 3
+ paddw mm3, mm1 // p[-2]-3*p[-1]+3*p[0]-p[1]
+
+ paddw mm3, fourFours // p[-2]-3*p[-1]+3*p[0]-p[1]+4
+ movq mm0, [eax] // FLimit
+
+ psraw mm3, 3 // FiltVal
+ movq mm1, mm3 // FiltVal
+
+ psraw mm3, 15 // FFFF-> Neg, 0->Pos
+ pxor mm1, mm3 //
+
+ psubsw mm1, mm3 // abs(FiltVal)
+ por mm3, fourOnes // -1 or 1, corresponding the sign
+
+ movq mm2, mm0 // Copy of FLimit
+ psubw mm0, mm1 // FLimit - abs(FiltVal)
+
+ movq mm4, mm0 // copy FLimit-abs(FiltVal)
+ psraw mm0, 15 // FFFF->Neg, 0->Pos
+
+ pxor mm4, mm0 //
+ psubsw mm4, mm0 // abs(FLimit-abs(FiltVal))
+
+ psubusw mm2, mm4 // FLimit-abs(FLimit-abs(FiltVal))
+ pmullw mm2, mm3 // Get the sign back
+
+ psubw mm5, mm2 // p[0] - FiltVal
+ paddw mm6, mm2 // p[-1] + FiltVal
+
+ packuswb mm5, mm5 // clamping
+ packuswb mm6, mm6 // clamping
+
+ movd [esi], mm5 // write p[0]
+ movd [esi+ecx], mm6 // write p[-1]
+
+ movd mm0, [esi+4] // p[0], four pixels
+ movd mm1, [esi+ecx+4] // p[-1], four pixels
+
+ punpcklbw mm0, mm7 // unpack to short
+ movd mm2, [esi+edx+4] // p[1], four pixels
+
+ punpcklbw mm1, mm7 // unpack p[-1] to shorts
+ movd mm3, [esi+ecx*2+4] // p[-2], four pixels
+
+ movq mm5, mm0 // copy of unpacked p[0]
+ movq mm6, mm1 // copy of unpacked p[-1]
+
+ psubw mm0, mm1 // p[0] - p[-1]
+ punpcklbw mm2, mm7 // unpack p[1]
+
+ movq mm1, mm0 // make a copy of p[0]-p[-1]
+ punpcklbw mm3, mm7 // unpack p[-2]
+
+ paddw mm0, mm1 // (p[0]-p[-1]) * 2
+ psubw mm3, mm2 // (p[-2]-p[1])
+
+ paddw mm1, mm0 // (p[0]-p[-1]) * 3
+ paddw mm3, mm1 // p[-2]-3*p[-1]+3*p[0]-p[1]
+
+ paddw mm3, fourFours // p[-2]-3*p[-1]+3*p[0]-p[1]+4
+ movq mm0, [eax] // FLimit
+
+ psraw mm3, 3 // FiltVal
+ movq mm1, mm3 // FiltVal
+
+ psraw mm3, 15 // FFFF-> Neg, 0->Pos
+ pxor mm1, mm3 //
+
+ psubsw mm1, mm3 // abs(FiltVal)
+ por mm3, fourOnes // -1 or 1, corresponding the sign
+
+ movq mm2, mm0 // Copy of FLimit
+ psubw mm0, mm1 // FLimit - abs(FiltVal)
+
+ movq mm4, mm0 // copy FLimit-abs(FiltVal)
+ psraw mm0, 15 // FFFF->Neg, 0->Pos
+
+ pxor mm4, mm0 //
+ psubsw mm4, mm0 // abs(FLimit-abs(FiltVal))
+
+ psubusw mm2, mm4 // FLimit-abs(FLimit-abs(FiltVal))
+ pmullw mm2, mm3 // Get the sign back
+
+ psubw mm5, mm2 // p[0] - FiltVal
+ paddw mm6, mm2 // p[-1] + FiltVal
+
+ packuswb mm5, mm5 // clamping
+ packuswb mm6, mm6 // clamping
+
+ movd [esi+4], mm5 // write p[0]
+ movd [esi+ecx+4], mm6 // write p[-1]
+
+ }
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilteringHoriz
+ *
+ * INPUTS : UINT32 QIndex : Quantization index.
+ * UINT8 *Src : Pointer to source block.
+ * INT32 Pitch : Pitch of input image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filters the vertical block edge inside a prediction
+ * block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ ****************************************************************************/
+void FilteringHoriz_12_MMX ( UINT32 QIndex, UINT8 *Src, INT32 Pitch )
+{
+ INT16 *FLimitPtr = &LoopFilterLimitValuesV2_MMX[QIndex*4];
+
+ __declspec(align(16)) const short fourFours[] = {4, 4, 4, 4};
+ __declspec(align(16)) const short fourOnes[] = { 1, 1, 1, 1};
+ __declspec(align(16)) unsigned char Temp[32];
+
+ __asm
+ {
+ mov eax, FLimitPtr
+ mov edx, Pitch
+
+ mov esi, Src
+ lea edi, Temp
+
+ mov ecx, edx //stride
+ movd mm0, [esi + -4] //xx xx xx xx 01 00 xx xx
+
+ movd mm4, [esi] //xx xx xx xx xx xx 03 02
+ psrld mm0, 16 //xx xx xx xx 00 00 01 00
+
+ movd mm1, [esi + ecx + -4] //xx xx xx xx 11 10 xx xx
+ punpcklwd mm0, mm4 //xx xx xx xx 03 02 01 00
+
+ movd mm4, [esi + ecx] //xx xx xx xx xx xx 13 12
+ psrld mm1, 16 //xx xx xx xx 00 00 11 10
+
+ punpcklwd mm1, mm4 //xx xx xx xx 13 12 11 10
+ lea edx, [edx + edx*2] //stride * 3
+
+ movd mm2, [esi + ecx*2 + -4] //xx xx xx xx 21 20 xx xx
+ punpcklbw mm0, mm1 //13 03 12 02 11 01 10 00
+
+ movd mm4, [esi + ecx*2] //xx xx xx xx xx xx 23 22
+ psrld mm2, 16 //xx xx xx xx 00 00 21 20
+
+ movd mm1, [esi + edx + -4] //xx xx xx xx 31 30 xx xx
+ punpcklwd mm2, mm4 //xx xx xx xx 23 22 21 20
+
+ movd mm4, [esi + edx] //xx xx xx xx xx xx 33 32
+ psrld mm1, 16 //xx xx xx xx 00 00 31 30
+
+ punpcklwd mm1, mm4 //xx xx xx xx 33 32 31 30
+ pxor mm4, mm4
+
+ punpcklbw mm2, mm1 //33 23 32 22 31 21 30 20
+ movq mm1, mm0
+
+ punpcklwd mm0, mm2 //31 21 11 01 30 20 10 00
+ lea esi, [esi + ecx*4] //base + (stride * 4)
+
+ punpckhwd mm1, mm2 //33 23 13 03 32 22 12 02
+ movq mm6, mm0 //xx xx xx xx 30 20 10 00
+
+ movq [edi], mm0
+ movq mm2, mm1
+
+ movq [edi+8], mm1
+ psrlq mm0, 32 //xx xx xx xx 31 21 11 01
+
+ movd mm7, [esi + -4] //xx xx xx xx 41 40 xx xx
+ punpcklbw mm1, mm4 //convert to words
+
+ movd mm4, [esi] //xx xx xx xx xx xx 43 42
+ psrld mm7, 16 //xx xx xx xx 00 00 41 40
+
+ movd mm5, [esi + ecx + -4] //xx xx xx xx 51 50 xx xx
+ punpcklwd mm7, mm4 //xx xx xx xx 43 42 41 40
+
+ movd mm4, [esi + ecx] //xx xx xx xx xx xx 53 52
+ psrld mm5, 16
+
+ punpcklwd mm5, mm4
+ pxor mm4, mm4
+
+ punpcklbw mm0, mm4
+
+ psrlq mm2, 32 //xx xx xx xx 33 23 13 03
+ psubw mm1, mm0 //x = p[0] - p[ms]
+
+ punpcklbw mm7, mm5 //53 43 52 42 51 41 50 40
+ movq mm3, mm1
+
+ punpcklbw mm6, mm4
+ paddw mm3, mm1
+
+ punpcklbw mm2, mm4
+ paddw mm1, mm3
+
+ paddw mm1, fourFours //x += LoopFilterAdjustBeforeShift
+ psubw mm6, mm2
+
+ movd mm2, [esi + ecx*2 + -4] //xx xx xx xx 61 60 xx xx
+ paddw mm6, mm1
+
+ movd mm4, [esi + ecx*2] //xx xx xx xx xx xx 63 62
+ psrld mm2, 16
+
+ movd mm5, [esi + edx + -4] //xx xx xx xx 71 70 xx xx
+ punpcklwd mm2, mm4 //xx xx xx xx 63 62 61 60
+
+ movd mm4, [esi + edx] //xx xx xx xx xx xx 73 72
+ psrld mm5, 16 //xx xx xx xx 00 00 71 70
+
+ mov esi, Src //restore PixelPtr
+ punpcklwd mm5, mm4 //xx xx xx xx 73 72 71 70
+
+ psraw mm6, 3 //values to be clipped
+ pxor mm4, mm4
+
+ punpcklbw mm2, mm5 //73 63 72 62 71 61 70 60
+ movq mm5, mm7 //53 43 52 42 51 41 50 40
+
+ movq mm1, mm6
+ punpckhwd mm5, mm2 //73 63 53 43 72 62 52 42
+
+
+ movq [edi+24], mm5 //save for later
+ punpcklwd mm7, mm2 //71 61 51 41 70 60 50 40
+
+ movq [edi+16], mm7 //save for later
+ psraw mm6, 15
+
+ movq mm2, [eax] //get the limit value
+ movq mm0, mm7 //xx xx xx xx 70 60 50 41
+
+ psrlq mm7, 32 //xx xx xx xx 71 61 51 41
+ pxor mm1, mm6
+
+ psubsw mm1, mm6 //abs(i)
+ punpcklbw mm5, mm4
+
+ por mm6, fourOnes //now have -1 or 1
+ movq mm3, mm2
+
+ punpcklbw mm7, mm4
+ psubw mm3, mm1 //limit - abs(i)
+
+ movq mm4, mm3
+ psraw mm3, 15
+
+ psubw mm5, mm7 //x = p[0] - p[ms]
+ pxor mm4, mm3
+
+ psubsw mm4, mm3 //abs(limit - abs(i))
+ pxor mm3, mm3
+
+ movd mm1, [edi + 28] //xx xx xx xx 73 63 53 43
+ psubusw mm2, mm4 //limit - abs(limit - abs(i))
+
+ punpcklbw mm0, mm3
+ movq mm7, mm5
+
+ paddw mm7, mm5
+ pmullw mm2, mm6 //new y -- wait 3 cycles
+
+ punpcklbw mm1, mm3
+ paddw mm5, mm7
+
+ paddw mm5, fourFours //x += LoopFilterAdjustBeforeShift
+ psubw mm0, mm1
+
+ paddw mm0, mm5
+ pxor mm6, mm6
+
+ movd mm7, [edi + 8] //xx xx xx xx 32 22 12 02
+ psraw mm0, 3 //values to be clipped
+
+ movd mm3, [edi + 4] //xx xx xx xx 31 21 11 01
+ punpcklbw mm7, mm6
+
+ psubw mm7, mm2 //p[ms] + y
+ punpcklbw mm3, mm6
+
+ paddw mm3, mm2 //p[0] - y
+ packuswb mm7, mm7 //clamp[ p[ms] + y]
+
+ packuswb mm3, mm3 //clamp[ p[0] - y]
+ movq mm1, mm0
+
+ movq mm2, [eax] //get the limit value
+ psraw mm0, 15
+
+ punpcklbw mm3, mm7 //32 31 22 21 12 11 02 01
+ movq mm7, mm0 //save sign
+
+ movd eax, mm3 //12 11 02 01
+ pxor mm1, mm0
+
+ mov [esi - 1],ax //02 01
+ psubsw mm1, mm0 //abs(i)
+
+ shr eax, 16
+ movq mm5, mm2
+
+ mov [esi + ecx - 1],ax
+ psrlq mm3, 32 //xx xx xx xx 32 31 22 21
+
+ por mm7, fourOnes //now have -1 or 1
+ psubw mm5, mm1 //limit - abs(i)
+
+ movd eax, mm3 //32 31 22 21
+ movq mm4, mm5
+
+ mov [esi + ecx*2 - 1],ax
+ psraw mm5, 15
+
+ shr eax, 16
+ pxor mm4, mm5
+
+ mov [esi + edx - 1],ax
+ psubsw mm4, mm5 //abs(limit - abs(i))
+
+ movd mm5, [edi + 24] //xx xx xx xx 72 62 52 42
+ psubusw mm2, mm4 //limit - abs(limit - abs(i))
+
+ pmullw mm2, mm7 //new y
+ pxor mm6, mm6
+
+ movd mm3, [edi + 20] //xx xx xx xx 71 61 51 41
+ punpcklbw mm5, mm6
+
+ lea esi, [esi + ecx*4]
+ punpcklbw mm3, mm6
+
+ paddw mm3, mm2 //p[ms] + y
+ psubw mm5, mm2 //p[0] - y
+
+ packuswb mm3, mm3 //clamp[ p[ms] + y]
+ packuswb mm5, mm5 //clamp[ p[0] - y]
+
+ punpcklbw mm3, mm5 //72 71 62 61 52 51 42 41
+ movd eax, mm3 //52 51 42 41
+
+ psrlq mm3, 32 //xx xx xx xx 72 71 62 61
+ mov [esi - 1],ax
+
+ shr eax, 16
+ mov [esi + ecx - 1],ax
+
+ movd eax, mm3
+ mov [esi + ecx*2 - 1],ax
+
+ shr eax,16
+ mov [esi + edx - 1],ax
+
+ mov eax, FLimitPtr //
+ lea esi, [esi+ ecx * 4] // four line below
+
+ movd mm0, [esi + -4] //xx xx xx xx 01 00 xx xx
+ movd mm4, [esi] //xx xx xx xx xx xx 03 02
+
+ psrld mm0, 16 //xx xx xx xx 00 00 01 00
+ movd mm1, [esi + ecx + -4] //xx xx xx xx 11 10 xx xx
+
+ punpcklwd mm0, mm4 //xx xx xx xx 03 02 01 00
+ movd mm4, [esi + ecx] //xx xx xx xx xx xx 13 12
+
+ psrld mm1, 16 //xx xx xx xx 00 00 11 10
+ punpcklwd mm1, mm4 //xx xx xx xx 13 12 11 10
+
+ movd mm2, [esi + ecx*2 + -4] //xx xx xx xx 21 20 xx xx
+ punpcklbw mm0, mm1 //13 03 12 02 11 01 10 00
+
+ movd mm4, [esi + ecx*2] //xx xx xx xx xx xx 23 22
+ psrld mm2, 16 //xx xx xx xx 00 00 21 20
+
+ movd mm1, [esi + edx + -4] //xx xx xx xx 31 30 xx xx
+ punpcklwd mm2, mm4 //xx xx xx xx 23 22 21 20
+
+ movd mm4, [esi + edx] //xx xx xx xx xx xx 33 32
+ psrld mm1, 16 //xx xx xx xx 00 00 31 30
+
+ punpcklwd mm1, mm4 //xx xx xx xx 33 32 31 30
+ pxor mm4, mm4 //clear mm4 for unpacking
+
+ punpcklbw mm2, mm1 //33 23 32 22 31 21 30 20
+ movq mm1, mm0 //13 03 12 02 11 01 10 00
+
+ punpcklwd mm0, mm2 //31 21 11 01 30 20 10 00
+ punpckhwd mm1, mm2 //33 23 13 03 32 22 12 02
+
+ movq mm6, mm0 //xx xx xx xx 30 20 10 00
+ movq [edi], mm0
+
+ movq mm2, mm1
+ movq [edi+8], mm1
+
+ psrlq mm0, 32 //xx xx xx xx 31 21 11 01
+ punpcklbw mm1, mm4 //-- 32 -- 22 -- 12 -- 02
+
+ punpcklbw mm0, mm4 //-- 31 -- 21 -- 11 -- 01
+ psrlq mm2, 32 //xx xx xx xx 33 23 13 03
+
+ psubw mm1, mm0 // mm1 = p[0] - p[ms]
+ movq mm3, mm1 // mm3 = p[0] - p[ms]
+
+ punpcklbw mm6, mm4 //-- 30 -- 20 -- 10 -- 00
+ paddw mm3, mm1 // mm3 = (p[0] - p[ms])*2
+
+ punpcklbw mm2, mm4 //-- 33 -- 23 -- 13 -- 03
+ paddw mm1, mm3 // mm1 = (p[0] - p[ms])*3
+
+ paddw mm1, fourFours // mm1 = (p[0] - p[ms])*3 + 4
+ psubw mm6, mm2 // mm6 = (p[ms2]-p[1])
+
+ paddw mm6, mm1 // mm6 = (p[0] - p[ms])*3 + 4 + (p[ms2]-p[1])
+ psraw mm6, 3 // mm6 = mm6 / 8
+
+ movq mm1, mm6 // make a copy of initial FiltVal
+ psraw mm6, 15 // FFFF for negative, 0000 for positive
+
+ pxor mm1, mm6 //
+ psubsw mm1, mm6 // abs(FiltVal)
+
+ por mm6, fourOnes // -1 or 1 for negative or positive
+ movq mm2, [eax] // mm2 = FLimit
+
+ movq mm3, mm2 // mm3 = FLimit
+ psubw mm3, mm1 // mm3 = FLimit - abs(FiltVal)
+
+ movq mm4, mm3 // Make a copy of FLimit - abs(FiltVal)
+ psraw mm3, 15 // FFFF and 0000 for - and +
+
+ pxor mm4, mm3 //
+ psubsw mm4, mm3 // abs(Limit-abs(FiltVal))
+
+ psubusw mm2, mm4 // Limit - abs(Limit-abs(FiltVal)
+ pmullw mm2, mm6 // get the sign back
+
+ pxor mm5, mm5 // clear mm5 for unpacking
+ movd mm7, [edi+8] // xx xx xx xx 32 22 12 02
+
+ punpcklbw mm7, mm5 // -- 32 -- 22 -- 12 -- 02
+ movd mm3, [edi+4] // xx xx xx xx 31 21 11 01
+
+ psubw mm7, mm2 // p[ms] - FiltVal
+ punpcklbw mm3, mm5 // -- 31 -- 21 -- 11 -- 01
+
+ paddw mm3, mm2 // p[0] + FiltVal
+ packuswb mm7, mm7 // clamping
+
+ packuswb mm3, mm3 // clamping
+ punpcklbw mm3, mm7 // 32 31 22 21 12 11 02 01
+
+ movd eax, mm3 // 12 11 02 01
+ psrlq mm3, 32 // xx xx xx xx 32 31 22 21
+
+ mov [esi-1], ax // write 01 02
+ shr eax, 16 // xx xx 12 11
+
+ mov [esi+ecx -1], ax // write 11 12
+ movd eax, mm3 // 32 31 22 21
+
+ mov [esi+ecx*2 -1], ax // write 21 22
+ shr eax, 16 // xx xx 32 31
+
+ mov [esi+edx-1], ax // write 31 32
+
+ }
+
+/*
+ INT32 j;
+ INT32 FiltVal;
+ UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+ UINT32 FLimit;
+ FLimit = LoopFilterLimitValuesV2[QValue];
+ for ( j=0; j<Length; j++ )
+ {
+ // set up blur kernel for differences
+ FiltVal = (( Src[-2] ) -
+ ( Src[-1] * 3 ) +
+ ( Src[ 0] * 3 ) -
+ ( Src[ 1] ) + 4 ) >> 3;
+
+ FiltVal = Bound ( FLimit, FiltVal );
+
+ Dest[-1] = LimitTable[(INT32)Src[-1] + FiltVal];
+ Dest[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
+
+ Src += SrcPitch;
+ Dest += DestPitch;
+ }
+*/
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilteringVert_12_MMX
+ *
+ * INPUTS : UINT32 QIndex : Quantization index.
+ * UINT8 *PixelPtr : Pointer to source block.
+ * INT32 Pitch : Pitch of input image.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Filters the horizontal block edge inside a prediction
+ * block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+
+void FilteringVert_12_MMX ( UINT32 QIndex, UINT8 *PixelPtr, INT32 Pitch )
+{
+ INT16 *FLimitPtr = &LoopFilterLimitValuesV2_MMX[QIndex*4];
+
+ __declspec(align(16)) const short fourFours[] = { 4, 4, 4, 4 };
+ __declspec(align(16)) const short fourOnes[] = { 1, 1, 1, 1 };
+
+ __asm
+ {
+ mov eax, FLimitPtr // 4 FLimit Values in shorts
+ mov edx, Pitch // Pitch
+
+ xor ecx, ecx // clear ecx to get negative Pitch
+ sub ecx, edx // Negative Pitch
+
+ mov esi, PixelPtr // Src and Dest pointer
+ movd mm0, [esi] // p[0], four pixels
+
+ pxor mm7, mm7 // clear mm7
+ movd mm1, [esi+ecx] // p[-1], four pixels
+
+ punpcklbw mm0, mm7 // unpack to short
+ movd mm2, [esi+edx] // p[1], four pixels
+
+ punpcklbw mm1, mm7 // unpack p[-1] to shorts
+ movd mm3, [esi+ecx*2] // p[-2], four pixels
+
+ movq mm5, mm0 // copy of unpacked p[0]
+ movq mm6, mm1 // copy of unpacked p[-1]
+
+ psubw mm0, mm1 // p[0] - p[-1]
+ punpcklbw mm2, mm7 // unpack p[1]
+
+ movq mm1, mm0 // make a copy of p[0]-p[-1]
+ punpcklbw mm3, mm7 // unpack p[-2]
+
+ paddw mm0, mm1 // (p[0]-p[-1]) * 2
+ psubw mm3, mm2 // (p[-2]-p[1])
+
+ paddw mm1, mm0 // (p[0]-p[-1]) * 3
+ paddw mm3, mm1 // p[-2]-3*p[-1]+3*p[0]-p[1]
+
+ paddw mm3, fourFours // p[-2]-3*p[-1]+3*p[0]-p[1]+4
+ movq mm0, [eax] // FLimit
+
+ psraw mm3, 3 // FiltVal
+ movq mm1, mm3 // FiltVal
+
+ psraw mm3, 15 // FFFF-> Neg, 0->Pos
+ pxor mm1, mm3 //
+
+ psubsw mm1, mm3 // abs(FiltVal)
+ por mm3, fourOnes // -1 or 1, corresponding the sign
+
+ movq mm2, mm0 // Copy of FLimit
+ psubw mm0, mm1 // FLimit - abs(FiltVal)
+
+ movq mm4, mm0 // copy FLimit-abs(FiltVal)
+ psraw mm0, 15 // FFFF->Neg, 0->Pos
+
+ pxor mm4, mm0 //
+ psubsw mm4, mm0 // abs(FLimit-abs(FiltVal))
+
+ psubusw mm2, mm4 // FLimit-abs(FLimit-abs(FiltVal))
+ pmullw mm2, mm3 // Get the sign back
+
+ psubw mm5, mm2 // p[0] - FiltVal
+ paddw mm6, mm2 // p[-1] + FiltVal
+
+ packuswb mm5, mm5 // clamping
+ packuswb mm6, mm6 // clamping
+
+ movd [esi], mm5 // write p[0]
+ movd [esi+ecx], mm6 // write p[-1]
+
+ movd mm0, [esi+4] // p[0], four pixels
+ movd mm1, [esi+ecx+4] // p[-1], four pixels
+
+ punpcklbw mm0, mm7 // unpack to short
+ movd mm2, [esi+edx+4] // p[1], four pixels
+
+ punpcklbw mm1, mm7 // unpack p[-1] to shorts
+ movd mm3, [esi+ecx*2+4] // p[-2], four pixels
+
+ movq mm5, mm0 // copy of unpacked p[0]
+ movq mm6, mm1 // copy of unpacked p[-1]
+
+ psubw mm0, mm1 // p[0] - p[-1]
+ punpcklbw mm2, mm7 // unpack p[1]
+
+ movq mm1, mm0 // make a copy of p[0]-p[-1]
+ punpcklbw mm3, mm7 // unpack p[-2]
+
+ paddw mm0, mm1 // (p[0]-p[-1]) * 2
+ psubw mm3, mm2 // (p[-2]-p[1])
+
+ paddw mm1, mm0 // (p[0]-p[-1]) * 3
+ paddw mm3, mm1 // p[-2]-3*p[-1]+3*p[0]-p[1]
+
+ paddw mm3, fourFours // p[-2]-3*p[-1]+3*p[0]-p[1]+4
+ movq mm0, [eax] // FLimit
+
+ psraw mm3, 3 // FiltVal
+ movq mm1, mm3 // FiltVal
+
+ psraw mm3, 15 // FFFF-> Neg, 0->Pos
+ pxor mm1, mm3 //
+
+ psubsw mm1, mm3 // abs(FiltVal)
+ por mm3, fourOnes // -1 or 1, corresponding the sign
+
+ movq mm2, mm0 // Copy of FLimit
+ psubw mm0, mm1 // FLimit - abs(FiltVal)
+
+ movq mm4, mm0 // copy FLimit-abs(FiltVal)
+ psraw mm0, 15 // FFFF->Neg, 0->Pos
+
+ pxor mm4, mm0 //
+ psubsw mm4, mm0 // abs(FLimit-abs(FiltVal))
+
+ psubusw mm2, mm4 // FLimit-abs(FLimit-abs(FiltVal))
+ pmullw mm2, mm3 // Get the sign back
+
+ psubw mm5, mm2 // p[0] - FiltVal
+ paddw mm6, mm2 // p[-1] + FiltVal
+
+ packuswb mm5, mm5 // clamping
+ packuswb mm6, mm6 // clamping
+
+ movd [esi+4], mm5 // write p[0]
+ movd [esi+ecx+4], mm6 // write p[-1]
+
+ movd mm0, [esi+8] // p[0], four pixels
+ movd mm1, [esi+ecx+8] // p[-1], four pixels
+
+ punpcklbw mm0, mm7 // unpack to short
+ movd mm2, [esi+edx+8] // p[1], four pixels
+
+ punpcklbw mm1, mm7 // unpack p[-1] to shorts
+ movd mm3, [esi+ecx*2+8] // p[-2], four pixels
+
+ movq mm5, mm0 // copy of unpacked p[0]
+ movq mm6, mm1 // copy of unpacked p[-1]
+
+ psubw mm0, mm1 // p[0] - p[-1]
+ punpcklbw mm2, mm7 // unpack p[1]
+
+ movq mm1, mm0 // make a copy of p[0]-p[-1]
+ punpcklbw mm3, mm7 // unpack p[-2]
+
+ paddw mm0, mm1 // (p[0]-p[-1]) * 2
+ psubw mm3, mm2 // (p[-2]-p[1])
+
+ paddw mm1, mm0 // (p[0]-p[-1]) * 3
+ paddw mm3, mm1 // p[-2]-3*p[-1]+3*p[0]-p[1]
+
+ paddw mm3, fourFours // p[-2]-3*p[-1]+3*p[0]-p[1]+4
+ movq mm0, [eax] // FLimit
+
+ psraw mm3, 3 // FiltVal
+ movq mm1, mm3 // FiltVal
+
+ psraw mm3, 15 // FFFF-> Neg, 0->Pos
+ pxor mm1, mm3 //
+
+ psubsw mm1, mm3 // abs(FiltVal)
+ por mm3, fourOnes // -1 or 1, corresponding the sign
+
+ movq mm2, mm0 // Copy of FLimit
+ psubw mm0, mm1 // FLimit - abs(FiltVal)
+
+ movq mm4, mm0 // copy FLimit-abs(FiltVal)
+ psraw mm0, 15 // FFFF->Neg, 0->Pos
+
+ pxor mm4, mm0 //
+ psubsw mm4, mm0 // abs(FLimit-abs(FiltVal))
+
+ psubusw mm2, mm4 // FLimit-abs(FLimit-abs(FiltVal))
+ pmullw mm2, mm3 // Get the sign back
+
+ psubw mm5, mm2 // p[0] - FiltVal
+ paddw mm6, mm2 // p[-1] + FiltVal
+
+ packuswb mm5, mm5 // clamping
+ packuswb mm6, mm6 // clamping
+
+ movd [esi+8], mm5 // write p[0]
+ movd [esi+ecx+8], mm6 // write p[-1]
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/scaleopt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/scaleopt.c
new file mode 100644
index 00000000..e0aa3c57
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/scaleopt.c
@@ -0,0 +1,1267 @@
+/****************************************************************************
+*
+* Module Title : scaleopt.cpp
+*
+* Description : Optimized scaling functions
+*
+****************************************************************************/
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+__declspec(align(16)) const static unsigned short oneFifth[] = { 51, 51, 51, 51 };
+__declspec(align(16)) const static unsigned short twoFifths[] = { 102, 102, 102, 102 };
+__declspec(align(16)) const static unsigned short threeFifths[] = { 154, 154, 154, 154 };
+__declspec(align(16)) const static unsigned short fourFifths[] = { 205, 205, 205, 205 };
+__declspec(align(16)) const static unsigned short roundValues[] = { 128, 128, 128, 128 };
+__declspec(align(16)) const static unsigned short fourOnes[]= { 1, 1, 1, 1};
+__declspec(align(16)) const static unsigned short const45_2[] = {205, 154, 102, 51 };
+__declspec(align(16)) const static unsigned short const45_1[] = { 51, 102, 154, 205 };
+__declspec(align(16)) const static unsigned char mask45[] = { 0, 0, 0, 0, 0, 0, 255, 0};
+__declspec(align(16)) const static unsigned short const35_2[] = { 154, 51, 205, 102 };
+__declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, 154 };
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+/****************************************************************************
+ *
+ * ROUTINE : HorizontalLine_3_5_Scale_MMX
+ *
+ * INPUTS : const unsigned char *source :
+ * unsigned int sourceWidth :
+ * unsigned char *dest :
+ * unsigned int destWidth :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 3 to 5 up-scaling of a horizontal line of pixels.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void HorizontalLine_3_5_Scale_MMX
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+)
+{
+ (void) destWidth;
+
+ __asm
+ {
+
+ push ebx
+
+ mov esi, source
+ mov edi, dest
+
+ mov ecx, sourceWidth
+ lea edx, [esi+ecx-3];
+
+ movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx
+ movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx
+
+ movq mm4, roundValues // mm4 = 80 xx 80 xx 80 xx 80 xx
+ pxor mm7, mm7 // clear mm7
+
+HorizLine_3_5_Loop:
+
+ mov eax, DWORD PTR [esi] // eax = 00 01 02 03
+ mov ebx, eax
+
+ and ebx, 0xffff00 // ebx = xx 01 02 xx
+ mov ecx, eax // ecx = 00 01 02 03
+
+ and eax, 0xffff0000 // eax = xx xx 02 03
+ xor ecx, eax // ecx = 00 01 xx xx
+
+ shr ebx, 8 // ebx = 01 02 xx xx
+ or eax, ebx // eax = 01 02 02 03
+
+ shl ebx, 16 // ebx = xx xx 01 02
+ movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx
+
+ or ebx, ecx // ebx = 00 01 01 02
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx
+
+ movd mm0, ebx // mm0 = 00 01 01 02
+ pmullw mm1, mm6 //
+
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
+ pmullw mm0, mm5 //
+
+ mov [edi], ebx // writeoutput 00 xx xx xx
+ add esi, 3
+
+ add edi, 5
+ paddw mm0, mm1
+
+ paddw mm0, mm4
+ psrlw mm0, 8
+
+ cmp esi, edx
+ packuswb mm0, mm7
+
+ movd DWORD Ptr [edi-4], mm0
+ jl HorizLine_3_5_Loop
+
+//Exit:
+ mov eax, DWORD PTR [esi] // eax = 00 01 02 03
+ mov ebx, eax
+
+ and ebx, 0xffff00 // ebx = xx 01 02 xx
+ mov ecx, eax // ecx = 00 01 02 03
+
+ and eax, 0xffff0000 // eax = xx xx 02 03
+ xor ecx, eax // ecx = 00 01 xx xx
+
+ shr ebx, 8 // ebx = 01 02 xx xx
+ or eax, ebx // eax = 01 02 02 03
+
+ shl eax, 8 // eax = xx 01 02 02
+ and eax, 0xffff0000 // eax = xx xx 02 02
+
+ or eax, ebx // eax = 01 02 02 02
+
+ shl ebx, 16 // ebx = xx xx 01 02
+ movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx
+
+ or ebx, ecx // ebx = 00 01 01 02
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx
+
+ movd mm0, ebx // mm0 = 00 01 01 02
+ pmullw mm1, mm6 //
+
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
+ pmullw mm0, mm5 //
+
+ mov [edi], ebx // writeoutput 00 xx xx xx
+ paddw mm0, mm1
+
+ paddw mm0, mm4
+ psrlw mm0, 8
+
+ packuswb mm0, mm7
+ movd DWORD Ptr [edi+1], mm0
+
+ pop ebx
+
+ }
+
+ /*
+ const unsigned char *src = source;
+ unsigned char *des = dest;
+ unsigned int a, b, c ;
+ unsigned int i;
+ (void) destWidth;
+
+ for ( i=0; i<sourceWidth-3; i+=3 )
+ {
+ a = src[0];
+ b = src[1];
+ des [0] = (UINT8) (a);
+ // 2 * left + 3 * right /5
+ des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+ c = src[2] ;
+ // 4 * left + 1 * right /5
+ des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+ // 1 * left + 4 * right /5
+ des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+ a = src[3];
+ // 3 * left + 2 * right /5
+ des [4] = (UINT8) (( c * 154 + a * 102 + 128 ) >> 8);
+
+ src += 3;
+ des += 5;
+ }
+
+ a = src[0];
+ b = src[1];
+ des [0] = (UINT8) (a);
+ // 2 * left + 3 * right /5
+ des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+ c = src[2] ;
+ // 4 * left + 1 * right /5
+ des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+ // 1 * left + 4 * right /5
+ des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+ des [4] = (UINT8) (c);
+*/
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : HorizontalLine_4_5_Scale_MMX
+ *
+ * INPUTS : const unsigned char *source :
+ * unsigned int sourceWidth :
+ * unsigned char *dest :
+ * unsigned int destWidth :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 4 to 5 up-scaling of a horizontal line of pixels.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void HorizontalLine_4_5_Scale_MMX
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+)
+{
+ (void)destWidth;
+
+ __asm
+ {
+
+ mov esi, source
+ mov edi, dest
+
+ mov ecx, sourceWidth
+ lea edx, [esi+ecx-8];
+
+ movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx
+ movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx
+
+ movq mm4, roundValues // mm4 = 80 xx 80 xx 80 xx 80 xx
+ pxor mm7, mm7 // clear mm7
+
+HorizLine_4_5_Loop:
+
+ movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07
+ movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08
+
+ movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
+ movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08
+
+ movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
+
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
+ pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
+
+ pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
+ punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
+
+ movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
+ pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
+
+ punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
+ pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51
+
+ paddw mm0, mm1 // added round values
+ paddw mm0, mm4
+
+ psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
+ packuswb mm0, mm7
+
+ movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
+ add edi, 10
+
+ add esi, 8
+ paddw mm2, mm3 //
+
+ paddw mm2, mm4 // added round values
+ cmp esi, edx
+
+ psrlw mm2, 8
+ packuswb mm2, mm7
+
+ movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09
+ jl HorizLine_4_5_Loop
+
+//Exit:
+ movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07
+ movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07
+
+ movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
+ psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00
+
+ movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00
+ pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00
+
+ psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07
+ por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07
+
+ movq mm3, mm1
+
+ movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
+ punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
+
+ punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
+ pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
+
+ pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
+ punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
+
+ movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
+ pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
+
+ punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
+ pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51
+
+ paddw mm0, mm1 // added round values
+ paddw mm0, mm4
+
+ psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
+ packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx
+
+ movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
+ paddw mm2, mm3 //
+
+ paddw mm2, mm4 // added round values
+ psrlw mm2, 8
+
+ packuswb mm2, mm7
+ movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09
+
+
+ }
+/*
+ const unsigned char *src = source;
+ unsigned char *des = dest;
+ unsigned int a, b, c ;
+ unsigned i;
+ (void) destWidth;
+
+ for ( i=0; i<sourceWidth-4; i+=4 )
+ {
+ a = src[0];
+ b = src[1];
+ des [0] = (UINT8) a;
+ des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8);
+ c = src[2] * 154;
+ a = src[3];
+ des [2] = (UINT8) (( b * 102 + c + 128) >> 8);
+ des [3] = (UINT8) (( c + 102 * a + 128) >> 8);
+ b = src[4];
+ des [4] = (UINT8) (( a * 205 + 51 * b + 128) >> 8);
+
+ src += 4;
+ des += 5;
+ }
+
+ a = src[0];
+ b = src[1];
+ des [0] = (UINT8) (a);
+ des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8);
+ c = src[2] * 154;
+ a = src[3];
+ des [2] = (UINT8) (( b * 102 + c + 128) >> 8);
+ des [3] = (UINT8) (( c + 102 * a + 128) >> 8);
+ des [4] = (UINT8) (a);
+*/
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VerticalBand_4_5_Scale_MMX
+ *
+ * INPUTS : unsigned char *dest :
+ * unsigned int destPitch :
+ * unsigned int destWidth :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 4 to 5 up-scaling of a 4 pixel high band of pixels.
+ *
+ * SPECIAL NOTES : The routine uses the first line of the band below
+ * the current band. The function also has a "C" only
+ * version.
+ *
+ ****************************************************************************/
+void VerticalBand_4_5_Scale_MMX
+(
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth
+)
+{
+ __asm
+ {
+
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, destPitch // Get the pitch size
+
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
+
+ pxor mm7, mm7 // clear out mm7
+ mov edx, destWidth // Loop counter
+
+VS_4_5_loop:
+
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
+
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
+
+ movq mm5, oneFifth
+ punpckhbw mm2, mm7 // unpack high to word
+
+ pmullw mm0, mm5 // a * 1/5
+
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
+
+ pmullw mm2, mm5 // a * 1/5
+ movq mm6, fourFifths // constan
+
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 4/5
+
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
+
+ pmullw mm5, mm6 // b * 4/5
+ paddw mm0, mm4 // a * 1/5 + b * 4/5
+
+ paddw mm2, mm5 // a * 1/5 + b * 4/5
+ paddw mm0, roundValues // + 128
+
+ paddw mm2, roundValues // + 128
+ psrlw mm0, 8
+
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
+
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
+
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
+
+ movq mm5, twoFifths
+ movq mm2, mm0 // make a copy
+
+ pmullw mm1, mm5 // b * 2/5
+ movq mm6, threeFifths
+
+
+ punpcklbw mm0, mm7 // unpack low to word
+ pmullw mm3, mm5 // b * 2/5
+
+ movq mm4, mm0 // make copy of c
+ punpckhbw mm2, mm7 // unpack high to word
+
+ pmullw mm4, mm6 // c * 3/5
+ movq mm5, mm2
+
+ pmullw mm5, mm6 // c * 3/5
+ paddw mm1, mm4 // b * 2/5 + c * 3/5
+
+ paddw mm3, mm5 // b * 2/5 + c * 3/5
+ paddw mm1, roundValues // + 128
+
+ paddw mm3, roundValues // + 128
+ psrlw mm1, 8
+
+ psrlw mm3, 8
+ packuswb mm1, mm3 // des[2]
+
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+ movq mm1, [edi] // mm1=Src[3];
+
+ // mm0, mm2 --- Src[2]
+ // mm1 --- Src[3]
+ // mm6 --- 3/5
+ // mm7 for unpacking
+
+ pmullw mm0, mm6 // c * 3/5
+ movq mm5, twoFifths // mm5 = 2/5
+
+ movq mm3, mm1 // make a copy
+ pmullw mm2, mm6 // c * 3/5
+
+ punpcklbw mm1, mm7 // unpack low
+ movq mm4, mm1 // make a copy
+
+ punpckhbw mm3, mm7 // unpack high
+ pmullw mm4, mm5 // d * 2/5
+
+ movq mm6, mm3 // make a copy
+ pmullw mm6, mm5 // d * 2/5
+
+ paddw mm0, mm4 // c * 3/5 + d * 2/5
+ paddw mm2, mm6 // c * 3/5 + d * 2/5
+
+ paddw mm0, roundValues // + 128
+ paddw mm2, roundValues // + 128
+
+ psrlw mm0, 8
+ psrlw mm2, 8
+
+ packuswb mm0, mm2 // des[3]
+ movq QWORD ptr [edi], mm0 // write des[3]
+
+ // mm1, mm3 --- Src[3]
+ // mm7 -- cleared for unpacking
+
+ movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group
+
+ movq mm5, fourFifths // mm5 = 4/5
+ pmullw mm1, mm5 // d * 4/5
+
+ movq mm6, oneFifth // mm6 = 1/5
+ movq mm2, mm0 // make a copy
+
+ pmullw mm3, mm5 // d * 4/5
+ punpcklbw mm0, mm7 // unpack low
+
+ pmullw mm0, mm6 // an * 1/5
+ punpckhbw mm2, mm7 // unpack high
+
+ paddw mm1, mm0 // d * 4/5 + an * 1/5
+ pmullw mm2, mm6 // an * 1/5
+
+ paddw mm3, mm2 // d * 4/5 + an * 1/5
+ paddw mm1, roundValues // + 128
+
+ paddw mm3, roundValues // + 128
+ psrlw mm1, 8
+
+ psrlw mm3, 8
+ packuswb mm1, mm3 // des[4]
+
+ movq QWORD ptr [edi+ecx], mm1 // write des[4]
+
+ add edi, 8
+ add esi, 8
+
+ sub edx, 8
+ jg VS_4_5_loop
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : LastVerticalBand_4_5_Scale_MMX
+ *
+ * INPUTS : unsigned char *dest :
+ * unsigned int destPitch :
+ * unsigned int destWidth :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : 4 to 5 up-scaling of the last 4-pixel high band in an image.
+ *
+ * SPECIAL NOTES : The routine uses the first line of the band below
+ * the current band. The function also has an "C" only
+ * version.
+ *
+ ****************************************************************************/
+void LastVerticalBand_4_5_Scale_MMX
+(
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth
+)
+{
+ __asm
+ {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, destPitch // Get the pitch size
+
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
+
+ pxor mm7, mm7 // clear out mm7
+ mov edx, destWidth // Loop counter
+
+LastVS_4_5_loop:
+
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
+
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
+
+ movq mm5, oneFifth
+ punpckhbw mm2, mm7 // unpack high to word
+
+ pmullw mm0, mm5 // a * 1/5
+
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
+
+ pmullw mm2, mm5 // a * 1/5
+ movq mm6, fourFifths // constan
+
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 4/5
+
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
+
+ pmullw mm5, mm6 // b * 4/5
+ paddw mm0, mm4 // a * 1/5 + b * 4/5
+
+ paddw mm2, mm5 // a * 1/5 + b * 4/5
+ paddw mm0, roundValues // + 128
+
+ paddw mm2, roundValues // + 128
+ psrlw mm0, 8
+
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
+
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
+
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
+
+ movq mm5, twoFifths
+ movq mm2, mm0 // make a copy
+
+ pmullw mm1, mm5 // b * 2/5
+ movq mm6, threeFifths
+
+
+ punpcklbw mm0, mm7 // unpack low to word
+ pmullw mm3, mm5 // b * 2/5
+
+ movq mm4, mm0 // make copy of c
+ punpckhbw mm2, mm7 // unpack high to word
+
+ pmullw mm4, mm6 // c * 3/5
+ movq mm5, mm2
+
+ pmullw mm5, mm6 // c * 3/5
+ paddw mm1, mm4 // b * 2/5 + c * 3/5
+
+ paddw mm3, mm5 // b * 2/5 + c * 3/5
+ paddw mm1, roundValues // + 128
+
+ paddw mm3, roundValues // + 128
+ psrlw mm1, 8
+
+ psrlw mm3, 8
+ packuswb mm1, mm3 // des[2]
+
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+ movq mm1, [edi] // mm1=Src[3];
+
+ movq QWORD ptr [edi+ecx], mm1 // write des[4];
+
+ // mm0, mm2 --- Src[2]
+ // mm1 --- Src[3]
+ // mm6 --- 3/5
+ // mm7 for unpacking
+
+ pmullw mm0, mm6 // c * 3/5
+ movq mm5, twoFifths // mm5 = 2/5
+
+ movq mm3, mm1 // make a copy
+ pmullw mm2, mm6 // c * 3/5
+
+ punpcklbw mm1, mm7 // unpack low
+ movq mm4, mm1 // make a copy
+
+ punpckhbw mm3, mm7 // unpack high
+ pmullw mm4, mm5 // d * 2/5
+
+ movq mm6, mm3 // make a copy
+ pmullw mm6, mm5 // d * 2/5
+
+ paddw mm0, mm4 // c * 3/5 + d * 2/5
+ paddw mm2, mm6 // c * 3/5 + d * 2/5
+
+ paddw mm0, roundValues // + 128
+ paddw mm2, roundValues // + 128
+
+ psrlw mm0, 8
+ psrlw mm2, 8
+
+ packuswb mm0, mm2 // des[3]
+ movq QWORD ptr [edi], mm0 // write des[3]
+
+ // mm1, mm3 --- Src[3]
+ // mm7 -- cleared for unpacking
+ add edi, 8
+ add esi, 8
+
+ sub edx, 8
+ jg LastVS_4_5_loop
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VerticalBand_3_5_Scale_MMX
+ *
+ * INPUTS : unsigned char *dest :
+ * unsigned int destPitch :
+ * unsigned int destWidth :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels.
+ *
+ * SPECIAL NOTES : The routine uses the first line of the band below
+ * the current band. The function also has an "C" only
+ * version.
+ *
+ ****************************************************************************/
+void VerticalBand_3_5_Scale_MMX
+(
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth
+)
+{
+ __asm
+ {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, destPitch // Get the pitch size
+
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
+
+ pxor mm7, mm7 // clear out mm7
+ mov edx, destWidth // Loop counter
+
+VS_3_5_loop:
+
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
+
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
+
+ movq mm5, twoFifths // mm5 = 2/5
+ punpckhbw mm2, mm7 // unpack high to word
+
+ pmullw mm0, mm5 // a * 2/5
+
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
+
+ pmullw mm2, mm5 // a * 2/5
+ movq mm6, threeFifths // mm6 = 3/5
+
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 3/5
+
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
+
+ pmullw mm5, mm6 // b * 3/5
+ paddw mm0, mm4 // a * 2/5 + b * 3/5
+
+ paddw mm2, mm5 // a * 2/5 + b * 3/5
+ paddw mm0, roundValues // + 128
+
+ paddw mm2, roundValues // + 128
+ psrlw mm0, 8
+
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
+
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
+
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
+
+ movq mm4, mm1 // b low
+ pmullw mm1, fourFifths // b * 4/5 low
+
+ movq mm5, mm3 // b high
+ pmullw mm3, fourFifths // b * 4/5 high
+
+ movq mm2, mm0 // c
+ pmullw mm4, oneFifth // b * 1/5
+
+ punpcklbw mm0, mm7 // c low
+ pmullw mm5, oneFifth // b * 1/5
+
+ movq mm6, mm0 // make copy of c low
+ punpckhbw mm2, mm7 // c high
+
+ pmullw mm6, oneFifth // c * 1/5 low
+ movq mm7, mm2 // make copy of c high
+
+ pmullw mm7, oneFifth // c * 1/5 high
+ paddw mm1, mm6 // b * 4/5 + c * 1/5 low
+
+ paddw mm3, mm7 // b * 4/5 + c * 1/5 high
+ movq mm6, mm0 // make copy of c low
+
+ pmullw mm6, fourFifths // c * 4/5 low
+ movq mm7, mm2 // make copy of c high
+
+ pmullw mm7, fourFifths // c * 4/5 high
+
+ paddw mm4, mm6 // b * 1/5 + c * 4/5 low
+ paddw mm5, mm7 // b * 1/5 + c * 4/5 high
+
+ paddw mm1, roundValues // + 128
+ paddw mm3, roundValues // + 128
+
+ psrlw mm1, 8
+ psrlw mm3, 8
+
+ packuswb mm1, mm3 // des[2]
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+
+ paddw mm4, roundValues // + 128
+ paddw mm5, roundValues // + 128
+
+ psrlw mm4, 8
+ psrlw mm5, 8
+
+ packuswb mm4, mm5 // des[3]
+ movq QWORD ptr [edi], mm4 // write des[3]
+
+ // mm0, mm2 --- Src[3]
+
+ pxor mm7, mm7 // clear mm7 for unpacking
+ movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group
+
+ movq mm5, threeFifths // mm5 = 3/5
+ pmullw mm0, mm5 // d * 3/5
+
+ movq mm6, twoFifths // mm6 = 2/5
+ movq mm3, mm1 // make a copy
+
+ pmullw mm2, mm5 // d * 3/5
+ punpcklbw mm1, mm7 // unpack low
+
+ pmullw mm1, mm6 // an * 2/5
+ punpckhbw mm3, mm7 // unpack high
+
+ paddw mm0, mm1 // d * 3/5 + an * 2/5
+ pmullw mm3, mm6 // an * 2/5
+
+ paddw mm2, mm3 // d * 3/5 + an * 2/5
+ paddw mm0, roundValues // + 128
+
+ paddw mm2, roundValues // + 128
+ psrlw mm0, 8
+
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des[4]
+
+ movq QWORD ptr [edi+ecx], mm0 // write des[4]
+
+ add edi, 8
+ add esi, 8
+
+ sub edx, 8
+ jg VS_3_5_loop
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : LastVerticalBand_3_5_Scale_MMX
+ *
+ * INPUTS : unsigned char *dest :
+ * unsigned int destPitch :
+ * unsigned int destWidth :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels.
+ *
+ * SPECIAL NOTES : The routine uses the first line of the band below
+ * the current band. The function also has an "C" only
+ * version.
+ *
+ ****************************************************************************/
+void LastVerticalBand_3_5_Scale_MMX
+(
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth
+)
+{
+ __asm
+ {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, destPitch // Get the pitch size
+
+ lea edi, [esi+ecx*2] // tow lines below
+ add edi, ecx // three lines below
+
+ pxor mm7, mm7 // clear out mm7
+ mov edx, destWidth // Loop counter
+
+
+LastVS_3_5_loop:
+
+ movq mm0, QWORD ptr [esi] // src[0];
+ movq mm1, QWORD ptr [esi+ecx] // src[1];
+
+ movq mm2, mm0 // Make a copy
+ punpcklbw mm0, mm7 // unpack low to word
+
+ movq mm5, twoFifths // mm5 = 2/5
+ punpckhbw mm2, mm7 // unpack high to word
+
+ pmullw mm0, mm5 // a * 2/5
+
+ movq mm3, mm1 // make a copy
+ punpcklbw mm1, mm7 // unpack low to word
+
+ pmullw mm2, mm5 // a * 2/5
+ movq mm6, threeFifths // mm6 = 3/5
+
+ movq mm4, mm1 // copy of low b
+ pmullw mm4, mm6 // b * 3/5
+
+ punpckhbw mm3, mm7 // unpack high to word
+ movq mm5, mm3 // copy of high b
+
+ pmullw mm5, mm6 // b * 3/5
+ paddw mm0, mm4 // a * 2/5 + b * 3/5
+
+ paddw mm2, mm5 // a * 2/5 + b * 3/5
+ paddw mm0, roundValues // + 128
+
+ paddw mm2, roundValues // + 128
+ psrlw mm0, 8
+
+ psrlw mm2, 8
+ packuswb mm0, mm2 // des [1]
+
+ movq QWORD ptr [esi+ecx], mm0 // write des[1]
+ movq mm0, [esi+ecx*2] // mm0 = src[2]
+
+
+
+ // mm1, mm3 --- Src[1]
+ // mm0 --- Src[2]
+ // mm7 for unpacking
+
+ movq mm4, mm1 // b low
+ pmullw mm1, fourFifths // b * 4/5 low
+
+ movq QWORD ptr [edi+ecx], mm0 // write des[4]
+
+ movq mm5, mm3 // b high
+ pmullw mm3, fourFifths // b * 4/5 high
+
+ movq mm2, mm0 // c
+ pmullw mm4, oneFifth // b * 1/5
+
+ punpcklbw mm0, mm7 // c low
+ pmullw mm5, oneFifth // b * 1/5
+
+ movq mm6, mm0 // make copy of c low
+ punpckhbw mm2, mm7 // c high
+
+ pmullw mm6, oneFifth // c * 1/5 low
+ movq mm7, mm2 // make copy of c high
+
+ pmullw mm7, oneFifth // c * 1/5 high
+ paddw mm1, mm6 // b * 4/5 + c * 1/5 low
+
+ paddw mm3, mm7 // b * 4/5 + c * 1/5 high
+ movq mm6, mm0 // make copy of c low
+
+ pmullw mm6, fourFifths // c * 4/5 low
+ movq mm7, mm2 // make copy of c high
+
+ pmullw mm7, fourFifths // c * 4/5 high
+
+ paddw mm4, mm6 // b * 1/5 + c * 4/5 low
+ paddw mm5, mm7 // b * 1/5 + c * 4/5 high
+
+ paddw mm1, roundValues // + 128
+ paddw mm3, roundValues // + 128
+
+ psrlw mm1, 8
+ psrlw mm3, 8
+
+ packuswb mm1, mm3 // des[2]
+ movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
+
+ paddw mm4, roundValues // + 128
+ paddw mm5, roundValues // + 128
+
+ psrlw mm4, 8
+ psrlw mm5, 8
+
+ packuswb mm4, mm5 // des[3]
+ movq QWORD ptr [edi], mm4 // write des[3]
+
+ // mm0, mm2 --- Src[3]
+
+ add edi, 8
+ add esi, 8
+
+ sub edx, 8
+ jg LastVS_3_5_loop
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : VerticalBand_1_2_Scale_MMX
+ *
+ * INPUTS : unsigned char *dest :
+ * unsigned int destPitch :
+ * unsigned int destWidth :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 1 to 2 up-scaling of a band of pixels.
+ *
+ * SPECIAL NOTES : The routine uses the first line of the band below
+ * the current band. The function also has an "C" only
+ * version.
+ *
+ ****************************************************************************/
+void VerticalBand_1_2_Scale_MMX
+(
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth
+)
+{
+ __asm
+ {
+
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, destPitch // Get the pitch size
+
+ pxor mm7, mm7 // clear out mm7
+ mov edx, destWidth // Loop counter
+
+VS_1_2_loop:
+
+ movq mm0, [esi] // get Src[0]
+ movq mm1, [esi + ecx * 2] // get Src[1]
+
+ movq mm2, mm0 // make copy before unpack
+ movq mm3, mm1 // make copy before unpack
+
+ punpcklbw mm0, mm7 // low Src[0]
+ movq mm6, fourOnes // mm6= 1, 1, 1, 1
+
+ punpcklbw mm1, mm7 // low Src[1]
+ paddw mm0, mm1 // low (a + b)
+
+ punpckhbw mm2, mm7 // high Src[0]
+ paddw mm0, mm6 // low (a + b + 1)
+
+ punpckhbw mm3, mm7
+ paddw mm2, mm3 // high (a + b )
+
+ psraw mm0, 1 // low (a + b +1 )/2
+ paddw mm2, mm6 // high (a + b + 1)
+
+ psraw mm2, 1 // high (a + b + 1)/2
+ packuswb mm0, mm2 // pack results
+
+ movq [esi+ecx], mm0 // write out eight bytes
+ add esi, 8
+
+ sub edx, 8
+ jg VS_1_2_loop
+ }
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : LastVerticalBand_1_2_Scale_MMX
+ *
+ * INPUTS : unsigned char *dest :
+ * unsigned int destPitch :
+ * unsigned int destWidth :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 1 to 2 up-scaling of band of pixels.
+ *
+ * SPECIAL NOTES : The routine uses the first line of the band below
+ * the current band. The function also has an "C" only
+ * version.
+ *
+ ****************************************************************************/
+void LastVerticalBand_1_2_Scale_MMX
+(
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth
+)
+{
+ __asm
+ {
+ mov esi, dest // Get the source and destination pointer
+ mov ecx, destPitch // Get the pitch size
+
+ mov edx, destWidth // Loop counter
+
+LastVS_1_2_loop:
+
+ movq mm0, [esi] // get Src[0]
+ movq [esi+ecx], mm0 // write out eight bytes
+
+ add esi, 8
+ sub edx, 8
+
+ jg LastVS_1_2_loop
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : HorizontalLine_1_2_Scale
+ *
+ * INPUTS : const unsigned char *source :
+ * unsigned int sourceWidth :
+ * unsigned char *dest :
+ * unsigned int destWidth :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void HorizontalLine_1_2_Scale_MMX
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+)
+{
+ (void) destWidth;
+
+ __asm
+ {
+ mov esi, source
+ mov edi, dest
+
+ pxor mm7, mm7
+ movq mm6, fourOnes
+
+ mov ecx, sourceWidth
+
+HS_1_2_Loop:
+
+ movq mm0, [esi]
+ movq mm1, [esi+1]
+
+ movq mm2, mm0
+ movq mm3, mm1
+
+ movq mm4, mm0
+ punpcklbw mm0, mm7
+
+ punpcklbw mm1, mm7
+ paddw mm0, mm1
+
+ paddw mm0, mm6
+ punpckhbw mm2, mm7
+
+ punpckhbw mm3, mm7
+ paddw mm2, mm3
+
+ paddw mm2, mm6
+ psraw mm0, 1
+
+ psraw mm2, 1
+ packuswb mm0, mm2
+
+ movq mm2, mm4
+ punpcklbw mm2, mm0
+
+ movq [edi], mm2
+ punpckhbw mm4, mm0
+
+ movq [edi+8], mm4
+ add esi, 8
+
+ add edi, 16
+ sub ecx, 8
+
+ cmp ecx, 8
+ jg HS_1_2_Loop
+
+// last eight pixel
+
+ movq mm0, [esi]
+ movq mm1, mm0
+
+ movq mm2, mm0
+ movq mm3, mm1
+
+ psrlq mm1, 8
+ psrlq mm3, 56
+
+ psllq mm3, 56
+ por mm1, mm3
+
+ movq mm3, mm1
+ movq mm4, mm0
+
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+
+ paddw mm0, mm1
+ paddw mm0, mm6
+
+ punpckhbw mm2, mm7
+ punpckhbw mm3, mm7
+
+ paddw mm2, mm3
+ paddw mm2, mm6
+
+ psraw mm0, 1
+ psraw mm2, 1
+
+ packuswb mm0, mm2
+ movq mm2, mm4
+
+ punpcklbw mm2, mm0
+ movq [edi], mm2
+
+ punpckhbw mm4, mm0
+ movq [edi+8], mm4
+ }
+}
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/simpledeblock_asm.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/simpledeblock_asm.c
new file mode 100644
index 00000000..063c15d3
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/simpledeblock_asm.c
@@ -0,0 +1,733 @@
+/****************************************************************************
+ *
+ * Module Title : simpledeblock_asm.c
+ *
+ * Description : Simple deblocking filter for low end machines
+ *
+ ***************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include "postp.h"
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/
+__declspec(align(16)) const unsigned char eightNOnes[]= {255, 255, 255, 255, 255, 255, 255, 255};
+__declspec(align(16)) const short fourFours[] = {4, 4, 4, 4};
+__declspec(align(16)) const short fourOnes[] = { 1, 1, 1, 1};
+__declspec(align(16)) const unsigned char eightFours[] = {4, 4, 4, 4, 4, 4, 4, 4};
+__declspec(align(16)) const unsigned char eightOnes[] = {1, 1, 1, 1, 1, 1, 1, 1};
+__declspec(align(16)) const unsigned char eight128s[] = {128, 128, 128, 128, 128, 128, 128, 128};
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+extern UINT32 LoopFilterLimitValuesV1[];
+extern UINT32 *DeblockLimitValuesV2;
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterHoriz_Simple_MMX
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Applies a loop filter to the vertical edge horizontally
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void FilterHoriz_Simple_MMX(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 * PixelPtr,
+ INT32 LineLength,
+ INT32 *BoundingValuePtr
+ )
+{
+ /*************************************************************
+ The following code in comments is the C version of the
+ function, provided here for reference
+ *************************************************************
+
+ INT32 j;
+ INT32 FiltVal;
+ UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+
+
+ for ( j = 0; j < 8; j++ )
+ {
+ INT32 UseHighVariance;
+
+ FiltVal = ( PixelPtr[2] * 3 ) -
+ ( PixelPtr[1] * 3 );
+
+ UseHighVariance = abs(PixelPtr[0] - PixelPtr[1]) > 1 ||
+ abs(PixelPtr[2] - PixelPtr[3]) > 1;
+
+ if(UseHighVariance)
+ {
+ FiltVal += ( PixelPtr[0] ) -
+ ( PixelPtr[3] );
+ }
+
+ FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+
+ PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
+ PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
+
+ if(!UseHighVariance)
+ {
+ FiltVal >>= 1;
+
+ PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] + FiltVal];
+ PixelPtr[3] = LimitTable[(INT32)PixelPtr[3] - FiltVal];
+ }
+
+ PixelPtr += LineLength;
+ }
+ ************************************************************/
+
+ UINT32 FLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+ __declspec(align(16)) unsigned char WorkingBuffer[32];
+ (void)BoundingValuePtr;
+
+ __asm
+ {
+ mov eax, FLimit // Flimit
+ xor ecx, ecx // clear ecx
+
+ mov edx, LineLength // pitch
+ mov esi, PixelPtr // src and des pointer
+
+ sub ecx, edx // negative pitch
+ lea esi, [esi + edx] // next line
+
+ movd mm0, [esi + ecx + -2] // xx xx xx xx 01 00 xx xx
+ movd mm4, [esi + -2] // xx xx xx xx 11 10 xx xx
+
+ movd mm2, [esi + ecx + 2] // xx xx xx xx xx xx 03 02
+ punpcklbw mm0, mm4 // 11 01 10 00 xx xx xx xx
+
+ movd mm3, [esi + 2] // xx xx xx xx xx xx 13 12
+ punpcklbw mm2, mm3 // xx xx xx xx 13 03 12 02
+
+ movd mm1, [esi+ edx + -2] // xx xx xx xx 21 20 xx xx
+ movd mm5, [esi+ edx *2 + -2] // xx xx xx xx 31 30 xx xx
+
+ movd mm6, [esi+ edx + 2] // xx xx xx xx xx xx 23 22
+ punpcklbw mm1, mm5 // 31 21 30 20 xx xx xx xx
+
+ movd mm7, [esi+ edx*2 + 2] // xx xx xx xx xx xx 33 32
+ punpckhwd mm0, mm1 // 31 21 11 01 30 20 10 00
+
+ punpcklbw mm6, mm7 // xx xx xx xx 33 23 32 22
+ lea edi, WorkingBuffer
+
+ punpcklwd mm2, mm6 // 33 23 13 03 32 22 12 02
+ lea esi, [esi+edx*4] // four lines below
+
+ movd mm4, [esi+ecx + -2] // xx xx xx xx 41 40 xx xx
+ movd mm1, [esi + -2] // xx xx xx xx 51 50 xx xx
+
+ movd mm3, [esi+ecx + 2] // xx xx xx xx xx xx 43 42
+ punpcklbw mm4, mm1 // 51 41 50 40 xx xx xx xx
+
+ movd mm6, [esi + 2] // xx xx xx xx xx xx 53 52
+ movd mm1, [esi + edx + -2] // xx xx xx xx 61 60 xx xx
+
+ punpcklbw mm3, mm6 // xx xx xx xx 53 43 52 42
+ movq mm5, [esi + edx*2 -2] // xx xx xx xx 71 70 xx xx
+
+ movq mm6, [esi + edx +2] // xx xx xx xx xx xx 63 62
+ punpcklbw mm1, mm5 // 71 61 70 60 xx xx xx xx
+
+ movq mm7, [esi + edx*2 + 2] // xx xx xx xx xx xx 73 72
+ punpckhwd mm4, mm1 // 71 61 51 41 70 60 50 40
+
+ punpcklbw mm6, mm7 // xx xx xx xx 73 63 72 62
+ movq mm1, mm0 // 31 21 11 01 30 20 10 00
+
+ punpcklwd mm3, mm6 // 73 63 53 43 72 62 52 42
+ movq mm7, mm2 // 33 23 13 03 32 22 12 02
+
+ punpckldq mm0, mm4 // 70 60 50 40 30 20 10 00
+ movq [edi], mm0 // save p[0]
+
+ punpckhdq mm1, mm4 // 71 61 51 41 31 21 11 01
+ movq mm4, mm0 // copy of p[0]
+
+ movq [edi+8], mm1 // save p[1]
+ punpckldq mm2, mm3 // 72 62 52 42 32 22 12 02
+
+ movq mm5, mm1 // copy of p[1]
+ movq [edi+16], mm2 // save p[2]
+
+ punpckhdq mm7, mm3 // 73 63 53 43 33 23 13 03
+ movq mm6, mm2 // copy of p[2]
+
+ movq [edi+24], mm7 // save p[3]
+
+ // mm0, 4 ---> p[0]
+ // mm1, 5 ---> p[1]
+ // mm2, 6 ---> p[2]
+ // mm7, 3 ---> p[3]
+
+ movq mm1, eightNOnes // mm1 = FFFFFFFFFFFFFFFFF
+ psubb mm0, mm5 // p[0]-p[1]
+
+ movq mm7, eightOnes // mm7 = 0101010101010101
+ pcmpgtb mm1, mm0 // p[0]-p[1]<-1?
+
+ pcmpgtb mm0, mm7 // p[0]-p[1]>1?
+ movq mm3, eightNOnes // mm1 = FFFFFFFFFFFFFFFFF
+
+ por mm0, mm1 // abs(p[0]-p[1])>1?
+ movq mm1, mm7 // mm1 = 0101010101010101
+
+ movq mm7, [edi+24] // p[3]
+ psubb mm2, mm7 // p[2]-p[3]
+
+ pcmpgtb mm3, mm2 // p[2]-p[3]<-1?
+ pcmpgtb mm2, mm1 // p[2]-p[3]>1?
+
+ por mm2, mm3 // abs(p[3]-p[2])>1?
+ movq mm3, eight128s // mm3 = 8080808080808080
+
+ por mm0, mm2 // mm0 = UseHighVariance
+
+ // mm0 = UseHighVariance
+ // mm4 = P[0]
+ // mm5 = P[1]
+ // mm6 = P[2]
+ // mm7 = P[3]
+ // mm3 = 8080808080808080
+
+ pxor mm1, mm1 // clear mm1 for unpack
+ movq mm2, mm5 // copy p[1]
+
+ movq mm3, mm6 // ocpy of p[2]
+ punpcklbw mm2, mm1 // low four p[1]
+
+ punpcklbw mm3, mm1 // low four p[2]
+ psubw mm3, mm2 // low four p[2]-p[1]
+
+ punpckhbw mm5, mm1 // high four p[1]
+ movq mm2, mm3 // low p[2]-p[1]
+
+ punpckhbw mm6, mm1 // high four p[2]
+ paddw mm3, mm3 // 2*(p[2]-p[1]) low four
+
+ psubw mm6, mm5 // high four p[2]-p[1]
+ paddw mm2, mm3 // 3*(p[2]-p[1]) low four
+
+ movq mm5, mm6 // high four p[2]-p[1]
+ movq mm3, mm4 // copy of p[0]
+
+ paddw mm6, mm6 // 2*(p[2]-p[1]) highfour
+ punpcklbw mm3, mm1 // low four p[0]
+
+ paddw mm5, mm6 // 3*(p[2]-p[1]) highfour
+ punpckhbw mm4, mm1 // high four p[0]
+
+ movq mm6, mm7 // copy of p[3]
+ punpcklbw mm7, mm1 // low four p[3]
+
+ punpckhbw mm6, mm1 // high four p[3]
+ psubw mm3, mm7 // low four p[0]-p[3]
+
+ punpcklbw mm1, mm0 // UseHighVariance Low four
+ pxor mm7, mm7 // clear mm7 for unpack
+
+ psraw mm1, 8 // FFFF or 0000
+ punpckhbw mm7, mm0 // UseHighVaraince high four
+
+ psubw mm4, mm6 // high four p[0]-p[3]
+ psraw mm7, 8 // FFFF or 0000
+
+ pand mm3, mm1 // And UseHighVariance
+ pand mm4, mm7 // And UseHighVariance
+
+ paddw mm2, mm3 // Low four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
+ paddw mm4, mm5 // High four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
+
+ paddw mm2, fourFours // adjust before shift
+ movd mm1, eax // Flimit
+
+ paddw mm4, fourFours // adjust before shift
+ psraw mm2, 3 // shift
+
+ psraw mm4, 3 // shift
+ movq mm3, mm2 // copy of low four
+
+ punpcklwd mm1, mm1 // Flimit Flimit
+ movq mm5, mm4 // copy of Highfour
+
+ punpckldq mm1, mm1 // Four Flimit
+ psraw mm2, 15 // FFFF or 0000
+
+ movq mm6, mm1 // copy of FLimit
+ psraw mm4, 15 // FFFF or 0000
+
+ pxor mm3, mm2
+ psubsw mm3, mm2 // abs(FiltVal) for Low
+
+ pxor mm5, mm4
+ psubsw mm5, mm4 // abs(FiltVal) for Low
+
+ por mm2, fourOnes // -1 or -1 for sign
+ por mm4, fourOnes // -1 or +1 for sign
+
+ // mm0 = UseHIghVariance?
+ // mm1 = FLimit in shorts
+ // mm2 = sign for lower four FiltVal
+ // mm3 = abs for lower four FiltVal
+ // mm4 = sign for higher four FiltVal
+ // mm5 = abs for higher four FiltVal
+ movq mm6, mm1 // copy of Flimit
+ psubusw mm1, mm3 // Flimit - abs(FiltVal)
+
+ psubusw mm3, mm6 // abs(Filtval) -FLimit
+ por mm3, mm1 // abs(Flimit-abs(FiltVal)
+
+ movq mm1, mm6 // Flimit
+ psubusw mm1, mm3 // Flimit-abs(FLimit-abs(FiltVal)
+
+ movq mm3, mm6 // copy of the Flimit
+ pmullw mm1, mm2 // Get the sign back
+
+ psubusw mm3, mm5 // Flimit-abs(Filtval)
+ psubusw mm5, mm6 // abs(Filtval)-Flimit)
+
+ por mm5, mm3 // abs(Flimit-abs(FiltVal)
+ movq mm3, mm6 // Flimit
+
+ psubusw mm3, mm5 // Flimit-abs(FLimit-abs(FiltVal)
+ pmullw mm4, mm3 // Get the sign back
+
+ movq mm2, mm4
+
+ // mm0 = UseHighVariance
+ // mm1 = low four
+ // mm2 = high four
+
+ movq mm5, [edi+8] // p[1]
+ movq mm3, mm1 // copy of low four
+
+ movq mm4, eight128s // 128 for offset
+ packsswb mm1, mm2 // pack to chars
+
+ movq mm6, [edi+16] // p[2]
+ psubb mm5, mm4 // unsigned -> signed
+
+ psubb mm6, mm4 // unsigned -> signed
+ paddsb mm5, mm1 // p[1]+delta
+
+ psubsb mm6, mm1 // p[1]-delta
+ paddb mm5, mm4 // offset back
+
+ paddb mm6, mm4 // offset back
+ movq mm1, [edi] // p[0]
+
+ psraw mm3, 1 // delta/2
+ psraw mm2, 1 // delta/2
+
+ movq mm7, [edi+24] // p[3]
+ packsswb mm3, mm2 // pack to chars
+
+ psubb mm1, mm4 // unsigned -> signed
+ pandn mm0, mm3 // and !UseHighVariance
+
+ psubb mm7, mm4 // unsigned -> signed
+ psubsb mm7, mm0 //
+
+ paddsb mm0, mm1 //
+ paddb mm7, mm4 // offset back
+
+ paddb mm0, mm4 // offset back
+ lea esi, [esi+ecx*4] // esi now point to the second line
+
+ //done with calculation, now write back the resutls
+ // mm0 -> 7060504030201000
+ // mm5 -> 7161514131211101
+ // mm6 -> 7262524232221202
+ // mm7 -> 7363534333231303
+
+ movq mm4, mm0 // 7060504030201000
+ punpcklbw mm0, mm5 // 3130212011100100
+
+ punpckhbw mm4, mm5 // 7170616051504140
+ movq mm2, mm6 // 7262524232221202
+
+ punpcklbw mm2, mm7 // 3332232213120302
+ punpckhbw mm6, mm7 // 7372636253524342
+
+ movq mm1, mm0 // 3130212011100100
+ punpcklwd mm0, mm2 // 1312111003020100
+
+ movd [esi+ecx], mm0 // write 03020100
+ punpckhwd mm1, mm2 // 3332313023222120
+
+ psrlq mm0, 32 // xxxxxxxx13121110
+ movd [esi], mm0 // write 13121110
+
+ movq mm5, mm4 // 7170717051504140
+ punpcklwd mm4, mm6 // 5352515043424140
+
+ movd [esi+edx], mm1 // write 23222120
+ psrlq mm1, 32 // xxxxxxxx33323130
+
+ punpckhwd mm5, mm6 // 7372717063626160
+ movd [esi+edx*2],mm1 // write 33323130
+
+ lea esi, [esi+edx*4] // fifth line
+ movd [esi+ecx], mm4 // write 43424140
+
+ psrlq mm4, 32 // xxxxxxxx53525150
+ movd [esi], mm4 // write 53525150
+
+ movd [esi+edx], mm5 // write 63626160
+ psrlq mm5, 32 // xxxxxxxx73727170
+
+ movd [esi+edx*2], mm5 // write 73727170
+
+ }
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterVert_Simple_MMX
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Applies a loop filter to a horizontal edge vertically
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void FilterVert_Simple_MMX(
+ POSTPROC_INSTANCE *pbi,
+ UINT8 * PixelPtr,
+ INT32 Pitch,
+ INT32 *BoundingValuePtr
+
+ )
+{
+
+ /************************************************************
+ The following code in comments is the C version of the
+ function, provided here for reference
+ ************************************************************
+
+ INT32 j;
+ INT32 FiltVal;
+ UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+ for ( j = 0; j < 8; j++ )
+ {
+ INT32 UseHighVariance;
+
+ FiltVal = ( ( (INT32) PixelPtr[0] * 3 ) -
+ ( (INT32)PixelPtr[- LineLength] * 3 ));
+
+ UseHighVariance = abs(PixelPtr[- (2 * LineLength)] - PixelPtr[- LineLength]) > 1 ||
+ abs(PixelPtr[0] - PixelPtr[LineLength]) > 1;
+
+ if(UseHighVariance)
+ {
+ FiltVal += ( (INT32)PixelPtr[- (2 * LineLength)] ) -
+ ( (INT32)PixelPtr[LineLength] );
+ }
+
+
+ FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+
+ PixelPtr[- LineLength] = LimitTable[(INT32)PixelPtr[- LineLength] + FiltVal];
+ PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] - FiltVal];
+
+ if(!UseHighVariance)
+ {
+ FiltVal >>=1 ;
+
+ PixelPtr[- 2* LineLength] = LimitTable[(INT32)PixelPtr[- 2 * LineLength] + FiltVal];
+ PixelPtr[LineLength] = LimitTable[(INT32)PixelPtr[LineLength] - FiltVal];
+ }
+
+ PixelPtr ++;
+ }
+ *************************************************************/
+
+
+ UINT32 FLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+ (void)BoundingValuePtr;
+ __asm
+ {
+
+ mov eax, FLimit // Flimit Values
+ xor ecx, ecx // clear ecx for negative pitch
+
+ mov edx, Pitch // Pitch
+ mov esi, PixelPtr // Pointer to Src and Destination
+
+ sub ecx, edx // negative pitch
+ movq mm2, [esi] // p[2]
+
+ movq mm7, eightOnes // mm7 = 0101010101010101
+ movq mm0, [esi+ecx*2] // p[0]
+
+ movq mm6, mm2 // Make a copy
+ movq mm5, [esi+ecx] // p[1]
+
+ movq mm4, mm0 // Make a copy
+ movq mm1, eightNOnes // mm1 = FFFFFFFFFFFFFFFFF
+
+ psubb mm0, mm5 // p[0]-p[1]
+ pcmpgtb mm1, mm0 // p[0]-p[1]<-1?
+
+ pcmpgtb mm0, mm7 // p[0]-p[1]>1?
+ movq mm3, eightNOnes // mm1 = FFFFFFFFFFFFFFFFF
+
+ por mm0, mm1 // abs(p[0]-p[1])>1?
+ movq mm1, mm7 // mm1 = 0101010101010101
+
+ movq mm7, [esi+edx] // p[3]
+ psubb mm2, mm7 // p[2]-p[3]
+
+ pcmpgtb mm3, mm2 // p[2]-p[3]<-1?
+ pcmpgtb mm2, mm1 // p[2]-p[3]>1?
+
+ por mm2, mm3 // abs(p[3]-p[2])>1?
+ movq mm3, eight128s // mm3 = 8080808080808080
+
+ por mm0, mm2 // mm0 = UseHighVariance
+
+ // mm0 = UseHighVariance
+ // mm4 = P[0]
+ // mm5 = P[1]
+ // mm6 = P[2]
+ // mm7 = P[3]
+ // mm3 = 8080808080808080
+
+ pxor mm1, mm1 // clear mm1 for unpack
+ movq mm2, mm5 // copy p[1]
+
+ movq mm3, mm6 // ocpy of p[2]
+ punpcklbw mm2, mm1 // low four p[1]
+
+ punpcklbw mm3, mm1 // low four p[2]
+ psubw mm3, mm2 // low four p[2]-p[1]
+
+ punpckhbw mm5, mm1 // high four p[1]
+ movq mm2, mm3 // low p[2]-p[1]
+
+ punpckhbw mm6, mm1 // high four p[2]
+ paddw mm3, mm3 // 2*(p[2]-p[1]) low four
+
+ psubw mm6, mm5 // high four p[2]-p[1]
+ paddw mm2, mm3 // 3*(p[2]-p[1]) low four
+
+ movq mm5, mm6 // high four p[2]-p[1]
+ movq mm3, mm4 // copy of p[0]
+
+ paddw mm6, mm6 // 2*(p[2]-p[1]) highfour
+ punpcklbw mm3, mm1 // low four p[0]
+
+ paddw mm5, mm6 // 3*(p[2]-p[1]) highfour
+ punpckhbw mm4, mm1 // high four p[0]
+
+ movq mm6, mm7 // copy of p[3]
+ punpcklbw mm7, mm1 // low four p[3]
+
+ punpckhbw mm6, mm1 // high four p[3]
+ psubw mm3, mm7 // low four p[0]-p[3]
+
+ punpcklbw mm1, mm0 // UseHighVariance Low four
+ pxor mm7, mm7 // clear mm7 for unpack
+
+ psraw mm1, 8 // FFFF or 0000
+ punpckhbw mm7, mm0 // UseHighVaraince high four
+
+ psubw mm4, mm6 // high four p[0]-p[3]
+ psraw mm7, 8 // FFFF or 0000
+
+ pand mm3, mm1 // And UseHighVariance
+ pand mm4, mm7 // And UseHighVariance
+
+ paddw mm2, mm3 // Low four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
+ paddw mm4, mm5 // High four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
+
+ paddw mm2, fourFours // adjust before shift
+ paddw mm4, fourFours // adjust before shift
+
+ movd mm1, eax // Flimit
+ psraw mm2, 3 // shift
+
+ psraw mm4, 3 // shift
+ movq mm3, mm2 // copy of low four
+
+ punpcklwd mm1, mm1 // Flimit Flimit
+ movq mm5, mm4 // copy of Highfour
+
+ punpckldq mm1, mm1 // Four Flimit
+ psraw mm2, 15 // FFFF or 0000
+
+ movq mm6, mm1 // copy of FLimit
+ psraw mm4, 15 // FFFF or 0000
+
+ pxor mm3, mm2
+ psubsw mm3, mm2 // abs(FiltVal) for Low
+
+ pxor mm5, mm4
+ psubsw mm5, mm4 // abs(FiltVal) for Low
+
+ por mm2, fourOnes // -1 or -1 for sign
+ por mm4, fourOnes // -1 or +1 for sign
+
+ /*
+ THE FOLLOWING CODE TRIED TO DO IT IN CHARS, BUT GENERATES DIFFERENT RESULTS
+ THAN THE C VERSION BECAUSE OF OVERFLOW IN VERY RARE CASES
+
+ pxor mm4, mm3 // offset all the pixels by 128
+ pxor mm5, mm3
+
+ pxor mm6, mm3
+ pxor mm7, mm3
+
+ psubsb mm6, mm5 // p[2]-p[1]
+ psubsb mm4, mm7 // p[0]-p[3]
+
+ movq mm2, mm6 // Make a copy p[2] - p[1]
+ paddsb mm6, mm6 // 2 * p[2] - p[1]
+
+ pand mm4, mm0 // UseHighVariance * (p[0]-p[3])
+ paddsb mm2, mm6 // 3*(p[2]-p[1])
+
+ paddsb mm4, mm2 // 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
+ paddsb mm4, eightFours // adjust before shift
+
+ pxor mm7, mm7 // clear mm7 for unpack
+ movd mm1, eax // FLimit
+
+ pxor mm2, mm2 // make a copy
+ punpcklwd mm1, mm1 // FLimit FLimit
+
+ punpcklbw mm2, mm4 // Unpack to shorts
+ punpckldq mm1, mm1 // 4 Flimit in short
+
+ punpckhbw mm7, mm4 // Unpcak to shorts
+ psraw mm2, 11 // >> 3-> FiltVal low four
+
+ psraw mm7, 11 // >> 3-> FiltVal High four
+ movq mm3, mm2 // make a copy of Low 4
+
+ movq mm4, mm7
+ pxor mm7, mm7
+
+ movq mm5, mm4 // make a copy of high 4
+ psraw mm2, 15 // FFFF or 0000
+
+ movq mm6, mm1 // copy of FLimit
+ psraw mm4, 15 // FFFF or 0000
+
+ pxor mm3, mm2
+ psubsw mm3, mm2 // abs(FiltVal) for Low
+
+ pxor mm5, mm4
+ psubsw mm5, mm4 // abs(FiltVal) for Low
+
+ por mm2, fourOnes // -1 or -1 for sign
+ por mm4, fourOnes // -1 or +1 for sign
+
+ */
+ // mm0 = UseHIghVariance?
+ // mm1 = FLimit in shorts
+ // mm2 = sign for lower four FiltVal
+ // mm3 = abs for lower four FiltVal
+ // mm4 = sign for higher four FiltVal
+ // mm5 = abs for higher four FiltVal
+
+ movq mm6, mm1 // copy of Flimit
+ psubusw mm1, mm3 // Flimit - abs(FiltVal)
+
+ psubusw mm3, mm6 // abs(Filtval) -FLimit
+ por mm3, mm1 // abs(Flimit-abs(FiltVal)
+
+ movq mm1, mm6 // Flimit
+ psubusw mm1, mm3 // Flimit-abs(FLimit-abs(FiltVal)
+
+ movq mm3, mm6 // copy of the Flimit
+ pmullw mm2, mm1 // Get the sign back
+
+ psubusw mm3, mm5 // Flimit-abs(Filtval)
+ psubusw mm5, mm6 // abs(Filtval)-Flimit)
+
+ por mm5, mm3 // abs(Flimit-abs(FiltVal)
+ movq mm3, mm6 // Flimit
+
+ psubusw mm3, mm5 // Flimit-abs(FLimit-abs(FiltVal)
+ pmullw mm4, mm3 // Get the sign back
+
+ // mm0 = UserHighVaraince
+ // mm2 = Final value with sign for lower four
+ // mm4 = Final value with sing for higher four
+ movq mm5, [esi+ecx] // p[1]
+ movq mm1, mm2 // make a copy of low four
+
+ movq mm7, eight128s // 128 for offset
+ packsswb mm2, mm4 // pack to chars for operation
+
+ movq mm6, [esi] // p[2]
+ psubb mm5, mm7 // unsigned -> signed
+
+ psubb mm6, mm7 // unsgined -> signed
+ paddsb mm5, mm2 // p[1] + Delta
+
+ psubsb mm6, mm2 // p[2] - Delta
+ paddb mm5, mm7 // offset back
+
+ paddb mm6, mm7 // offset back
+ movq [esi+ecx], mm5 // write out p[1]
+ psraw mm1, 1 // Delta/2
+
+ psraw mm4, 1 // Delta/2
+ movq [esi], mm6 // write out p[2]
+
+ movq mm2, [esi+ecx*2] // p[0]
+ packsswb mm1, mm4 // pack to chars
+
+ movq mm3, [esi+edx] // p[3]
+ pandn mm0, mm1 // and !UseHighVaraince
+
+ psubb mm2, mm7 // unsigned -> signed
+ psubb mm3, mm7 // unsigned -> signed
+
+ paddsb mm2, mm0 //
+ paddb mm2, mm7 // offset back
+
+ movq [esi+ecx*2], mm2 // write p[0]
+ psubsb mm3, mm0 //
+
+ paddb mm3, mm7 // offset back
+ movq [esi+edx], mm3 // write p[3]
+
+ }
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/Makefile b/Src/libvpShared/corelibs/cdxv/vputil/Makefile
new file mode 100644
index 00000000..54b763d0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/Makefile
@@ -0,0 +1,61 @@
+## Target to built
+
+TARGET =libvputil
+
+## TOOLS
+CC = ecc
+LD = ecc
+AR = ar
+OBJDUMP = objdump
+RM = rm -f
+
+## Directories
+TOPDIR =C:\DuckSoft
+PRIVATEINCLUDE =${TOPDIR}\private\include
+CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
+CDXVINCLUDE =${TOPDIR}\private\corelibs\cdxv\include
+VPPPINCLUDE =${TOPDIR}\private\corelibs\cdxv\vputil\include
+CURRENTDIR =${TOPDIR}\private\corelibs\cdxv\vputil
+LIBDIR =${TOPDIR}\private\corelibs\lib\mapca
+
+## Compile Flags
+ALLINCLUDES =-I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${VPPPINCLUDE}
+VP6DEFINES =-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
+ETIDEFINES =-DMAPCA
+ALLDEFINES =${VP6DEFINES} ${ETIDEFINES}
+DEBUG =-O2
+CFLAGS =-msvc -align 8 -etswp -mP3OPT_nonlocal_calls_through_register=true \
+ -mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
+ -magen_interroutine_padding
+ALLFLAGS =$(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
+
+
+## Files
+OBJS =generic\fdct.o \
+ generic\idctpart.o \
+ generic\reconstruct.o \
+ generic\vputil.o \
+ bsp\bspFdct.o \
+ bsp\bspIDct.o \
+ bsp\bsprecon.o \
+ bsp\bspvputil.o \
+ bsp\uoptsystemdependant.o
+
+
+SRCS =$(OBJS:.o=.c)
+
+ARTARGET =${TARGET}.a
+
+# archive
+
+ARTARGET:${OBJS}
+ ${AR} -cr ${ARTARGET} ${OBJS}
+ mv ${ARTARGET} ${LIBDIR}
+
+${OBJS} : ${SRCS}
+ $(CC) $(ALLFLAGS) -c $*.c -o $*.o
+
+clean:
+ ${RM} ${OBJS} ${ARTARGET}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/generic/fdct.c b/Src/libvpShared/corelibs/cdxv/vputil/generic/fdct.c
new file mode 100644
index 00000000..91ddba73
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/generic/fdct.c
@@ -0,0 +1,312 @@
+/****************************************************************************
+*
+* Module Title : fdct.c
+*
+* Description : Fast 8x8 DCT C-Implementation.
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "dct.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define SIGNBITDUPPED(X) ( (signed )((X & 0x80000000)) >> 31 )
+#define DOROUND(X) X = ( (SIGNBITDUPPED(X) & (0xffff)) + X );
+
+/****************************************************************************
+* Module statics
+****************************************************************************/
+static INT32 xC1S7 = 64277;
+static INT32 xC2S6 = 60547;
+static INT32 xC3S5 = 54491;
+static INT32 xC4S4 = 46341;
+static INT32 xC5S3 = 36410;
+static INT32 xC6S2 = 25080;
+static INT32 xC7S1 = 12785;
+
+/****************************************************************************
+ *
+ * ROUTINE : fdct_short_C_orig
+ *
+ * INPUTS : INT16 *InputData : 16-bit input data.
+ *
+ * OUTPUTS : INT16 *OutputData : 16-bit transform coefficients.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs an 8x8 2-D fast DCT.
+ *
+ * The algorithm used is derived from the flowgraph for
+ * the Vetterli and Ligtenberg fast 1-D dct given in the
+ * JPEG reference book by Pennebaker and Mitchell.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void fdct_short_C_orig ( INT16 *InputData, INT16 *OutputData )
+{
+ int loop;
+ INT32 is07, is12, is34, is56;
+ INT32 is0734, is1256;
+ INT32 id07, id12, id34, id56;
+ INT32 irot_input_x, irot_input_y;
+ INT32 icommon_product1; // Re-used product (c4s4 * (s12 - s56)).
+ INT32 icommon_product2; // Re-used product (c4s4 * (d12 + d56)).
+ INT32 temp1, temp2; // intermediate variable for computation
+ INT32 InterData[64];
+
+ INT32 *ip = InterData;
+ INT16 *op = OutputData;
+
+ for ( loop=0; loop<8; loop++ )
+ {
+ // Pre calculate some common sums and differences.
+ is07 = InputData[0] + InputData[7];
+ is12 = InputData[1] + InputData[2];
+ is34 = InputData[3] + InputData[4];
+ is56 = InputData[5] + InputData[6];
+
+ id07 = InputData[0] - InputData[7];
+ id12 = InputData[1] - InputData[2];
+ id34 = InputData[3] - InputData[4];
+ id56 = InputData[5] - InputData[6];
+
+ is0734 = is07 + is34;
+ is1256 = is12 + is56;
+
+ // Pre-Calculate some common product terms.
+ icommon_product1 = xC4S4*(is12 - is56);
+ DOROUND ( icommon_product1 )
+ icommon_product1 >>= 16;
+
+ icommon_product2 = xC4S4*(id12 + id56);
+ DOROUND ( icommon_product2 )
+ icommon_product2 >>= 16;
+
+ ip[0] = (xC4S4*(is0734 + is1256));
+ DOROUND ( ip[0] );
+ ip[0] >>= 16;
+
+ ip[4] = (xC4S4*(is0734 - is1256));
+ DOROUND ( ip[4] );
+ ip[4] >>= 16;
+
+ // Define inputs to rotation for outputs 2 and 6
+ irot_input_x = id12 - id56;
+ irot_input_y = is07 - is34;
+
+ // Apply rotation for outputs 2 and 6.
+ temp1 = xC6S2*irot_input_x;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC2S6*irot_input_y;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ ip[2] = temp1 + temp2;
+
+ temp1 = xC6S2*irot_input_y;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC2S6*irot_input_x;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ ip[6] = temp1 -temp2;
+
+ // Define inputs to rotation for outputs 1 and 7
+ irot_input_x = icommon_product1 + id07;
+ irot_input_y = -( id34 + icommon_product2 );
+
+ // Apply rotation for outputs 1 and 7.
+ temp1 = xC1S7*irot_input_x;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC7S1*irot_input_y;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ ip[1] = temp1 - temp2;
+
+ temp1 = xC7S1*irot_input_x;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC1S7*irot_input_y;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ ip[7] = temp1 + temp2;
+
+ // Define inputs to rotation for outputs 3 and 5
+ irot_input_x = id07 - icommon_product1;
+ irot_input_y = id34 - icommon_product2;
+
+ // Apply rotation for outputs 3 and 5.
+ temp1 = xC3S5 * irot_input_x;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC5S3*irot_input_y;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ ip[3] = temp1 - temp2;
+
+ temp1 = xC5S3*irot_input_x;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC3S5*irot_input_y;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ ip[5] = temp1 + temp2;
+
+ // Increment data pointer for next row.
+ InputData += 8;
+ ip += 8; // advance pointer to next row
+ }
+
+ // Performed DCT on rows, now transform the columns
+ ip = InterData;
+ for ( loop=0; loop<8; loop++ )
+ {
+ // Pre calculate some common sums and differences.
+ is07 = ip[0 * 8] + ip[7 * 8];
+ is12 = ip[1 * 8] + ip[2 * 8];
+ is34 = ip[3 * 8] + ip[4 * 8];
+ is56 = ip[5 * 8] + ip[6 * 8];
+
+ id07 = ip[0 * 8] - ip[7 * 8];
+ id12 = ip[1 * 8] - ip[2 * 8];
+ id34 = ip[3 * 8] - ip[4 * 8];
+ id56 = ip[5 * 8] - ip[6 * 8];
+
+ is0734 = is07 + is34;
+ is1256 = is12 + is56;
+
+ // Pre-Calculate some common product terms.
+ icommon_product1 = xC4S4*(is12 - is56);
+ icommon_product2 = xC4S4*(id12 + id56);
+ DOROUND ( icommon_product1 )
+ DOROUND ( icommon_product2 )
+ icommon_product1 >>= 16;
+ icommon_product2 >>= 16;
+
+ temp1 = xC4S4*(is0734 + is1256);
+ temp2 = xC4S4*(is0734 - is1256);
+ DOROUND ( temp1 );
+ DOROUND ( temp2 );
+ temp1 >>= 16;
+ temp2 >>= 16;
+ op[0*8] = (INT16)temp1;
+ op[4*8] = (INT16)temp2;
+
+ // Define inputs to rotation for outputs 2 and 6
+ irot_input_x = id12 - id56;
+ irot_input_y = is07 - is34;
+
+ // Apply rotation for outputs 2 and 6.
+ temp1 = xC6S2*irot_input_x;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC2S6*irot_input_y;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ op[2*8] = (INT16)(temp1 + temp2);
+
+ temp1 = xC6S2*irot_input_y;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC2S6*irot_input_x;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ op[6*8] = (INT16)(temp1 -temp2);
+
+ // Define inputs to rotation for outputs 1 and 7
+ irot_input_x = icommon_product1 + id07;
+ irot_input_y = -( id34 + icommon_product2 );
+
+ // Apply rotation for outputs 1 and 7.
+ temp1 = xC1S7*irot_input_x;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC7S1*irot_input_y;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ op[1*8] = (INT16) (temp1 - temp2);
+
+ temp1 = xC7S1*irot_input_x;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC1S7*irot_input_y;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ op[7*8] = (INT16)(temp1 + temp2);
+
+ // Define inputs to rotation for outputs 3 and 5
+ irot_input_x = id07 - icommon_product1;
+ irot_input_y = id34 - icommon_product2;
+
+ // Apply rotation for outputs 3 and 5.
+ temp1 = xC3S5*irot_input_x;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC5S3*irot_input_y;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ op[3*8] = (INT16)(temp1 - temp2);
+
+ temp1 = xC5S3*irot_input_x;
+ DOROUND ( temp1 );
+ temp1 >>= 16;
+ temp2 = xC3S5*irot_input_y;
+ DOROUND ( temp2 );
+ temp2 >>= 16;
+ op[5*8] = (INT16) (temp1 + temp2);
+
+ // Increment data pointer for next column.
+ ip ++;
+ op ++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : fdct_short_C
+ *
+ * INPUTS : INT16 *InputData : 16-bit input data.
+ *
+ * OUTPUTS : INT16 *OutputData : 16-bit transform coefficients.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Performs an 8x8 2-D fast DCT.
+ *
+ * The function to up the precision of FDCT by number of bits
+ * defined by FDCT_PRECISION_BITS.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void fdct_short_C ( INT16 *DCTDataBuffer, INT16 *DCT_codes )
+{
+
+ INT32 i;
+
+ // Increase precision on input to fdct
+ for ( i = 0; i < 64; i++ )
+ DCTDataBuffer[i] = DCTDataBuffer[i] << FDCT_PRECISION_BITS;
+
+ // Transform the error signal using the forward DCT to get set of transform coefficients
+ fdct_short_C_orig ( DCTDataBuffer, DCT_codes );
+
+ // Strip off the extra bits from the DCT output.
+ // This should ultimately be merged into the quantize process but there are also
+ // implications for DC prediction that would then need to be sorted
+ for ( i = 0; i < 64; i++ )
+ {
+ // signed shift modified so behaves like "/" (truncates towards 0 for + and -)
+ if ( DCT_codes[i] >= 0 )
+ DCT_codes[i] = (DCT_codes[i]) >> FDCT_PRECISION_BITS;
+ else
+ DCT_codes[i] = (DCT_codes[i] + FDCT_PRECISION_NEG_ADJ) >> FDCT_PRECISION_BITS;
+ }
+
+} \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/generic/idctpart.c b/Src/libvpShared/corelibs/cdxv/vputil/generic/idctpart.c
new file mode 100644
index 00000000..980e4fc0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/generic/idctpart.c
@@ -0,0 +1,921 @@
+/****************************************************************************
+*
+* Module Title : idctpart.c
+*
+* Description : IDCT with multiple versions based on # of non 0 coeffs
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+
+#include "dct.h"
+#include "string.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define int32 int
+#define int16 short
+#define IdctAdjustBeforeShift 8
+
+#define xC1S7 64277
+#define xC2S6 60547
+#define xC3S5 54491
+#define xC4S4 46341
+#define xC5S3 36410
+#define xC6S2 25080
+#define xC7S1 12785
+
+/****************************************************************************
+* Module statics
+****************************************************************************/
+static const UINT32 dequant_index[64] =
+{
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+#if 0 // AWG CODE NO LONGER USED IN CODEBASE.
+/* Cos and Sin constant multipliers used during DCT and IDCT */
+const double C1S7 = (double)0.9807852804032;
+const double C2S6 = (double)0.9238795325113;
+const double C3S5 = (double)0.8314696123025;
+const double C4S4 = (double)0.7071067811865;
+const double C5S3 = (double)0.5555702330196;
+const double C6S2 = (double)0.3826834323651;
+const double C7S1 = (double)0.1950903220161;
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+
+// DCT lookup tables
+INT32 * C4S4_TablePtr;
+INT32 C4S4_Table[(COEFF_MAX * 4) + 1];
+
+INT32 * C6S2_TablePtr;
+INT32 C6S2_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C2S6_TablePtr;
+INT32 C2S6_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C1S7_TablePtr;
+INT32 C1S7_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C7S1_TablePtr;
+INT32 C7S1_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C3S5_TablePtr;
+INT32 C3S5_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C5S3_TablePtr;
+INT32 C5S3_Table[(COEFF_MAX * 2) + 1];
+
+/****************************************************************************
+ *
+ * ROUTINE : InitDctTables
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Initialises lookup tables used in IDCT.
+ *
+ * SPECIAL NOTES : NO LONGER USED IN CODEBASE.
+ *
+ ****************************************************************************/
+void InitDctTables ( void )
+{
+ INT32 i;
+
+ C4S4_TablePtr = &C4S4_Table[COEFF_MAX*2];
+ for( i = -(2 * COEFF_MAX); i < (2 * COEFF_MAX); i++ )
+ {
+ if ( i < 0 )
+ C4S4_TablePtr[i] = (INT32)((i * C4S4) - 0.5);
+ else
+ C4S4_TablePtr[i] = (INT32)((i * C4S4) + 0.5);
+ }
+
+ C6S2_TablePtr = &C6S2_Table[COEFF_MAX];
+ for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+ {
+ if ( i < 0 )
+ C6S2_TablePtr[i] = (INT32)((i * C6S2) - 0.5);
+ else
+ C6S2_TablePtr[i] = (INT32)((i * C6S2) + 0.5);
+ }
+
+ C2S6_TablePtr = &C2S6_Table[COEFF_MAX];
+ for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+ {
+ if ( i < 0 )
+ C2S6_TablePtr[i] = (INT32)((i * C2S6) - 0.5);
+ else
+ C2S6_TablePtr[i] = (INT32)((i * C2S6) + 0.5);
+ }
+
+ C1S7_TablePtr = &C1S7_Table[COEFF_MAX];
+ for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+ {
+ if ( i < 0 )
+ C1S7_TablePtr[i] = (INT32)((i * C1S7) - 0.5);
+ else
+ C1S7_TablePtr[i] = (INT32)((i * C1S7) + 0.5);
+ }
+
+ C7S1_TablePtr = &C7S1_Table[COEFF_MAX];
+ for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+ {
+ if ( i < 0 )
+ C7S1_TablePtr[i] = (INT32)((i * C7S1) - 0.5);
+ else
+ C7S1_TablePtr[i] = (INT32)((i * C7S1) + 0.5);
+ }
+
+ C3S5_TablePtr = &C3S5_Table[COEFF_MAX];
+ for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+ {
+ if ( i < 0 )
+ C3S5_TablePtr[i] = (INT32)((i * C3S5) - 0.5);
+ else
+ C3S5_TablePtr[i] = (INT32)((i * C3S5) + 0.5);
+ }
+
+ C5S3_TablePtr = &C5S3_Table[COEFF_MAX];
+ for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+ {
+ if ( i < 0 )
+ C5S3_TablePtr[i] = (INT32)((i * C5S3) - 0.5);
+ else
+ C5S3_TablePtr[i] = (INT32)((i * C5S3) + 0.5);
+ }
+}
+#endif
+
+/****************************************************************************
+ *
+ * ROUTINE : dequant_slow
+ *
+ * INPUTS : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
+ * INT16 *quantized_list : Pointer to quantized DCT coeffs
+ * (in zig-zag order).
+ *
+ * OUTPUTS : INT32 *DCT_block : Pointer to 8x8 de-quantized block
+ * (in 2-D raster order).
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-quantizes an 8x8 block of quantized DCT coeffs.
+ *
+ * SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering.
+ *
+ ****************************************************************************/
+void dequant_slow ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
+{
+ // Loop fully expanded for maximum speed
+ DCT_block[dequant_index[0]] = quantized_list[0] * dequant_coeffs[0];
+ DCT_block[dequant_index[1]] = quantized_list[1] * dequant_coeffs[1];
+ DCT_block[dequant_index[2]] = quantized_list[2] * dequant_coeffs[2];
+ DCT_block[dequant_index[3]] = quantized_list[3] * dequant_coeffs[3];
+ DCT_block[dequant_index[4]] = quantized_list[4] * dequant_coeffs[4];
+ DCT_block[dequant_index[5]] = quantized_list[5] * dequant_coeffs[5];
+ DCT_block[dequant_index[6]] = quantized_list[6] * dequant_coeffs[6];
+ DCT_block[dequant_index[7]] = quantized_list[7] * dequant_coeffs[7];
+ DCT_block[dequant_index[8]] = quantized_list[8] * dequant_coeffs[8];
+ DCT_block[dequant_index[9]] = quantized_list[9] * dequant_coeffs[9];
+ DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
+ DCT_block[dequant_index[11]] = quantized_list[11] * dequant_coeffs[11];
+ DCT_block[dequant_index[12]] = quantized_list[12] * dequant_coeffs[12];
+ DCT_block[dequant_index[13]] = quantized_list[13] * dequant_coeffs[13];
+ DCT_block[dequant_index[14]] = quantized_list[14] * dequant_coeffs[14];
+ DCT_block[dequant_index[15]] = quantized_list[15] * dequant_coeffs[15];
+ DCT_block[dequant_index[16]] = quantized_list[16] * dequant_coeffs[16];
+ DCT_block[dequant_index[17]] = quantized_list[17] * dequant_coeffs[17];
+ DCT_block[dequant_index[18]] = quantized_list[18] * dequant_coeffs[18];
+ DCT_block[dequant_index[19]] = quantized_list[19] * dequant_coeffs[19];
+ DCT_block[dequant_index[20]] = quantized_list[20] * dequant_coeffs[20];
+ DCT_block[dequant_index[21]] = quantized_list[21] * dequant_coeffs[21];
+ DCT_block[dequant_index[22]] = quantized_list[22] * dequant_coeffs[22];
+ DCT_block[dequant_index[23]] = quantized_list[23] * dequant_coeffs[23];
+ DCT_block[dequant_index[24]] = quantized_list[24] * dequant_coeffs[24];
+ DCT_block[dequant_index[25]] = quantized_list[25] * dequant_coeffs[25];
+ DCT_block[dequant_index[26]] = quantized_list[26] * dequant_coeffs[26];
+ DCT_block[dequant_index[27]] = quantized_list[27] * dequant_coeffs[27];
+ DCT_block[dequant_index[28]] = quantized_list[28] * dequant_coeffs[28];
+ DCT_block[dequant_index[29]] = quantized_list[29] * dequant_coeffs[29];
+ DCT_block[dequant_index[30]] = quantized_list[30] * dequant_coeffs[30];
+ DCT_block[dequant_index[31]] = quantized_list[31] * dequant_coeffs[31];
+ DCT_block[dequant_index[32]] = quantized_list[32] * dequant_coeffs[32];
+ DCT_block[dequant_index[33]] = quantized_list[33] * dequant_coeffs[33];
+ DCT_block[dequant_index[34]] = quantized_list[34] * dequant_coeffs[34];
+ DCT_block[dequant_index[35]] = quantized_list[35] * dequant_coeffs[35];
+ DCT_block[dequant_index[36]] = quantized_list[36] * dequant_coeffs[36];
+ DCT_block[dequant_index[37]] = quantized_list[37] * dequant_coeffs[37];
+ DCT_block[dequant_index[38]] = quantized_list[38] * dequant_coeffs[38];
+ DCT_block[dequant_index[39]] = quantized_list[39] * dequant_coeffs[39];
+ DCT_block[dequant_index[40]] = quantized_list[40] * dequant_coeffs[40];
+ DCT_block[dequant_index[41]] = quantized_list[41] * dequant_coeffs[41];
+ DCT_block[dequant_index[42]] = quantized_list[42] * dequant_coeffs[42];
+ DCT_block[dequant_index[43]] = quantized_list[43] * dequant_coeffs[43];
+ DCT_block[dequant_index[44]] = quantized_list[44] * dequant_coeffs[44];
+ DCT_block[dequant_index[45]] = quantized_list[45] * dequant_coeffs[45];
+ DCT_block[dequant_index[46]] = quantized_list[46] * dequant_coeffs[46];
+ DCT_block[dequant_index[47]] = quantized_list[47] * dequant_coeffs[47];
+ DCT_block[dequant_index[48]] = quantized_list[48] * dequant_coeffs[48];
+ DCT_block[dequant_index[49]] = quantized_list[49] * dequant_coeffs[49];
+ DCT_block[dequant_index[50]] = quantized_list[50] * dequant_coeffs[50];
+ DCT_block[dequant_index[51]] = quantized_list[51] * dequant_coeffs[51];
+ DCT_block[dequant_index[52]] = quantized_list[52] * dequant_coeffs[52];
+ DCT_block[dequant_index[53]] = quantized_list[53] * dequant_coeffs[53];
+ DCT_block[dequant_index[54]] = quantized_list[54] * dequant_coeffs[54];
+ DCT_block[dequant_index[55]] = quantized_list[55] * dequant_coeffs[55];
+ DCT_block[dequant_index[56]] = quantized_list[56] * dequant_coeffs[56];
+ DCT_block[dequant_index[57]] = quantized_list[57] * dequant_coeffs[57];
+ DCT_block[dequant_index[58]] = quantized_list[58] * dequant_coeffs[58];
+ DCT_block[dequant_index[59]] = quantized_list[59] * dequant_coeffs[59];
+ DCT_block[dequant_index[60]] = quantized_list[60] * dequant_coeffs[60];
+ DCT_block[dequant_index[61]] = quantized_list[61] * dequant_coeffs[61];
+ DCT_block[dequant_index[62]] = quantized_list[62] * dequant_coeffs[62];
+ DCT_block[dequant_index[63]] = quantized_list[63] * dequant_coeffs[63];
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : IDctSlow
+ *
+ * INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
+ * int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
+ *
+ * OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Inverse quantizes and inverse DCT's input 8x8 block
+ * to reproduce prediction error.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void IDctSlow ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
+{
+ int loop;
+ int32 t1, t2;
+ int32 IntermediateData[64];
+ int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
+ int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
+
+ int32 *ip = IntermediateData;
+ int16 *op = OutputData;
+
+ // dequantize the input
+ dequant_slow ( QuantMatrix, InputData, IntermediateData );
+
+ // Inverse DCT on the rows now
+ for ( loop=0; loop<8; loop++ )
+ {
+ // Check for non-zero values
+ if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] )
+ {
+ t1 = (int32)(xC1S7 * ip[1]);
+ t2 = (int32)(xC7S1 * ip[7]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _A = t1 + t2;
+
+ t1 = (int32)(xC7S1 * ip[1]);
+ t2 = (int32)(xC1S7 * ip[7]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _B = t1 - t2;
+
+ t1 = (int32)(xC3S5 * ip[3]);
+ t2 = (int32)(xC5S3 * ip[5]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _C = t1 + t2;
+
+ t1 = (int32)(xC3S5 * ip[5]);
+ t2 = (int32)(xC5S3 * ip[3]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _D = t1 - t2;
+
+ t1 = (int32)(xC4S4 * (_A - _C));
+ t1 >>= 16;
+ _Ad = t1;
+
+ t1 = (int32)(xC4S4 * (_B - _D));
+ t1 >>= 16;
+ _Bd = t1;
+
+ _Cd = _A + _C;
+ _Dd = _B + _D;
+
+ t1 = (int32)(xC4S4 * (ip[0] + ip[4]));
+ t1 >>= 16;
+ _E = t1;
+
+ t1 = (int32)(xC4S4 * (ip[0] - ip[4]));
+ t1 >>= 16;
+ _F = t1;
+
+ t1 = (int32)(xC2S6 * ip[2]);
+ t2 = (int32)(xC6S2 * ip[6]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _G = t1 + t2;
+
+ t1 = (int32)(xC6S2 * ip[2]);
+ t2 = (int32)(xC2S6 * ip[6]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _H = t1 - t2;
+
+ _Ed = _E - _G;
+ _Gd = _E + _G;
+
+ _Add = _F + _Ad;
+ _Bdd = _Bd - _H;
+
+ _Fd = _F - _Ad;
+ _Hd = _Bd + _H;
+
+ // Final sequence of operations over-write original inputs.
+ ip[0] = (int16)((_Gd + _Cd ) >> 0);
+ ip[7] = (int16)((_Gd - _Cd ) >> 0);
+
+ ip[1] = (int16)((_Add + _Hd ) >> 0);
+ ip[2] = (int16)((_Add - _Hd ) >> 0);
+
+ ip[3] = (int16)((_Ed + _Dd ) >> 0);
+ ip[4] = (int16)((_Ed - _Dd ) >> 0);
+
+ ip[5] = (int16)((_Fd + _Bdd ) >> 0);
+ ip[6] = (int16)((_Fd - _Bdd ) >> 0);
+ }
+
+ ip += 8; /* next row */
+ }
+
+ ip = IntermediateData;
+
+ for ( loop=0; loop<8; loop++ )
+ {
+ // Check for non-zero values (bitwise | faster than logical ||)
+ if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
+ ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] )
+ {
+
+ t1 = (int32)(xC1S7 * ip[1*8]);
+ t2 = (int32)(xC7S1 * ip[7*8]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _A = t1 + t2;
+
+ t1 = (int32)(xC7S1 * ip[1*8]);
+ t2 = (int32)(xC1S7 * ip[7*8]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _B = t1 - t2;
+
+ t1 = (int32)(xC3S5 * ip[3*8]);
+ t2 = (int32)(xC5S3 * ip[5*8]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _C = t1 + t2;
+
+ t1 = (int32)(xC3S5 * ip[5*8]);
+ t2 = (int32)(xC5S3 * ip[3*8]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _D = t1 - t2;
+
+ t1 = (int32)(xC4S4 * (_A - _C));
+ t1 >>= 16;
+ _Ad = t1;
+
+ t1 = (int32)(xC4S4 * (_B - _D));
+ t1 >>= 16;
+ _Bd = t1;
+
+ _Cd = _A + _C;
+ _Dd = _B + _D;
+
+ t1 = (int32)(xC4S4 * (ip[0*8] + ip[4*8]));
+ t1 >>= 16;
+ _E = t1;
+
+ t1 = (int32)(xC4S4 * (ip[0*8] - ip[4*8]));
+ t1 >>= 16;
+ _F = t1;
+
+ t1 = (int32)(xC2S6 * ip[2*8]);
+ t2 = (int32)(xC6S2 * ip[6*8]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _G = t1 + t2;
+
+ t1 = (int32)(xC6S2 * ip[2*8]);
+ t2 = (int32)(xC2S6 * ip[6*8]);
+ t1 >>= 16;
+ t2 >>= 16;
+ _H = t1 - t2;
+
+ _Ed = _E - _G;
+ _Gd = _E + _G;
+
+ _Add = _F + _Ad;
+ _Bdd = _Bd - _H;
+
+ _Fd = _F - _Ad;
+ _Hd = _Bd + _H;
+
+ _Gd += IdctAdjustBeforeShift;
+ _Add += IdctAdjustBeforeShift;
+ _Ed += IdctAdjustBeforeShift;
+ _Fd += IdctAdjustBeforeShift;
+
+ // Final sequence of operations over-write original inputs.
+ op[0*8] = (int16)((_Gd + _Cd ) >> 4);
+ op[7*8] = (int16)((_Gd - _Cd ) >> 4);
+
+ op[1*8] = (int16)((_Add + _Hd ) >> 4);
+ op[2*8] = (int16)((_Add - _Hd ) >> 4);
+
+ op[3*8] = (int16)((_Ed + _Dd ) >> 4);
+ op[4*8] = (int16)((_Ed - _Dd ) >> 4);
+
+ op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
+ op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
+ }
+ else
+ {
+ op[0*8] = 0;
+ op[7*8] = 0;
+ op[1*8] = 0;
+ op[2*8] = 0;
+ op[3*8] = 0;
+ op[4*8] = 0;
+ op[5*8] = 0;
+ op[6*8] = 0;
+ }
+
+ ip++; // next column
+ op++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : dequant_slow10
+ *
+ * INPUTS : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
+ * INT16 *quantized_list : Pointer to quantized DCT coeffs
+ * (in zig-zag order).
+ *
+ * OUTPUTS : INT32 *DCT_block : Pointer to 8x8 de-quantized block
+ * (in 2-D raster order).
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : De-quantizes an 8x8 block of quantized DCT coeffs that
+ * only has non-zero coefficients in the first 10, i.e.
+ * only DC & AC1-9 are non-zero, AC10-63 __MUST_BE_ zero.
+ *
+ * SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering.
+ *
+ ****************************************************************************/
+void dequant_slow10 ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
+{
+ memset(DCT_block,0, 128);
+
+ // Loop fully expanded for maximum speed
+ DCT_block[dequant_index[0]] = quantized_list[0] * dequant_coeffs[0];
+ DCT_block[dequant_index[1]] = quantized_list[1] * dequant_coeffs[1];
+ DCT_block[dequant_index[2]] = quantized_list[2] * dequant_coeffs[2];
+ DCT_block[dequant_index[3]] = quantized_list[3] * dequant_coeffs[3];
+ DCT_block[dequant_index[4]] = quantized_list[4] * dequant_coeffs[4];
+ DCT_block[dequant_index[5]] = quantized_list[5] * dequant_coeffs[5];
+ DCT_block[dequant_index[6]] = quantized_list[6] * dequant_coeffs[6];
+ DCT_block[dequant_index[7]] = quantized_list[7] * dequant_coeffs[7];
+ DCT_block[dequant_index[8]] = quantized_list[8] * dequant_coeffs[8];
+ DCT_block[dequant_index[9]] = quantized_list[9] * dequant_coeffs[9];
+ DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : IDctSlow10
+ *
+ * INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
+ * int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
+ *
+ * OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Inverse quantizes and inverse DCT's input 8x8 block
+ * with non-zero coeffs only in DC & the first 9 AC coeffs.
+ * i.e. non-zeros ONLY in the following 10 positions:
+ *
+ * x x x x 0 0 0 0
+ * x x x 0 0 0 0 0
+ * x x 0 0 0 0 0 0
+ * x 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ *
+ * SPECIAL NOTES : Output data is in raster, not zig-zag, order.
+ *
+ ****************************************************************************/
+void IDct10 ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
+{
+ int loop;
+ int32 t1, t2;
+ int32 IntermediateData[64];
+ int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
+ int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
+
+ int32 *ip = IntermediateData;
+ int16 *op = OutputData;
+
+ // dequantize the input
+ dequant_slow10 ( QuantMatrix, InputData, IntermediateData );
+
+ // Inverse DCT on the rows now
+ for ( loop=0; loop<4; loop++ )
+ {
+ // Check for non-zero values
+ if ( ip[0] | ip[1] | ip[2] | ip[3] )
+ {
+ t1 = (int32)(xC1S7 * ip[1]);
+ t1 >>= 16;
+ _A = t1;
+
+ t1 = (int32)(xC7S1 * ip[1]);
+ t1 >>= 16;
+ _B = t1 ;
+
+ t1 = (int32)(xC3S5 * ip[3]);
+ t1 >>= 16;
+ _C = t1;
+
+ t2 = (int32)(xC5S3 * ip[3]);
+ t2 >>= 16;
+ _D = -t2;
+
+ t1 = (int32)(xC4S4 * (_A - _C));
+ t1 >>= 16;
+ _Ad = t1;
+
+ t1 = (int32)(xC4S4 * (_B - _D));
+ t1 >>= 16;
+ _Bd = t1;
+
+ _Cd = _A + _C;
+ _Dd = _B + _D;
+
+ t1 = (int32)(xC4S4 * ip[0] );
+ t1 >>= 16;
+ _E = t1;
+
+ _F = t1;
+
+ t1 = (int32)(xC2S6 * ip[2]);
+ t1 >>= 16;
+ _G = t1;
+
+ t1 = (int32)(xC6S2 * ip[2]);
+ t1 >>= 16;
+ _H = t1 ;
+
+ _Ed = _E - _G;
+ _Gd = _E + _G;
+
+ _Add = _F + _Ad;
+ _Bdd = _Bd - _H;
+
+ _Fd = _F - _Ad;
+ _Hd = _Bd + _H;
+
+ // Final sequence of operations over-write original inputs.
+ ip[0] = (int16)((_Gd + _Cd ) >> 0);
+ ip[7] = (int16)((_Gd - _Cd ) >> 0);
+
+ ip[1] = (int16)((_Add + _Hd ) >> 0);
+ ip[2] = (int16)((_Add - _Hd ) >> 0);
+
+ ip[3] = (int16)((_Ed + _Dd ) >> 0);
+ ip[4] = (int16)((_Ed - _Dd ) >> 0);
+
+ ip[5] = (int16)((_Fd + _Bdd ) >> 0);
+ ip[6] = (int16)((_Fd - _Bdd ) >> 0);
+ }
+
+ ip += 8; /* next row */
+ }
+
+ ip = IntermediateData;
+
+ for ( loop=0; loop<8; loop++ )
+ {
+ // Check for non-zero values (bitwise or faster than ||)
+ if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] )
+ {
+ t1 = (int32)(xC1S7 * ip[1*8]);
+ t1 >>= 16;
+ _A = t1 ;
+
+ t1 = (int32)(xC7S1 * ip[1*8]);
+ t1 >>= 16;
+ _B = t1 ;
+
+ t1 = (int32)(xC3S5 * ip[3*8]);
+ t1 >>= 16;
+ _C = t1 ;
+
+ t2 = (int32)(xC5S3 * ip[3*8]);
+ t2 >>= 16;
+ _D = - t2;
+
+ t1 = (int32)(xC4S4 * (_A - _C));
+ t1 >>= 16;
+ _Ad = t1;
+
+ t1 = (int32)(xC4S4 * (_B - _D));
+ t1 >>= 16;
+ _Bd = t1;
+
+ _Cd = _A + _C;
+ _Dd = _B + _D;
+
+ t1 = (int32)(xC4S4 * ip[0*8]);
+ t1 >>= 16;
+ _E = t1;
+ _F = t1;
+
+ t1 = (int32)(xC2S6 * ip[2*8]);
+ t1 >>= 16;
+ _G = t1;
+
+ t1 = (int32)(xC6S2 * ip[2*8]);
+ t1 >>= 16;
+ _H = t1;
+
+ _Ed = _E - _G;
+ _Gd = _E + _G;
+
+ _Add = _F + _Ad;
+ _Bdd = _Bd - _H;
+
+ _Fd = _F - _Ad;
+ _Hd = _Bd + _H;
+
+ _Gd += IdctAdjustBeforeShift;
+ _Add += IdctAdjustBeforeShift;
+ _Ed += IdctAdjustBeforeShift;
+ _Fd += IdctAdjustBeforeShift;
+
+ // Final sequence of operations over-write original inputs.
+ op[0*8] = (int16)((_Gd + _Cd ) >> 4);
+ op[7*8] = (int16)((_Gd - _Cd ) >> 4);
+
+ op[1*8] = (int16)((_Add + _Hd ) >> 4);
+ op[2*8] = (int16)((_Add - _Hd ) >> 4);
+
+ op[3*8] = (int16)((_Ed + _Dd ) >> 4);
+ op[4*8] = (int16)((_Ed - _Dd ) >> 4);
+
+ op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
+ op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
+ }
+ else
+ {
+ op[0*8] = 0;
+ op[7*8] = 0;
+ op[1*8] = 0;
+ op[2*8] = 0;
+ op[3*8] = 0;
+ op[4*8] = 0;
+ op[5*8] = 0;
+ op[6*8] = 0;
+ }
+
+ ip++; // next column
+ op++;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : IDct1
+ *
+ * INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
+ * int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
+ *
+ * OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Inverse DCT's input 8x8 block with only one non-zero
+ * coeff in the DC position:
+ *
+ * x 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ *
+ * SPECIAL NOTES : Output data is in raster, not zig-zag, order.
+ *
+ ****************************************************************************/
+void IDct1 ( int16 *InputData, int16 *QuantMatrix, INT16 *OutputData )
+{
+ INT32 loop;
+ INT16 OutD;
+
+ OutD = (INT16)((INT32)(InputData[0]*QuantMatrix[0]+15)>>5);
+
+ for ( loop=0; loop<64; loop++ )
+ OutputData[loop] = OutD;
+}
+
+
+#if 0
+/****************************************************************************
+ *
+ * ROUTINE : IDct4
+ *
+ * INPUTS : int16 *InputData : Pointer to 8x8 DCT coefficients.
+ *
+ * OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Inverse DCT's input 8x8 block with at most four non-zero
+ * coeffs in the following positions:
+ *
+ * x x 0 0 0 0 0 0
+ * x x 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ * 0 0 0 0 0 0 0 0
+ *
+ * SPECIAL NOTES : CURRENTLY NOT USED IN CODEBASE.
+ *
+ ****************************************************************************/
+void IDct4 ( int16 *InputData, int16 *OutputData )
+{
+ int32 t1;
+ int loop;
+ int32 _Add, _Fd;
+ int32 _A, _B, _Ad, _Bd, _Cd, _Dd, _E;
+
+ int16 *ip = InputData;
+ int16 *op = OutputData;
+
+ // Unzigzag the coefficents
+ ip[8] = ip[2];
+ ip[9] = ip[4];
+ ip[2] = 0;
+ ip[5] = 0;
+
+ // Inverse DCT on the rows now
+ for ( loop = 0; loop < 2; loop++)
+ {
+ // Check for non-zero values
+ if ( ip[0] | ip[1] )
+ {
+ t1 = (int32)(xC1S7 * ip[1]);
+ t1 >>= 16;
+ _A = t1;
+
+ t1 = (int32)(xC7S1 * ip[1]);
+ t1 >>= 16;
+ _B = t1 ;
+
+ t1 = (int32)(xC4S4 * _A );
+ t1 >>= 16;
+ _Ad = t1;
+
+ t1 = (int32)(xC4S4 * _B );
+ t1 >>= 16;
+ _Bd = t1;
+
+ _Cd = _A ;
+ _Dd = _B ;
+
+ t1 = (int32)(xC4S4 * ip[0] );
+ t1 >>= 16;
+ _E = t1;
+
+ _Add = _E + _Ad;
+
+ _Fd = _E - _Ad;
+
+ // Final sequence of operations over-write original inputs.
+ ip[0] = (int16)((_E + _Cd ) >> 0);
+ ip[7] = (int16)((_E - _Cd ) >> 0);
+
+ ip[1] = (int16)((_Add + _Bd ) >> 0);
+ ip[2] = (int16)((_Add - _Bd ) >> 0);
+
+ ip[3] = (int16)((_E + _Dd ) >> 0);
+ ip[4] = (int16)((_E - _Dd ) >> 0);
+
+ ip[5] = (int16)((_Fd + _Bd ) >> 0);
+ ip[6] = (int16)((_Fd - _Bd ) >> 0);
+ }
+
+ ip += 8; /* next row */
+ }
+
+ ip = InputData;
+
+ for ( loop=0; loop<8; loop++ )
+ {
+ // Check for non-zero values (bitwise or faster than ||)
+ if ( ip[0 * 8] | ip[1 * 8] )
+ {
+
+ t1 = (int32)(xC1S7 * ip[1*8]);
+ t1 >>= 16;
+ _A = t1 ;
+
+ t1 = (int32)(xC7S1 * ip[1*8]);
+ t1 >>= 16;
+ _B = t1 ;
+
+ t1 = (int32)(xC4S4 * _A );
+ t1 >>= 16;
+ _Ad = t1;
+
+ t1 = (int32)(xC4S4 * _B );
+ t1 >>= 16;
+ _Bd = t1;
+
+ _Cd = _A ;
+ _Dd = _B ;
+
+ t1 = (int32)(xC4S4 * ip[0*8]);
+ t1 >>= 16;
+ _E = t1;
+
+ _Add = _E + _Ad;
+
+ _Fd = _E - _Ad;
+
+ _Add += IdctAdjustBeforeShift;
+ _E += IdctAdjustBeforeShift;
+ _Fd += IdctAdjustBeforeShift;
+
+ // Final sequence of operations over-write original inputs.
+ op[0*8] = (int16)((_E + _Cd ) >> 4);
+ op[7*8] = (int16)((_E - _Cd ) >> 4);
+
+ op[1*8] = (int16)((_Add + _Bd ) >> 4);
+ op[2*8] = (int16)((_Add - _Bd ) >> 4);
+
+ op[3*8] = (int16)((_E + _Dd ) >> 4);
+ op[4*8] = (int16)((_E - _Dd ) >> 4);
+
+ op[5*8] = (int16)((_Fd + _Bd ) >> 4);
+ op[6*8] = (int16)((_Fd - _Bd ) >> 4);
+ }
+ else
+ {
+ op[0*8] = 0;
+ op[7*8] = 0;
+ op[1*8] = 0;
+ op[2*8] = 0;
+ op[3*8] = 0;
+ op[4*8] = 0;
+ op[5*8] = 0;
+ op[6*8] = 0;
+ }
+
+ ip++; // next column
+ op++;
+ }
+}
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/generic/reconstruct.c b/Src/libvpShared/corelibs/cdxv/vputil/generic/reconstruct.c
new file mode 100644
index 00000000..0db4652a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/generic/reconstruct.c
@@ -0,0 +1,243 @@
+/****************************************************************************
+*
+* Module Title : Reconstruct.c
+*
+* Description : Block reconstruction functions.
+*
+****************************************************************************/
+#define STRICT // Strict type checking
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "reconstruct.h"
+#include "codec_common.h"
+
+/****************************************************************************
+ *
+ * ROUTINE : SatUnsigned8
+ *
+ * INPUTS : INT16 *DataBlock : Pointer to 8x8 input block.
+ * UINT32 ResultLineStep : Stride of output block.
+ * UINT32 DataLineStep : Stride of input block.
+ *
+ * OUTPUTS : UINT8 *ResultPtr : Pointer to 8x8 output block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Saturates the input data to 8 bits unsigned and stores
+ * in the output buffer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SatUnsigned8 ( UINT8 *ResultPtr, INT16 *DataBlock, UINT32 ResultLineStep, UINT32 DataLineStep )
+{
+ INT32 i;
+
+ // Partly expanded loop
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ ResultPtr[0] = (char) LIMIT(DataBlock[0]);
+ ResultPtr[1] = (char) LIMIT(DataBlock[1]);
+ ResultPtr[2] = (char) LIMIT(DataBlock[2]);
+ ResultPtr[3] = (char) LIMIT(DataBlock[3]);
+ ResultPtr[4] = (char) LIMIT(DataBlock[4]);
+ ResultPtr[5] = (char) LIMIT(DataBlock[5]);
+ ResultPtr[6] = (char) LIMIT(DataBlock[6]);
+ ResultPtr[7] = (char) LIMIT(DataBlock[7]);
+
+ DataBlock += DataLineStep;
+ ResultPtr += ResultLineStep;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ScalarReconIntra
+ *
+ * INPUTS : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
+ * UINT16 *ChangePtr : Pointer to 8x8 intra prediction block.
+ * UINT32 LineStep : Stride of reconstruction block.
+ *
+ * OUTPUTS : UINT8 *ReconPtr : Pointer to 8x8 block to hold reconstructed block.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs an intra block.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ScalarReconIntra ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep )
+{
+ UINT32 i;
+ INT16 *TmpDataPtr = TmpDataBuffer;
+
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ TmpDataPtr[0] = (INT16) ( ChangePtr[0] + 128 );
+ TmpDataPtr[1] = (INT16) ( ChangePtr[1] + 128 );
+ TmpDataPtr[2] = (INT16) ( ChangePtr[2] + 128 );
+ TmpDataPtr[3] = (INT16) ( ChangePtr[3] + 128 );
+ TmpDataPtr[4] = (INT16) ( ChangePtr[4] + 128 );
+ TmpDataPtr[5] = (INT16) ( ChangePtr[5] + 128 );
+ TmpDataPtr[6] = (INT16) ( ChangePtr[6] + 128 );
+ TmpDataPtr[7] = (INT16) ( ChangePtr[7] + 128 );
+
+ TmpDataPtr += BLOCK_HEIGHT_WIDTH;
+ ChangePtr += BLOCK_HEIGHT_WIDTH;
+ }
+
+ // Saturate the output to unsigned 8 bit values in recon buffer
+ SatUnsigned8 ( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ScalarReconInter
+ *
+ * INPUTS : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
+ * UINT8 *RefPtr : Pointer to 8x8 reference block.
+ * INT16 *ChangePtr : Pointer to 8x8 inter prediction error block.
+ * UINT32 LineStep : Stride of reference and output blocks.
+ *
+ * OUTPUTS : UINT8 *ReconPtr : Pointer to 8x8 block to hold reconstructed block.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs an inter-coded block by adding a prediction
+ * error to a reference block in the previous frame
+ * reconstruction buffer.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ScalarReconInter ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep )
+{
+ UINT32 i;
+ INT16 *TmpDataPtr = TmpDataBuffer;
+
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ // Form each row
+ TmpDataPtr[0] = (INT16)(RefPtr[0] + ChangePtr[0]);
+ TmpDataPtr[1] = (INT16)(RefPtr[1] + ChangePtr[1]);
+ TmpDataPtr[2] = (INT16)(RefPtr[2] + ChangePtr[2]);
+ TmpDataPtr[3] = (INT16)(RefPtr[3] + ChangePtr[3]);
+ TmpDataPtr[4] = (INT16)(RefPtr[4] + ChangePtr[4]);
+ TmpDataPtr[5] = (INT16)(RefPtr[5] + ChangePtr[5]);
+ TmpDataPtr[6] = (INT16)(RefPtr[6] + ChangePtr[6]);
+ TmpDataPtr[7] = (INT16)(RefPtr[7] + ChangePtr[7]);
+
+ // Next row of Block
+ ChangePtr += BLOCK_HEIGHT_WIDTH;
+ TmpDataPtr += BLOCK_HEIGHT_WIDTH;
+ RefPtr += LineStep;
+ }
+
+ // Saturate the output to unsigned 8 bit values in recon buffer
+ SatUnsigned8 ( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ScalarReconInterHalfPixel2
+ *
+ * INPUTS : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
+ * UINT8 *RefPtr1 : Pointer to first 8x8 reference block.
+ * UINT8 *RefPtr2 : Pointer to second 8x8 reference block.
+ * INT16 *ChangePtr : Pointer to 8x8 inter prediction error block.
+ * UINT32 LineStep : Stride of reference blocks.
+ *
+ * OUTPUTS : UINT8 *ReconPtr : Pointer to 8x8 block to hold reconstructed block.
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs an inter-coded block by adding a prediction
+ * error to a reference block computed by averaging the two
+ * specified reference blocks. The two reference blocks are
+ * those that bracket the 1/2-pixel accuracy motion vector.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ScalarReconInterHalfPixel2
+(
+ INT16 *TmpDataBuffer,
+ UINT8 *ReconPtr,
+ UINT8 *RefPtr1,
+ UINT8 *RefPtr2,
+ INT16 *ChangePtr,
+ UINT32 LineStep
+)
+{
+ UINT32 i;
+ INT16 *TmpDataPtr = TmpDataBuffer;
+
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ // Form each row
+ TmpDataPtr[0] = (INT16)( (((INT32)RefPtr1[0] + (INT32)RefPtr2[0]) >> 1) + ChangePtr[0] );
+ TmpDataPtr[1] = (INT16)( (((INT32)RefPtr1[1] + (INT32)RefPtr2[1]) >> 1) + ChangePtr[1] );
+ TmpDataPtr[2] = (INT16)( (((INT32)RefPtr1[2] + (INT32)RefPtr2[2]) >> 1) + ChangePtr[2] );
+ TmpDataPtr[3] = (INT16)( (((INT32)RefPtr1[3] + (INT32)RefPtr2[3]) >> 1) + ChangePtr[3] );
+ TmpDataPtr[4] = (INT16)( (((INT32)RefPtr1[4] + (INT32)RefPtr2[4]) >> 1) + ChangePtr[4] );
+ TmpDataPtr[5] = (INT16)( (((INT32)RefPtr1[5] + (INT32)RefPtr2[5]) >> 1) + ChangePtr[5] );
+ TmpDataPtr[6] = (INT16)( (((INT32)RefPtr1[6] + (INT32)RefPtr2[6]) >> 1) + ChangePtr[6] );
+ TmpDataPtr[7] = (INT16)( (((INT32)RefPtr1[7] + (INT32)RefPtr2[7]) >> 1) + ChangePtr[7] );
+
+ // Next row of Block
+ ChangePtr += BLOCK_HEIGHT_WIDTH;
+ TmpDataPtr += BLOCK_HEIGHT_WIDTH;
+ RefPtr1 += LineStep;
+ RefPtr2 += LineStep;
+ }
+
+ // Saturate the output to unsigned 8 bit values in recon buffer
+ SatUnsigned8( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ReconBlock_C
+ *
+ * INPUTS : INT16 *SrcBlock : Pointer to 8x8 prediction error.
+ * INT16 *ReconRefPtr : Pointer to 8x8 block prediction.
+ * UINT32 LineStep : Stride of output block.
+ *
+ * OUTPUTS : UINT8 *DestBlock : Pointer to 8x8 reconstructed block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Reconstrut a block by adding the prediction error
+ * block to the source block and clipping values.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ReconBlock_C ( INT16 *SrcBlock, INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep )
+{
+ UINT32 i;
+ INT16 *SrcBlockPtr = SrcBlock;
+
+ // For each block row
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ SrcBlock[0] = (INT16)(SrcBlock[0] + ReconRefPtr[0]);
+ SrcBlock[1] = (INT16)(SrcBlock[1] + ReconRefPtr[1]);
+ SrcBlock[2] = (INT16)(SrcBlock[2] + ReconRefPtr[2]);
+ SrcBlock[3] = (INT16)(SrcBlock[3] + ReconRefPtr[3]);
+ SrcBlock[4] = (INT16)(SrcBlock[4] + ReconRefPtr[4]);
+ SrcBlock[5] = (INT16)(SrcBlock[5] + ReconRefPtr[5]);
+ SrcBlock[6] = (INT16)(SrcBlock[6] + ReconRefPtr[6]);
+ SrcBlock[7] = (INT16)(SrcBlock[7] + ReconRefPtr[7]);
+
+ // Next row...
+ SrcBlock += BLOCK_HEIGHT_WIDTH;
+ ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+ }
+
+ // Saturate the output to unsigned 8 bit values in recon buffer
+ SatUnsigned8( DestBlock, SrcBlockPtr, LineStep, BLOCK_HEIGHT_WIDTH );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/generic/uoptsystemdependant.c b/Src/libvpShared/corelibs/cdxv/vputil/generic/uoptsystemdependant.c
new file mode 100644
index 00000000..aa745ff9
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/generic/uoptsystemdependant.c
@@ -0,0 +1,100 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions.
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "vputil_if.h"
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+// Scalar (no mmx) reconstruction functions
+extern void ClearSysState_C ( void );
+extern void IDctSlow ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void IDct10 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void IDct1 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void ScalarReconIntra ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep );
+extern void ScalarReconInter ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep );
+extern void ScalarReconInterHalfPixel2 ( INT16 *TmpDataBuffer, UINT8 *ReconPtr,UINT8 *RefPtr1, UINT8 *RefPtr2, INT16 *ChangePtr, UINT32 LineStep );
+extern void ReconBlock_C(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep );
+extern void SubtractBlock_C ( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
+extern void UnpackBlock_C ( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
+extern void AverageBlock_C ( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
+extern void CopyBlock_C ( unsigned char *src, unsigned char *dest, unsigned int srcstride );
+extern void Copy12x12_C ( const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride );
+extern void fdct_short_C ( INT16 *InputData, INT16 *OutputData );
+extern void FilterBlockBil_8_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
+extern void FilterBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
+
+/****************************************************************************
+ *
+ * ROUTINE : fillidctconstants
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : STUB FUNCTION.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void fillidctconstants ( void )
+{
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MachineSpecificConfig
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support
+ * sets approipriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UtilMachineSpecificConfig ( void )
+{
+ int i;
+ for(i=0;i<=64;i++)
+ {
+ if(i<=1)idctc[i]=IDct1;
+ else if(i<=10)idctc[i]=IDct10;
+ else idctc[i]=IDctSlow;
+ }
+ fdct_short=fdct_short_C ;
+ for(i=0;i<=64;i++)
+ {
+ if(i<=1)idct[i]=IDct1;
+ else if(i<=10)idct[i]=IDct10;
+ else idct[i]=IDctSlow;
+ }
+ ClearSysState = ClearSysState_C;
+ ReconIntra = ScalarReconIntra;
+ ReconInter = ScalarReconInter;
+ ReconInterHalfPixel2 = ScalarReconInterHalfPixel2;
+ AverageBlock = AverageBlock_C;
+ UnpackBlock = UnpackBlock_C;
+ ReconBlock = ReconBlock_C;
+ SubtractBlock = SubtractBlock_C;
+ CopyBlock = CopyBlock_C;
+ Copy12x12 = Copy12x12_C;
+ FilterBlockBil_8 = FilterBlockBil_8_C;
+ FilterBlock=FilterBlock_C;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/generic/vputil.c b/Src/libvpShared/corelibs/cdxv/vputil/generic/vputil.c
new file mode 100644
index 00000000..1705dc39
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/generic/vputil.c
@@ -0,0 +1,1285 @@
+/****************************************************************************
+ *
+ * Module Title : vputil.c
+ *
+ * Description : Codec utility functions.
+ *
+ ***************************************************************************/
+#define STRICT /* Strict type checking */
+
+/****************************************************************************
+ * Header Files
+ ***************************************************************************/
+#include <math.h>
+#include "codec_common.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define FILTER_WEIGHT 128
+#define FILTER_SHIFT 7
+#define MIN(a, b) ( ( a < b ) ? a : b )
+
+/****************************************************************************
+ * Imports
+ ***************************************************************************/
+extern void UtilMachineSpecificConfig ( void );
+extern void fillidctconstants ( void );
+
+/****************************************************************************
+ * Module Statics
+ ****************************************************************************/
+
+INT32 BilinearFilters[8][2] =
+{
+ { 128, 0 },
+ { 112, 16 },
+ { 96, 32 },
+ { 80, 48 },
+ { 64, 64 },
+ { 48, 80 },
+ { 32, 96 },
+ { 16, 112 }
+};
+
+// VP6.2 Bicubic filter taps calculated for 32 values of 'A' from -0.25 to -1.00 in steps of -0.05
+// For each 'A' there are 8 sets of data corresponding to 1/8 pel offsets 0 to 7/8.
+// These are only used in VP6.2 and upwards
+// The last entry is a dummy entry used for backwards compatibility with VP61
+static INT32 BicubicFilterSet[17][8][4] =
+{
+ { { 0, 128, 0, 0 },
+ { -3, 122, 9, 0 },
+ { -4, 109, 24, -1 },
+ { -5, 91, 45, -3 },
+ { -4, 68, 68, -4 },
+ { -3, 45, 91, -5 },
+ { -1, 24, 109, -4 },
+ { 0, 9, 122, -3 },
+ },
+ { { 0, 128, 0, 0 },
+ { -4, 124, 9, -1 },
+ { -5, 110, 25, -2 },
+ { -6, 91, 46, -3 },
+ { -5, 69, 69, -5 },
+ { -3, 46, 91, -6 },
+ { -2, 25, 110, -5 },
+ { -1, 9, 124, -4 },
+ },
+ { { 0, 128, 0, 0 },
+ { -4, 123, 10, -1 },
+ { -6, 110, 26, -2 },
+ { -7, 92, 47, -4 },
+ { -6, 70, 70, -6 },
+ { -4, 47, 92, -7 },
+ { -2, 26, 110, -6 },
+ { -1, 10, 123, -4 },
+ },
+ { { 0, 128, 0, 0 }, // Approx A=-0.4
+ { -5, 124, 10, -1 },
+ { -7, 110, 27, -2 },
+ { -7, 91, 48, -4 },
+ { -6, 70, 70, -6 },
+ { -4, 48, 92, -8 },
+ { -2, 27, 110, -7 },
+ { -1, 10, 124, -5 },
+ },
+ { { 0, 128, 0, 0 },
+ { -6, 124, 11, -1 },
+ { -8, 111, 28, -3 },
+ { -8, 92, 49, -5 },
+ { -7, 71, 71, -7 },
+ { -5, 49, 92, -8 },
+ { -3, 28, 111, -8 },
+ { -1, 11, 124, -6 },
+ },
+ { { 0, 128, 0, 0 }, // Corresponds approximately to VDub bicubic A=-0.50
+ { -6, 123, 12, -1 },
+ { -9, 111, 29, -3 },
+ { -9, 93, 50, -6 },
+ { -8, 72, 72, -8 },
+ { -6, 50, 93, -9 },
+ { -3, 29, 111, -9 },
+ { -1, 12, 123, -6 },
+ },
+ { { 0, 128, 0, 0 },
+ { -7, 124, 12, -1 },
+ { -10, 111, 30, -3 },
+ { -10, 93, 51, -6 },
+ { -9, 73, 73, -9 },
+ { -6, 51, 93, -10 },
+ { -3, 30, 111, -10 },
+ { -1, 12, 124, -7 },
+ },
+ { { 0, 128, 0, 0 },
+ { -7, 123, 13, -1 },
+ { -11, 112, 31, -4 },
+ { -11, 94, 52, -7 },
+ { -10, 74, 74, -10 },
+ { -7, 52, 94, -11 },
+ { -4, 31, 112, -11 },
+ { -1, 13, 123, -7 },
+ },
+ { { 0, 128, 0, 0 },
+ { -8, 124, 13, -1 },
+ { -12, 112, 32, -4 },
+ { -12, 94, 53, -7 },
+ { -10, 74, 74, -10 },
+ { -7, 53, 94, -12 },
+ { -4, 32, 112, -12 },
+ { -1, 13, 124, -8 },
+ },
+ { { 0, 128, 0, 0 },
+ { -9, 124, 14, -1 },
+ { -13, 112, 33, -4 },
+ { -13, 95, 54, -8 },
+ { -11, 75, 75, -11 },
+ { -8, 54, 95, -13 },
+ { -4, 33, 112, -13 },
+ { -1, 14, 124, -9 },
+ },
+ { { 0, 128, 0, 0 }, // Corresponds approximately to VDub bicubic A=-0.75
+ { -9, 123, 15, -1 },
+ { -14, 113, 34, -5 },
+ { -14, 95, 55, -8 },
+ { -12, 76, 76, -12 },
+ { -8, 55, 95, -14 },
+ { -5, 34, 112, -13 },
+ { -1, 15, 123, -9 },
+ },
+ { { 0, 128, 0, 0 },
+ { -10, 124, 15, -1 },
+ { -14, 113, 34, -5 },
+ { -15, 96, 56, -9 },
+ { -13, 77, 77, -13 },
+ { -9, 56, 96, -15 },
+ { -5, 34, 113, -14 },
+ { -1, 15, 124, -10 },
+ },
+ { { 0, 128, 0, 0 },
+ { -10, 123, 16, -1 },
+ { -15, 113, 35, -5 },
+ { -16, 98, 56, -10 },
+ { -14, 78, 78, -14 },
+ { -10, 56, 98, -16 },
+ { -5, 35, 113, -15 },
+ { -1, 16, 123, -10 },
+ },
+ { { 0, 128, 0, 0 },
+ { -11, 124, 17, -2 },
+ { -16, 113, 36, -5 },
+ { -17, 98, 57, -10 },
+ { -14, 78, 78, -14 },
+ { -10, 57, 98, -17 },
+ { -5, 36, 113, -16 },
+ { -2, 17, 124, -11 },
+ },
+ { { 0, 128, 0, 0 },
+ { -12, 125, 17, -2 },
+ { -17, 114, 37, -6 },
+ { -18, 99, 58, -11 },
+ { -15, 79, 79, -15 },
+ { -11, 58, 99, -18 },
+ { -6, 37, 114, -17 },
+ { -2, 17, 125, -12 },
+ },
+ { { 0, 128, 0, 0 },
+ { -12, 124, 18, -2 },
+ { -18, 114, 38, -6 },
+ { -19, 99, 59, -11 },
+ { -16, 80, 80, -16 },
+ { -11, 59, 99, -19 },
+ { -6, 38, 114, -18 },
+ { -2, 18, 124, -12 },
+ },
+
+ // Dummy entry for backwards VP61 compatibility
+ {
+ { 0, 128, 0, 0 },
+ { -4, 118, 16, -2 },
+ { -7, 106, 34, -5 },
+ { -8, 90, 53, -7 },
+ { -8, 72, 72, -8 },
+ { -7, 53, 90, -8 },
+ { -5, 34, 106, -7 },
+ { -2, 16, 118, -4 }
+ }
+};
+
+
+//static INT32 FData[BLOCK_HEIGHT_WIDTH*11]; // Temp data bufffer used in filtering
+
+/****************************************************************************
+ * Exports
+ ****************************************************************************/
+// Function pointers to platform specif routines
+void (*ReconIntra)( INT16 *tmpBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep );
+void (*ReconInter)( INT16 *tmpBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep );
+void (*ReconInterHalfPixel2)( INT16 * tmpBuffer, UINT8 * ReconPtr, UINT8 *RefPtr1, UINT8 *RefPtr2, INT16 *ChangePtr, UINT32 LineStep );
+void (*fdct_short)( INT16 *InputData, INT16 *OutputData );
+void (*idct[65])( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+void (*ClearSysState)( void );
+void (*ReconBlock)( INT16 *SrcBlock, INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep );
+void (*SubtractBlock)( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
+void (*UnpackBlock)( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+void (*AverageBlock)( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
+void (*CopyBlock)( unsigned char *src, unsigned char *dest, unsigned int srcstride );
+void (*Copy12x12)( const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride );
+void (*idctc[65])( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+void (*FilterBlockBil_8)( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
+void (*FilterBlock)( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+
+/****************************************************************************
+ *
+ * ROUTINE : ClearSysState_C
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Null placeholder function.
+ *
+ * SPECIAL NOTES : Stub in the C-code for a function required when using
+ * MMX, XMM, etc. to clear system state.
+ *
+ ****************************************************************************/
+void ClearSysState_C ( void )
+{
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : AverageBlock_C
+ *
+ * INPUTS : UINT8 *ReconPtr1 : Pointer to first reference block.
+ * UINT8 *ReconPtr2 : Pointer to second reference block.
+ * UINT32 ReconPixelsPerLine : Stride of reference blocks.
+ *
+ * OUTPUTS : UINT16 *ReconRefPtr : Pointer to output block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Takes two input blocks and creates an output block
+ * by pixel averaging.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void AverageBlock_C ( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine )
+{
+ UINT32 i;
+
+ // For each block row
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ ReconRefPtr[0] = (INT16)(((INT32)(ReconPtr1[0]) + ((INT32)ReconPtr2[0]))>>1);
+ ReconRefPtr[1] = (INT16)(((INT32)(ReconPtr1[1]) + ((INT32)ReconPtr2[1]))>>1);
+ ReconRefPtr[2] = (INT16)(((INT32)(ReconPtr1[2]) + ((INT32)ReconPtr2[2]))>>1);
+ ReconRefPtr[3] = (INT16)(((INT32)(ReconPtr1[3]) + ((INT32)ReconPtr2[3]))>>1);
+ ReconRefPtr[4] = (INT16)(((INT32)(ReconPtr1[4]) + ((INT32)ReconPtr2[4]))>>1);
+ ReconRefPtr[5] = (INT16)(((INT32)(ReconPtr1[5]) + ((INT32)ReconPtr2[5]))>>1);
+ ReconRefPtr[6] = (INT16)(((INT32)(ReconPtr1[6]) + ((INT32)ReconPtr2[6]))>>1);
+ ReconRefPtr[7] = (INT16)(((INT32)(ReconPtr1[7]) + ((INT32)ReconPtr2[7]))>>1);
+
+ // Start next row
+ ReconPtr1 += ReconPixelsPerLine;
+ ReconPtr2 += ReconPixelsPerLine;
+
+ ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : UnpackBlock_C
+ *
+ * INPUTS : UINT8 *ReconPtr : Pointer to reference block.
+ * UINT32 ReconPixelsPerLine : Stride of reference block.
+ *
+ * OUTPUTS : UINT16 *ReconRefPtr : Pointer to output block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Converts block of 8x8 unsigned 8-bit to block of
+ * signed 16-bit.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UnpackBlock_C ( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine )
+{
+ UINT32 i;
+
+ // For each block row
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ ReconRefPtr[0] = (INT16)ReconPtr[0];
+ ReconRefPtr[1] = (INT16)ReconPtr[1];
+ ReconRefPtr[2] = (INT16)ReconPtr[2];
+ ReconRefPtr[3] = (INT16)ReconPtr[3];
+ ReconRefPtr[4] = (INT16)ReconPtr[4];
+ ReconRefPtr[5] = (INT16)ReconPtr[5];
+ ReconRefPtr[6] = (INT16)ReconPtr[6];
+ ReconRefPtr[7] = (INT16)ReconPtr[7];
+
+ // Start next row
+ ReconPtr += ReconPixelsPerLine;
+ ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SubtractBlock_C
+ *
+ * INPUTS : UINT8 *SrcBlock : Pointer to 8x8 source block.
+ * UINT32 LineStep : Stride of source block.
+ *
+ * OUTPUTS : INT16 *DestPtr : Pointer to 8x8 output block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Subtracts block pointed to by DestPtr from that pointed
+ * to by SrcBlock. Result stored in DstPtr.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SubtractBlock_C ( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep )
+{
+ UINT32 i;
+
+ // For each block row
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ DestPtr[0] = (INT16)((INT32)SrcBlock[0] - (INT32)DestPtr[0]);
+ DestPtr[1] = (INT16)((INT32)SrcBlock[1] - (INT32)DestPtr[1]);
+ DestPtr[2] = (INT16)((INT32)SrcBlock[2] - (INT32)DestPtr[2]);
+ DestPtr[3] = (INT16)((INT32)SrcBlock[3] - (INT32)DestPtr[3]);
+ DestPtr[4] = (INT16)((INT32)SrcBlock[4] - (INT32)DestPtr[4]);
+ DestPtr[5] = (INT16)((INT32)SrcBlock[5] - (INT32)DestPtr[5]);
+ DestPtr[6] = (INT16)((INT32)SrcBlock[6] - (INT32)DestPtr[6]);
+ DestPtr[7] = (INT16)((INT32)SrcBlock[7] - (INT32)DestPtr[7]);
+
+ // Start next row
+ SrcBlock += LineStep;
+ DestPtr += BLOCK_HEIGHT_WIDTH;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CopyBlock_C
+ *
+ * INPUTS : unsigned char *src : Pointer to 8x8 source block.
+ * unsigned int srcstride : Pointer to 8x8 destination block.
+ *
+ * OUTPUTS : unsigned char *dest : Stride of blocks.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Copies a block from source to destination.
+ *
+ * SPECIAL NOTES : Copies block in chunks of 32-bits at a time.
+ *
+ ****************************************************************************/
+void CopyBlock_C ( unsigned char *src, unsigned char *dest, unsigned int srcstride )
+{
+ int j;
+ unsigned char *s = src;
+ unsigned char *d = dest;
+ unsigned int stride = srcstride;
+
+ for ( j=0; j<8; j++ )
+ {
+ ((UINT32*)d)[0] = ((UINT32*)s)[0];
+ ((UINT32*)d)[1] = ((UINT32*)s)[1];
+ s += stride;
+ d += stride;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : Copy12x12_C
+ *
+ * INPUTS : const unsigned char *src : Pointer to source block.
+ * unsigned int srcstride : Stride of the source block.
+ * unsigned int deststride : Stride of the destination block.
+ *
+ * OUTPUTS : unsigned char *dest : Pointer to destination block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Copies a 12x12 block from source to destination.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void Copy12x12_C
+(
+ const unsigned char *src,
+ unsigned char *dest,
+ unsigned int srcstride,
+ unsigned int deststride
+)
+{
+ int j;
+ const unsigned char *s = src;
+ unsigned char *d = dest;
+
+ for ( j=0; j<12; j++ )
+ {
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ d[4] = s[4];
+ d[5] = s[5];
+ d[6] = s[6];
+ d[7] = s[7];
+ d[8] = s[8];
+ d[9] = s[9];
+ d[10] = s[10];
+ d[11] = s[11];
+ s += srcstride;
+ d += deststride;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : InitVPUtil
+ *
+ * INPUTS : None.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Setup static initialized variables for Util.
+ *
+ * SPECIAL NOTES : None
+ *
+ ****************************************************************************/
+void InitVPUtil ( void )
+{
+ fillidctconstants ();
+ UtilMachineSpecificConfig ();
+}
+
+/****************************************************************************
+/* Fractional pixel prediction filtering...
+****************************************************************************/
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock1d
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of source block.
+ * UINT32 PixelStep : 1 for horizontal filtering,
+ * SrcPixelsPerLine for vertical filtering.
+ * UINT32 OutputHeight : Height of the output block.
+ * UINT32 OutputWidth : Width of the output block.
+ * INT32 *Filter : Array of 4 filter taps.
+ *
+ * OUTPUTS : UINT16 *OutputPtr : Pointer to output block.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Applies a 1-D 4-tap filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block.
+ *
+ * SPECIAL NOTES : Four filter taps should sum to FILTER_WEIGHT.
+ * PixelStep defines whether the filter is applied
+ * horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+void FilterBlock1d
+(
+ UINT8 *SrcPtr,
+ UINT16 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ UINT32 PixelStep,
+ UINT32 OutputHeight,
+ UINT32 OutputWidth,
+ INT32 *Filter
+)
+{
+ UINT32 i, j;
+ INT32 Temp;
+
+ for ( i=0; i<OutputHeight; i++ )
+ {
+ for ( j=0; j<OutputWidth; j++ )
+ {
+ // Apply filter...
+ Temp = ((INT32)SrcPtr[-(INT32)PixelStep] * Filter[0]) +
+ ((INT32)SrcPtr[0] * Filter[1]) +
+ ((INT32)SrcPtr[PixelStep] * Filter[2]) +
+ ((INT32)SrcPtr[2*PixelStep] * Filter[3]) +
+ (FILTER_WEIGHT >> 1); // Rounding
+
+ // Normalize back to 0-255
+ Temp = Temp >> FILTER_SHIFT;
+ if ( Temp < 0 )
+ Temp = 0;
+ else if ( Temp > 255 )
+ Temp = 255;
+
+ OutputPtr[j] = (INT16)Temp;
+ SrcPtr++;
+ }
+
+ // Next row...
+ SrcPtr += SrcPixelsPerLine - OutputWidth;
+ OutputPtr += OutputWidth;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dFirstPass
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of source block.
+ * UINT32 PixelStep : 1 for horizontal filtering,
+ * SrcPixelsPerLine for vertical filtering.
+ * UINT32 OutputHeight : Height of the output block.
+ * UINT32 OutputWidth : Width of the output block.
+ * INT32 *Filter : Array of 4 filter taps.
+ *
+ * OUTPUTS : INT32 *OutputPtr : Pointer to output block.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Applies a 1-D 4-tap filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement first-pass
+ * of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
+ * Four filter taps should sum to FILTER_WEIGHT.
+ * PixelStep defines whether the filter is applied
+ * horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+void FilterBlock2dFirstPass
+(
+ UINT8 *SrcPtr,
+ INT32 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ UINT32 PixelStep,
+ UINT32 OutputHeight,
+ UINT32 OutputWidth,
+ INT32 *Filter
+)
+{
+ UINT32 i, j;
+ INT32 Temp;
+
+ for ( i=0; i<OutputHeight; i++ )
+ {
+ for ( j=0; j<OutputWidth; j++ )
+ {
+ // Apply filter
+ Temp = ((INT32)SrcPtr[-(INT32)PixelStep] * Filter[0]) +
+ ((INT32)SrcPtr[0] * Filter[1]) +
+ ((INT32)SrcPtr[PixelStep] * Filter[2]) +
+ ((INT32)SrcPtr[2*PixelStep] * Filter[3]) +
+ (FILTER_WEIGHT >> 1); // Rounding
+
+ // Normalize back to 0-255
+ Temp = Temp >> FILTER_SHIFT;
+ if ( Temp < 0 )
+ Temp = 0;
+ else if ( Temp > 255 )
+ Temp = 255;
+
+ OutputPtr[j] = Temp;
+ SrcPtr++;
+ }
+
+ // Next row...
+ SrcPtr += SrcPixelsPerLine - OutputWidth;
+ OutputPtr += OutputWidth;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dSecondPass
+ *
+ * INPUTS : INT32 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of source block.
+ * UINT32 PixelStep : 1 for horizontal filtering,
+ * SrcPixelsPerLine for vertical filtering.
+ * UINT32 OutputHeight : Height of the output block.
+ * UINT32 OutputWidth : Width of the output block.
+ * INT32 *Filter : Array of 4 filter taps.
+ *
+ * OUTPUTS : UINT16 *OutputPtr : Pointer to output block.
+ *
+ * RETURNS : void.
+ *
+ * FUNCTION : Applies a 1-D 4-tap filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement second-pass
+ * of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Requires 32-bit input as produced by FilterBlock2dFirstPass.
+ * Four filter taps should sum to FILTER_WEIGHT.
+ * PixelStep defines whether the filter is applied
+ * horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+void FilterBlock2dSecondPass
+(
+ INT32 *SrcPtr,
+ UINT16 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ UINT32 PixelStep,
+ UINT32 OutputHeight,
+ UINT32 OutputWidth,
+ INT32 *Filter
+)
+{
+ UINT32 i,j;
+ INT32 Temp;
+
+ for ( i=0; i < OutputHeight; i++ )
+ {
+ for ( j = 0; j < OutputWidth; j++ )
+ {
+ // Apply filter
+ Temp = ((INT32)SrcPtr[-(INT32)PixelStep] * Filter[0]) +
+ ((INT32)SrcPtr[0] * Filter[1]) +
+ ((INT32)SrcPtr[PixelStep] * Filter[2]) +
+ ((INT32)SrcPtr[2*PixelStep] * Filter[3]) +
+ (FILTER_WEIGHT >> 1); // Rounding
+
+ // Normalize back to 0-255
+ Temp = Temp >> FILTER_SHIFT;
+ if ( Temp < 0 )
+ Temp = 0;
+ else if ( Temp > 255 )
+ Temp = 255;
+
+ OutputPtr[j] = (UINT16)Temp;
+ SrcPtr++;
+ }
+
+ // Start next row
+ SrcPtr += SrcPixelsPerLine - OutputWidth;
+ OutputPtr += OutputWidth;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2d
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of input block.
+ * INT32 *HFilter : Array of 4 horizontal filter taps.
+ * INT32 *VFilter : Array of 4 vertical filter taps.
+ *
+ * OUTPUTS : UINT16 *OutputPtr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 2-D filters an 8x8 input block by applying a 4-tap
+ * filter horizontally followed by a 4-tap filter vertically
+ * on the result.
+ *
+ * SPECIAL NOTES : The intermediate horizontally filtered block must produce
+ * 3 more points than the input block in each column. This
+ * is to ensure that the 4-tap filter has one extra data-point
+ * at the top & 2 extra data-points at the bottom of each
+ * column so filter taps do not extend beyond data. Thus the
+ * output of the first stage filter is an 8x11 (HxV) block.
+ *
+ ****************************************************************************/
+void FilterBlock2d
+(
+ UINT8 *SrcPtr,
+ UINT16 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ INT32 *HFilter,
+ INT32 *VFilter
+)
+{
+ INT32 FData[BLOCK_HEIGHT_WIDTH*11]; // Temp data bufffer used in filtering
+
+ // First filter 1-D horizontally...
+ FilterBlock2dFirstPass ( SrcPtr-SrcPixelsPerLine, FData, SrcPixelsPerLine, 1, 11, 8, HFilter );
+
+ // then filter verticaly...
+ FilterBlock2dSecondPass ( FData+BLOCK_HEIGHT_WIDTH, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock1dBil
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of input block.
+ * UINT32 PixelStep : Offset between filter input samples (see notes).
+ * UINT32 OutputHeight : Input block height.
+ * UINT32 OutputWidth : Input block width.
+ * INT32 *Filter : Array of 2 bi-linear filter taps.
+ *
+ * OUTPUTS : UINT16 *OutputPtr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 2-tap 1-D bi-linear filter to input block in
+ * either horizontal or vertical direction.
+ *
+ * SPECIAL NOTES : PixelStep defines whether the filter is applied
+ * horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+void FilterBlock1dBil
+(
+ UINT8 *SrcPtr,
+ UINT16 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ UINT32 PixelStep,
+ UINT32 OutputHeight,
+ UINT32 OutputWidth,
+ INT32 *Filter
+)
+{
+ UINT32 i, j;
+
+ for ( i=0; i<OutputHeight; i++ )
+ {
+ for ( j=0; j<OutputWidth; j++ )
+ {
+ // Apply filter
+ // NOTE: Rounding doesn't improve accuracy but is
+ // easier to implement on certain platforms.
+ OutputPtr[j] = (INT16)( ( ((INT32)SrcPtr[0] * Filter[0]) +
+ ((INT32)SrcPtr[PixelStep] * Filter[1]) +
+ (FILTER_WEIGHT/2) ) >> FILTER_SHIFT );
+ SrcPtr++;
+ }
+
+ // Next row...
+ SrcPtr += SrcPixelsPerLine - OutputWidth;
+ OutputPtr += OutputWidth;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dBil_FirstPass
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of input block.
+ * UINT32 PixelStep : Offset between filter input samples (see notes).
+ * UINT32 OutputHeight : Input block height.
+ * UINT32 OutputWidth : Input block width.
+ * INT32 *Filter : Array of 2 bi-linear filter taps.
+ *
+ * OUTPUTS : INT32 *OutputPtr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement first-pass
+ * of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
+ * Two filter taps should sum to FILTER_WEIGHT.
+ * PixelStep defines whether the filter is applied
+ * horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+void FilterBlock2dBil_FirstPass
+(
+ UINT8 *SrcPtr,
+ INT32 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ UINT32 PixelStep,
+ UINT32 OutputHeight,
+ UINT32 OutputWidth,
+ INT32 *Filter
+)
+{
+ UINT32 i, j;
+
+ for ( i=0; i<OutputHeight; i++ )
+ {
+ for ( j=0; j<OutputWidth; j++ )
+ {
+ // Apply bilinear filter
+ OutputPtr[j] = ( ( (INT32)SrcPtr[0] * Filter[0]) +
+ ((INT32)SrcPtr[PixelStep] * Filter[1]) +
+ (FILTER_WEIGHT/2) ) >> FILTER_SHIFT;
+ SrcPtr++;
+ }
+
+ // Next row...
+ SrcPtr += SrcPixelsPerLine - OutputWidth;
+ OutputPtr += OutputWidth;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dBil_SecondPass
+ *
+ * INPUTS : INT32 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of input block.
+ * UINT32 PixelStep : Offset between filter input samples (see notes).
+ * UINT32 OutputHeight : Input block height.
+ * UINT32 OutputWidth : Input block width.
+ * INT32 *Filter : Array of 2 bi-linear filter taps.
+ *
+ * OUTPUTS : UINT16 *OutputPtr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement second-pass
+ * of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Requires 32-bit input as produced by FilterBlock2dBil_FirstPass.
+ * Two filter taps should sum to FILTER_WEIGHT.
+ * PixelStep defines whether the filter is applied
+ * horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+void FilterBlock2dBil_SecondPass
+(
+ INT32 *SrcPtr,
+ UINT16 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ UINT32 PixelStep,
+ UINT32 OutputHeight,
+ UINT32 OutputWidth,
+ INT32 *Filter
+)
+{
+ UINT32 i,j;
+ INT32 Temp;
+
+ for ( i=0; i<OutputHeight; i++ )
+ {
+ for ( j=0; j<OutputWidth; j++ )
+ {
+ // Apply filter
+ Temp = ((INT32)SrcPtr[0] * Filter[0]) +
+ ((INT32)SrcPtr[PixelStep] * Filter[1]) +
+ (FILTER_WEIGHT/2);
+ OutputPtr[j] = (UINT16)(Temp >> FILTER_SHIFT);
+ SrcPtr++;
+ }
+
+ // Next row...
+ SrcPtr += SrcPixelsPerLine - OutputWidth;
+ OutputPtr += OutputWidth;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dBil
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of input block.
+ * INT32 *HFilter : Array of 2 horizontal filter taps.
+ * INT32 *VFilter : Array of 2 vertical filter taps.
+ *
+ * OUTPUTS : UINT16 *OutputPtr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 2-D filters an 8x8 input block by applying a 2-tap
+ * bi-linear filter horizontally followed by a 2-tap
+ * bi-linear filter vertically on the result.
+ *
+ * SPECIAL NOTES : The intermediate horizontally filtered block must produce
+ * 1 more point than the input block in each column. This
+ * is to ensure that the 2-tap filter has one extra data-point
+ * at the top of each column so filter taps do not extend
+ * beyond data. Thus the output of the first stage filter
+ * is an 8x9 (HxV) block.
+ *
+ ****************************************************************************/
+ void FilterBlock2dBil
+(
+ UINT8 *SrcPtr,
+ UINT16 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ INT32 *HFilter,
+ INT32 *VFilter
+)
+{
+
+ INT32 FData[BLOCK_HEIGHT_WIDTH*11]; // Temp data bufffer used in filtering
+
+ // First filter 1-D horizontally...
+ FilterBlock2dBil_FirstPass ( SrcPtr, FData, SrcPixelsPerLine, 1, 9, 8, HFilter );
+
+ // then 1-D vertically...
+ FilterBlock2dBil_SecondPass ( FData, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock_C
+ *
+ * INPUTS : UINT8 *ReconPtr1 : Pointer to first 8x8 input block.
+ * UINT8 *ReconPtr2 : Pointer to second 8x8 input block.
+ * UINT32 PixelsPerLine : Stride for ReconPtr1 & ReconPtr2.
+ * INT32 ModX : Fractional part of x-component of motion vector.
+ * INT32 ModY : Fractional part of y-component of motion vector.
+ * BOOL UseBicubic : TRUE=Bicubic, FALSE=Bi-Linear filter.
+ * UINT8 BicubicAlpha : Defines which set of bicubic taps to use.
+ *
+ * OUTPUTS : UINT16 *ReconRefPtr : Pointer to 8x8 filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Produces a filtered fractional pel prediction block
+ * using bilinear or bicubic filters.
+ * This is used by baseline VP6.2 and upwards.
+ *
+ * SPECIAL NOTES : ReconPtr1 & ReconPtr2 point to blocks that bracket the
+ * position of the fractional pixel motion vector. These
+ * two blocks are combined using either a bi-linear or
+ * bi-cubic filter to produce the output prediction block
+ * for this motion vector.
+ * ModX, ModY are used for filter selection--see code
+ * comment for definition.
+ *
+ ****************************************************************************/
+void FilterBlock_C
+(
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ UINT16 *ReconRefPtr,
+ UINT32 PixelsPerLine,
+ INT32 ModX,
+ INT32 ModY,
+ BOOL UseBicubic,
+ UINT8 BicubicAlpha
+)
+{
+ int diff;
+
+ // ModX and ModY are the bottom three bits of the signed motion vector
+ // components (in 1/8th pel units). This works out to be what we want
+ // --despite the pointer swapping that goes on below.
+ // For example...
+ // if MV x-component is +ve then ModX = x%8.
+ // if MV x-component is -ve then ModX = 8+(x%8), where X%8 is in the range -7 to -1.
+
+ // Swap pointers to ensure that ReconPtr1 is "smaller than",
+ // i.e. above, left, above-right or above-left, ReconPtr1
+ diff = ReconPtr2 - ReconPtr1;
+
+ if ( diff<0 )
+ {
+ // ReconPtr1>ReconPtr2, so swap...
+ UINT8 *temp = ReconPtr1;
+ ReconPtr1 = ReconPtr2;
+ ReconPtr2 = temp;
+ diff = (int)(ReconPtr2-ReconPtr1);
+ }
+
+ if ( diff==1 )
+ {
+ // Fractional pixel in horizontal only...
+ if ( UseBicubic )
+ FilterBlock1d ( ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BicubicFilterSet[BicubicAlpha][ModX] );
+ else
+ FilterBlock1dBil ( ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters[ModX] );
+ }
+ else if ( diff == (int)(PixelsPerLine) )
+ {
+ // Fractional pixel in vertical only...
+ if ( UseBicubic )
+ FilterBlock1d ( ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BicubicFilterSet[BicubicAlpha][ModY] );
+ else
+ FilterBlock1dBil ( ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters[ModY] );
+ }
+ else if(diff == (int)(PixelsPerLine - 1))
+ {
+ // ReconPtr1 is Top right...
+ if ( UseBicubic )
+ FilterBlock2d ( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BicubicFilterSet[BicubicAlpha][ModX], BicubicFilterSet[BicubicAlpha][ModY] );
+ else
+ FilterBlock2dBil ( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters[ModX], BilinearFilters[ModY] );
+ }
+ else if(diff == (int)(PixelsPerLine + 1) )
+ {
+ // ReconPtr1 is Top left...
+ if ( UseBicubic )
+ FilterBlock2d ( ReconPtr1, ReconRefPtr, PixelsPerLine, BicubicFilterSet[BicubicAlpha][ModX], BicubicFilterSet[BicubicAlpha][ModY] );
+ else
+ FilterBlock2dBil ( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters[ModX], BilinearFilters[ModY] );
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock1dBil_8
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of input block.
+ * UINT32 PixelStep : Offset between filter input samples (see notes).
+ * UINT32 OutputHeight : Input block height.
+ * UINT32 OutputWidth : Input block width.
+ * INT32 *Filter : Array of 2 bi-linear filter taps.
+ *
+ * OUTPUTS : UINT8 *OutputPtr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 2-tap 1-D bi-linear filter to input block in
+ * either horizontal or vertical direction.
+ *
+ * SPECIAL NOTES : PixelStep defines whether the filter is applied
+ * horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+void FilterBlock1dBil_8
+(
+ UINT8 *SrcPtr,
+ UINT8 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ UINT32 PixelStep,
+ UINT32 OutputHeight,
+ UINT32 OutputWidth,
+ INT32 *Filter )
+{
+ UINT32 i, j;
+
+ for ( i=0; i<OutputHeight; i++ )
+ {
+ for ( j=0; j<OutputWidth; j++ )
+ {
+ // Apply filter
+ // NOTE: Rounding doesn't improve accuracy but is
+ // easier to implement on certain platforms.
+ OutputPtr[j] = (UINT8)( ( ((INT32)SrcPtr[0] * Filter[0]) +
+ ((INT32)SrcPtr[PixelStep] * Filter[1]) +
+ (FILTER_WEIGHT/2) ) >> FILTER_SHIFT );
+ SrcPtr++;
+ }
+
+ // Next row...
+ SrcPtr += SrcPixelsPerLine - OutputWidth;
+ OutputPtr += OutputWidth;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dBil_SecondPass_8
+ *
+ * INPUTS : INT32 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of input block.
+ * UINT32 PixelStep : Offset between filter input samples (see notes).
+ * UINT32 OutputHeight : Input block height.
+ * UINT32 OutputWidth : Input block width.
+ * INT32 *Filter : Array of 2 bi-linear filter taps.
+ *
+ * OUTPUTS : UINT8 *OutputPtr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement second-pass
+ * of 2-D separable bi-linear filter.
+ *
+ * SPECIAL NOTES : Requires 32-bit input as produced by FilterBlock2dBil_FirstPass.
+ * Two filter taps should sum to FILTER_WEIGHT.
+ * PixelStep defines whether the filter is applied
+ * horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+void FilterBlock2dBil_SecondPass_8
+(
+ INT32 *SrcPtr,
+ UINT8 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ UINT32 PixelStep,
+ UINT32 OutputHeight,
+ UINT32 OutputWidth,
+ INT32 *Filter
+)
+{
+ UINT32 i, j;
+ INT32 Temp;
+ INT32 RoundValue = ((FILTER_WEIGHT*FILTER_WEIGHT) >> 1);
+
+ for ( i=0; i<OutputHeight; i++ )
+ {
+ for ( j=0; j<OutputWidth; j++ )
+ {
+ // Apply bi-linear filter...
+ Temp = ((INT32)SrcPtr[0] * Filter[0]) +
+ ((INT32)SrcPtr[PixelStep] * Filter[1]) +
+ (FILTER_WEIGHT / 2);
+
+ OutputPtr[j] = (UINT8)(Temp >> FILTER_SHIFT);
+
+ SrcPtr++;
+ }
+
+ // Next row...
+ SrcPtr += SrcPixelsPerLine - OutputWidth;
+ OutputPtr += OutputWidth;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dBil_8
+ *
+ * INPUTS : UINT8 *SrcPtr : Pointer to source block.
+ * UINT32 SrcPixelsPerLine : Stride of input block.
+ * INT32 *HFilter : Array of 2 horizontal filter taps.
+ * INT32 *VFilter : Array of 2 vertical filter taps.
+ *
+ * OUTPUTS : UINT8 *OutputPtr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : 2-D filters an 8x8 input block by applying a 2-tap
+ * bi-linear filter horizontally followed by a 2-tap
+ * bi-linear filter vertically on the result. Output
+ * is 8-bit unsigned.
+ *
+ * SPECIAL NOTES : The intermediate horizontally filtered block must produce
+ * 1 more point than the input block in each column. This
+ * is to ensure that the 2-tap filter has one extra data-point
+ * at the top of each column so filter taps do not extend
+ * beyond data. Thus the output of the first stage filter
+ * is an 8x9 (HxV) block.
+ *
+ ****************************************************************************/
+void FilterBlock2dBil_8
+(
+ UINT8 *SrcPtr,
+ UINT8 *OutputPtr,
+ UINT32 SrcPixelsPerLine,
+ INT32 *HFilter,
+ INT32 *VFilter
+)
+{
+ INT32 FData[BLOCK_HEIGHT_WIDTH*11]; // Temp data bufffer used in filtering
+
+ // First filter 1-D horizontally...
+ FilterBlock2dBil_FirstPass ( SrcPtr, FData, SrcPixelsPerLine, 1, 9, 8, HFilter );
+
+ // then filter 1-D vertically..
+ FilterBlock2dBil_SecondPass_8 ( FData, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter );
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlockBil_8_C
+ *
+ * INPUTS : UINT8 *ReconPtr1 : Pointer to first 8x8 input block.
+ * UINT8 *ReconPtr2 : Pointer to second 8x8 input block.
+ * UINT32 PixelsPerLine : Stride for ReconPtr1 & ReconPtr2.
+ * INT32 ModX : Fractional part of x-component of motion vector.
+ * INT32 ModY : Fractional part of y-component of motion vector.
+ *
+ * OUTPUTS : UINT8 *ReconRefPtr : Pointer to 8x8 filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Produces a filtered fractional pel prediction block
+ * using bilinear filter.
+ *
+ * SPECIAL NOTES : ReconPtr1 & ReconPtr2 point to blocks that bracket the
+ * position of the fractional pixel motion vector. These
+ * two blocks are combined using a bi-linear filter to
+ * produce the output prediction block for this motion vector.
+ * ModX, ModY are used for filter selection--see code
+ * comment for definition.
+ *
+ ****************************************************************************/
+void FilterBlockBil_8_C
+(
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ UINT8 *ReconRefPtr,
+ UINT32 PixelsPerLine,
+ INT32 ModX,
+ INT32 ModY
+)
+{
+ int diff;
+
+ // ModX and ModY are the bottom three bits of the signed motion vector
+ // components (in 1/8th pel units). This works out to be what we want
+ // --despite the pointer swapping that goes on below.
+ // For example...
+ // if MV x-component is +ve then ModX = x%8.
+ // if MV x-component is -ve then ModX = 8+(x%8), where X%8 is in the range -7 to -1.
+
+ // Swap pointers to ensure that ReconPtr1 is "smaller than",
+ // i.e. above, left, above-right or above-left, ReconPtr1
+ diff = ReconPtr2 - ReconPtr1;
+
+ if ( diff<0 )
+ {
+ // ReconPtr1>ReconPtr2, so swap...
+ UINT8 *temp = ReconPtr1;
+ ReconPtr1 = ReconPtr2;
+ ReconPtr2 = temp;
+ diff = (int)(ReconPtr2-ReconPtr1);
+ }
+
+ if ( diff==1 )
+ {
+ // Fractional pixel in horizontal only...
+ FilterBlock1dBil_8 ( ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters[ModX] );
+ }
+ else if ( diff == (int)(PixelsPerLine) )
+ {
+ // Fractional pixel in vertical only...
+ FilterBlock1dBil_8 ( ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters[ModY] );
+ }
+ else if ( diff == (int)(PixelsPerLine - 1))
+ {
+ // ReconPtr1 is Top right...
+ FilterBlock2dBil_8 ( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters[ModX], BilinearFilters[ModY] );
+ }
+ else if ( diff == (int)(PixelsPerLine + 1) )
+ {
+ // ReconPtr1 is Top left
+ FilterBlock2dBil_8 ( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters[ModX], BilinearFilters[ModY] );
+ }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/include/dct.h b/Src/libvpShared/corelibs/cdxv/vputil/include/dct.h
new file mode 100644
index 00000000..40ae448a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/include/dct.h
@@ -0,0 +1,74 @@
+/****************************************************************************
+*
+* Module Title : dct.h
+*
+* Description : DCT header file.
+*
+****************************************************************************/
+
+#ifndef __INC_DCT_H
+#define __INC_DCT_H
+
+/****************************************************************************
+* Header files
+****************************************************************************/
+#include "type_aliases.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+#define COEFF_MAX 32768 // Max magnitude of DCT coefficient
+// Extra bits of precision added to the fdct that have to be stripped off during the quantize
+#define FDCT_PRECISION_BITS 1
+#define FDCT_PRECISION_NEG_ADJ ((INT16) (1<<FDCT_PRECISION_BITS)-1)
+
+
+
+
+#if 0 // AWG not required any more!!!
+/* Cos and Sin constant multipliers used during DCT and IDCT */
+extern const double C1S7;
+extern const double C2S6;
+extern const double C3S5;
+extern const double C4S4;
+extern const double C5S3;
+extern const double C6S2;
+extern const double C7S1;
+
+// DCT lookup tables and pointers
+extern INT32 * C4S4_TablePtr;
+extern INT32 C4S4_Table[(COEFF_MAX * 4) + 1];
+
+extern INT32 * C6S2_TablePtr;
+extern INT32 C6S2_Table[(COEFF_MAX * 2) + 1];
+
+extern INT32 * C2S6_TablePtr;
+extern INT32 C2S6_Table[(COEFF_MAX * 2) + 1];
+
+extern INT32 * C1S7_TablePtr;
+extern INT32 C1S7_Table[(COEFF_MAX * 2) + 1];
+
+extern INT32 * C7S1_TablePtr;
+extern INT32 C7S1_Table[(COEFF_MAX * 2) + 1];
+
+extern INT32 * C3S5_TablePtr;
+extern INT32 C3S5_Table[(COEFF_MAX * 2) + 1];
+
+extern INT32 * C5S3_TablePtr;
+extern INT32 C5S3_Table[(COEFF_MAX * 2) + 1];
+#endif
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+#ifdef COMPDLL
+// Forward Transform
+extern void fdct_slow ( INT32 *InputData, double *OutputData );
+#endif
+
+// Reverse Transform
+extern void IDctSlow( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void IDct10 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void IDct1 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/include/mac_specs.h b/Src/libvpShared/corelibs/cdxv/vputil/include/mac_specs.h
new file mode 100644
index 00000000..c218ac52
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/include/mac_specs.h
@@ -0,0 +1,11 @@
+#if !defined(_mac_specs_h)
+#define _mac_specs_h
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int vputil_hasAltivec(void);
+int vputil_cpuMhz(void);
+#if defined(__cplusplus)
+}
+#endif
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/include/reconstruct.h b/Src/libvpShared/corelibs/cdxv/vputil/include/reconstruct.h
new file mode 100644
index 00000000..f87983d9
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/include/reconstruct.h
@@ -0,0 +1,60 @@
+/****************************************************************************
+*
+* Module Title : Reconstruct.h
+*
+* Description : Block Reconstruction module header
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.00 PGW 14/10/99 Created
+*
+*****************************************************************************
+*/
+
+#define STRICT /* Strict type checking. */
+
+#ifndef RECONSTRUCT_H
+#define RECONSTRUCT_H
+
+#include "type_aliases.h"
+
+/****************************************************************************
+* Constants
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Types
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Data structures
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Functions
+*****************************************************************************
+*/
+
+// Scalar (no mmx) reconstruction functions
+extern void ScalarReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void ScalarReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void ScalarReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+
+// MMx versions
+extern void MMXReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+
+// WMT versions
+extern void WmtReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void WmtReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void WmtReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj b/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj
new file mode 100644
index 00000000..ca32aed6
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj
@@ -0,0 +1,388 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <VCProjectVersion>17.0</VCProjectVersion>
+ <ProjectGuid>{F93716CE-8F89-4334-BE64-43705EF3FB70}</ProjectGuid>
+ <RootNamespace>vputil</RootNamespace>
+ <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg">
+ <VcpkgEnableManifest>false</VcpkgEnableManifest>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader />
+ <PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation />
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>
+ </AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation />
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>
+ </AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="generic\fdct.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\idctpart.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\reconstruct.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\uoptsystemdependant.c">
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="generic\vputil.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\fdctmmx.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\fdctwmt.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\filtmmx.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\filtwmt.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\mmxidct.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\mmxrecon.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\uoptsystemdependant.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\vputilasm.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\wmtidct.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="win32\wmtrecon.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj.filters
new file mode 100644
index 00000000..204b2144
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj.filters
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="generic">
+ <UniqueIdentifier>{f7966dc8-1d55-46a4-b0e6-8584774d721d}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="win32">
+ <UniqueIdentifier>{ad0ce32e-d033-416c-813e-7a7f913ac3fa}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="generic\fdct.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\idctpart.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\reconstruct.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\uoptsystemdependant.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="generic\vputil.c">
+ <Filter>generic</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\fdctmmx.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\fdctwmt.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\filtmmx.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\filtwmt.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\mmxidct.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\mmxrecon.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\uoptsystemdependant.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\vputilasm.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\wmtidct.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ <ClCompile Include="win32\wmtrecon.c">
+ <Filter>win32</Filter>
+ </ClCompile>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/vputil.xcodeproj/project.pbxproj b/Src/libvpShared/corelibs/cdxv/vputil/vputil.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..df47f476
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/vputil.xcodeproj/project.pbxproj
@@ -0,0 +1,213 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 42;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 0CAF34950BB78E9F000FB06C /* vputil.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34940BB78E9F000FB06C /* vputil.c */; };
+ 0CAF34AC0BB78EDF000FB06C /* idctpart.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34A80BB78EDF000FB06C /* idctpart.c */; };
+ 0CAF34AD0BB78EDF000FB06C /* fdct.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34A90BB78EDF000FB06C /* fdct.c */; };
+ 0CAF34AE0BB78EDF000FB06C /* uoptsystemdependant.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34AA0BB78EDF000FB06C /* uoptsystemdependant.c */; };
+ 0CAF34AF0BB78EDF000FB06C /* reconstruct.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34AB0BB78EDF000FB06C /* reconstruct.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+ 0CAF34940BB78E9F000FB06C /* vputil.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = vputil.c; path = generic/vputil.c; sourceTree = "<group>"; };
+ 0CAF34A80BB78EDF000FB06C /* idctpart.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = idctpart.c; path = generic/idctpart.c; sourceTree = "<group>"; };
+ 0CAF34A90BB78EDF000FB06C /* fdct.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = fdct.c; path = generic/fdct.c; sourceTree = "<group>"; };
+ 0CAF34AA0BB78EDF000FB06C /* uoptsystemdependant.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = uoptsystemdependant.c; path = generic/uoptsystemdependant.c; sourceTree = "<group>"; };
+ 0CAF34AB0BB78EDF000FB06C /* reconstruct.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = reconstruct.c; path = generic/reconstruct.c; sourceTree = "<group>"; };
+ D2AAC046055464E500DB518D /* libvputil.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvputil.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ D289987405E68DCB004EDB86 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 08FB7794FE84155DC02AAC07 /* vputil */ = {
+ isa = PBXGroup;
+ children = (
+ 08FB7795FE84155DC02AAC07 /* Source */,
+ C6A0FF2B0290797F04C91782 /* Documentation */,
+ 1AB674ADFE9D54B511CA2CBB /* Products */,
+ );
+ name = vputil;
+ sourceTree = "<group>";
+ };
+ 08FB7795FE84155DC02AAC07 /* Source */ = {
+ isa = PBXGroup;
+ children = (
+ 0CAF34940BB78E9F000FB06C /* vputil.c */,
+ 0CAF34A80BB78EDF000FB06C /* idctpart.c */,
+ 0CAF34A90BB78EDF000FB06C /* fdct.c */,
+ 0CAF34AA0BB78EDF000FB06C /* uoptsystemdependant.c */,
+ 0CAF34AB0BB78EDF000FB06C /* reconstruct.c */,
+ );
+ name = Source;
+ sourceTree = "<group>";
+ };
+ 1AB674ADFE9D54B511CA2CBB /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ D2AAC046055464E500DB518D /* libvputil.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ C6A0FF2B0290797F04C91782 /* Documentation */ = {
+ isa = PBXGroup;
+ children = (
+ );
+ name = Documentation;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+ D2AAC043055464E500DB518D /* Headers */ = {
+ isa = PBXHeadersBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+ D2AAC045055464E500DB518D /* vputil */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vputil" */;
+ buildPhases = (
+ D2AAC043055464E500DB518D /* Headers */,
+ D2AAC044055464E500DB518D /* Sources */,
+ D289987405E68DCB004EDB86 /* Frameworks */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = vputil;
+ productName = vputil;
+ productReference = D2AAC046055464E500DB518D /* libvputil.a */;
+ productType = "com.apple.product-type.library.static";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 08FB7793FE84155DC02AAC07 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vputil" */;
+ hasScannedForEncodings = 1;
+ mainGroup = 08FB7794FE84155DC02AAC07 /* vputil */;
+ projectDirPath = "";
+ targets = (
+ D2AAC045055464E500DB518D /* vputil */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+ D2AAC044055464E500DB518D /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 0CAF34950BB78E9F000FB06C /* vputil.c in Sources */,
+ 0CAF34AC0BB78EDF000FB06C /* idctpart.c in Sources */,
+ 0CAF34AD0BB78EDF000FB06C /* fdct.c in Sources */,
+ 0CAF34AE0BB78EDF000FB06C /* uoptsystemdependant.c in Sources */,
+ 0CAF34AF0BB78EDF000FB06C /* reconstruct.c in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 1DEB91EC08733DB70010E9CD /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_MODEL_TUNING = G5;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ INSTALL_PATH = /usr/local/lib;
+ PRODUCT_NAME = vputil;
+ ZERO_LINK = YES;
+ };
+ name = Debug;
+ };
+ 1DEB91ED08733DB70010E9CD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ARCHS = (
+ ppc,
+ i386,
+ );
+ GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+ GCC_MODEL_TUNING = G5;
+ INSTALL_PATH = /usr/local/lib;
+ PRODUCT_NAME = vputil;
+ };
+ name = Release;
+ };
+ 1DEB91F008733DB70010E9CD /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ OBJROOT = build;
+ PREBINDING = NO;
+ SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+ SYMROOT = ../../../lib/osx;
+ USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
+ };
+ name = Debug;
+ };
+ 1DEB91F108733DB70010E9CD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ OBJROOT = build;
+ PREBINDING = NO;
+ SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+ SYMROOT = ../../../lib/osx;
+ USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vputil" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1DEB91EC08733DB70010E9CD /* Debug */,
+ 1DEB91ED08733DB70010E9CD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vputil" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1DEB91F008733DB70010E9CD /* Debug */,
+ 1DEB91F108733DB70010E9CD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/fdct_m.asm b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdct_m.asm
new file mode 100644
index 00000000..affb8497
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdct_m.asm
@@ -0,0 +1,1002 @@
+;***********************************************************************
+; File: fdct_m.asm
+;
+; Description:
+; This function perform 2-D Forward DCT on a 8x8 block
+;
+;
+; Input: Pointers to input source data buffer and destination
+; buffer.
+;
+; Note: none
+;
+; Special Notes: We try to do the truncation right to match the result
+; of the c version.
+;
+;************************************************************************
+; Revision History:
+;
+; 1.00 YWX 08/05/00 Configuration Baseline
+;
+
+
+ .586
+ .387
+ .MODEL flat, SYSCALL, os_dos
+ .MMX
+;
+; macro functions
+;
+Fdct MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7
+ ; execute stage 1 of forward DCT
+
+
+ movq mm0,ip0 ; mm0 = ip0
+ movq mm1,ip1 ; mm1 = ip1
+ movq mm2,ip3 ; mm2 = ip3
+ movq mm3,ip5 ; mm3 = ip5
+ movq mm4,mm1 ; mm4 = ip1
+ movq mm5,mm3 ; mm5 = ip5
+ movq mm6,mm0 ; mm0 = ip0
+ movq mm7,mm2 ; mm7 = ip3
+
+ paddsw mm0,ip7 ; mm0 = ip0 + ip7 = is07
+ paddsw mm1,ip2 ; mm1 = ip1 + ip2 = is12
+ paddsw mm2,ip4 ; mm2 = ip3 + ip4 = is34
+ paddsw mm3,ip6 ; mm3 = ip5 + ip6 = is56
+ psubsw mm6,ip7 ; mm6 = ip0 - ip7 = id07
+ psubsw mm7,ip4 ; mm7 = ip3 - ip4 = id34
+ psubsw mm4,ip2 ; mm4 = ip1 - ip2 = id12
+ psubsw mm5,ip6 ; mm5 = ip5 - ip6 = id56
+
+ movq TID07,mm6 ; save id07
+ movq TID34,mm7 ; save id34
+
+ ; free = mm6, mm7
+
+ movq mm6,mm4 ; mm6 = id12
+ psubsw mm4,mm5 ; mm4 = id12 - id56 = irot_input_x
+
+ movq TIRX,mm4 ; save irot_input_x
+ paddsw mm6,mm5 ; mm6 = id12 + id56
+ movq mm5,mm6 ;
+
+ pmulhw mm6,xC4S4 ; (xC4S4 * (id12 + id56)) - (id12 + id56)
+ paddw mm6,mm5 ; (xC4S4 * (id12 + id56))
+ psrlw mm5,15 ;
+
+ paddw mm6,mm5; ;
+
+
+ ; free = mm4 ,mm5, mm7
+
+ movq mm4,mm0 ; mm4 = is07
+ psubsw mm0,mm2 ; mm0 = is07 - is34 = irot_input_y
+
+ movq TIRY,mm0 ; save irot_input_y
+
+ ; free = mm0, mm5, mm7
+
+ movq mm0,mm1 ; mm0 = is12
+ psubsw mm1,mm3 ; mm1 = is12 - is56
+
+ movq TIC2,mm6 ; save icommon_product2
+ movq mm7, mm1
+
+ pmulhw mm1,xC4S4 ; mm1 = (xC4S4 * (is12 - is56)) - (is12 - is56)
+ paddw mm1, mm7 ; mm1 = (xC4S4 * (is12 - is56))
+ psrlw mm7, 15 ;
+
+ paddw mm1, mm7
+ movq TIC1,mm1 ; save icommon_product1
+
+ ; free = mm1, mm5, mm6, mm7
+
+ paddsw mm4,mm2 ; mm4 = is07 + is34 = is0734
+ paddsw mm0,mm3 ; mm0 = is12 + is56 = is1256
+ movq mm1,mm4 ; mm1 = is07 + is34 = is0734
+
+ paddsw mm4,mm0 ; mm4 = is0734 + is1256
+ psubsw mm1,mm0 ; mm1 = is0734 - is1256
+
+ movq mm7,mm4
+ movq mm6,mm1
+
+ pmulhw mm4,xC4S4 ; mm4 = (xC4S4 * (is0734 + is1256)) - (is0734 + is1256)
+ pmulhw mm1,xC4S4 ; mm1 = (xC4S4 * (is0734 - is1256)) - (is0734 - is1256)
+ paddw mm4,mm7 ; mm4 = (xC4S4 * (is0734 + is1256))
+ paddw mm1,mm6 ; mm1 = (xC4S4 * (is0734 - is1256))
+
+ psrlw mm7, 15
+ psrlw mm6, 15
+
+ paddw mm4, mm7
+ movq ip0,mm4 ; write out ip0
+
+ paddw mm1, mm6
+ movq ip4,mm1 ; write out ip4
+
+ ; free = mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7
+
+ movq mm0,TIRY ; mm0 = irot_input_y
+ movq mm1,TIRX ; mm1 = irot_input_x
+
+ movq mm2,mm0 ; mm2 = irot_input_y
+ movq mm3,mm1 ; mm3 = irot_input_x
+
+ movq mm4,mm0 ;
+ movq mm5,mm1 ;
+
+ movq mm6,xC2S6 ;
+ movq mm7,xC6s2 ;
+
+ pmulhw mm0,mm6 ; mm0 = xC2S6*irot_input_y - irot_input_y
+ pmulhw mm3,mm6 ; mm3 = xC2S6*irot_input_x - irot_input_x
+ psrlw mm4, 15
+ psrlw mm5, 15
+ paddw mm0,mm2 ; mm0 = xC2S6*irot_input_y
+ paddw mm3,mm1 ; mm3 = xC2S6*irot_input_x
+ paddw mm0,mm4
+ paddw mm3,mm5;
+
+ pmulhw mm1,mm7 ; mm1 = xC6S2*irot_input_x
+ pmulhw mm2,mm7 ; mm2 = xC6S2*irot_input_y
+
+ paddw mm1,mm5 ;
+ paddw mm2,mm4 ;
+
+ paddsw mm0,mm1 ; mm0 = xC2S6(irot_input_y * 2) + xC6S2(irot_input_x * 2) = ip2
+ psubsw mm2,mm3 ; mm2 = xC6S2(irot_input_y * 2) - xC2S6(irot_input_x * 2) = ip6
+
+ movq ip2,mm0 ; write out ip2
+ movq ip6,mm2 ; write out ip6
+
+ ;
+
+ movq mm6,TIC1 ; mm6 = icommon_product1
+ movq mm4,TID07 ; mm4 = id07
+
+ movq mm5,TID34 ; mm5 = id34
+ movq mm7,TIC2 ; mm7 = icommon_product2
+
+ movq mm1,mm6 ; mm1 = icommon_product1
+ movq mm3,mm7 ; mm3 = icommon_product2
+
+ pxor mm0,mm0 ; clear mm0
+ paddsw mm7,mm5 ; mm7 = icommon_product2 + id34
+
+ paddsw mm6,mm4 ; mm6 = icommon_product1 + id07 = irot_input_x
+ psubsw mm0,mm7 ; mm0 = -(icommon_product2 + id34) = irot_input_y
+
+
+ ; free = mm2, mm7, mm4, mm5;
+
+ movq mm2,mm6 ; mm2 = irot_input_x
+ movq mm7,mm0 ; mm7 = irot_input_y
+
+ movq mm4,mm6;
+ movq mm5,mm0;
+
+ pmulhw mm6,xC1S7 ; mm6 = xC1S7*irot_input_x -irot_input_x
+ psrlw mm4,15;
+
+ psrlw mm5,15;
+ pmulhw mm7,xC1S7 ; mm7 = xC1S7*irot_input_y -irot_input_y
+
+ paddw mm6,mm2 ; mm6 = xC1S7*irot_input_x
+ paddw mm7,mm0 ; mm7 = xC1S7*irot_input_y
+
+ pmulhw mm0,xC7S1 ; mm0 = xC7S1*irot_input_y
+ paddw mm6,mm4 ;
+
+ paddw mm7,mm5 ;
+ pmulhw mm2,xC7S1 ; mm2 = xC7S1*irot_input_x
+
+ paddw mm0,mm5 ;
+ paddw mm2,mm4 ;
+
+ psubsw mm6,mm0 ; mm6 = xC1S7*irot_input_x - xC7S1*irot_input_y = ip1
+ paddsw mm2,mm7 ; mm2 = xC7S1*irot_input_x + xC1S7*irot_input_y = ip7
+
+ movq ip1,mm6 ; write out ip1
+
+ movq mm4,TID07 ; mm4 = id07
+ movq mm5,TID34 ; mm5 = id34
+
+ movq ip7,mm2 ; write out ip7
+
+
+ psubsw mm4,mm1 ; mm4 = id07 - icommon_product1 = irot_input_x
+ psubsw mm5,mm3 ; mm5 = id34 - icommon_product2 = irot_input_y
+
+ movq mm6,mm4 ; mm6 = irot_input_x
+ movq mm0,mm4 ; mm0 = irot_input_x
+
+ movq mm7,mm5 ; mm7 = irot_input_y
+ movq mm2,mm5 ; mm2 = irot_input_y
+
+ movq mm1,xC3S5
+ movq mm3,xC5S3
+
+ pmulhw mm4,mm1 ; mm4 = xC3S5*irot_input_x - irot_input_x
+ pmulhw mm6,mm3 ; mm6 = xC5S3*irot_input_x - irot_input_x
+ pmulhw mm5,mm3 ; mm5 = xC5S3*irot_input_y - irot_input_y
+ pmulhw mm7,mm1 ; mm7 = xC3S5*irot_input_y - irot_input_y
+
+ paddw mm4, mm0 ; mm4 = xC3S5*irot_input_x
+ paddw mm6, mm0 ; mm6 = xC5S3*irot_input_x
+ paddw mm5, mm2 ; mm5 = xC5S3*irot_input_y
+ paddw mm7, mm2 ; mm7 = xC3S5*irot_input_y
+
+
+ psrlw mm0, 15 ;
+ psrlw mm2, 15 ;
+
+ paddw mm4, mm0 ;
+ paddw mm6, mm0 ;
+ paddw mm5, mm2 ;
+ paddw mm7, mm2 ;
+
+ psubsw mm4,mm5 ; mm4 = xC3S4*irot_input_x - xC5S3*irot_input_y = ip3
+ paddsw mm6,mm7 ; mm6 = xC5S3*irot_input_x + xC3S5*irot_input_y = ip5
+
+ movq ip3,mm4 ; write out ip3
+ movq ip5,mm6 ; write out ip5
+
+
+ENDM
+
+Fdct_new MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7
+ ; execute stage 1 of forward DCT
+
+
+ movq mm0,ip0 ; mm0 = ip0
+ movq mm1,ip1 ; mm1 = ip1
+ movq mm2,ip3 ; mm2 = ip3
+ movq mm3,ip5 ; mm3 = ip5
+ movq mm4,ip0 ; mm0 = ip0
+ movq mm5,ip1 ; mm1 = ip1
+ movq mm6,ip3 ; mm2 = ip3
+ movq mm7,ip5 ; mm3 = ip5
+
+
+ paddsw mm0,ip7 ; mm0 = ip0 + ip7 = is07
+ paddsw mm1,ip2 ; mm1 = ip1 + ip2 = is12
+ paddsw mm2,ip4 ; mm2 = ip3 + ip4 = is34
+ paddsw mm3,ip6 ; mm3 = ip5 + ip6 = is56
+ psubsw mm4,ip7 ; mm4 = ip0 - ip7 = id07
+ psubsw mm5,ip2 ; mm5 = ip1 - ip2 = id12
+
+ psubsw mm0,mm2 ; mm0 = is07 - is34
+
+ paddsw mm2,mm2
+
+ psubsw mm6,ip4 ; mm6 = ip3 - ip4 = id34
+
+ paddsw mm2,mm0 ; mm2 = is07 + is34 = is0734
+ psubsw mm1,mm3 ; mm1 = is12 - is56
+ movq TIRY,mm0 ; Save is07 - is34 to free mm0;
+ paddsw mm3,mm3
+ paddsw mm3,mm1 ; mm3 = is12 + 1s56 = is1256
+
+ psubsw mm7,ip6 ; mm7 = ip5 - ip6 = id56
+
+;--------------------------------------------------------------------
+;
+
+ psubsw mm5,mm7 ; mm5 = id12 - id56
+ paddsw mm7,mm7
+ paddsw mm7,mm5 ; mm7 = id12 + id56
+
+ ; mm4 = id07
+
+ ; mm6 = id34
+;---------------------------------------------------------------------
+; ip[0], ip[4]
+; mm0 Free
+; mm2 is0734
+; mm3 is1256
+
+
+ psubsw mm2,mm3 ; mm2 = is0734 - is1256
+ paddsw mm3,mm3
+
+ movq mm0,mm2 ; make a copy
+ paddsw mm3,mm2 ; mm3 = is0734 + is1256
+
+ pmulhw mm0,xC4S4 ; mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 )
+ paddw mm0,mm2 ; mm0 = xC4S4 * ( is0734 - is1256 )
+ psrlw mm2,15 ;
+ paddw mm0,mm2 ; Truncate mm0, now it is op[4]
+
+ movq mm2,mm3 ;
+ movq ip4,mm0 ; save ip4, now mm0,mm2 are free
+
+ movq mm0,mm3 ;
+ pmulhw mm3,xC4S4 ; mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 )
+
+ psrlw mm2,15 ;
+ paddw mm3,mm0 ; mm3 = xC4S4 * ( is0734 +is1256 )
+ paddw mm3,mm2 ; Truncate mm3, now it is op[0]
+
+ movq ip0,mm3 ;
+
+;----------------------------------------------------------------------
+; ip[2], ip[6]
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 id12 - id56 irot_input_x
+; TIRY is07 - is34 irot_input_y
+
+ movq mm3,TIRY ; mm3 = irot_input_y
+ pmulhw mm3,xC2S6 ; mm3 = xC2S6 * irot_input_y - irot_input_y
+
+ movq mm2,TIRY ;
+ movq mm0,mm2 ;
+
+ psrlw mm2,15 ; mm3 = xC2S6 * irot_input_y
+ paddw mm3,mm0
+
+ paddw mm3,mm2 ; Truncated
+ movq mm0, mm5; ;
+
+
+ movq mm2, mm5;
+ pmulhw mm0, xC6S2 ; mm0 = xC6S2 * irot_input_x
+
+ psrlw mm2, 15
+ paddw mm0, mm2 ; Truncated
+
+ paddsw mm3, mm0 ; ip[2]
+ movq ip2, mm3 ; Save ip2
+
+
+ movq mm0, mm5 ;
+ movq mm2, mm5 ;
+
+ pmulhw mm5, xC2S6 ; mm5 = xC2S6 * irot_input_x - irot_input_x
+ psrlw mm2, 15 ;
+
+ movq mm3, TIRY ;
+ paddw mm5, mm0 ; mm5 = xC2S6 * irot_input_x
+
+ paddw mm5, mm2 ; Truncated
+ movq mm2, mm3
+
+ pmulhw mm3, xC6S2 ; mm3 = xC6S2 * irot_input_y
+ psrlw mm2, 15
+
+ paddw mm3, mm2 ; Truncated
+ psubsw mm3, mm5 ;
+
+ movq ip6, mm3 ;
+
+
+
+;-----------------------------------------------------------------------
+; icommon_product1, icommon_product2
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm1 is12 - is56
+; mm7 id12 + id56
+
+ movq mm0, xC4S4
+ movq mm2, mm1
+ movq mm3, mm1
+
+ pmulhw mm1, mm0 ; mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 )
+ psrlw mm2, 15
+
+ paddw mm1, mm3 ; mm0 = xC4S4 * ( is12 - is56 )
+ paddw mm1, mm2 ; Truncate mm1, now it is icommon_product1
+
+ movq mm2, mm7
+ movq mm3, mm7
+
+ pmulhw mm7, mm0 ; mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 )
+ psrlw mm2, 15
+
+ paddw mm7, mm3 ; mm7 = xC4S4 * ( id12 + id56 )
+ paddw mm7, mm2 ; Truncate mm7, now it is icommon_product2
+
+;------------------------------------------------------------------------
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm1 icommon_product1
+; mm7 icommon_product2
+; mm4 id07
+; mm6 id34
+
+
+ pxor mm0, mm0 ; Clear mm0
+ psubsw mm0, mm6 ; mm0 = - id34
+
+ psubsw mm0, mm7 ; mm0 = - ( id34 + idcommon_product2 )
+ paddsw mm6, mm6 ;
+ paddsw mm6, mm0 ; mm6 = id34 - icommon_product2
+
+ psubsw mm4, mm1 ; mm4 = id07 - icommon_product1
+ paddsw mm1, mm1 ;
+ paddsw mm1, mm4 ; mm1 = id07 + icommon_product1
+
+
+;-------------------------------------------------------------------------
+; ip1, ip7
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm7 Free
+; mm1 irot_input_x
+; mm0 irot_input_y
+
+ movq mm7, xC1S7
+ movq mm2, mm1
+
+ movq mm3, mm1;
+ pmulhw mm1, mm7 ; mm1 = xC1S7 * irot_input_x - irot_input_x
+
+ movq mm7, xC7S1 ;
+ psrlw mm2, 15
+
+ paddw mm1, mm3 ; mm1 = xC1S7 * irot_input_x
+ paddw mm1, mm2 ; Trucated
+
+ pmulhw mm3, mm7 ; mm3 = xC7S1 * irot_input_x
+ paddw mm3, mm2 ; Truncated
+
+ movq mm5, mm0
+ movq mm2, mm0
+
+ movq mm7, xC1S7
+ pmulhw mm0, mm7 ; mm0 = xC1S7 * irot_input_y - irot_input_y
+
+ movq mm7, xC7S1
+ psrlw mm2, 15
+
+ paddw mm0, mm5 ; mm0 = xC1S7 * irot_input_y
+ paddw mm0, mm2 ; Truncated
+
+ pmulhw mm5, mm7 ; mm5 = xC7S1 * irot_input_y
+ paddw mm5, mm2 ; Truncated
+
+ psubsw mm1, mm5 ; mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1
+ paddsw mm3, mm0 ; mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7
+
+ movq ip1, mm1
+ movq ip7, mm3
+;-----------------------------------------------------------------------------
+; ip3, ip5
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm7 Free
+; mm1 Free
+; mm0 Free
+; mm4 id07 - icommon_product1 = irot_input_x
+; mm6 id34 - icommon_product2 = irot_input_y
+
+ movq mm0, xC3S5
+ movq mm1, xC5S3
+
+ movq mm5, mm6
+ movq mm7, mm6
+
+ movq mm2, mm4
+ movq mm3, mm4
+
+ pmulhw mm4, mm0 ; mm4 = xC3S5 * irot_input_x - irot_input_x
+ pmulhw mm6, mm1 ; mm6 = xC5S3 * irot_input_y - irot_input_y
+
+ psrlw mm2, 15
+ psrlw mm5, 15
+
+ paddw mm4, mm3 ; mm4 = xC3S5 * irot_input_x
+ paddw mm6, mm7 ; mm6 = xC5S3 * irot_input_y
+
+ paddw mm4, mm2 ; Truncated
+ paddw mm6, mm5 ; Truncated
+
+ psubsw mm4, mm6 ; ip3
+ movq ip3, mm4 ;
+
+ movq mm4, mm3 ;
+ movq mm6, mm7 ;
+
+ pmulhw mm3, mm1 ; mm3 = xC5S3 * irot_input_x - irot_input_x
+ pmulhw mm7, mm0 ; mm7 = xC3S5 * irot_input_y - irot_input_y
+
+ paddw mm4, mm2
+ paddw mm6, mm5
+
+ paddw mm3, mm4 ; mm3 = xC5S3 * irot_input_x
+ paddw mm7, mm6 ; mm7 = xC3S5 * irot_input_y
+
+ paddw mm3, mm7 ; ip5
+ movq ip5, mm3 ;
+
+ENDM
+
+Transpose MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7,
+ op0, op1, op2, op3, op4, op5, op6, op7
+ movq mm0,ip0 ; mm0 = a0 a1 a2 a3
+ movq mm4,ip4 ; mm4 = e4 e5 e6 e7
+ movq mm1,ip1 ; mm1 = b0 b1 b2 b3
+ movq mm5,ip5 ; mm5 = f4 f5 f6 f7
+ movq mm2,ip2 ; mm2 = c0 c1 c2 c3
+ movq mm6,ip6 ; mm6 = g4 g5 g6 g7
+ movq mm3,ip3 ; mm3 = d0 d1 d2 d3
+ movq op1,mm1 ; save b0 b1 b2 b3
+ movq mm7,ip7 ; mm7 = h0 h1 h2 h3
+
+ ; Transpose 2x8 block
+ movq mm1, mm4 ; mm1 = e3 e2 e1 e0
+ punpcklwd mm4, mm5 ; mm4 = f1 e1 f0 e0
+ movq op0, mm0 ; save a3 a2 a1 a0
+ punpckhwd mm1, mm5 ; mm1 = f3 e3 f2 e2
+ movq mm0, mm6 ; mm0 = g3 g2 g1 g0
+ punpcklwd mm6, mm7 ; mm6 = h1 g1 h0 g0
+ movq mm5, mm4 ; mm5 = f1 e1 f0 e0
+ punpckldq mm4, mm6 ; mm4 = h0 g0 f0 e0 = MM4
+ punpckhdq mm5, mm6 ; mm5 = h1 g1 f1 e1 = MM5
+ movq mm6, mm1 ; mm6 = f3 e3 f2 e2
+ movq op4, mm4 ;
+ punpckhwd mm0, mm7 ; mm0 = h3 g3 h2 g2
+ movq op5, mm5 ;
+ punpckhdq mm6, mm0 ; mm6 = h3 g3 f3 e3 = MM7
+ movq mm4, op0 ; mm4 = a3 a2 a1 a0
+ punpckldq mm1, mm0 ; mm1 = h2 g2 f2 e2 = MM6
+ movq mm5, op1 ; mm5 = b3 b2 b1 b0
+ movq mm0, mm4 ; mm0 = a3 a2 a1 a0
+ movq op7, mm6 ;
+ punpcklwd mm0, mm5 ; mm0 = b1 a1 b0 a0
+ movq op6, mm1 ;
+ punpckhwd mm4, mm5 ; mm4 = b3 a3 b2 a2
+ movq mm5, mm2 ; mm5 = c3 c2 c1 c0
+ punpcklwd mm2, mm3 ; mm2 = d1 c1 d0 c0
+ movq mm1, mm0 ; mm1 = b1 a1 b0 a0
+ punpckldq mm0, mm2 ; mm0 = d0 c0 b0 a0 = MM0
+ punpckhdq mm1, mm2 ; mm1 = d1 c1 b1 a1 = MM1
+ movq mm2, mm4 ; mm2 = b3 a3 b2 a2
+ movq op0, mm0 ;
+ punpckhwd mm5, mm3 ; mm5 = d3 c3 d2 c2
+ movq op1, mm1 ;
+ punpckhdq mm4, mm5 ; mm4 = d3 c3 b3 a3 = MM3
+ punpckldq mm2, mm5 ; mm2 = d2 c2 b2 a2 = MM2
+ movq op3, mm4
+ movq op2, mm2
+ENDM
+
+;------------------------------------------------
+fdctParams STRUC
+ dd 6 dup (?) ;6 pushed regs
+ dd ? ;return address
+ InputPtr dd ?
+ OutputPtr dd ?
+fdctParams ENDS
+;------------------------------------------------
+
+
+
+ .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
+
+ ALIGN 32
+
+xC1S7 QWORD 0fb15fb15fb15fb15h
+xC2S6 QWORD 0ec83ec83ec83ec83h
+xC3S5 QWORD 0d4dbd4dbd4dbd4dbh
+xC4S4 QWORD 0b505b505b505b505h
+xC5S3 QWORD 08e3a8e3a8e3a8e3ah
+xC6S2 QWORD 061f861f861f861f8h
+xC7S1 QWORD 031f131f131f131f1h
+TIRX QWORD 00000000000000000h
+TIRY QWORD 00000000000000000h
+TIC1 QWORD 00000000000000000h
+TIC2 QWORD 00000000000000000h
+TID07 QWORD 00000000000000000h
+TID34 QWORD 00000000000000000h
+
+; data goes here
+
+ .CODE
+
+NAME fdct
+
+PUBLIC fdct_MMX_
+PUBLIC _fdct_MMX
+
+; includes go here
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE EQU 0
+
+
+;------------------------------------------------
+; void fdct_MMX ( INT16 * InputData, INT16 * OutputData )
+;
+fdct_MMX_:
+_fdct_MMX:
+ push esi
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+;
+; ESP = Stack Pointer MM0 = Free
+; ESI = Free MM1 = Free
+; EDI = Free MM2 = Free
+; EBP = Free MM3 = Free
+; EBX = Free MM4 = Free
+; ECX = Free MM5 = Free
+; EDX = Free MM6 = Free
+; EAX = Free MM7 = Free
+;
+
+ mov eax,(fdctParams PTR [esp]).InputPtr ; load pointer to input data
+ mov edx,(fdctParams PTR [esp]).OutputPtr ; load pointer to output data
+
+ ;
+ ; Input data is an 8x8 block. To make processing of the data more efficent
+ ; we will transpose the block of data to two 4x8 blocks???
+ ;
+
+ Transpose [eax], [eax+16], [eax+32], [eax+48], [eax+8], [eax+24], [eax+40], [eax+56], [edx], [edx+16], [edx+32], [edx+48], [edx+8], [edx+24], [edx+40], [edx+56]
+ Fdct_new [edx], [edx+16], [edx+32], [edx+48], [edx+8], [edx+24], [edx+40], [edx+56]
+
+ Transpose [eax+64], [eax+80], [eax+96], [eax+112], [eax+72], [eax+88], [eax+104], [eax+120], [edx+64], [edx+80], [edx+96], [edx+112], [edx+72], [edx+88], [edx+104], [edx+120]
+ Fdct_new [edx+64], [edx+80], [edx+96], [edx+112], [edx+72], [edx+88], [edx+104], [edx+120]
+
+ Transpose [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112], [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112]
+ Fdct_new [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112]
+
+ Transpose [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120], [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120]
+ Fdct_new [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120]
+
+
+theExit:
+
+ emms
+
+ pop edx
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+
+ ret
+
+
+NAME FDct1D4Mmx
+
+PUBLIC FDct1D4Mmx_
+PUBLIC _FDct1D4Mmx
+
+; includes go here
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE EQU 0
+
+;------------------------------------------------
+; void FDct1D4Mmx ( INT16 * InputData, INT16 * OutputData )
+;
+FDct1D4Mmx_:
+_FDct1D4Mmx:
+ push esi
+ push edi
+ push ebp
+ push ebx
+ push ecx
+ push edx
+
+;
+; ESP = Stack Pointer MM0 = Free
+; ESI = Free MM1 = Free
+; EDI = Free MM2 = Free
+; EBP = Free MM3 = Free
+; EBX = Free MM4 = Free
+; ECX = Free MM5 = Free
+; EDX = Free MM6 = Free
+; EAX = Free MM7 = Free
+;
+
+ mov eax,(fdctParams PTR [esp]).InputPtr ; load pointer to input data
+ mov edx,(fdctParams PTR [esp]).OutputPtr ; load pointer to output data
+
+
+ movq mm0,[eax] ; mm0 = ip0
+ movq mm1,[eax + 8] ; mm1 = ip1
+ movq mm2,[eax + 24] ; mm2 = ip3
+ movq mm3,[eax + 40] ; mm3 = ip5
+ movq mm4,[eax] ; mm0 = ip0
+ movq mm5,[eax + 8] ; mm1 = ip1
+ movq mm6,[eax + 24] ; mm2 = ip3
+ movq mm7,[eax + 40] ; mm3 = ip5
+
+
+ paddsw mm0,[eax + 56] ; mm0 = ip0 + ip7 = is07
+ paddsw mm1,[eax + 16] ; mm1 = ip1 + ip2 = is12
+ paddsw mm2,[eax + 32] ; mm2 = ip3 + ip4 = is34
+ paddsw mm3,[eax + 48] ; mm3 = ip5 + ip6 = is56
+ psubsw mm4,[eax + 56] ; mm4 = ip0 - ip7 = id07
+ psubsw mm5,[eax + 16] ; mm5 = ip1 - ip2 = id12
+
+ psubsw mm0,mm2 ; mm0 = is07 - is34
+
+ paddsw mm2,mm2
+
+ psubsw mm6,[eax + 32] ; mm6 = ip3 - ip4 = id34
+
+ paddsw mm2,mm0 ; mm2 = is07 + is34 = is0734
+ psubsw mm1,mm3 ; mm1 = is12 - is56
+ movq TIRY,mm0 ; Save is07 - is34 to free mm0;
+ paddsw mm3,mm3
+ paddsw mm3,mm1 ; mm3 = is12 + 1s56 = is1256
+
+ psubsw mm7,[eax + 48] ; mm7 = ip5 - ip6 = id56
+
+;--------------------------------------------------------------------
+;
+
+ psubsw mm5,mm7 ; mm5 = id12 - id56
+ paddsw mm7,mm7
+ paddsw mm7,mm5 ; mm7 = id12 + id56
+
+ ; mm4 = id07
+
+ ; mm6 = id34
+;---------------------------------------------------------------------
+; ip[0], ip[4]
+; mm0 Free
+; mm2 is0734
+; mm3 is1256
+
+
+ psubsw mm2,mm3 ; mm2 = is0734 - is1256
+ paddsw mm3,mm3
+
+ movq mm0,mm2 ; make a copy
+ paddsw mm3,mm2 ; mm3 = is0734 + is1256
+
+ pmulhw mm0,xC4S4 ; mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 )
+ paddw mm0,mm2 ; mm0 = xC4S4 * ( is0734 - is1256 )
+ psrlw mm2,15 ;
+ paddw mm0,mm2 ; Truncate mm0, now it is op[4]
+
+ movq mm2,mm3 ;
+ movq [edx + 32],mm0 ; save op4, now mm0,mm2 are free
+
+ movq mm0,mm3 ;
+ pmulhw mm3,xC4S4 ; mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 )
+
+ psrlw mm2,15 ;
+ paddw mm3,mm0 ; mm3 = xC4S4 * ( is0734 +is1256 )
+ paddw mm3,mm2 ; Truncate mm3, now it is op[0]
+
+ movq [edx],mm3 ;
+
+;----------------------------------------------------------------------
+; ip[2], ip[6]
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 id12 - id56 irot_input_x
+; TIRY is07 - is34 irot_input_y
+
+ movq mm3,TIRY ; mm3 = irot_input_y
+ pmulhw mm3,xC2S6 ; mm3 = xC2S6 * irot_input_y - irot_input_y
+
+ movq mm2,TIRY ;
+ movq mm0,mm2 ;
+
+ psrlw mm2,15 ; mm3 = xC2S6 * irot_input_y
+ paddw mm3,mm0
+
+ paddw mm3,mm2 ; Truncated
+ movq mm0, mm5; ;
+
+
+ movq mm2, mm5;
+ pmulhw mm0, xC6S2 ; mm0 = xC6S2 * irot_input_x
+
+ psrlw mm2, 15
+ paddw mm0, mm2 ; Truncated
+
+ paddsw mm3, mm0 ; ip[2]
+ movq [edx + 16], mm3 ; Save ip2
+
+
+ movq mm0, mm5 ;
+ movq mm2, mm5 ;
+
+ pmulhw mm5, xC2S6 ; mm5 = xC2S6 * irot_input_x - irot_input_x
+ psrlw mm2, 15 ;
+
+ movq mm3, TIRY ;
+ paddw mm5, mm0 ; mm5 = xC2S6 * irot_input_x
+
+ paddw mm5, mm2 ; Truncated
+ movq mm2, mm3
+
+ pmulhw mm3, xC6S2 ; mm3 = xC6S2 * irot_input_y
+ psrlw mm2, 15
+
+ paddw mm3, mm2 ; Truncated
+ psubsw mm3, mm5 ;
+
+ movq [edx + 48], mm3 ;
+
+
+
+;-----------------------------------------------------------------------
+; icommon_product1, icommon_product2
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm1 is12 - is56
+; mm7 id12 + id56
+
+ movq mm0, xC4S4
+ movq mm2, mm1
+ movq mm3, mm1
+
+ pmulhw mm1, mm0 ; mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 )
+ psrlw mm2, 15
+
+ paddw mm1, mm3 ; mm0 = xC4S4 * ( is12 - is56 )
+ paddw mm1, mm2 ; Truncate mm1, now it is icommon_product1
+
+ movq mm2, mm7
+ movq mm3, mm7
+
+ pmulhw mm7, mm0 ; mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 )
+ psrlw mm2, 15
+
+ paddw mm7, mm3 ; mm7 = xC4S4 * ( id12 + id56 )
+ paddw mm7, mm2 ; Truncate mm7, now it is icommon_product2
+
+;------------------------------------------------------------------------
+; mm0 Free
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm1 icommon_product1
+; mm7 icommon_product2
+; mm4 id07
+; mm6 id34
+
+
+ pxor mm0, mm0 ; Clear mm0
+ psubsw mm0, mm6 ; mm0 = - id34
+
+ psubsw mm0, mm7 ; mm0 = - ( id34 + idcommon_product2 )
+ paddsw mm6, mm6 ;
+ paddsw mm6, mm0 ; mm6 = id34 - icommon_product2
+
+ psubsw mm4, mm1 ; mm4 = id07 - icommon_product1
+ paddsw mm1, mm1 ;
+ paddsw mm1, mm4 ; mm1 = id07 + icommon_product1
+
+
+;-------------------------------------------------------------------------
+; ip1, ip7
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm7 Free
+; mm1 irot_input_x
+; mm0 irot_input_y
+
+ movq mm7, xC1S7
+ movq mm2, mm1
+
+ movq mm3, mm1;
+ pmulhw mm1, mm7 ; mm1 = xC1S7 * irot_input_x - irot_input_x
+
+ movq mm7, xC7S1 ;
+ psrlw mm2, 15
+
+ paddw mm1, mm3 ; mm1 = xC1S7 * irot_input_x
+ paddw mm1, mm2 ; Trucated
+
+ pmulhw mm3, mm7 ; mm3 = xC7S1 * irot_input_x
+ paddw mm3, mm2 ; Truncated
+
+ movq mm5, mm0
+ movq mm2, mm0
+
+ movq mm7, xC1S7
+ pmulhw mm0, mm7 ; mm0 = xC1S7 * irot_input_y - irot_input_y
+
+ movq mm7, xC7S1
+ psrlw mm2, 15
+
+ paddw mm0, mm5 ; mm0 = xC1S7 * irot_input_y
+ paddw mm0, mm2 ; Truncated
+
+ pmulhw mm5, mm7 ; mm5 = xC7S1 * irot_input_y
+ paddw mm5, mm2 ; Truncated
+
+ psubsw mm1, mm5 ; mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1
+ paddsw mm3, mm0 ; mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7
+
+ movq [edx + 8], mm1
+ movq [edx + 56], mm3
+;-----------------------------------------------------------------------------
+; ip3, ip5
+; mm2 Free
+; mm3 Free
+; mm5 Free
+; mm7 Free
+; mm1 Free
+; mm0 Free
+; mm4 id07 - icommon_product1 = irot_input_x
+; mm6 id34 - icommon_product2 = irot_input_y
+
+ movq mm0, xC3S5
+ movq mm1, xC5S3
+
+ movq mm5, mm6
+ movq mm7, mm6
+
+ movq mm2, mm4
+ movq mm3, mm4
+
+ pmulhw mm4, mm0 ; mm4 = xC3S5 * irot_input_x - irot_input_x
+ pmulhw mm6, mm1 ; mm6 = xC5S3 * irot_input_y - irot_input_y
+
+ psrlw mm2, 15
+ psrlw mm5, 15
+
+ paddw mm4, mm3 ; mm4 = xC3S5 * irot_input_x
+ paddw mm6, mm7 ; mm6 = xC5S3 * irot_input_y
+
+ paddw mm4, mm2 ; Truncated
+ paddw mm6, mm5 ; Truncated
+
+ psubsw mm4, mm6 ; ip3
+ movq [edx + 24], mm4 ;
+
+ movq mm4, mm3 ;
+ movq mm6, mm7 ;
+
+ pmulhw mm3, mm1 ; mm3 = xC5S3 * irot_input_x - irot_input_x
+ pmulhw mm7, mm0 ; mm7 = xC3S5 * irot_input_y - irot_input_y
+
+ paddw mm4, mm2
+ paddw mm6, mm5
+
+ paddw mm3, mm4 ; mm3 = xC5S3 * irot_input_x
+ paddw mm7, mm6 ; mm7 = xC3S5 * irot_input_y
+
+ paddw mm3, mm7 ; ip5
+ movq [edx + 40], mm3 ;
+
+
+ emms
+
+ pop edx
+ pop ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+
+ ret
+
+
+;************************************************
+ END
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctmmx.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctmmx.c
new file mode 100644
index 00000000..dceb1982
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctmmx.c
@@ -0,0 +1,1398 @@
+/****************************************************************************
+ *
+ * Module Title : fdctmmx.c
+ *
+ * Description : Forward DCT optimized specifically for mmx or compatible
+ * processor
+ *
+ * AUTHOR : Yaowu Xu
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ * 1.00 YWX 07/11/11 Configuration baseline
+ *
+ *****************************************************************************
+ */
+
+
+/*******************************************************************************
+ * Module Constants
+ *******************************************************************************
+ */
+
+
+__declspec(align(16)) static unsigned short TIRY[8];
+
+__declspec(align(16)) static unsigned short MmxIdctConst[8 * 4] =
+{
+ 0, 0, 0, 0,
+ 64277,64277,64277,64277,
+ 60547,60547,60547,60547,
+ 54491,54491,54491,54491,
+ 46341,46341,46341,46341,
+ 36410,36410,36410,36410,
+ 25080,25080,25080,25080,
+ 12785,12785,12785,12785
+};
+
+
+/**************************************************************************************
+ *
+ * Macro: fdct_MMX
+ *
+ * Description: The Macro does 1-D IDct on 8 columns.
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: The inputdata is limited to 9 bits [-256, 255]
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+void fdct_MMX(short *InputData, short *OutputData)
+{
+
+ __asm
+ {
+ mov eax, InputData
+ mov ebx, OutputData
+ lea ecx, [eax+8]
+ lea edi, [ebx+8]
+
+ lea edx, MmxIdctConst
+
+#define IL(i) [eax + 16 * i]
+#define IH(i) [ecx + 16 * i]
+#define OL(i) [ebx + 16 * i]
+#define OH(i) [edi + 16 * i]
+#define C(i) [edx + 8 * i]
+
+/******************************************************/
+/* Do 4x8 Transpose is done through 2 4x4 Transpose */
+/******************************************************/
+
+ movq mm4, IH(0) /* mm4=e3e2e1e0 */
+ movq mm0, IH(1) /* mm4=f3f2f1f0 */
+
+ psllw mm4, 1 /* up precision */
+ psllw mm0, 1 /* up precision */
+
+ movq mm5, mm4 /* make a copy */
+ punpcklwd mm4, mm0 /* mm4=f1e1f0e0 */
+
+ punpckhwd mm5, mm0 /* mm5=f3e3f2e2 */
+ movq mm6, IH(2) /* mm6=g3g2g1g0 */
+
+ movq mm0, IH(3) /* mm0=h3h2h1h0 */
+ psllw mm6, 1 /* up precision */
+
+ psllw mm0, 1 /* up precision */
+ movq mm7, mm6 /* mm7=g3g2g1g0 */
+
+ punpcklwd mm6, mm0 /* mm6=h1g1h0g0 */
+ punpckhwd mm7, mm0 /* mm7=h3g3h2g2 */
+
+ movq mm3, mm4 /* mm4=f1e1f0e0 */
+ punpckldq mm4, mm6 /* mm4=h0g0f0e0 */
+
+ punpckhdq mm3, mm6 /* mm3=h1g1f1e1 */
+ movq mm6, mm5 /* mm5=f3e3f2e2 */
+
+ punpckldq mm5, mm7 /* mm5=h2g2f2e2 */
+ movq IH(0), mm4 /* saveh0g0f0e0 */
+
+ punpckhdq mm6, mm7 /* mm6=h3g3f3e3 */
+ movq IH(2), mm5 /* saveh2g2f2e2 */
+
+ movq IH(3), mm6 /* saveh3g3f3e3 */
+
+/*----------------------------------------------------*/
+/* mm3 in use for IH(1) */
+/*----------------------------------------------------*/
+
+ movq mm4, IL(0) /* mm4=a3a2a1a0 */
+ movq mm0, IL(1) /* mm0=b3b2b1b0 */
+
+ psllw mm4, 1 /* up precision */
+ psllw mm0, 1 /* up precision */
+
+ movq mm5, mm4 /* mm5=a3a2a1a0 */
+ punpcklwd mm4, mm0 /* mm4=b1a1b0a0 */
+
+ punpckhwd mm5, mm0 /* mm5=b3a3b2a2 */
+ movq mm6, IL(2) /* mm6=c3c2c1c0 */
+
+
+ movq mm0, IL(3) /* mm0=d3d2d1d0 */
+ psllw mm6, 1 /* up precision */
+
+ psllw mm0, 1 /* up precision */
+ movq mm7, mm6 /* mm7=c3c2c1c0 */
+
+ punpcklwd mm6, mm0 /* mm6=d1c1d0c0 */
+ punpckhwd mm7, mm0 /* mm7=c3c3d2c2 */
+
+ movq mm1, mm4 /* mm4=b1a1b0a0 */
+ punpckldq mm4, mm6 /* mm4=d0c0b0a0 */
+
+ punpckhdq mm1, mm6 /* mm1=d1c1b1a1 */
+ movq mm2, mm5 /* mm5=b3a3b2a2 */
+
+ punpckldq mm5, mm7 /* mm5=d2c2b2a2 */
+ punpckhdq mm2, mm7 /* mm6=d3c3b3a3 */
+
+ movq IL(2), mm5 /* saved2c2b2a2 */
+
+/*----------------------------------------------------*/
+/* mm1 in use for IL(1) */
+/* mm2 in use for IL(3) */
+/* mm3 in use for IH(1) */
+/* mm4 in use for IH(0) */
+/*----------------------------------------------------*/
+
+/******************************************************/
+/* Let's do the 4x8 forward DCT */
+/******************************************************/
+ movq mm0, mm4 /* mm4 = ip0 */
+ movq mm5, mm1 /* mm5 = ip1 */
+
+ movq mm6, mm2 /* mm6 = ip3 */
+ movq mm7, mm3 /* mm7 = ip5 */
+
+ paddsw mm0, IH(3) /* mm0 = ip0 + ip7 */
+ paddsw mm1, IL(2) /* mm1 = ip1 + ip2 */
+
+ paddsw mm2, IH(0) /* mm2 = ip3 + ip4 */
+ paddsw mm3, IH(2) /* mm3 = ip5 + ip6 */
+
+ psubsw mm4, IH(3) /* mm4 = ip0 - ip7 */
+ psubsw mm5, IL(2) /* mm5 = ip1 - ip2 */
+
+ psubsw mm0, mm2 /* mm0 = is07 - is34 */
+ paddsw mm2, mm2 /* mm2 = is34 * 2 */
+
+ psubsw mm6, IH(0) /* mm6 = ip3 - ip4 */
+ paddsw mm2, mm0 /* mm2 = is07 + is34 */
+
+ psubsw mm1, mm3 /* mm1 = is12 - is56 */
+ movq TIRY, mm0 /* save is07-is34 */
+
+ paddsw mm3, mm3 /* mm3 = is56 * 2 */
+ paddsw mm3, mm1 /* mm3 = is12 + is56 */
+
+ psubsw mm7, IH(2) /* mm7 = ip5 -ip6 */
+ psubsw mm5, mm7 /* mm5 = id12 - id56 */
+
+ paddsw mm7, mm7 /* mm7 = id56 * 2 */
+ paddsw mm7, mm5 /* mm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4
+/*---------------------------------------------------------*/
+ psubsw mm2, mm3 /* mm2 = is0734 - is1256 */
+ paddsw mm3, mm3 /* mm3 = is1256 * 2 */
+
+ movq mm0, mm2 /* mm0 = is0734 - is1256 */
+ paddsw mm3, mm2 /* mm3 = is0734 + is1256 */
+
+ pmulhw mm0, C(4) /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+ paddw mm0, mm2 /* mm0 = xC4S4 * ( is0734 - is1256 ) */
+
+ psrlw mm2, 15
+ paddw mm0, mm2 /* Truncate mm0, now it is op[4] */
+
+ movq mm2, mm3 /* mm2 = is0734 + is1256 */
+ movq OH(0), mm0 /* op4, now mm0,mm2 are free */
+
+ movq mm0, mm3 /* mm0 = is0734 + is1256 */
+ pmulhw mm3, C(4) /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
+
+ psrlw mm2, 15
+ paddw mm3, mm0 /* mm3 = xC4S4 * ( is0734 +is1256 ) */
+
+ paddw mm3, mm2 /* Truncate mm3, now it is op[0] */
+ movq OL(0), mm3 /* save op0 */
+/*---------------------------------------------------------*/
+/* op2 and op6
+/*---------------------------------------------------------*/
+ movq mm3, TIRY /* mm3 = irot_input_y */
+ pmulhw mm3, C(2) /* mm3 = xC2S6 * irot_input_y - irot_input_y */
+
+ movq mm2, TIRY /* mm2 = irot_input_y */
+ movq mm0, mm2 /* mm0 = irot_input_y */
+
+ psrlw mm2, 15
+ paddw mm3, mm0 /* mm3 = xC2S6 * irot_input_y */
+
+ paddw mm3, mm2 /* Truncated */
+ movq mm0, mm5 /* mm0 = id12 - id56 */
+
+
+ movq mm2, mm5 /* mm2 = id12 - id56 */
+ pmulhw mm0, C(6) /* mm0 = xC6S2 * irot_input_x */
+
+ psrlw mm2, 15
+ paddw mm0, mm2 /* Truncated */
+
+ paddsw mm3, mm0 /* op[2] */
+ movq OL(2), mm3 /* save op[2] */
+
+
+ movq mm0, mm5 /* mm0 = id12 - id56 */
+ movq mm2, mm5 /* mm0 = id12 - id56 */
+
+ pmulhw mm5, C(2) /* mm5 = xC2S6 * irot_input_x - irot_input_x */
+ psrlw mm2, 15
+
+ movq mm3, TIRY /* mm3 = irot_input_y */
+ paddw mm5, mm0 /* mm5 = xC2S6 * irot_input_x */
+
+ paddw mm5, mm2 /* Truncated */
+ movq mm2, mm3 /* mm2 = irot_input_y */
+
+ pmulhw mm3, C(6) /* mm3 = xC6S2 * irot_input_y */
+ psrlw mm2, 15
+
+ paddw mm3, mm2 /* Truncated */
+ psubsw mm3, mm5 /* mm3 = op[6] */
+
+ movq OH(2), mm3
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2 */
+/*-----------------------------------------------------------------------*/
+ movq mm0, C(4) /* mm0 = xC4s4 */
+ movq mm2, mm1 /* mm2 = is12 - is56 */
+
+ movq mm3, mm1 /* mm3 = is12 - is56 */
+ pmulhw mm1, mm0 /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+
+ psrlw mm2, 15
+ paddw mm1, mm3 /* mm1 = xC4S4 * ( is12 - is56 ) */
+
+ paddw mm1, mm2 /* Truncate mm1, now it is icommon_product1 */
+ movq mm2, mm7 /* mm2 = id12 + id56 */
+
+ movq mm3, mm7 /* mm3 = id12 + id56 */
+ pmulhw mm7, mm0 /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+
+ psrlw mm2, 15 /* For trucation */
+ paddw mm7, mm3 /* mm7 = xC4S4 * ( id12 + id56 ) */
+
+ paddw mm7, mm2 /* Truncate mm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+ pxor mm0, mm0 /* Clear mm0 */
+ psubsw mm0, mm6 /* mm0 = - id34 */
+
+ psubsw mm0, mm7 /* mm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+ paddsw mm6, mm6 /* mm6 = id34 * 2 */
+
+ paddsw mm6, mm0 /* mm6 = id34 - icommon_product2 = irot_input_x for 35 */
+ psubsw mm4, mm1 /* mm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+ paddsw mm1, mm1 /* mm1 = icommon_product1 * 2 */
+ paddsw mm1, mm4 /* mm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7
+/*---------------------------------------------------------*/
+
+ movq mm7, C(1) /* xC1S7 */
+ movq mm2, mm1 /* mm2 = irot_input_x */
+
+ movq mm3, mm1; /* mm3 = irot_input_x */
+ pmulhw mm1, mm7 /* mm1 = xC1S7 * irot_input_x - irot_input_x */
+
+ movq mm7, C(7) /* xC7S1 */
+ psrlw mm2, 15 /* for trucation */
+
+ paddw mm1, mm3 /* mm1 = xC1S7 * irot_input_x */
+ paddw mm1, mm2 /* Trucated */
+
+ pmulhw mm3, mm7 /* mm3 = xC7S1 * irot_input_x */
+ paddw mm3, mm2 /* Truncated */
+
+ movq mm5, mm0 /* mm5 = irot_input_y */
+ movq mm2, mm0 /* mm2 = irot_input_y */
+
+ movq mm7, C(1) /* xC1S7 */
+ pmulhw mm0, mm7 /* mm0 = xC1S7 * irot_input_y - irot_input_y */
+
+ movq mm7, C(7) /* xC7S1 */
+ psrlw mm2, 15 /* for trucation */
+
+ paddw mm0, mm5 /* mm0 = xC1S7 * irot_input_y */
+ paddw mm0, mm2 /* Truncated */
+
+ pmulhw mm5, mm7 /* mm5 = xC7S1 * irot_input_y */
+ paddw mm5, mm2 /* Truncated */
+
+ psubsw mm1, mm5 /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+ paddsw mm3, mm0 /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+
+ movq OL(1), mm1
+ movq OH(3), mm3
+/*---------------------------------------------------------*/
+/* op3 and op5
+/*---------------------------------------------------------*/
+ movq mm0, C(3) /* xC3S5 */
+ movq mm1, C(5) /* xC5S3 */
+
+ movq mm5,mm6 /* irot_input_x */
+ movq mm7,mm6 /* irot_input_x */
+
+ movq mm2,mm4 /* irot_input_y */
+ movq mm3,mm4 /* irot_input_y */
+
+ pmulhw mm4,mm0 /* mm4 = xC3S5 * irot_input_x - irot_input_x */
+ pmulhw mm6,mm1 /* mm6 = xC5S3 * irot_input_y - irot_input_y */
+
+ psrlw mm2,15 /* for trucation */
+ psrlw mm5,15 /* for trucation */
+
+ paddw mm4,mm3 /* mm4 = xC3S5 * irot_input_x */
+ paddw mm6,mm7 /* mm6 = xC5S3 * irot_input_y */
+
+ paddw mm4,mm2 /* Truncated */
+ paddw mm6,mm5 /* Truncated */
+
+ psubsw mm4,mm6 /* op [3] */
+ movq OL(3),mm4 /* Save Op[3] */
+
+ movq mm4,mm3 /* irot_input_y */
+ movq mm6,mm7 /* irot_input_x */
+
+ pmulhw mm3,mm1 /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+ pmulhw mm7,mm0 /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+ paddw mm4,mm2 /* Trucated */
+ paddw mm6,mm5 /* Trucated */
+
+ paddw mm3,mm4 /* mm3 = xC5S3 * irot_input_x */
+ paddw mm7,mm6 /* mm7 = xC3S5 * irot_input_y */
+
+ paddw mm3,mm7 /* Op[5] */
+ movq OH(1),mm3 /* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of 4x8 1-D FDCT */
+/*---------------------------------------------------------*/
+
+/******************************************************/
+/* Do 4x8 Transpose is done through 2 4x4 Transpose */
+/******************************************************/
+
+ lea eax, [eax+64]
+ lea ecx, [ecx+64]
+ lea ebx, [ebx+64]
+ lea edi, [edi+64]
+
+ movq mm4, IH(0) /* mm4=e3e2e1e0 */
+ movq mm0, IH(1) /* mm4=f3f2f1f0 */
+
+ psllw mm4, 1 /* up precision */
+ psllw mm0, 1 /* up precision */
+
+ movq mm5, mm4 /* make a copy */
+ punpcklwd mm4, mm0 /* mm4=f1e1f0e0 */
+
+ punpckhwd mm5, mm0 /* mm5=f3e3f2e2 */
+ movq mm6, IH(2) /* mm6=g3g2g1g0 */
+
+ movq mm0, IH(3) /* mm0=h3h2h1h0 */
+ psllw mm6, 1 /* up precision */
+
+ psllw mm0, 1 /* up precision */
+ movq mm7, mm6 /* mm7=g3g2g1g0 */
+
+ punpcklwd mm6, mm0 /* mm6=h1g1h0g0 */
+ punpckhwd mm7, mm0 /* mm7=h3g3h2g2 */
+
+ movq mm3, mm4 /* mm4=f1e1f0e0 */
+ punpckldq mm4, mm6 /* mm4=h0g0f0e0 */
+
+ punpckhdq mm3, mm6 /* mm3=h1g1f1e1 */
+ movq mm6, mm5 /* mm5=f3e3f2e2 */
+
+ punpckldq mm5, mm7 /* mm5=h2g2f2e2 */
+ movq IH(0), mm4 /* saveh0g0f0e0 */
+
+ punpckhdq mm6, mm7 /* mm6=h3g3f3e3 */
+ movq IH(2), mm5 /* saveh2g2f2e2 */
+
+ movq IH(3), mm6 /* saveh3g3f3e3 */
+
+/*----------------------------------------------------*/
+/* mm3 in use for IH(1) */
+/*----------------------------------------------------*/
+
+ movq mm4, IL(0) /* mm4=a3a2a1a0 */
+ movq mm0, IL(1) /* mm0=b3b2b1b0 */
+
+ psllw mm4, 1 /* up precision */
+ psllw mm0, 1 /* up precision */
+
+ movq mm5, mm4 /* mm5=a3a2a1a0 */
+ punpcklwd mm4, mm0 /* mm4=b1a1b0a0 */
+
+ punpckhwd mm5, mm0 /* mm5=b3a3b2a2 */
+ movq mm6, IL(2) /* mm6=c3c2c1c0 */
+
+
+ movq mm0, IL(3) /* mm0=d3d2d1d0 */
+ psllw mm6, 1 /* up precision */
+
+ psllw mm0, 1 /* up precision */
+ movq mm7, mm6 /* mm7=c3c2c1c0 */
+
+ punpcklwd mm6, mm0 /* mm6=d1c1d0c0 */
+ punpckhwd mm7, mm0 /* mm7=c3c3d2c2 */
+
+ movq mm1, mm4 /* mm4=b1a1b0a0 */
+ punpckldq mm4, mm6 /* mm4=d0c0b0a0 */
+
+ punpckhdq mm1, mm6 /* mm1=d1c1b1a1 */
+ movq mm2, mm5 /* mm5=b3a3b2a2 */
+
+ punpckldq mm5, mm7 /* mm5=d2c2b2a2 */
+ punpckhdq mm2, mm7 /* mm6=d3c3b3a3 */
+
+ movq IL(2), mm5 /* saved2c2b2a2 */
+
+/*----------------------------------------------------*/
+/* mm1 in use for IL(1) */
+/* mm2 in use for IL(3) */
+/* mm3 in use for IH(1) */
+/* mm4 in use for IH(0) */
+/*----------------------------------------------------*/
+
+/******************************************************/
+/* Let's do the 4x8 forward DCT */
+/******************************************************/
+ movq mm0, mm4 /* mm4 = ip0 */
+ movq mm5, mm1 /* mm5 = ip1 */
+
+ movq mm6, mm2 /* mm6 = ip3 */
+ movq mm7, mm3 /* mm7 = ip5 */
+
+ paddsw mm0, IH(3) /* mm0 = ip0 + ip7 */
+ paddsw mm1, IL(2) /* mm1 = ip1 + ip2 */
+
+ paddsw mm2, IH(0) /* mm2 = ip3 + ip4 */
+ paddsw mm3, IH(2) /* mm3 = ip5 + ip6 */
+
+ psubsw mm4, IH(3) /* mm4 = ip0 - ip7 */
+ psubsw mm5, IL(2) /* mm5 = ip1 - ip2 */
+
+ psubsw mm0, mm2 /* mm0 = is07 - is34 */
+ paddsw mm2, mm2 /* mm2 = is34 * 2 */
+
+ psubsw mm6, IH(0) /* mm6 = ip3 - ip4 */
+ paddsw mm2, mm0 /* mm2 = is07 + is34 */
+
+ psubsw mm1, mm3 /* mm1 = is12 - is56 */
+ movq TIRY, mm0 /* save is07-is34 */
+
+ paddsw mm3, mm3 /* mm3 = is56 * 2 */
+ paddsw mm3, mm1 /* mm3 = is12 + is56 */
+
+ psubsw mm7, IH(2) /* mm7 = ip5 -ip6 */
+ psubsw mm5, mm7 /* mm5 = id12 - id56 */
+
+ paddsw mm7, mm7 /* mm7 = id56 * 2 */
+ paddsw mm7, mm5 /* mm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4
+/*---------------------------------------------------------*/
+ psubsw mm2, mm3 /* mm2 = is0734 - is1256 */
+ paddsw mm3, mm3 /* mm3 = is1256 * 2 */
+
+ movq mm0, mm2 /* mm0 = is0734 - is1256 */
+ paddsw mm3, mm2 /* mm3 = is0734 + is1256 */
+
+ pmulhw mm0, C(4) /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+ paddw mm0, mm2 /* mm0 = xC4S4 * ( is0734 - is1256 ) */
+
+ psrlw mm2, 15
+ paddw mm0, mm2 /* Truncate mm0, now it is op[4] */
+
+ movq mm2, mm3 /* mm2 = is0734 + is1256 */
+ movq OH(0), mm0 /* op4, now mm0,mm2 are free */
+
+ movq mm0, mm3 /* mm0 = is0734 + is1256 */
+ pmulhw mm3, C(4) /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
+
+ psrlw mm2, 15
+ paddw mm3, mm0 /* mm3 = xC4S4 * ( is0734 +is1256 ) */
+
+ paddw mm3, mm2 /* Truncate mm3, now it is op[0] */
+ movq OL(0), mm3 /* save op0 */
+/*---------------------------------------------------------*/
+/* op2 and op6
+/*---------------------------------------------------------*/
+ movq mm3, TIRY /* mm3 = irot_input_y */
+ pmulhw mm3, C(2) /* mm3 = xC2S6 * irot_input_y - irot_input_y */
+
+ movq mm2, TIRY /* mm2 = irot_input_y */
+ movq mm0, mm2 /* mm0 = irot_input_y */
+
+ psrlw mm2, 15
+ paddw mm3, mm0 /* mm3 = xC2S6 * irot_input_y */
+
+ paddw mm3, mm2 /* Truncated */
+ movq mm0, mm5 /* mm0 = id12 - id56 */
+
+
+ movq mm2, mm5 /* mm2 = id12 - id56 */
+ pmulhw mm0, C(6) /* mm0 = xC6S2 * irot_input_x */
+
+ psrlw mm2, 15
+ paddw mm0, mm2 /* Truncated */
+
+ paddsw mm3, mm0 /* op[2] */
+ movq OL(2), mm3 /* save op[2] */
+
+
+ movq mm0, mm5 /* mm0 = id12 - id56 */
+ movq mm2, mm5 /* mm0 = id12 - id56 */
+
+ pmulhw mm5, C(2) /* mm5 = xC2S6 * irot_input_x - irot_input_x */
+ psrlw mm2, 15
+
+ movq mm3, TIRY /* mm3 = irot_input_y */
+ paddw mm5, mm0 /* mm5 = xC2S6 * irot_input_x */
+
+ paddw mm5, mm2 /* Truncated */
+ movq mm2, mm3 /* mm2 = irot_input_y */
+
+ pmulhw mm3, C(6) /* mm3 = xC6S2 * irot_input_y */
+ psrlw mm2, 15
+
+ paddw mm3, mm2 /* Truncated */
+ psubsw mm3, mm5 /* mm3 = op[6] */
+
+ movq OH(2), mm3
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2 */
+/*-----------------------------------------------------------------------*/
+ movq mm0, C(4) /* mm0 = xC4s4 */
+ movq mm2, mm1 /* mm2 = is12 - is56 */
+
+ movq mm3, mm1 /* mm3 = is12 - is56 */
+ pmulhw mm1, mm0 /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+
+ psrlw mm2, 15
+ paddw mm1, mm3 /* mm1 = xC4S4 * ( is12 - is56 ) */
+
+ paddw mm1, mm2 /* Truncate mm1, now it is icommon_product1 */
+ movq mm2, mm7 /* mm2 = id12 + id56 */
+
+ movq mm3, mm7 /* mm3 = id12 + id56 */
+ pmulhw mm7, mm0 /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+
+ psrlw mm2, 15 /* For trucation */
+ paddw mm7, mm3 /* mm7 = xC4S4 * ( id12 + id56 ) */
+
+ paddw mm7, mm2 /* Truncate mm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+ pxor mm0, mm0 /* Clear mm0 */
+ psubsw mm0, mm6 /* mm0 = - id34 */
+
+ psubsw mm0, mm7 /* mm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+ paddsw mm6, mm6 /* mm6 = id34 * 2 */
+
+ paddsw mm6, mm0 /* mm6 = id34 - icommon_product2 = irot_input_x for 35 */
+ psubsw mm4, mm1 /* mm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+ paddsw mm1, mm1 /* mm1 = icommon_product1 * 2 */
+ paddsw mm1, mm4 /* mm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7
+/*---------------------------------------------------------*/
+
+ movq mm7, C(1) /* xC1S7 */
+ movq mm2, mm1 /* mm2 = irot_input_x */
+
+ movq mm3, mm1; /* mm3 = irot_input_x */
+ pmulhw mm1, mm7 /* mm1 = xC1S7 * irot_input_x - irot_input_x */
+
+ movq mm7, C(7) /* xC7S1 */
+ psrlw mm2, 15 /* for trucation */
+
+ paddw mm1, mm3 /* mm1 = xC1S7 * irot_input_x */
+ paddw mm1, mm2 /* Trucated */
+
+ pmulhw mm3, mm7 /* mm3 = xC7S1 * irot_input_x */
+ paddw mm3, mm2 /* Truncated */
+
+ movq mm5, mm0 /* mm5 = irot_input_y */
+ movq mm2, mm0 /* mm2 = irot_input_y */
+
+ movq mm7, C(1) /* xC1S7 */
+ pmulhw mm0, mm7 /* mm0 = xC1S7 * irot_input_y - irot_input_y */
+
+ movq mm7, C(7) /* xC7S1 */
+ psrlw mm2, 15 /* for trucation */
+
+ paddw mm0, mm5 /* mm0 = xC1S7 * irot_input_y */
+ paddw mm0, mm2 /* Truncated */
+
+ pmulhw mm5, mm7 /* mm5 = xC7S1 * irot_input_y */
+ paddw mm5, mm2 /* Truncated */
+
+ psubsw mm1, mm5 /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+ paddsw mm3, mm0 /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+
+ movq OL(1), mm1
+ movq OH(3), mm3
+/*---------------------------------------------------------*/
+/* op3 and op5
+/*---------------------------------------------------------*/
+ movq mm0, C(3) /* xC3S5 */
+ movq mm1, C(5) /* xC5S3 */
+
+ movq mm5,mm6 /* irot_input_x */
+ movq mm7,mm6 /* irot_input_x */
+
+ movq mm2,mm4 /* irot_input_y */
+ movq mm3,mm4 /* irot_input_y */
+
+ pmulhw mm4,mm0 /* mm4 = xC3S5 * irot_input_x - irot_input_x */
+ pmulhw mm6,mm1 /* mm6 = xC5S3 * irot_input_y - irot_input_y */
+
+ psrlw mm2,15 /* for trucation */
+ psrlw mm5,15 /* for trucation */
+
+ paddw mm4,mm3 /* mm4 = xC3S5 * irot_input_x */
+ paddw mm6,mm7 /* mm6 = xC5S3 * irot_input_y */
+
+ paddw mm4,mm2 /* Truncated */
+ paddw mm6,mm5 /* Truncated */
+
+ psubsw mm4,mm6 /* op [3] */
+ movq OL(3),mm4 /* Save Op[3] */
+
+ movq mm4,mm3 /* irot_input_y */
+ movq mm6,mm7 /* irot_input_x */
+
+ pmulhw mm3,mm1 /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+ pmulhw mm7,mm0 /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+ paddw mm4,mm2 /* Trucated */
+ paddw mm6,mm5 /* Trucated */
+
+ paddw mm3,mm4 /* mm3 = xC5S3 * irot_input_x */
+ paddw mm7,mm6 /* mm7 = xC3S5 * irot_input_y */
+
+ paddw mm3,mm7 /* Op[5] */
+ movq OH(1),mm3 /* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of Horizontal FDCT */
+/*---------------------------------------------------------*/
+ lea eax, [ebx-64]
+ lea esi, [edi-64]
+
+#undef IL
+#undef IH
+#undef OL
+#undef OH
+#define IL(i) [eax + 16 * i]
+#define IH(i) [ebx + 16 * i]
+#define OL(i) [eax + 16 * i]
+#define OH(i) [ebx + 16 * i]
+
+/******************************************************/
+/* Do 4x8 Transpose is done through 2 4x4 Transpose */
+/******************************************************/
+ movq mm4, IH(0) /* mm4=e3e2e1e0 */
+ movq mm0, IH(1) /* mm4=f3f2f1f0 */
+
+ movq mm5, mm4 /* make a copy */
+ punpcklwd mm4, mm0 /* mm4=f1e1f0e0 */
+
+ punpckhwd mm5, mm0 /* mm5=f3e3f2e2 */
+ movq mm6, IH(2) /* mm6=g3g2g1g0 */
+
+ movq mm0, IH(3) /* mm0=h3h2h1h0 */
+ movq mm7, mm6 /* mm7=g3g2g1g0 */
+
+ punpcklwd mm6, mm0 /* mm6=h1g1h0g0 */
+ punpckhwd mm7, mm0 /* mm7=h3g3h2g2 */
+
+ movq mm3, mm4 /* mm4=f1e1f0e0 */
+ punpckldq mm4, mm6 /* mm4=h0g0f0e0 */
+
+ punpckhdq mm3, mm6 /* mm3=h1g1f1e1 */
+ movq mm6, mm5 /* mm5=f3e3f2e2 */
+
+ punpckldq mm5, mm7 /* mm5=h2g2f2e2 */
+ movq IH(0), mm4 /* saveh0g0f0e0 */
+
+ punpckhdq mm6, mm7 /* mm6=h3g3f3e3 */
+ movq IH(2), mm5 /* saveh2g2f2e2 */
+
+ movq IH(3), mm6 /* saveh3g3f3e3 */
+
+/*----------------------------------------------------*/
+/* mm3 in use for IH(1) */
+/*----------------------------------------------------*/
+
+ movq mm4, IL(0) /* mm4=a3a2a1a0 */
+ movq mm0, IL(1) /* mm0=b3b2b1b0 */
+
+ movq mm5, mm4 /* mm5=a3a2a1a0 */
+ punpcklwd mm4, mm0 /* mm4=b1a1b0a0 */
+
+ punpckhwd mm5, mm0 /* mm5=b3a3b2a2 */
+ movq mm6, IL(2) /* mm6=c3c2c1c0 */
+
+ movq mm0, IL(3) /* mm0=d3d2d1d0 */
+ movq mm7, mm6 /* mm7=c3c2c1c0 */
+
+ punpcklwd mm6, mm0 /* mm6=d1c1d0c0 */
+ punpckhwd mm7, mm0 /* mm7=c3c3d2c2 */
+
+ movq mm1, mm4 /* mm4=b1a1b0a0 */
+ punpckldq mm4, mm6 /* mm4=d0c0b0a0 */
+
+ punpckhdq mm1, mm6 /* mm1=d1c1b1a1 */
+ movq mm2, mm5 /* mm5=b3a3b2a2 */
+
+ punpckldq mm5, mm7 /* mm5=d2c2b2a2 */
+ punpckhdq mm2, mm7 /* mm6=d3c3b3a3 */
+
+ movq IL(2), mm5 /* saved2c2b2a2 */
+
+/*----------------------------------------------------*/
+/* mm1 in use for IL(1) */
+/* mm2 in use for IL(3) */
+/* mm3 in use for IH(1) */
+/* mm4 in use for IH(0) */
+/*----------------------------------------------------*/
+
+/******************************************************/
+/* Let's do the 4x8 forward DCT */
+/******************************************************/
+ movq mm0, mm4 /* mm4 = ip0 */
+ movq mm5, mm1 /* mm5 = ip1 */
+
+ movq mm6, mm2 /* mm6 = ip3 */
+ movq mm7, mm3 /* mm7 = ip5 */
+
+ paddsw mm0, IH(3) /* mm0 = ip0 + ip7 */
+ paddsw mm1, IL(2) /* mm1 = ip1 + ip2 */
+
+ paddsw mm2, IH(0) /* mm2 = ip3 + ip4 */
+ paddsw mm3, IH(2) /* mm3 = ip5 + ip6 */
+
+ psubsw mm4, IH(3) /* mm4 = ip0 - ip7 */
+ psubsw mm5, IL(2) /* mm5 = ip1 - ip2 */
+
+ psubsw mm0, mm2 /* mm0 = is07 - is34 */
+ paddsw mm2, mm2 /* mm2 = is34 * 2 */
+
+ psubsw mm6, IH(0) /* mm6 = ip3 - ip4 */
+ paddsw mm2, mm0 /* mm2 = is07 + is34 */
+
+ psubsw mm1, mm3 /* mm1 = is12 - is56 */
+ movq TIRY, mm0 /* save is07-is34 */
+
+ paddsw mm3, mm3 /* mm3 = is56 * 2 */
+ paddsw mm3, mm1 /* mm3 = is12 + is56 */
+
+ psubsw mm7, IH(2) /* mm7 = ip5 -ip6 */
+ psubsw mm5, mm7 /* mm5 = id12 - id56 */
+
+ paddsw mm7, mm7 /* mm7 = id56 * 2 */
+ paddsw mm7, mm5 /* mm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4
+/*---------------------------------------------------------*/
+ psubsw mm2, mm3 /* mm2 = is0734 - is1256 */
+ paddsw mm3, mm3 /* mm3 = is1256 * 2 */
+
+ movq mm0, mm2 /* mm0 = is0734 - is1256 */
+ paddsw mm3, mm2 /* mm3 = is0734 + is1256 */
+
+ pmulhw mm0, C(4) /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+ paddw mm0, mm2 /* mm0 = xC4S4 * ( is0734 - is1256 ) */
+
+ psrlw mm2, 15
+ paddw mm0, mm2 /* Truncate mm0, now it is op[4] */
+
+ movq mm2, mm0
+ psrlw mm0, 15
+
+ paddw mm0, mm2
+ psraw mm0, 1
+
+ movq OH(0), mm0 /* op4, now mm0,mm2 are free */
+ movq mm2, mm3 /* mm2 = is0734 + is1256 */
+
+
+ movq mm0, mm3 /* mm0 = is0734 + is1256 */
+ pmulhw mm3, C(4) /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
+
+ psrlw mm2, 15
+ paddw mm3, mm0 /* mm3 = xC4S4 * ( is0734 +is1256 ) */
+
+ paddw mm3, mm2 /* Truncate mm3, now it is op[0] */
+ movq mm2, mm3
+
+ psrlw mm3, 15
+ paddw mm3, mm2
+
+ psraw mm3, 1
+ movq OL(0), mm3 /* save op0 */
+/*---------------------------------------------------------*/
+/* op2 and op6
+/*---------------------------------------------------------*/
+ movq mm3, TIRY /* mm3 = irot_input_y */
+ pmulhw mm3, C(2) /* mm3 = xC2S6 * irot_input_y - irot_input_y */
+
+ movq mm2, TIRY /* mm2 = irot_input_y */
+ movq mm0, mm2 /* mm0 = irot_input_y */
+
+ psrlw mm2, 15
+ paddw mm3, mm0 /* mm3 = xC2S6 * irot_input_y */
+
+ paddw mm3, mm2 /* Truncated */
+ movq mm0, mm5 /* mm0 = id12 - id56 */
+
+
+ movq mm2, mm5 /* mm2 = id12 - id56 */
+ pmulhw mm0, C(6) /* mm0 = xC6S2 * irot_input_x */
+
+ psrlw mm2, 15
+ paddw mm0, mm2 /* Truncated */
+
+ paddsw mm3, mm0 /* op[2] */
+ movq mm0, mm3
+
+ psrlw mm3, 15
+ paddw mm3, mm0
+
+ psraw mm3, 1
+ movq OL(2), mm3 /* save op[2] */
+
+ movq mm0, mm5 /* mm0 = id12 - id56 */
+ movq mm2, mm5 /* mm0 = id12 - id56 */
+
+ pmulhw mm5, C(2) /* mm5 = xC2S6 * irot_input_x - irot_input_x */
+ psrlw mm2, 15
+
+ movq mm3, TIRY /* mm3 = irot_input_y */
+ paddw mm5, mm0 /* mm5 = xC2S6 * irot_input_x */
+
+ paddw mm5, mm2 /* Truncated */
+ movq mm2, mm3 /* mm2 = irot_input_y */
+
+ pmulhw mm3, C(6) /* mm3 = xC6S2 * irot_input_y */
+ psrlw mm2, 15
+
+ paddw mm3, mm2 /* Truncated */
+ psubsw mm3, mm5 /* mm3 = op[6] */
+
+ movq mm5, mm3
+ psrlw mm3, 15
+
+ paddw mm3, mm5
+ psraw mm3, 1
+
+ movq OH(2), mm3
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2 */
+/*-----------------------------------------------------------------------*/
+ movq mm0, C(4) /* mm0 = xC4s4 */
+ movq mm2, mm1 /* mm2 = is12 - is56 */
+
+ movq mm3, mm1 /* mm3 = is12 - is56 */
+ pmulhw mm1, mm0 /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+
+ psrlw mm2, 15
+ paddw mm1, mm3 /* mm1 = xC4S4 * ( is12 - is56 ) */
+
+ paddw mm1, mm2 /* Truncate mm1, now it is icommon_product1 */
+ movq mm2, mm7 /* mm2 = id12 + id56 */
+
+ movq mm3, mm7 /* mm3 = id12 + id56 */
+ pmulhw mm7, mm0 /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+
+ psrlw mm2, 15 /* For trucation */
+ paddw mm7, mm3 /* mm7 = xC4S4 * ( id12 + id56 ) */
+
+ paddw mm7, mm2 /* Truncate mm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+ pxor mm0, mm0 /* Clear mm0 */
+ psubsw mm0, mm6 /* mm0 = - id34 */
+
+ psubsw mm0, mm7 /* mm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+ paddsw mm6, mm6 /* mm6 = id34 * 2 */
+
+ paddsw mm6, mm0 /* mm6 = id34 - icommon_product2 = irot_input_x for 35 */
+ psubsw mm4, mm1 /* mm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+ paddsw mm1, mm1 /* mm1 = icommon_product1 * 2 */
+ paddsw mm1, mm4 /* mm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7
+/*---------------------------------------------------------*/
+ movq mm7, C(1) /* xC1S7 */
+ movq mm2, mm1 /* mm2 = irot_input_x */
+
+ movq mm3, mm1; /* mm3 = irot_input_x */
+ pmulhw mm1, mm7 /* mm1 = xC1S7 * irot_input_x - irot_input_x */
+
+ movq mm7, C(7) /* xC7S1 */
+ psrlw mm2, 15 /* for trucation */
+
+ paddw mm1, mm3 /* mm1 = xC1S7 * irot_input_x */
+ paddw mm1, mm2 /* Trucated */
+
+ pmulhw mm3, mm7 /* mm3 = xC7S1 * irot_input_x */
+ paddw mm3, mm2 /* Truncated */
+
+ movq mm5, mm0 /* mm5 = irot_input_y */
+ movq mm2, mm0 /* mm2 = irot_input_y */
+
+ movq mm7, C(1) /* xC1S7 */
+ pmulhw mm0, mm7 /* mm0 = xC1S7 * irot_input_y - irot_input_y */
+
+ movq mm7, C(7) /* xC7S1 */
+ psrlw mm2, 15 /* for trucation */
+
+ paddw mm0, mm5 /* mm0 = xC1S7 * irot_input_y */
+ paddw mm0, mm2 /* Truncated */
+
+ pmulhw mm5, mm7 /* mm5 = xC7S1 * irot_input_y */
+ paddw mm5, mm2 /* Truncated */
+
+ psubsw mm1, mm5 /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+ paddsw mm3, mm0 /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+
+ movq mm5, mm1
+ movq mm0, mm3
+
+ psrlw mm1, 15
+ psrlw mm3, 15
+
+ paddw mm1, mm5
+ paddw mm3, mm0
+
+ psraw mm1, 1
+ psraw mm3, 1
+
+ movq OL(1), mm1
+ movq OH(3), mm3
+/*---------------------------------------------------------*/
+/* op3 and op5
+/*---------------------------------------------------------*/
+ movq mm0, C(3) /* xC3S5 */
+ movq mm1, C(5) /* xC5S3 */
+
+ movq mm5,mm6 /* irot_input_x */
+ movq mm7,mm6 /* irot_input_x */
+
+ movq mm2,mm4 /* irot_input_y */
+ movq mm3,mm4 /* irot_input_y */
+
+ pmulhw mm4,mm0 /* mm4 = xC3S5 * irot_input_x - irot_input_x */
+ pmulhw mm6,mm1 /* mm6 = xC5S3 * irot_input_y - irot_input_y */
+
+ psrlw mm2,15 /* for trucation */
+ psrlw mm5,15 /* for trucation */
+
+ paddw mm4,mm3 /* mm4 = xC3S5 * irot_input_x */
+ paddw mm6,mm7 /* mm6 = xC5S3 * irot_input_y */
+
+ paddw mm4,mm2 /* Truncated */
+ paddw mm6,mm5 /* Truncated */
+
+ psubsw mm4,mm6 /* op [3] */
+ movq mm6,mm4
+
+ psrlw mm4,15
+ paddw mm4,mm6
+
+ psraw mm4,1
+ movq OL(3),mm4 /* Save Op[3] */
+
+ movq mm4,mm3 /* irot_input_y */
+ movq mm6,mm7 /* irot_input_x */
+
+ pmulhw mm3,mm1 /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+ pmulhw mm7,mm0 /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+ paddw mm4,mm2 /* Trucated */
+ paddw mm6,mm5 /* Trucated */
+
+ paddw mm3,mm4 /* mm3 = xC5S3 * irot_input_x */
+ paddw mm7,mm6 /* mm7 = xC3S5 * irot_input_y */
+
+ paddw mm3,mm7 /* Op[5] */
+ movq mm7,mm3
+
+ psrlw mm3,15
+ paddw mm3,mm7
+
+ psraw mm3,1
+ movq OH(1),mm3 /* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of 4x8 1-D FDCT */
+/*---------------------------------------------------------*/
+ lea eax, [eax+8]
+ lea ebx, [ebx+8]
+
+/******************************************************/
+/* Do 4x8 Transpose is done through 2 4x4 Transpose */
+/******************************************************/
+ movq mm4, IH(0) /* mm4=e3e2e1e0 */
+ movq mm0, IH(1) /* mm4=f3f2f1f0 */
+
+ movq mm5, mm4 /* make a copy */
+ punpcklwd mm4, mm0 /* mm4=f1e1f0e0 */
+
+ punpckhwd mm5, mm0 /* mm5=f3e3f2e2 */
+ movq mm6, IH(2) /* mm6=g3g2g1g0 */
+
+ movq mm0, IH(3) /* mm0=h3h2h1h0 */
+ movq mm7, mm6 /* mm7=g3g2g1g0 */
+
+ punpcklwd mm6, mm0 /* mm6=h1g1h0g0 */
+ punpckhwd mm7, mm0 /* mm7=h3g3h2g2 */
+
+ movq mm3, mm4 /* mm4=f1e1f0e0 */
+ punpckldq mm4, mm6 /* mm4=h0g0f0e0 */
+
+ punpckhdq mm3, mm6 /* mm3=h1g1f1e1 */
+ movq mm6, mm5 /* mm5=f3e3f2e2 */
+
+ punpckldq mm5, mm7 /* mm5=h2g2f2e2 */
+ movq IH(0), mm4 /* saveh0g0f0e0 */
+
+ punpckhdq mm6, mm7 /* mm6=h3g3f3e3 */
+ movq IH(2), mm5 /* saveh2g2f2e2 */
+
+ movq IH(3), mm6 /* saveh3g3f3e3 */
+
+/*----------------------------------------------------*/
+/* mm3 in use for IH(1) */
+/*----------------------------------------------------*/
+
+ movq mm4, IL(0) /* mm4=a3a2a1a0 */
+ movq mm0, IL(1) /* mm0=b3b2b1b0 */
+
+ movq mm5, mm4 /* mm5=a3a2a1a0 */
+ punpcklwd mm4, mm0 /* mm4=b1a1b0a0 */
+
+ punpckhwd mm5, mm0 /* mm5=b3a3b2a2 */
+ movq mm6, IL(2) /* mm6=c3c2c1c0 */
+
+ movq mm0, IL(3) /* mm0=d3d2d1d0 */
+ movq mm7, mm6 /* mm7=c3c2c1c0 */
+
+ punpcklwd mm6, mm0 /* mm6=d1c1d0c0 */
+ punpckhwd mm7, mm0 /* mm7=c3c3d2c2 */
+
+ movq mm1, mm4 /* mm4=b1a1b0a0 */
+ punpckldq mm4, mm6 /* mm4=d0c0b0a0 */
+
+ punpckhdq mm1, mm6 /* mm1=d1c1b1a1 */
+ movq mm2, mm5 /* mm5=b3a3b2a2 */
+
+ punpckldq mm5, mm7 /* mm5=d2c2b2a2 */
+ punpckhdq mm2, mm7 /* mm6=d3c3b3a3 */
+
+ movq IL(2), mm5 /* saved2c2b2a2 */
+
+/*----------------------------------------------------*/
+/* mm1 in use for IL(1) */
+/* mm2 in use for IL(3) */
+/* mm3 in use for IH(1) */
+/* mm4 in use for IH(0) */
+/*----------------------------------------------------*/
+
+/******************************************************/
+/* Let's do the 4x8 forward DCT */
+/******************************************************/
+ movq mm0, mm4 /* mm4 = ip0 */
+ movq mm5, mm1 /* mm5 = ip1 */
+
+ movq mm6, mm2 /* mm6 = ip3 */
+ movq mm7, mm3 /* mm7 = ip5 */
+
+ paddsw mm0, IH(3) /* mm0 = ip0 + ip7 */
+ paddsw mm1, IL(2) /* mm1 = ip1 + ip2 */
+
+ paddsw mm2, IH(0) /* mm2 = ip3 + ip4 */
+ paddsw mm3, IH(2) /* mm3 = ip5 + ip6 */
+
+ psubsw mm4, IH(3) /* mm4 = ip0 - ip7 */
+ psubsw mm5, IL(2) /* mm5 = ip1 - ip2 */
+
+ psubsw mm0, mm2 /* mm0 = is07 - is34 */
+ paddsw mm2, mm2 /* mm2 = is34 * 2 */
+
+ psubsw mm6, IH(0) /* mm6 = ip3 - ip4 */
+ paddsw mm2, mm0 /* mm2 = is07 + is34 */
+
+ psubsw mm1, mm3 /* mm1 = is12 - is56 */
+ movq TIRY, mm0 /* save is07-is34 */
+
+ paddsw mm3, mm3 /* mm3 = is56 * 2 */
+ paddsw mm3, mm1 /* mm3 = is12 + is56 */
+
+ psubsw mm7, IH(2) /* mm7 = ip5 -ip6 */
+ psubsw mm5, mm7 /* mm5 = id12 - id56 */
+
+ paddsw mm7, mm7 /* mm7 = id56 * 2 */
+ paddsw mm7, mm5 /* mm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4
+/*---------------------------------------------------------*/
+ psubsw mm2, mm3 /* mm2 = is0734 - is1256 */
+ paddsw mm3, mm3 /* mm3 = is1256 * 2 */
+
+ movq mm0, mm2 /* mm0 = is0734 - is1256 */
+ paddsw mm3, mm2 /* mm3 = is0734 + is1256 */
+
+ pmulhw mm0, C(4) /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+ paddw mm0, mm2 /* mm0 = xC4S4 * ( is0734 - is1256 ) */
+
+ psrlw mm2, 15
+ paddw mm0, mm2 /* Truncate mm0, now it is op[4] */
+
+ movq mm2, mm0
+ psrlw mm0, 15
+
+ paddw mm0, mm2
+ psraw mm0, 1
+
+ movq OH(0), mm0 /* op4, now mm0,mm2 are free */
+ movq mm2, mm3 /* mm2 = is0734 + is1256 */
+
+
+ movq mm0, mm3 /* mm0 = is0734 + is1256 */
+ pmulhw mm3, C(4) /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
+
+ psrlw mm2, 15
+ paddw mm3, mm0 /* mm3 = xC4S4 * ( is0734 +is1256 ) */
+
+ paddw mm3, mm2 /* Truncate mm3, now it is op[0] */
+ movq mm2, mm3
+
+ psrlw mm3, 15
+ paddw mm3, mm2
+
+ psraw mm3, 1
+ movq OL(0), mm3 /* save op0 */
+/*---------------------------------------------------------*/
+/* op2 and op6
+/*---------------------------------------------------------*/
+ movq mm3, TIRY /* mm3 = irot_input_y */
+ pmulhw mm3, C(2) /* mm3 = xC2S6 * irot_input_y - irot_input_y */
+
+ movq mm2, TIRY /* mm2 = irot_input_y */
+ movq mm0, mm2 /* mm0 = irot_input_y */
+
+ psrlw mm2, 15
+ paddw mm3, mm0 /* mm3 = xC2S6 * irot_input_y */
+
+ paddw mm3, mm2 /* Truncated */
+ movq mm0, mm5 /* mm0 = id12 - id56 */
+
+
+ movq mm2, mm5 /* mm2 = id12 - id56 */
+ pmulhw mm0, C(6) /* mm0 = xC6S2 * irot_input_x */
+
+ psrlw mm2, 15
+ paddw mm0, mm2 /* Truncated */
+
+ paddsw mm3, mm0 /* op[2] */
+ movq mm0, mm3
+
+ psrlw mm3, 15
+ paddw mm3, mm0
+
+ psraw mm3, 1
+ movq OL(2), mm3 /* save op[2] */
+
+ movq mm0, mm5 /* mm0 = id12 - id56 */
+ movq mm2, mm5 /* mm0 = id12 - id56 */
+
+ pmulhw mm5, C(2) /* mm5 = xC2S6 * irot_input_x - irot_input_x */
+ psrlw mm2, 15
+
+ movq mm3, TIRY /* mm3 = irot_input_y */
+ paddw mm5, mm0 /* mm5 = xC2S6 * irot_input_x */
+
+ paddw mm5, mm2 /* Truncated */
+ movq mm2, mm3 /* mm2 = irot_input_y */
+
+ pmulhw mm3, C(6) /* mm3 = xC6S2 * irot_input_y */
+ psrlw mm2, 15
+
+ paddw mm3, mm2 /* Truncated */
+ psubsw mm3, mm5 /* mm3 = op[6] */
+
+ movq mm5, mm3
+ psrlw mm3, 15
+
+ paddw mm3, mm5
+ psraw mm3, 1
+
+ movq OH(2), mm3
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2 */
+/*-----------------------------------------------------------------------*/
+ movq mm0, C(4) /* mm0 = xC4s4 */
+ movq mm2, mm1 /* mm2 = is12 - is56 */
+
+ movq mm3, mm1 /* mm3 = is12 - is56 */
+ pmulhw mm1, mm0 /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+
+ psrlw mm2, 15
+ paddw mm1, mm3 /* mm1 = xC4S4 * ( is12 - is56 ) */
+
+ paddw mm1, mm2 /* Truncate mm1, now it is icommon_product1 */
+ movq mm2, mm7 /* mm2 = id12 + id56 */
+
+ movq mm3, mm7 /* mm3 = id12 + id56 */
+ pmulhw mm7, mm0 /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+
+ psrlw mm2, 15 /* For trucation */
+ paddw mm7, mm3 /* mm7 = xC4S4 * ( id12 + id56 ) */
+
+ paddw mm7, mm2 /* Truncate mm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+ pxor mm0, mm0 /* Clear mm0 */
+ psubsw mm0, mm6 /* mm0 = - id34 */
+
+ psubsw mm0, mm7 /* mm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+ paddsw mm6, mm6 /* mm6 = id34 * 2 */
+
+ paddsw mm6, mm0 /* mm6 = id34 - icommon_product2 = irot_input_x for 35 */
+ psubsw mm4, mm1 /* mm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+ paddsw mm1, mm1 /* mm1 = icommon_product1 * 2 */
+ paddsw mm1, mm4 /* mm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7
+/*---------------------------------------------------------*/
+ movq mm7, C(1) /* xC1S7 */
+ movq mm2, mm1 /* mm2 = irot_input_x */
+
+ movq mm3, mm1; /* mm3 = irot_input_x */
+ pmulhw mm1, mm7 /* mm1 = xC1S7 * irot_input_x - irot_input_x */
+
+ movq mm7, C(7) /* xC7S1 */
+ psrlw mm2, 15 /* for trucation */
+
+ paddw mm1, mm3 /* mm1 = xC1S7 * irot_input_x */
+ paddw mm1, mm2 /* Trucated */
+
+ pmulhw mm3, mm7 /* mm3 = xC7S1 * irot_input_x */
+ paddw mm3, mm2 /* Truncated */
+
+ movq mm5, mm0 /* mm5 = irot_input_y */
+ movq mm2, mm0 /* mm2 = irot_input_y */
+
+ movq mm7, C(1) /* xC1S7 */
+ pmulhw mm0, mm7 /* mm0 = xC1S7 * irot_input_y - irot_input_y */
+
+ movq mm7, C(7) /* xC7S1 */
+ psrlw mm2, 15 /* for trucation */
+
+ paddw mm0, mm5 /* mm0 = xC1S7 * irot_input_y */
+ paddw mm0, mm2 /* Truncated */
+
+ pmulhw mm5, mm7 /* mm5 = xC7S1 * irot_input_y */
+ paddw mm5, mm2 /* Truncated */
+
+ psubsw mm1, mm5 /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+ paddsw mm3, mm0 /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+
+ movq mm5, mm1
+ movq mm0, mm3
+
+ psrlw mm1, 15
+ psrlw mm3, 15
+
+ paddw mm1, mm5
+ paddw mm3, mm0
+
+ psraw mm1, 1
+ psraw mm3, 1
+
+ movq OL(1), mm1
+ movq OH(3), mm3
+/*---------------------------------------------------------*/
+/* op3 and op5
+/*---------------------------------------------------------*/
+ movq mm0, C(3) /* xC3S5 */
+ movq mm1, C(5) /* xC5S3 */
+
+ movq mm5,mm6 /* irot_input_x */
+ movq mm7,mm6 /* irot_input_x */
+
+ movq mm2,mm4 /* irot_input_y */
+ movq mm3,mm4 /* irot_input_y */
+
+ pmulhw mm4,mm0 /* mm4 = xC3S5 * irot_input_x - irot_input_x */
+ pmulhw mm6,mm1 /* mm6 = xC5S3 * irot_input_y - irot_input_y */
+
+ psrlw mm2,15 /* for trucation */
+ psrlw mm5,15 /* for trucation */
+
+ paddw mm4,mm3 /* mm4 = xC3S5 * irot_input_x */
+ paddw mm6,mm7 /* mm6 = xC5S3 * irot_input_y */
+
+ paddw mm4,mm2 /* Truncated */
+ paddw mm6,mm5 /* Truncated */
+
+ psubsw mm4,mm6 /* op [3] */
+ movq mm6,mm4
+
+ psrlw mm4,15
+ paddw mm4,mm6
+
+ psraw mm4,1
+ movq OL(3),mm4 /* Save Op[3] */
+
+ movq mm4,mm3 /* irot_input_y */
+ movq mm6,mm7 /* irot_input_x */
+
+ pmulhw mm3,mm1 /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+ pmulhw mm7,mm0 /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+ paddw mm4,mm2 /* Trucated */
+ paddw mm6,mm5 /* Trucated */
+
+ paddw mm3,mm4 /* mm3 = xC5S3 * irot_input_x */
+ paddw mm7,mm6 /* mm7 = xC3S5 * irot_input_y */
+
+ paddw mm3,mm7 /* Op[5] */
+ movq mm7,mm3
+
+ psrlw mm3,15
+ paddw mm3,mm7
+
+ psraw mm3,1
+ movq OH(1),mm3 /* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of 4x8 1-D FDCT */
+/*---------------------------------------------------------*/
+
+
+ }/* end of _asm code section */
+}
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctwmt.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctwmt.c
new file mode 100644
index 00000000..13a67fa7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctwmt.c
@@ -0,0 +1,810 @@
+/****************************************************************************
+ *
+ * Module Title : Fdctwmt.c
+ *
+ * Description : Forward DCT optimized specifically for Intel P4
+ * processor
+ *
+ * AUTHOR : YaoWu Xu
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ * 1.00 YWX 03/11/02 Configuration baseline
+ *
+ *****************************************************************************
+ */
+
+
+/*******************************************************************************
+ * Module Constants
+ *******************************************************************************
+ */
+
+
+__declspec(align(16)) static unsigned short TIRY[8];
+
+__declspec(align(16)) static unsigned short WmtIdctConst[8 * 8] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 64277,64277,64277,64277,64277,64277,64277,64277,
+ 60547,60547,60547,60547,60547,60547,60547,60547,
+ 54491,54491,54491,54491,54491,54491,54491,54491,
+ 46341,46341,46341,46341,46341,46341,46341,46341,
+ 36410,36410,36410,36410,36410,36410,36410,36410,
+ 25080,25080,25080,25080,25080,25080,25080,25080,
+ 12785,12785,12785,12785,12785,12785,12785,12785
+};
+
+
+/**************************************************************************************
+ *
+ * Macro: FDct_WMT
+ *
+ * Description: The Macro does 1-D IDct on 8 columns.
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+void fdct_WMT(short *InputData, short *OutputData)
+{
+
+ __asm
+ {
+ mov eax, InputData
+ mov ebx, OutputData
+ lea edx, WmtIdctConst
+
+#define I(i) [eax + 16 * i ]
+#define O(i) [ebx + 16 * i ]
+#define C(i) [edx + 16 * i ]
+
+/******************************************************/
+/* Do 8x8 Transpose */
+/******************************************************/
+
+ movdqa xmm4, I(4) /* xmm4=e7e6e5e4e3e2e1e0 */
+ movdqa xmm0, I(5) /* xmm4=f7f6f5f4f3f2f1f0 */
+
+ psllw xmm4, 1
+ psllw xmm0, 1
+
+ movdqa xmm5, xmm4 /* make a copy */
+ punpcklwd xmm4, xmm0 /* xmm4=f3e3f2e2f1e1f0e0 */
+
+ punpckhwd xmm5, xmm0 /* xmm5=f7e7f6e6f5e5f4e4 */
+ movdqa xmm6, I(6) /* xmm6=g7g6g5g4g3g2g1g0 */
+
+ movdqa xmm0, I(7) /* xmm0=h7h6h5h4h3h2h1h0 */
+
+ psllw xmm6, 1
+ psllw xmm0, 1
+
+ movdqa xmm7, xmm6 /* make a copy */
+
+ punpcklwd xmm6, xmm0 /* xmm6=h3g3h3g2h1g1h0g0 */
+ punpckhwd xmm7, xmm0 /* xmm7=h7g7h6g6h5g5h4g4 */
+
+ movdqa xmm3, xmm4 /* make a copy */
+ punpckldq xmm4, xmm6 /* xmm4=h1g1f1e1h0g0f0e0 */
+
+ punpckhdq xmm3, xmm6 /* xmm3=h3g3g3e3h2g2f2e2 */
+ movdqa I(6), xmm3 /* save h3g3g3e3h2g2f2e2 */
+ /* Free xmm6 */
+ movdqa xmm6, xmm5 /* make a copy */
+ punpckldq xmm5, xmm7 /* xmm5=h5g5f5e5h4g4f4e4 */
+
+ punpckhdq xmm6, xmm7 /* xmm6=h7g7f7e7h6g6f6e6 */
+ movdqa xmm0, I(0) /* xmm0=a7a6a5a4a3a2a1a0 */
+ /* Free xmm7 */
+ movdqa xmm1, I(1) /* xmm1=b7b6b5b4b3b2b1b0 */
+
+ psllw xmm0, 1
+ psllw xmm1, 1
+
+ movdqa xmm7, xmm0 /* make a copy */
+
+ punpcklwd xmm0, xmm1 /* xmm0=b3a3b2a2b1a1b0a0 */
+ punpckhwd xmm7, xmm1 /* xmm7=b7a7b6a6b5a5b4a4 */
+ /* Free xmm1 */
+ movdqa xmm2, I(2) /* xmm2=c7c6c5c4c3c2c1c0 */
+ movdqa xmm3, I(3) /* xmm3=d7d6d5d4d3d2d1d0 */
+
+ psllw xmm2, 1
+ psllw xmm3, 1
+
+ movdqa xmm1, xmm2 /* make a copy */
+ punpcklwd xmm2, xmm3 /* xmm2=d3c3d2c2d1c1d0c0 */
+
+ punpckhwd xmm1, xmm3 /* xmm1=d7c7d6c6d5c5d4c4 */
+ movdqa xmm3, xmm0 /* make a copy */
+
+ punpckldq xmm0, xmm2 /* xmm0=d1c1b1a1d0c0b0a0 */
+ punpckhdq xmm3, xmm2 /* xmm3=d3c3b3a3d2c2b2a2 */
+ /* Free xmm2 */
+ movdqa xmm2, xmm7 /* make a copy */
+ punpckldq xmm2, xmm1 /* xmm2=d5c5b5a5d4c4b4a4 */
+
+ punpckhdq xmm7, xmm1 /* xmm7=d7c7b7a7d6c6b6a6 */
+ movdqa xmm1, xmm0 /* make a copy */
+
+ punpcklqdq xmm0, xmm4 /* xmm0=h0g0f0e0d0c0b0a0 */
+ punpckhqdq xmm1, xmm4 /* xmm1=h1g1g1e1d1c1b1a1 */
+
+ movdqa I(0), xmm0 /* save I(0) */
+ movdqa I(1), xmm1 /* save I(1) */
+
+ movdqa xmm0, I(6) /* load h3g3g3e3h2g2f2e2 */
+ movdqa xmm1, xmm3 /* make a copy */
+
+ punpcklqdq xmm1, xmm0 /* xmm1=h2g2f2e2d2c2b2a2 */
+ punpckhqdq xmm3, xmm0 /* xmm3=h3g3f3e3d3c3b3a3 */
+
+ movdqa xmm4, xmm2 /* make a copy */
+ punpcklqdq xmm4, xmm5 /* xmm4=h4g4f4e4d4c4b4a4 */
+
+ punpckhqdq xmm2, xmm5 /* xmm2=h5g5f5e5d5c5b5a5 */
+ movdqa I(2), xmm1 /* save I(2) */
+
+ movdqa I(3), xmm3 /* save I(3) */
+ movdqa I(4), xmm4 /* save I(4) */
+
+ movdqa I(5), xmm2 /* save I(5) */
+ movdqa xmm5, xmm7 /* make a copy */
+
+ punpcklqdq xmm5, xmm6 /* xmm5=h6g6f6e6d6c6b6a6 */
+ punpckhqdq xmm7, xmm6 /* xmm7=h7g7f7e7d7c7b7a7 */
+
+ movdqa I(6), xmm5 /* save I(6) */
+ movdqa I(7), xmm7 /* save I(7) */
+
+/******************************************************/
+/* Done with transpose - Let's do the forward DCT */
+/******************************************************/
+
+ movdqa xmm0, I(0) /* xmm0 = ip0 */
+ movdqa xmm1, I(1) /* xmm1 = ip1 */
+
+ movdqa xmm2, I(3) /* xmm2 = ip3 */
+ movdqa xmm3, I(5) /* xmm3 = ip5 */
+
+ movdqa xmm4, xmm0 /* xmm4 = ip0 */
+ movdqa xmm5, xmm1 /* xmm5 = ip1 */
+
+ movdqa xmm6, xmm2 /* xmm6 = ip3 */
+ movdqa xmm7, xmm3 /* xmm7 = ip5 */
+
+ paddsw xmm0, I(7) /* xmm0 = ip0 + ip7 */
+ paddsw xmm1, I(2) /* xmm1 = ip1 + ip2 */
+
+ paddsw xmm2, I(4) /* xmm2 = ip3 + ip4 */
+ paddsw xmm3, I(6) /* xmm3 = ip5 + ip6 */
+
+ psubsw xmm4, I(7) /* xmm4 = ip0 - ip7 */
+ psubsw xmm5, I(2) /* xmm5 = ip1 - ip2 */
+
+ psubsw xmm0, xmm2 /* xmm0 = is07 - is34 */
+ paddsw xmm2, xmm2 /* xmm2 = is34 * 2 */
+
+ psubsw xmm6, I(4) /* xmm6 = ip3 - ip4 */
+ paddsw xmm2, xmm0 /* xmm2 = is07 + is34 */
+
+ psubsw xmm1, xmm3 /* xmm1 = is12 - is56 */
+ movdqa TIRY, xmm0 /* save is07-is34 */
+
+ paddsw xmm3, xmm3 /* xmm3 = is56 * 2 */
+ paddsw xmm3, xmm1 /* xmm3 = is12 + is56 */
+
+ psubsw xmm7, I(6) /* xmm7 = ip5 -ip6 */
+ psubsw xmm5, xmm7 /* xmm5 = id12 - id56 */
+
+ paddsw xmm7, xmm7 /* xmm7 = id56 * 2 */
+ paddsw xmm7, xmm5 /* xmm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4
+/*---------------------------------------------------------*/
+ psubsw xmm2, xmm3 /* xmm2 = is0734 - is1256 */
+ paddsw xmm3, xmm3 /* xmm3 = is1256 * 2 */
+
+ movdqa xmm0, xmm2 /* xmm0 = is0734 - is1256 */
+ paddsw xmm3, xmm2 /* xmm3 = is0734 + is1256 */
+
+ pmulhw xmm0, C(4) /* xmm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+ paddw xmm0, xmm2 /* xmm0 = xC4S4 * ( is0734 - is1256 ) */
+
+ psrlw xmm2, 15
+ paddw xmm0, xmm2 /* Truncate xmm0, now it is op[4] */
+
+ movdqa xmm2, xmm3 /* xmm2 = is0734 + is1256 */
+ movdqa O(4), xmm0 /* op4, now xmm0,xmm2 are free */
+
+ movdqa xmm0, xmm3 /* xmm0 = is0734 + is1256 */
+ pmulhw xmm3, C(4) /* xmm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
+
+ psrlw xmm2, 15
+ paddw xmm3, xmm0 /* xmm3 = xC4S4 * ( is0734 +is1256 ) */
+
+ paddw xmm3, xmm2 /* Truncate xmm3, now it is op[0] */
+ movdqa O(0), xmm3 /* save op0 */
+/*---------------------------------------------------------*/
+/* op2 and op6
+/*---------------------------------------------------------*/
+ movdqa xmm3, TIRY /* xmm3 = irot_input_y */
+ pmulhw xmm3, C(2) /* xmm3 = xC2S6 * irot_input_y - irot_input_y */
+
+ movdqa xmm2, TIRY /* xmm2 = irot_input_y */
+ movdqa xmm0, xmm2 /* xmm0 = irot_input_y */
+
+ psrlw xmm2, 15
+ paddw xmm3, xmm0 /* xmm3 = xC2S6 * irot_input_y */
+
+ paddw xmm3, xmm2 /* Truncated */
+ movdqa xmm0, xmm5 /* xmm0 = id12 - id56 */
+
+
+ movdqa xmm2, xmm5 /* xmm2 = id12 - id56 */
+ pmulhw xmm0, C(6) /* xmm0 = xC6S2 * irot_input_x */
+
+ psrlw xmm2, 15
+ paddw xmm0, xmm2 /* Truncated */
+
+ paddsw xmm3, xmm0 /* op[2] */
+ movdqa O(2), xmm3 /* save op[2] */
+
+
+ movdqa xmm0, xmm5 /* xmm0 = id12 - id56 */
+ movdqa xmm2, xmm5 /* xmm0 = id12 - id56 */
+
+ pmulhw xmm5, C(2) /* xmm5 = xC2S6 * irot_input_x - irot_input_x */
+ psrlw xmm2, 15
+
+ movdqa xmm3, TIRY /* xmm3 = irot_input_y */
+ paddw xmm5, xmm0 /* xmm5 = xC2S6 * irot_input_x */
+
+ paddw xmm5, xmm2 /* Truncated */
+ movdqa xmm2, xmm3 /* xmm2 = irot_input_y */
+
+ pmulhw xmm3, C(6) /* mm3 = xC6S2 * irot_input_y */
+ psrlw xmm2, 15
+
+ paddw xmm3, xmm2 /* Truncated */
+ psubsw xmm3, xmm5 /* xmm3 = op[6] */
+
+ movdqa O(6), xmm3
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2 */
+/*-----------------------------------------------------------------------*/
+ movdqa xmm0, C(4) /* xmm0 = xC4s4 */
+ movdqa xmm2, xmm1 /* xmm2 = is12 - is56 */
+
+ movdqa xmm3, xmm1 /* xmm3 = is12 - is56 */
+ pmulhw xmm1, xmm0 /* xmm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+
+ psrlw xmm2, 15
+ paddw xmm1, xmm3 /* xmm1 = xC4S4 * ( is12 - is56 ) */
+
+ paddw xmm1, xmm2 /* Truncate xmm1, now it is icommon_product1 */
+ movdqa xmm2, xmm7 /* xmm2 = id12 + id56 */
+
+ movdqa xmm3, xmm7 /* xmm3 = id12 + id56 */
+ pmulhw xmm7, xmm0 /* xmm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+
+ psrlw xmm2, 15 /* For trucation */
+ paddw xmm7, xmm3 /* xmm7 = xC4S4 * ( id12 + id56 ) */
+
+ paddw xmm7, xmm2 /* Truncate xmm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+ pxor xmm0, xmm0 /* Clear xmm0 */
+ psubsw xmm0, xmm6 /* xmm0 = - id34 */
+
+ psubsw xmm0, xmm7 /* xmm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+ paddsw xmm6, xmm6 /* xmm6 = id34 * 2 */
+
+ paddsw xmm6, xmm0 /* xmm6 = id34 - icommon_product2 = irot_input_x for 35 */
+ psubsw xmm4, xmm1 /* xmm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+ paddsw xmm1, xmm1 /* xmm1 = icommon_product1 * 2 */
+ paddsw xmm1, xmm4 /* xmm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7
+/*---------------------------------------------------------*/
+
+ movdqa xmm7, C(1) /* xC1S7 */
+ movdqa xmm2, xmm1 /* xmm2 = irot_input_x */
+
+ movdqa xmm3, xmm1; /* xmm3 = irot_input_x */
+ pmulhw xmm1, xmm7 /* xmm1 = xC1S7 * irot_input_x - irot_input_x */
+
+ movdqa xmm7, C(7) /* xC7S1 */
+ psrlw xmm2, 15 /* for trucation */
+
+ paddw xmm1, xmm3 /* xmm1 = xC1S7 * irot_input_x */
+ paddw xmm1, xmm2 /* Trucated */
+
+ pmulhw xmm3, xmm7 /* xmm3 = xC7S1 * irot_input_x */
+ paddw xmm3, xmm2 /* Truncated */
+
+ movdqa xmm5, xmm0 /* xmm5 = irot_input_y */
+ movdqa xmm2, xmm0 /* xmm2 = irot_input_y */
+
+ movdqa xmm7, C(1) /* xC1S7 */
+ pmulhw xmm0, xmm7 /* xmm0 = xC1S7 * irot_input_y - irot_input_y */
+
+ movdqa xmm7, C(7) /* xC7S1 */
+ psrlw xmm2, 15 /* for trucation */
+
+ paddw xmm0, xmm5 /* xmm0 = xC1S7 * irot_input_y */
+ paddw xmm0, xmm2 /* Truncated */
+
+ pmulhw xmm5, xmm7 /* xmm5 = xC7S1 * irot_input_y */
+ paddw xmm5, xmm2 /* Truncated */
+
+ psubsw xmm1, xmm5 /* xmm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+ paddsw xmm3, xmm0 /* xmm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+
+ movdqa O(1), xmm1
+ movdqa O(7), xmm3
+/*---------------------------------------------------------*/
+/* op3 and op5
+/*---------------------------------------------------------*/
+ movdqa xmm0, C(3) /* xC3S5 */
+ movdqa xmm1, C(5) /* xC5S3 */
+
+ movdqa xmm5,xmm6 /* irot_input_x */
+ movdqa xmm7,xmm6 /* irot_input_x */
+
+ movdqa xmm2,xmm4 /* irot_input_y */
+ movdqa xmm3,xmm4 /* irot_input_y */
+
+ pmulhw xmm4,xmm0 /* xmm4 = xC3S5 * irot_input_x - irot_input_x */
+ pmulhw xmm6,xmm1 /* xmm6 = xC5S3 * irot_input_y - irot_input_y */
+
+ psrlw xmm2,15 /* for trucation */
+ psrlw xmm5,15 /* for trucation */
+
+ paddw xmm4,xmm3 /* xmm4 = xC3S5 * irot_input_x */
+ paddw xmm6,xmm7 /* xmm6 = xC5S3 * irot_input_y */
+
+ paddw xmm4,xmm2 /* Truncated */
+ paddw xmm6,xmm5 /* Truncated */
+
+ psubsw xmm4,xmm6 /* op [3] */
+ movdqa O(3),xmm4 /* Save Op[3] */
+
+ movdqa xmm4,xmm3 /* irot_input_y */
+ movdqa xmm6,xmm7 /* irot_input_x */
+
+ pmulhw xmm3,xmm1 /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+ pmulhw xmm7,xmm0 /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+ paddw xmm4,xmm2 /* Trucated */
+ paddw xmm6,xmm5 /* Trucated */
+
+ paddw xmm3,xmm4 /* xmm3 = xC5S3 * irot_input_x */
+ paddw xmm7,xmm6 /* mm7 = xC3S5 * irot_input_y */
+
+ paddw xmm3,xmm7 /* Op[5] */
+ movdqa O(5),xmm3 /* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of 8 1-D FDCT */
+/*---------------------------------------------------------*/
+#undef I
+#undef O
+#define I(i) [ebx + 16 * i ]
+#define O(i) [ebx + 16 * i ]
+
+/******************************************************/
+/* Do 8x8 Transpose */
+/******************************************************/
+
+ movdqa xmm4, I(4) /* xmm4=e7e6e5e4e3e2e1e0 */
+ movdqa xmm0, I(5) /* xmm4=f7f6f5f4f3f2f1f0 */
+
+ movdqa xmm5, xmm4 /* make a copy */
+ punpcklwd xmm4, xmm0 /* xmm4=f3e3f2e2f1e1f0e0 */
+
+ punpckhwd xmm5, xmm0 /* xmm5=f7e7f6e6f5e5f4e4 */
+ movdqa xmm6, I(6) /* xmm6=g7g6g5g4g3g2g1g0 */
+
+ movdqa xmm0, I(7) /* xmm0=h7h6h5h4h3h2h1h0 */
+ movdqa xmm7, xmm6 /* make a copy */
+
+ punpcklwd xmm6, xmm0 /* xmm6=h3g3h3g2h1g1h0g0 */
+ punpckhwd xmm7, xmm0 /* xmm7=h7g7h6g6h5g5h4g4 */
+
+ movdqa xmm3, xmm4 /* make a copy */
+ punpckldq xmm4, xmm6 /* xmm4=h1g1f1e1h0g0f0e0 */
+
+ punpckhdq xmm3, xmm6 /* xmm3=h3g3g3e3h2g2f2e2 */
+ movdqa I(6), xmm3 /* save h3g3g3e3h2g2f2e2 */
+ /* Free xmm6 */
+ movdqa xmm6, xmm5 /* make a copy */
+ punpckldq xmm5, xmm7 /* xmm5=h5g5f5e5h4g4f4e4 */
+
+ punpckhdq xmm6, xmm7 /* xmm6=h7g7f7e7h6g6f6e6 */
+ movdqa xmm0, I(0) /* xmm0=a7a6a5a4a3a2a1a0 */
+ /* Free xmm7 */
+ movdqa xmm1, I(1) /* xmm1=b7b6b5b4b3b2b1b0 */
+ movdqa xmm7, xmm0 /* make a copy */
+
+ punpcklwd xmm0, xmm1 /* xmm0=b3a3b2a2b1a1b0a0 */
+ punpckhwd xmm7, xmm1 /* xmm7=b7a7b6a6b5a5b4a4 */
+ /* Free xmm1 */
+ movdqa xmm2, I(2) /* xmm2=c7c6c5c4c3c2c1c0 */
+ movdqa xmm3, I(3) /* xmm3=d7d6d5d4d3d2d1d0 */
+
+ movdqa xmm1, xmm2 /* make a copy */
+ punpcklwd xmm2, xmm3 /* xmm2=d3c3d2c2d1c1d0c0 */
+
+ punpckhwd xmm1, xmm3 /* xmm1=d7c7d6c6d5c5d4c4 */
+ movdqa xmm3, xmm0 /* make a copy */
+
+ punpckldq xmm0, xmm2 /* xmm0=d1c1b1a1d0c0b0a0 */
+ punpckhdq xmm3, xmm2 /* xmm3=d3c3b3a3d2c2b2a2 */
+ /* Free xmm2 */
+ movdqa xmm2, xmm7 /* make a copy */
+ punpckldq xmm2, xmm1 /* xmm2=d5c5b5a5d4c4b4a4 */
+
+ punpckhdq xmm7, xmm1 /* xmm7=d7c7b7a7d6c6b6a6 */
+ movdqa xmm1, xmm0 /* make a copy */
+
+ punpcklqdq xmm0, xmm4 /* xmm0=h0g0f0e0d0c0b0a0 */
+ punpckhqdq xmm1, xmm4 /* xmm1=h1g1g1e1d1c1b1a1 */
+
+ movdqa I(0), xmm0 /* save I(0) */
+ movdqa I(1), xmm1 /* save I(1) */
+
+ movdqa xmm0, I(6) /* load h3g3g3e3h2g2f2e2 */
+ movdqa xmm1, xmm3 /* make a copy */
+
+ punpcklqdq xmm1, xmm0 /* xmm1=h2g2f2e2d2c2b2a2 */
+ punpckhqdq xmm3, xmm0 /* xmm3=h3g3f3e3d3c3b3a3 */
+
+ movdqa xmm4, xmm2 /* make a copy */
+ punpcklqdq xmm4, xmm5 /* xmm4=h4g4f4e4d4c4b4a4 */
+
+ punpckhqdq xmm2, xmm5 /* xmm2=h5g5f5e5d5c5b5a5 */
+ movdqa I(2), xmm1 /* save I(2) */
+
+ movdqa I(3), xmm3 /* save I(3) */
+ movdqa I(4), xmm4 /* save I(4) */
+
+ movdqa I(5), xmm2 /* save I(5) */
+ movdqa xmm5, xmm7 /* make a copy */
+
+ punpcklqdq xmm5, xmm6 /* xmm5=h6g6f6e6d6c6b6a6 */
+ punpckhqdq xmm7, xmm6 /* xmm7=h7g7f7e7d7c7b7a7 */
+
+ movdqa I(6), xmm5 /* save I(6) */
+ movdqa I(7), xmm7 /* save I(7) */
+
+/******************************************************/
+/* Done with transpose - Let's do the forward DCT */
+/******************************************************/
+
+ movdqa xmm0, I(0) /* xmm0 = ip0 */
+ movdqa xmm1, I(1) /* xmm1 = ip1 */
+
+ movdqa xmm2, I(3) /* xmm2 = ip3 */
+ movdqa xmm3, I(5) /* xmm3 = ip5 */
+
+ movdqa xmm4, xmm0 /* xmm4 = ip0 */
+ movdqa xmm5, xmm1 /* xmm5 = ip1 */
+
+ movdqa xmm6, xmm2 /* xmm6 = ip3 */
+ movdqa xmm7, xmm3 /* xmm7 = ip5 */
+
+ paddsw xmm0, I(7) /* xmm0 = ip0 + ip7 */
+ paddsw xmm1, I(2) /* xmm1 = ip1 + ip2 */
+
+ paddsw xmm2, I(4) /* xmm2 = ip3 + ip4 */
+ paddsw xmm3, I(6) /* xmm3 = ip5 + ip6 */
+
+ psubsw xmm4, I(7) /* xmm4 = ip0 - ip7 */
+ psubsw xmm5, I(2) /* xmm5 = ip1 - ip2 */
+
+ psubsw xmm0, xmm2 /* xmm0 = is07 - is34 */
+ paddsw xmm2, xmm2 /* xmm2 = is34 * 2 */
+
+ psubsw xmm6, I(4) /* xmm6 = ip3 - ip4 */
+ paddsw xmm2, xmm0 /* xmm2 = is07 + is34 */
+
+ psubsw xmm1, xmm3 /* xmm1 = is12 - is56 */
+ movdqa TIRY, xmm0 /* save is07-is34 */
+
+ paddsw xmm3, xmm3 /* xmm3 = is56 * 2 */
+ paddsw xmm3, xmm1 /* xmm3 = is12 + is56 */
+
+ psubsw xmm7, I(6) /* xmm7 = ip5 -ip6 */
+ psubsw xmm5, xmm7 /* xmm5 = id12 - id56 */
+
+ paddsw xmm7, xmm7 /* xmm7 = id56 * 2 */
+ paddsw xmm7, xmm5 /* xmm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4
+/*---------------------------------------------------------*/
+#if 0
+ movdqa xmm0, xmm2 /* xmm0 =xmm2= is0734 */
+ pmulhw xmm2, C(4) /* xC4S4 * is0734 - is0734 */
+
+ paddw xmm2, xmm0 /* XC4S4 * is0734 */
+ movdqa xmm0, xmm3 /* xmm0 =xmm3= is1256 */
+
+ pmulhw xmm3, C(4) /* xC4S4 * is1256 - is1256 */
+ paddw xmm3, xmm0 /* xC4S4 * is1256 */
+
+
+ movdqa xmm0, xmm2
+ paddsw xmm2, xmm3 /* xC4S4 * ( is0734 +is1256 ) */
+
+ psubsw xmm0, xmm3 /* xC4S4 * ( is0734 -is1256 ) */
+ movdqa xmm3, xmm2
+
+ psrlw xmm2, 15
+ paddsw xmm3, xmm2
+
+ movdqa xmm2, xmm0
+ movdqa O(0), xmm3
+
+ psrlw xmm0, 15
+ paddsw xmm2, xmm0
+
+ movdqa O(4), xmm2
+
+
+#else
+
+
+ psubsw xmm2, xmm3 /* xmm2 = is0734 - is1256 */
+ paddsw xmm3, xmm3 /* xmm3 = is1256 * 2 */
+
+ movdqa xmm0, xmm2 /* xmm0 = is0734 - is1256 */
+ paddsw xmm3, xmm2 /* xmm3 = is0734 + is1256 */
+
+ pmulhw xmm0, C(4) /* xmm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+ paddw xmm0, xmm2 /* xmm0 = xC4S4 * ( is0734 - is1256 ) */
+
+ psrlw xmm2, 15
+ paddw xmm0, xmm2 /* Truncate xmm0, now it is op[4] */
+
+ movdqa xmm2, xmm0
+ psrlw xmm0, 15
+
+ paddw xmm0, xmm2
+ psraw xmm0, 1
+
+ movdqa O(4), xmm0 /* op4, now xmm0,xmm2 are free */
+ movdqa xmm2, xmm3 /* xmm2 = is0734 + is1256 */
+
+
+ movdqa xmm0, xmm3 /* xmm0 = is0734 + is1256 */
+ pmulhw xmm3, C(4) /* xmm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
+
+ psrlw xmm2, 15
+ paddw xmm3, xmm0 /* xmm3 = xC4S4 * ( is0734 +is1256 ) */
+
+ paddw xmm3, xmm2 /* Truncate xmm3, now it is op[0] */
+ movdqa xmm2, xmm3
+
+ psrlw xmm3, 15
+ paddw xmm3, xmm2
+
+ psraw xmm3, 1
+ movdqa O(0), xmm3 /* save op0 */
+#endif
+/*---------------------------------------------------------*/
+/* op2 and op6
+/*---------------------------------------------------------*/
+ movdqa xmm3, TIRY /* xmm3 = irot_input_y */
+ pmulhw xmm3, C(2) /* xmm3 = xC2S6 * irot_input_y - irot_input_y */
+
+ movdqa xmm2, TIRY /* xmm2 = irot_input_y */
+ movdqa xmm0, xmm2 /* xmm0 = irot_input_y */
+
+ psrlw xmm2, 15
+ paddw xmm3, xmm0 /* xmm3 = xC2S6 * irot_input_y */
+
+ paddw xmm3, xmm2 /* Truncated */
+ movdqa xmm0, xmm5 /* xmm0 = id12 - id56 */
+
+
+ movdqa xmm2, xmm5 /* xmm2 = id12 - id56 */
+ pmulhw xmm0, C(6) /* xmm0 = xC6S2 * irot_input_x */
+
+ psrlw xmm2, 15
+ paddw xmm0, xmm2 /* Truncated */
+
+ paddsw xmm3, xmm0 /* op[2] */
+ movdqa xmm0, xmm3
+
+ psrlw xmm3, 15
+ paddw xmm3, xmm0
+
+ psraw xmm3, 1
+ movdqa O(2), xmm3 /* save op[2] */
+
+
+ movdqa xmm0, xmm5 /* xmm0 = id12 - id56 */
+ movdqa xmm2, xmm5 /* xmm0 = id12 - id56 */
+
+ pmulhw xmm5, C(2) /* xmm5 = xC2S6 * irot_input_x - irot_input_x */
+ psrlw xmm2, 15
+
+ movdqa xmm3, TIRY /* xmm3 = irot_input_y */
+ paddw xmm5, xmm0 /* xmm5 = xC2S6 * irot_input_x */
+
+ paddw xmm5, xmm2 /* Truncated */
+ movdqa xmm2, xmm3 /* xmm2 = irot_input_y */
+
+ pmulhw xmm3, C(6) /* mm3 = xC6S2 * irot_input_y */
+ psrlw xmm2, 15
+
+ paddw xmm3, xmm2 /* Truncated */
+ psubsw xmm3, xmm5 /* xmm3 = op[6] */
+
+ movdqa xmm5, xmm3
+ psrlw xmm3, 15
+
+ paddw xmm3, xmm5
+ psraw xmm3, 1
+
+ movdqa O(6), xmm3
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2 */
+/*-----------------------------------------------------------------------*/
+ movdqa xmm0, C(4) /* xmm0 = xC4s4 */
+ movdqa xmm2, xmm1 /* xmm2 = is12 - is56 */
+
+ movdqa xmm3, xmm1 /* xmm3 = is12 - is56 */
+ pmulhw xmm1, xmm0 /* xmm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+
+ psrlw xmm2, 15
+ paddw xmm1, xmm3 /* xmm1 = xC4S4 * ( is12 - is56 ) */
+
+ paddw xmm1, xmm2 /* Truncate xmm1, now it is icommon_product1 */
+ movdqa xmm2, xmm7 /* xmm2 = id12 + id56 */
+
+ movdqa xmm3, xmm7 /* xmm3 = id12 + id56 */
+ pmulhw xmm7, xmm0 /* xmm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+
+ psrlw xmm2, 15 /* For trucation */
+ paddw xmm7, xmm3 /* xmm7 = xC4S4 * ( id12 + id56 ) */
+
+ paddw xmm7, xmm2 /* Truncate xmm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+ pxor xmm0, xmm0 /* Clear xmm0 */
+ psubsw xmm0, xmm6 /* xmm0 = - id34 */
+
+ psubsw xmm0, xmm7 /* xmm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+ paddsw xmm6, xmm6 /* xmm6 = id34 * 2 */
+
+ paddsw xmm6, xmm0 /* xmm6 = id34 - icommon_product2 = irot_input_x for 35 */
+ psubsw xmm4, xmm1 /* xmm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+ paddsw xmm1, xmm1 /* xmm1 = icommon_product1 * 2 */
+ paddsw xmm1, xmm4 /* xmm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7
+/*---------------------------------------------------------*/
+
+ movdqa xmm7, C(1) /* xC1S7 */
+ movdqa xmm2, xmm1 /* xmm2 = irot_input_x */
+
+ movdqa xmm3, xmm1; /* xmm3 = irot_input_x */
+ pmulhw xmm1, xmm7 /* xmm1 = xC1S7 * irot_input_x - irot_input_x */
+
+ movdqa xmm7, C(7) /* xC7S1 */
+ psrlw xmm2, 15 /* for trucation */
+
+ paddw xmm1, xmm3 /* xmm1 = xC1S7 * irot_input_x */
+ paddw xmm1, xmm2 /* Trucated */
+
+ pmulhw xmm3, xmm7 /* xmm3 = xC7S1 * irot_input_x */
+ paddw xmm3, xmm2 /* Truncated */
+
+ movdqa xmm5, xmm0 /* xmm5 = irot_input_y */
+ movdqa xmm2, xmm0 /* xmm2 = irot_input_y */
+
+ movdqa xmm7, C(1) /* xC1S7 */
+ pmulhw xmm0, xmm7 /* xmm0 = xC1S7 * irot_input_y - irot_input_y */
+
+ movdqa xmm7, C(7) /* xC7S1 */
+ psrlw xmm2, 15 /* for trucation */
+
+ paddw xmm0, xmm5 /* xmm0 = xC1S7 * irot_input_y */
+ paddw xmm0, xmm2 /* Truncated */
+
+ pmulhw xmm5, xmm7 /* xmm5 = xC7S1 * irot_input_y */
+ paddw xmm5, xmm2 /* Truncated */
+
+ psubsw xmm1, xmm5 /* xmm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+ paddsw xmm3, xmm0 /* xmm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+
+ movdqa xmm5, xmm1
+ movdqa xmm0, xmm3
+
+ psrlw xmm1, 15
+ psrlw xmm3, 15
+
+ paddw xmm1, xmm5
+ paddw xmm3, xmm0
+
+ psraw xmm1, 1
+ psraw xmm3, 1
+
+
+ movdqa O(1), xmm1
+ movdqa O(7), xmm3
+/*---------------------------------------------------------*/
+/* op3 and op5
+/*---------------------------------------------------------*/
+ movdqa xmm0, C(3) /* xC3S5 */
+ movdqa xmm1, C(5) /* xC5S3 */
+
+ movdqa xmm5,xmm6 /* irot_input_x */
+ movdqa xmm7,xmm6 /* irot_input_x */
+
+ movdqa xmm2,xmm4 /* irot_input_y */
+ movdqa xmm3,xmm4 /* irot_input_y */
+
+ pmulhw xmm4,xmm0 /* xmm4 = xC3S5 * irot_input_x - irot_input_x */
+ pmulhw xmm6,xmm1 /* xmm6 = xC5S3 * irot_input_y - irot_input_y */
+
+ psrlw xmm2,15 /* for trucation */
+ psrlw xmm5,15 /* for trucation */
+
+ paddw xmm4,xmm3 /* xmm4 = xC3S5 * irot_input_x */
+ paddw xmm6,xmm7 /* xmm6 = xC5S3 * irot_input_y */
+
+ paddw xmm4,xmm2 /* Truncated */
+ paddw xmm6,xmm5 /* Truncated */
+
+ psubsw xmm4,xmm6 /* op [3] */
+ movdqa xmm6,xmm4
+
+ psrlw xmm4,15
+ paddw xmm4,xmm6
+
+ psraw xmm4,1
+ movdqa O(3),xmm4 /* Save Op[3] */
+
+ movdqa xmm4,xmm3 /* irot_input_y */
+ movdqa xmm6,xmm7 /* irot_input_x */
+
+ pmulhw xmm3,xmm1 /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+ pmulhw xmm7,xmm0 /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+ paddw xmm4,xmm2 /* Trucated */
+ paddw xmm6,xmm5 /* Trucated */
+
+ paddw xmm3,xmm4 /* xmm3 = xC5S3 * irot_input_x */
+ paddw xmm7,xmm6 /* mm7 = xC3S5 * irot_input_y */
+
+ paddw xmm3,xmm7 /* Op[5] */
+ movdqa xmm7,xmm3
+
+ psrlw xmm3,15
+ paddw xmm3,xmm7
+
+ psraw xmm3,1
+ movdqa O(5),xmm3 /* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of 8 1-D FDCT */
+/*---------------------------------------------------------*/
+
+ }/* end of _asm code section */
+}
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/filtmmx.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/filtmmx.c
new file mode 100644
index 00000000..f424c13e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/filtmmx.c
@@ -0,0 +1,1053 @@
+/****************************************************************************
+ *
+ * Module Title : newLoopTest_asm.c
+ *
+ * Description : Codec specific functions
+ *
+ * AUTHOR : Yaowu Xu
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ * 1.02 YWX 03-Nov-00 Changed confusing variable name
+ * 1.01 YWX 02-Nov-00 Added the set of functions
+ * 1.00 YWX 19-Oct-00 configuration baseline
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Header Frames
+ *****************************************************************************
+ */
+
+
+#define STRICT /* Strict type checking. */
+#include "codec_common.h"
+#include <math.h>
+
+ /****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define FILTER_WEIGHT 128
+#define FILTER_SHIFT 7
+
+extern void UnpackBlock_MMX( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+
+static __declspec(align(16)) short rd[]={64,64,64,64,64,64,64,64};
+
+
+__declspec(align(16)) INT16 BilinearFilters_mmx[8][16] =
+{
+{ 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0 },
+{ 112,112,112,112,112,112,112,112, 16, 16, 16, 16, 16, 16, 16, 16 },
+{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
+{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
+{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
+{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
+{ 16, 16, 16, 16, 16, 16, 16, 16, 112,112,112,112,112,112,112,112 }
+};
+
+__declspec(align(16)) INT16 BicubicFilters_mmx[17][8][32] =
+{
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -3, -3, -3, -3, -3, -3, -3, -3, 122,122,122,122,122,122,122,122, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -4, -4, -4, -4, -4, -4, -4, -4, 109,109,109,109,109,109,109,109, 24, 24, 24, 24, 24, 24, 24, 24, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -5, -5, -5, -5, -5, -5, -5, -5, 91, 91, 91, 91, 91, 91, 91, 91, 45, 45, 45, 45, 45, 45, 45, 45, -3, -3, -3, -3, -3, -3, -3, -3, },
+ { -4, -4, -4, -4, -4, -4, -4, -4, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, -4, -4, -4, -4, -4, -4, -4, -4, },
+ { -3, -3, -3, -3, -3, -3, -3, -3, 45, 45, 45, 45, 45, 45, 45, 45, 91, 91, 91, 91, 91, 91, 91, 91, -5, -5, -5, -5, -5, -5, -5, -5, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 24, 24, 24, 24, 24, 24, 24, 24, 109,109,109,109,109,109,109,109, -4, -4, -4, -4, -4, -4, -4, -4, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 122,122,122,122,122,122,122,122, -3, -3, -3, -3, -3, -3, -3, -3, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -4, -4, -4, -4, -4, -4, -4, -4, 124,124,124,124,124,124,124,124, 9, 9, 9, 9, 9, 9, 9, 9, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -5, -5, -5, -5, -5, -5, -5, -5, 110,110,110,110,110,110,110,110, 25, 25, 25, 25, 25, 25, 25, 25, -2, -2, -2, -2, -2, -2, -2, -2, },
+ { -6, -6, -6, -6, -6, -6, -6, -6, 91, 91, 91, 91, 91, 91, 91, 91, 46, 46, 46, 46, 46, 46, 46, 46, -3, -3, -3, -3, -3, -3, -3, -3, },
+ { -5, -5, -5, -5, -5, -5, -5, -5, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, -5, -5, -5, -5, -5, -5, -5, -5, },
+ { -3, -3, -3, -3, -3, -3, -3, -3, 46, 46, 46, 46, 46, 46, 46, 46, 91, 91, 91, 91, 91, 91, 91, 91, -6, -6, -6, -6, -6, -6, -6, -6, },
+ { -2, -2, -2, -2, -2, -2, -2, -2, 25, 25, 25, 25, 25, 25, 25, 25, 110,110,110,110,110,110,110,110, -5, -5, -5, -5, -5, -5, -5, -5, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 9, 9, 9, 9, 9, 9, 9, 9, 124,124,124,124,124,124,124,124, -4, -4, -4, -4, -4, -4, -4, -4, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -4, -4, -4, -4, -4, -4, -4, -4, 123,123,123,123,123,123,123,123, 10, 10, 10, 10, 10, 10, 10, 10, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -6, -6, -6, -6, -6, -6, -6, -6, 110,110,110,110,110,110,110,110, 26, 26, 26, 26, 26, 26, 26, 26, -2, -2, -2, -2, -2, -2, -2, -2, },
+ { -7, -7, -7, -7, -7, -7, -7, -7, 92, 92, 92, 92, 92, 92, 92, 92, 47, 47, 47, 47, 47, 47, 47, 47, -4, -4, -4, -4, -4, -4, -4, -4, },
+ { -6, -6, -6, -6, -6, -6, -6, -6, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, -6, -6, -6, -6, -6, -6, -6, -6, },
+ { -4, -4, -4, -4, -4, -4, -4, -4, 47, 47, 47, 47, 47, 47, 47, 47, 92, 92, 92, 92, 92, 92, 92, 92, -7, -7, -7, -7, -7, -7, -7, -7, },
+ { -2, -2, -2, -2, -2, -2, -2, -2, 26, 26, 26, 26, 26, 26, 26, 26, 110,110,110,110,110,110,110,110, -6, -6, -6, -6, -6, -6, -6, -6, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 10, 10, 10, 10, 10, 10, 10, 10, 123,123,123,123,123,123,123,123, -4, -4, -4, -4, -4, -4, -4, -4, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -5, -5, -5, -5, -5, -5, -5, -5, 124,124,124,124,124,124,124,124, 10, 10, 10, 10, 10, 10, 10, 10, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -7, -7, -7, -7, -7, -7, -7, -7, 110,110,110,110,110,110,110,110, 27, 27, 27, 27, 27, 27, 27, 27, -2, -2, -2, -2, -2, -2, -2, -2, },
+ { -7, -7, -7, -7, -7, -7, -7, -7, 91, 91, 91, 91, 91, 91, 91, 91, 48, 48, 48, 48, 48, 48, 48, 48, -4, -4, -4, -4, -4, -4, -4, -4, },
+ { -6, -6, -6, -6, -6, -6, -6, -6, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, -6, -6, -6, -6, -6, -6, -6, -6, },
+ { -4, -4, -4, -4, -4, -4, -4, -4, 48, 48, 48, 48, 48, 48, 48, 48, 92, 92, 92, 92, 92, 92, 92, 92, -8, -8, -8, -8, -8, -8, -8, -8, },
+ { -2, -2, -2, -2, -2, -2, -2, -2, 27, 27, 27, 27, 27, 27, 27, 27, 110,110,110,110,110,110,110,110, -7, -7, -7, -7, -7, -7, -7, -7, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 10, 10, 10, 10, 10, 10, 10, 10, 124,124,124,124,124,124,124,124, -5, -5, -5, -5, -5, -5, -5, -5, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -6, -6, -6, -6, -6, -6, -6, -6, 124,124,124,124,124,124,124,124, 11, 11, 11, 11, 11, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -8, -8, -8, -8, -8, -8, -8, -8, 111,111,111,111,111,111,111,111, 28, 28, 28, 28, 28, 28, 28, 28, -3, -3, -3, -3, -3, -3, -3, -3, },
+ { -8, -8, -8, -8, -8, -8, -8, -8, 92, 92, 92, 92, 92, 92, 92, 92, 49, 49, 49, 49, 49, 49, 49, 49, -5, -5, -5, -5, -5, -5, -5, -5, },
+ { -7, -7, -7, -7, -7, -7, -7, -7, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, -7, -7, -7, -7, -7, -7, -7, -7, },
+ { -5, -5, -5, -5, -5, -5, -5, -5, 49, 49, 49, 49, 49, 49, 49, 49, 92, 92, 92, 92, 92, 92, 92, 92, -8, -8, -8, -8, -8, -8, -8, -8, },
+ { -3, -3, -3, -3, -3, -3, -3, -3, 28, 28, 28, 28, 28, 28, 28, 28, 111,111,111,111,111,111,111,111, -8, -8, -8, -8, -8, -8, -8, -8, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 11, 11, 11, 11, 11, 11, 11, 11, 124,124,124,124,124,124,124,124, -6, -6, -6, -6, -6, -6, -6, -6, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -6, -6, -6, -6, -6, -6, -6, -6, 123,123,123,123,123,123,123,123, 12, 12, 12, 12, 12, 12, 12, 12, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -9, -9, -9, -9, -9, -9, -9, -9, 111,111,111,111,111,111,111,111, 29, 29, 29, 29, 29, 29, 29, 29, -3, -3, -3, -3, -3, -3, -3, -3, },
+ { -9, -9, -9, -9, -9, -9, -9, -9, 93, 93, 93, 93, 93, 93, 93, 93, 50, 50, 50, 50, 50, 50, 50, 50, -6, -6, -6, -6, -6, -6, -6, -6, },
+ { -8, -8, -8, -8, -8, -8, -8, -8, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, -8, -8, -8, -8, -8, -8, -8, -8, },
+ { -6, -6, -6, -6, -6, -6, -6, -6, 50, 50, 50, 50, 50, 50, 50, 50, 93, 93, 93, 93, 93, 93, 93, 93, -9, -9, -9, -9, -9, -9, -9, -9, },
+ { -3, -3, -3, -3, -3, -3, -3, -3, 29, 29, 29, 29, 29, 29, 29, 29, 111,111,111,111,111,111,111,111, -9, -9, -9, -9, -9, -9, -9, -9, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 12, 12, 12, 12, 12, 12, 12, 12, 123,123,123,123,123,123,123,123, -6, -6, -6, -6, -6, -6, -6, -6, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -7, -7, -7, -7, -7, -7, -7, -7, 124,124,124,124,124,124,124,124, 12, 12, 12, 12, 12, 12, 12, 12, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -10,-10,-10,-10,-10,-10,-10,-10, 111,111,111,111,111,111,111,111, 30, 30, 30, 30, 30, 30, 30, 30, -3, -3, -3, -3, -3, -3, -3, -3, },
+ { -10,-10,-10,-10,-10,-10,-10,-10, 93, 93, 93, 93, 93, 93, 93, 93, 51, 51, 51, 51, 51, 51, 51, 51, -6, -6, -6, -6, -6, -6, -6, -6, },
+ { -9, -9, -9, -9, -9, -9, -9, -9, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, -9, -9, -9, -9, -9, -9, -9, -9, },
+ { -6, -6, -6, -6, -6, -6, -6, -6, 51, 51, 51, 51, 51, 51, 51, 51, 93, 93, 93, 93, 93, 93, 93, 93, -10,-10,-10,-10,-10,-10,-10,-10, },
+ { -3, -3, -3, -3, -3, -3, -3, -3, 30, 30, 30, 30, 30, 30, 30, 30, 111,111,111,111,111,111,111,111, -10,-10,-10,-10,-10,-10,-10,-10, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 12, 12, 12, 12, 12, 12, 12, 12, 124,124,124,124,124,124,124,124, -7, -7, -7, -7, -7, -7, -7, -7, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -7, -7, -7, -7, -7, -7, -7, -7, 123,123,123,123,123,123,123,123, 13, 13, 13, 13, 13, 13, 13, 13, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -11,-11,-11,-11,-11,-11,-11,-11, 112,112,112,112,112,112,112,112, 31, 31, 31, 31, 31, 31, 31, 31, -4, -4, -4, -4, -4, -4, -4, -4, },
+ { -11,-11,-11,-11,-11,-11,-11,-11, 94, 94, 94, 94, 94, 94, 94, 94, 52, 52, 52, 52, 52, 52, 52, 52, -7, -7, -7, -7, -7, -7, -7, -7, },
+ { -10,-10,-10,-10,-10,-10,-10,-10, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, -10,-10,-10,-10,-10,-10,-10,-10, },
+ { -7, -7, -7, -7, -7, -7, -7, -7, 52, 52, 52, 52, 52, 52, 52, 52, 94, 94, 94, 94, 94, 94, 94, 94, -11,-11,-11,-11,-11,-11,-11,-11, },
+ { -4, -4, -4, -4, -4, -4, -4, -4, 31, 31, 31, 31, 31, 31, 31, 31, 112,112,112,112,112,112,112,112, -11,-11,-11,-11,-11,-11,-11,-11, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 13, 13, 13, 13, 13, 13, 13, 13, 123,123,123,123,123,123,123,123, -7, -7, -7, -7, -7, -7, -7, -7, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -8, -8, -8, -8, -8, -8, -8, -8, 124,124,124,124,124,124,124,124, 13, 13, 13, 13, 13, 13, 13, 13, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -12,-12,-12,-12,-12,-12,-12,-12, 112,112,112,112,112,112,112,112, 32, 32, 32, 32, 32, 32, 32, 32, -4, -4, -4, -4, -4, -4, -4, -4, },
+ { -12,-12,-12,-12,-12,-12,-12,-12, 94, 94, 94, 94, 94, 94, 94, 94, 53, 53, 53, 53, 53, 53, 53, 53, -7, -7, -7, -7, -7, -7, -7, -7, },
+ { -10,-10,-10,-10,-10,-10,-10,-10, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, -10,-10,-10,-10,-10,-10,-10,-10, },
+ { -7, -7, -7, -7, -7, -7, -7, -7, 53, 53, 53, 53, 53, 53, 53, 53, 94, 94, 94, 94, 94, 94, 94, 94, -12,-12,-12,-12,-12,-12,-12,-12, },
+ { -4, -4, -4, -4, -4, -4, -4, -4, 32, 32, 32, 32, 32, 32, 32, 32, 112,112,112,112,112,112,112,112, -12,-12,-12,-12,-12,-12,-12,-12, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 13, 13, 13, 13, 13, 13, 13, 13, 124,124,124,124,124,124,124,124, -8, -8, -8, -8, -8, -8, -8, -8, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -9, -9, -9, -9, -9, -9, -9, -9, 124,124,124,124,124,124,124,124, 14, 14, 14, 14, 14, 14, 14, 14, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -13,-13,-13,-13,-13,-13,-13,-13, 112,112,112,112,112,112,112,112, 33, 33, 33, 33, 33, 33, 33, 33, -4, -4, -4, -4, -4, -4, -4, -4, },
+ { -13,-13,-13,-13,-13,-13,-13,-13, 95, 95, 95, 95, 95, 95, 95, 95, 54, 54, 54, 54, 54, 54, 54, 54, -8, -8, -8, -8, -8, -8, -8, -8, },
+ { -11,-11,-11,-11,-11,-11,-11,-11, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, -11,-11,-11,-11,-11,-11,-11,-11, },
+ { -8, -8, -8, -8, -8, -8, -8, -8, 54, 54, 54, 54, 54, 54, 54, 54, 95, 95, 95, 95, 95, 95, 95, 95, -13,-13,-13,-13,-13,-13,-13,-13, },
+ { -4, -4, -4, -4, -4, -4, -4, -4, 33, 33, 33, 33, 33, 33, 33, 33, 112,112,112,112,112,112,112,112, -13,-13,-13,-13,-13,-13,-13,-13, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 14, 14, 14, 14, 14, 14, 14, 14, 124,124,124,124,124,124,124,124, -9, -9, -9, -9, -9, -9, -9, -9, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -9, -9, -9, -9, -9, -9, -9, -9, 123,123,123,123,123,123,123,123, 15, 15, 15, 15, 15, 15, 15, 15, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -14,-14,-14,-14,-14,-14,-14,-14, 113,113,113,113,113,113,113,113, 34, 34, 34, 34, 34, 34, 34, 34, -5, -5, -5, -5, -5, -5, -5, -5, },
+ { -14,-14,-14,-14,-14,-14,-14,-14, 95, 95, 95, 95, 95, 95, 95, 95, 55, 55, 55, 55, 55, 55, 55, 55, -8, -8, -8, -8, -8, -8, -8, -8, },
+ { -12,-12,-12,-12,-12,-12,-12,-12, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, -12,-12,-12,-12,-12,-12,-12,-12, },
+ { -8, -8, -8, -8, -8, -8, -8, -8, 55, 55, 55, 55, 55, 55, 55, 55, 95, 95, 95, 95, 95, 95, 95, 95, -14,-14,-14,-14,-14,-14,-14,-14, },
+ { -5, -5, -5, -5, -5, -5, -5, -5, 34, 34, 34, 34, 34, 34, 34, 34, 112,112,112,112,112,112,112,112, -13,-13,-13,-13,-13,-13,-13,-13, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 15, 15, 15, 15, 15, 15, 15, 15, 123,123,123,123,123,123,123,123, -9, -9, -9, -9, -9, -9, -9, -9, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -10,-10,-10,-10,-10,-10,-10,-10, 124,124,124,124,124,124,124,124, 15, 15, 15, 15, 15, 15, 15, 15, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -14,-14,-14,-14,-14,-14,-14,-14, 113,113,113,113,113,113,113,113, 34, 34, 34, 34, 34, 34, 34, 34, -5, -5, -5, -5, -5, -5, -5, -5, },
+ { -15,-15,-15,-15,-15,-15,-15,-15, 96, 96, 96, 96, 96, 96, 96, 96, 56, 56, 56, 56, 56, 56, 56, 56, -9, -9, -9, -9, -9, -9, -9, -9, },
+ { -13,-13,-13,-13,-13,-13,-13,-13, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, -13,-13,-13,-13,-13,-13,-13,-13, },
+ { -9, -9, -9, -9, -9, -9, -9, -9, 56, 56, 56, 56, 56, 56, 56, 56, 96, 96, 96, 96, 96, 96, 96, 96, -15,-15,-15,-15,-15,-15,-15,-15, },
+ { -5, -5, -5, -5, -5, -5, -5, -5, 34, 34, 34, 34, 34, 34, 34, 34, 113,113,113,113,113,113,113,113, -14,-14,-14,-14,-14,-14,-14,-14, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 15, 15, 15, 15, 15, 15, 15, 15, 124,124,124,124,124,124,124,124, -10,-10,-10,-10,-10,-10,-10,-10, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -10,-10,-10,-10,-10,-10,-10,-10, 123,123,123,123,123,123,123,123, 16, 16, 16, 16, 16, 16, 16, 16, -1, -1, -1, -1, -1, -1, -1, -1, },
+ { -15,-15,-15,-15,-15,-15,-15,-15, 113,113,113,113,113,113,113,113, 35, 35, 35, 35, 35, 35, 35, 35, -5, -5, -5, -5, -5, -5, -5, -5, },
+ { -16,-16,-16,-16,-16,-16,-16,-16, 98, 98, 98, 98, 98, 98, 98, 98, 56, 56, 56, 56, 56, 56, 56, 56, -10,-10,-10,-10,-10,-10,-10,-10, },
+ { -14,-14,-14,-14,-14,-14,-14,-14, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, -14,-14,-14,-14,-14,-14,-14,-14, },
+ { -10,-10,-10,-10,-10,-10,-10,-10, 56, 56, 56, 56, 56, 56, 56, 56, 98, 98, 98, 98, 98, 98, 98, 98, -16,-16,-16,-16,-16,-16,-16,-16, },
+ { -5, -5, -5, -5, -5, -5, -5, -5, 35, 35, 35, 35, 35, 35, 35, 35, 113,113,113,113,113,113,113,113, -15,-15,-15,-15,-15,-15,-15,-15, },
+ { -1, -1, -1, -1, -1, -1, -1, -1, 16, 16, 16, 16, 16, 16, 16, 16, 123,123,123,123,123,123,123,123, -10,-10,-10,-10,-10,-10,-10,-10, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -11,-11,-11,-11,-11,-11,-11,-11, 124,124,124,124,124,124,124,124, 17, 17, 17, 17, 17, 17, 17, 17, -2, -2, -2, -2, -2, -2, -2, -2, },
+ { -16,-16,-16,-16,-16,-16,-16,-16, 113,113,113,113,113,113,113,113, 36, 36, 36, 36, 36, 36, 36, 36, -5, -5, -5, -5, -5, -5, -5, -5, },
+ { -17,-17,-17,-17,-17,-17,-17,-17, 98, 98, 98, 98, 98, 98, 98, 98, 57, 57, 57, 57, 57, 57, 57, 57, -10,-10,-10,-10,-10,-10,-10,-10, },
+ { -14,-14,-14,-14,-14,-14,-14,-14, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, -14,-14,-14,-14,-14,-14,-14,-14, },
+ { -10,-10,-10,-10,-10,-10,-10,-10, 57, 57, 57, 57, 57, 57, 57, 57, 98, 98, 98, 98, 98, 98, 98, 98, -17,-17,-17,-17,-17,-17,-17,-17, },
+ { -5, -5, -5, -5, -5, -5, -5, -5, 36, 36, 36, 36, 36, 36, 36, 36, 113,113,113,113,113,113,113,113, -16,-16,-16,-16,-16,-16,-16,-16, },
+ { -2, -2, -2, -2, -2, -2, -2, -2, 17, 17, 17, 17, 17, 17, 17, 17, 124,124,124,124,124,124,124,124, -11,-11,-11,-11,-11,-11,-11,-11, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -12,-12,-12,-12,-12,-12,-12,-12, 125,125,125,125,125,125,125,125, 17, 17, 17, 17, 17, 17, 17, 17, -2, -2, -2, -2, -2, -2, -2, -2, },
+ { -17,-17,-17,-17,-17,-17,-17,-17, 114,114,114,114,114,114,114,114, 37, 37, 37, 37, 37, 37, 37, 37, -6, -6, -6, -6, -6, -6, -6, -6, },
+ { -18,-18,-18,-18,-18,-18,-18,-18, 99, 99, 99, 99, 99, 99, 99, 99, 58, 58, 58, 58, 58, 58, 58, 58, -11,-11,-11,-11,-11,-11,-11,-11, },
+ { -15,-15,-15,-15,-15,-15,-15,-15, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, -15,-15,-15,-15,-15,-15,-15,-15, },
+ { -11,-11,-11,-11,-11,-11,-11,-11, 58, 58, 58, 58, 58, 58, 58, 58, 99, 99, 99, 99, 99, 99, 99, 99, -18,-18,-18,-18,-18,-18,-18,-18, },
+ { -6, -6, -6, -6, -6, -6, -6, -6, 37, 37, 37, 37, 37, 37, 37, 37, 114,114,114,114,114,114,114,114, -17,-17,-17,-17,-17,-17,-17,-17, },
+ { -2, -2, -2, -2, -2, -2, -2, -2, 17, 17, 17, 17, 17, 17, 17, 17, 125,125,125,125,125,125,125,125, -12,-12,-12,-12,-12,-12,-12,-12, },
+ },
+
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
+ { -12,-12,-12,-12,-12,-12,-12,-12, 124,124,124,124,124,124,124,124, 18, 18, 18, 18, 18, 18, 18, 18, -2, -2, -2, -2, -2, -2, -2, -2, },
+ { -18,-18,-18,-18,-18,-18,-18,-18, 114,114,114,114,114,114,114,114, 38, 38, 38, 38, 38, 38, 38, 38, -6, -6, -6, -6, -6, -6, -6, -6, },
+ { -19,-19,-19,-19,-19,-19,-19,-19, 99, 99, 99, 99, 99, 99, 99, 99, 59, 59, 59, 59, 59, 59, 59, 59, -11,-11,-11,-11,-11,-11,-11,-11, },
+ { -16,-16,-16,-16,-16,-16,-16,-16, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, -16,-16,-16,-16,-16,-16,-16,-16, },
+ { -11,-11,-11,-11,-11,-11,-11,-11, 59, 59, 59, 59, 59, 59, 59, 59, 99, 99, 99, 99, 99, 99, 99, 99, -19,-19,-19,-19,-19,-19,-19,-19, },
+ { -6, -6, -6, -6, -6, -6, -6, -6, 38, 38, 38, 38, 38, 38, 38, 38, 114,114,114,114,114,114,114,114, -18,-18,-18,-18,-18,-18,-18,-18, },
+ { -2, -2, -2, -2, -2, -2, -2, -2, 18, 18, 18, 18, 18, 18, 18, 18, 124,124,124,124,124,124,124,124, -12,-12,-12,-12,-12,-12,-12,-12, },
+ },
+
+ // Dummy entry for VP61 supporty
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { -4, -4, -4, -4, -4, -4, -4, -4, 118,118,118,118,118,118,118,118, 16, 16, 16, 16, 16, 16, 16, 16, -2, -2, -2, -2, -2, -2, -2, -2 },
+ { -7, -7, -7, -7, -7, -7, -7, -7, 106,106,106,106,106,106,106,106, 34, 34, 34, 34, 34, 34, 34, 34, -5, -5, -5, -5, -5, -5, -5, -5 },
+ { -8, -8, -8, -8, -8, -8, -8, -8, 90, 90, 90, 90, 90, 90, 90, 90, 53, 53, 53, 53, 53, 53, 53, 53, -7, -7, -7, -7, -7, -7, -7, -7 },
+ { -8, -8, -8, -8, -8, -8, -8, -8, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, -8, -8, -8, -8, -8, -8, -8, -8 },
+ { -7, -7, -7, -7, -7, -7, -7, -7, 53, 53, 53, 53, 53, 53, 53, 53, 90, 90, 90, 90, 90, 90, 90, 90, -8, -8, -8, -8, -8, -8, -8, -8 },
+ { -5, -5, -5, -5, -5, -5, -5, -5, 34, 34, 34, 34, 34, 34, 34, 34, 106,106,106,106,106,106,106,106, -7, -7, -7, -7, -7, -7, -7, -7 },
+ { -2, -2, -2, -2, -2, -2, -2, -2, 16, 16, 16, 16, 16, 16, 16, 16, 118,118,118,118,118,118,118,118, -4, -4, -4, -4, -4, -4, -4, -4 }
+ }
+
+};
+
+
+
+void FilterBlock1d_h_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movq mm1, [edi] ; mm3 *= kernel 0 modifiers.
+ movq mm2, [edi+ 16] ; mm3 *= kernel 0 modifiers.
+ movq mm6, [edi + 32] ; mm3 *= kernel 0 modifiers.
+ movq mm7, [edi + 48] ; mm3 *= kernel 0 modifiers.
+
+ mov edi,OutputPtr
+ mov esi,SrcPtr
+ dec esi
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor mm0, mm0 ; mm0 = 00000000
+
+nextrow:
+ movq mm3, [esi] ; mm3 = p-1..p6
+ movq mm4, mm3 ; mm4 = p-1..p6
+ punpcklbw mm3, mm0 ; mm3 = p-1..p2
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ psrlq mm4, 24 ; mm4 = p2..p6
+ movq mm5, mm4 ; mm5 = p2..p6
+ punpcklbw mm5, mm0 ; mm5 = p2..p5
+ pmullw mm5, mm7 ; mm5 *= kernel 3 modifiers
+ paddsw mm3, mm5 ; mm3 += mm5
+
+ movq mm4, [esi+1] ; mm4 = p0..p6
+ movq mm5, mm4 ; mm5 = p0..p6
+ punpcklbw mm5, mm0 ; mm5 = p0..p3
+ pmullw mm5, mm2 ; mm5 *= kernel 1 modifiers
+ paddsw mm3, mm5 ; mm3 += mm5
+
+ psrlq mm4, 8 ; mm4 = p1..p6
+ movq mm5, mm4 ; mm5 = p1..p6
+ punpcklbw mm5, mm0 ; mm5 = p1..p4
+ pmullw mm5, mm6 ; mm5 *= kernel 2 modifiers
+ paddsw mm3, mm5 ; mm3 += mm5
+
+
+ paddsw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and unpack to saturate
+
+ movd [edi],mm3 ; store the results in the destination
+
+
+ movq mm3, [esi+4] ; mm3 = p-1..p6
+ movq mm4, mm3 ; mm4 = p-1..p6
+ punpcklbw mm3, mm0 ; mm3 = p-1..p2
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ psrlq mm4, 24 ; mm4 = p2..p6
+ movq mm5, mm4 ; mm5 = p2..p6
+ punpcklbw mm5, mm0 ; mm5 = p2..p5
+ pmullw mm5, mm7 ; mm5 *= kernel 3 modifiers
+ paddsw mm3, mm5 ; mm3 += mm5
+
+ movq mm4, [esi+5] ; mm4 = p0..p6
+ movq mm5, mm4 ; mm5 = p0..p6
+ punpcklbw mm5, mm0 ; mm5 = p0..p3
+ pmullw mm5, mm2 ; mm5 *= kernel 1 modifiers
+ paddsw mm3, mm5 ; mm3 += mm5
+
+ psrlq mm4, 8 ; mm4 = p1..p6
+ movq mm5, mm4 ; mm5 = p1..p6
+ punpcklbw mm5, mm0 ; mm5 = p1..p4
+ pmullw mm5, mm6 ; mm5 *= kernel 2 modifiers
+ paddsw mm3, mm5 ; mm3 += mm5
+
+
+ paddsw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and unpack to saturate
+
+ movd [edi+4],mm3 ; store the results in the destination
+
+ add esi,SrcPixelsPerLine ; next line
+ add edi,eax;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+ }
+}
+
+
+void FilterBlock1d_v_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movq mm1, [edi] ; mm3 *= kernel 0 modifiers.
+ movq mm2, [edi + 16] ; mm3 *= kernel 0 modifiers.
+ movq mm6, [edi + 32] ; mm3 *= kernel 0 modifiers.
+ movq mm7, [edi + 48] ; mm3 *= kernel 0 modifiers.
+
+ mov edx, PixelsPerLine
+ mov edi, OutputPtr
+ mov esi, SrcPtr
+ sub esi, PixelsPerLine
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor mm0, mm0 ; mm0 = 00000000
+
+
+nextrow:
+ movq mm3, [esi] ; mm3 = p0..p8
+ punpcklbw mm3, mm0 ; mm3 = p0..p3
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ add esi, edx ; move source forward 1 line to avoid 3 * pitch
+
+ movq mm4, [esi+2*edx] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm7 ; mm4 *= kernel 3 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+ movq mm4, [esi ] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm2 ; mm4 *= kernel 1 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+ movq mm4, [esi +edx] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm6 ; mm4 *= kernel 2 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+
+ paddsw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and saturate
+
+ movd [edi],mm3 ; store the results in the destination
+
+ sub esi, edx ; subtract edx to get back to -1 column
+
+ movq mm3, [esi+4] ; mm3 = p4..p12
+ punpcklbw mm3, mm0 ; mm3 = p4..p7
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ add esi, edx ; move source forward 1 line to avoid 3 * pitch
+
+ movq mm4, [esi+2*edx+4] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm7 ; mm4 *= kernel 3 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+ movq mm4, [esi +4] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm2 ; mm4 *= kernel 1 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+ movq mm4, [esi +edx+4] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm6 ; mm4 *= kernel 2 modifiers.
+ paddsw mm3, mm4 ; mm3 += mm4
+
+
+ paddsw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and saturate
+
+ movd [edi+4],mm3 ; store the results in the destination
+
+
+
+ // the subsequent iterations repeat 3 out of 4 of these reads. Since the
+ // recon block should be in cache this shouldn't cost much. Its obviously
+ // avoidable!!!.
+ add edi,eax;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+
+ }
+}
+
+
+void FilterBlock1d_h_mmxa( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movq mm1, [edi] ; mm3 *= kernel 0 modifiers.
+ movq mm2, [edi+ 16] ; mm3 *= kernel 0 modifiers.
+ movq mm6, [edi + 32] ; mm3 *= kernel 0 modifiers.
+ movq mm7, [edi + 48] ; mm3 *= kernel 0 modifiers.
+
+ mov edi,OutputPtr
+ mov esi,SrcPtr
+ dec esi
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor mm0, mm0 ; mm0 = 00000000
+
+nextrow:
+ movq mm3, [esi] ; mm3 = p-1..p6
+ movq mm4, mm3 ; mm4 = p-1..p6
+ punpcklbw mm3, mm0 ; mm3 = p-1..p2
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ psrlq mm4, 8 ; mm4 = p0..p6
+ movq mm5, mm4 ; mm5 = p0..p6
+ punpcklbw mm5, mm0 ; mm5 = p0..p3
+ pmullw mm5, mm2 ; mm5 *= kernel 1 modifiers
+ paddw mm3, mm5 ; mm3 += mm5
+
+ psrlq mm4, 8 ; mm4 = p1..p6
+ movq mm5, mm4 ; mm5 = p1..p6
+ punpcklbw mm5, mm0 ; mm5 = p1..p4
+ pmullw mm5, mm6 ; mm5 *= kernel 2 modifiers
+ paddw mm3, mm5 ; mm3 += mm5
+
+ psrlq mm4, 8 ; mm4 = p2..p6
+ movq mm5, mm4 ; mm5 = p2..p6
+ punpcklbw mm5, mm0 ; mm5 = p2..p5
+ pmullw mm5, mm7 ; mm5 *= kernel 3 modifiers
+ paddw mm3, mm5 ; mm3 += mm5
+
+ paddw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and unpack to saturate
+
+ movd [edi],mm3 ; store the results in the destination
+
+
+ movq mm3, [esi+4] ; mm3 = p-1..p6
+ movq mm4, mm3 ; mm4 = p-1..p6
+ punpcklbw mm3, mm0 ; mm3 = p-1..p2
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ psrlq mm4, 8 ; mm4 = p0..p6
+ movq mm5, mm4 ; mm5 = p0..p6
+ punpcklbw mm5, mm0 ; mm5 = p0..p3
+ pmullw mm5, mm2 ; mm5 *= kernel 1 modifiers
+ paddw mm3, mm5 ; mm3 += mm5
+
+ psrlq mm4, 8 ; mm4 = p1..p6
+ movq mm5, mm4 ; mm5 = p1..p6
+ punpcklbw mm5, mm0 ; mm5 = p1..p4
+ pmullw mm5, mm6 ; mm5 *= kernel 2 modifiers
+ paddw mm3, mm5 ; mm3 += mm5
+
+ psrlq mm4, 8 ; mm4 = p2..p6
+ movq mm5, mm4 ; mm5 = p2..p6
+ punpcklbw mm5, mm0 ; mm5 = p2..p5
+ pmullw mm5, mm7 ; mm5 *= kernel 3 modifiers
+ paddw mm3, mm5 ; mm3 += mm5
+
+ paddw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and unpack to saturate
+
+ movd [edi+4],mm3 ; store the results in the destination
+
+ add esi,SrcPixelsPerLine ; next line
+ add edi,eax;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+ }
+}
+
+
+void FilterBlock1d_v_mmxa( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movq mm1, [edi] ; mm3 *= kernel 0 modifiers.
+ movq mm2, [edi + 16] ; mm3 *= kernel 0 modifiers.
+ movq mm6, [edi + 32] ; mm3 *= kernel 0 modifiers.
+ movq mm7, [edi + 48] ; mm3 *= kernel 0 modifiers.
+
+ mov edx, PixelsPerLine
+ mov edi, OutputPtr
+ mov esi, SrcPtr
+ sub esi, PixelsPerLine
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor mm0, mm0 ; mm0 = 00000000
+
+
+nextrow:
+ movq mm3, [esi] ; mm3 = p0..p8
+ punpcklbw mm3, mm0 ; mm3 = p0..p3
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ movq mm4, [esi +edx ] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm2 ; mm4 *= kernel 1 modifiers.
+ paddw mm3, mm4 ; mm3 += mm4
+
+ movq mm4, [esi +2*edx] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm6 ; mm4 *= kernel 2 modifiers.
+ paddw mm3, mm4 ; mm3 += mm4
+
+ add esi, edx ; move source forward 1 line to avoid 3 * pitch
+
+ movq mm4, [esi+2*edx] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm7 ; mm4 *= kernel 3 modifiers.
+ paddw mm3, mm4 ; mm3 += mm4
+
+ paddw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and saturate
+
+ movd [edi],mm3 ; store the results in the destination
+
+ sub esi, edx ; subtract edx to get back to -1 column
+
+ movq mm3, [esi+4] ; mm3 = p4..p12
+ punpcklbw mm3, mm0 ; mm3 = p4..p7
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ movq mm4, [esi +edx +4] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm2 ; mm4 *= kernel 1 modifiers.
+ paddw mm3, mm4 ; mm3 += mm4
+
+ movq mm4, [esi +2*edx+4] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm6 ; mm4 *= kernel 2 modifiers.
+ paddw mm3, mm4 ; mm3 += mm4
+
+ add esi, edx ; move source forward 1 line to avoid 3 * pitch
+
+ movq mm4, [esi+2*edx+4] ; mm4 = p0..p8
+ punpcklbw mm4, mm0 ; mm4 = p0..p3
+ pmullw mm4, mm7 ; mm4 *= kernel 3 modifiers.
+ paddw mm3, mm4 ; mm3 += mm4
+
+ paddw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and saturate
+
+ movd [edi+4],mm3 ; store the results in the destination
+
+
+
+ // the subsequent iterations repeat 3 out of 4 of these reads. Since the
+ // recon block should be in cache this shouldn't cost much. Its obviously
+ // avoidable!!!.
+ add edi,eax;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+
+ }
+}
+
+
+void FilterBlock1d_hb8_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movq mm1, [edi] ; mm3 *= kernel 0 modifiers.
+ movq mm2, [edi + 16] ; mm3 *= kernel 0 modifiers.
+
+ mov edi,OutputPtr
+ mov esi,SrcPtr
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor mm0, mm0 ; mm0 = 00000000
+
+nextrow:
+ movq mm3, [esi] ; mm3 = p-1..p14
+ movq mm4, mm3 ; mm4 = p-1..p14
+ punpcklbw mm3, mm0 ; mm3 = p-1..p6
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ psrlq mm4, 8 ; mm4 = p0..p13
+ movq mm5, mm4 ; mm5 = p0..p13
+ punpcklbw mm5, mm0 ; mm5 = p0..p7
+ pmullw mm5, mm2 ; mm5 *= kernel 1 modifiers
+ paddw mm3, mm5 ; mm3 += mm5
+
+ paddw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and unpack to saturate
+
+ movd [edi],mm3 ; store the results in the destination
+
+ movq mm3, [esi+4] ; mm3 = p-1..p14
+ movq mm4, mm3 ; mm4 = p-1..p14
+ punpcklbw mm3, mm0 ; mm3 = p-1..p6
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ psrlq mm4, 8 ; mm4 = p0..p13
+ movq mm5, mm4 ; mm5 = p0..p13
+ punpcklbw mm5, mm0 ; mm5 = p0..p7
+ pmullw mm5, mm2 ; mm5 *= kernel 1 modifiers
+ paddw mm3, mm5 ; mm3 += mm5
+
+ paddw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and unpack to saturate
+
+ movd [edi+4],mm3 ; store the results in the destination
+
+
+ add esi,SrcPixelsPerLine ; next line
+ add edi,eax;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+ }
+}
+
+
+void FilterBlock1d_vb8_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movq mm1, [edi] ; mm3 *= kernel 0 modifiers.
+ movq mm2, [edi + 16] ; mm3 *= kernel 0 modifiers.
+ mov edx, PixelsPerLine
+ mov edi, OutputPtr
+ mov esi, SrcPtr
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor mm0, mm0 ; mm0 = 00000000
+
+
+nextrow:
+ movq mm3, [esi] ; mm3 = p0..p16
+ punpcklbw mm3, mm0 ; mm3 = p0..p8
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ movq mm4, [esi +edx ] ; mm4 = p0..p16
+ punpcklbw mm4, mm0 ; mm4 = p0..p8
+ pmullw mm4, mm2 ; mm4 *= kernel 1 modifiers.
+ paddw mm3, mm4 ; mm3 += mm4
+
+ paddw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and unpack to saturate
+
+ movd [edi],mm3 ; store the results in the destination
+
+ movq mm3, [esi+4] ; mm3 = p0..p16
+ punpcklbw mm3, mm0 ; mm3 = p0..p8
+ pmullw mm3, mm1 ; mm3 *= kernel 0 modifiers.
+
+ movq mm4, [esi +edx +4] ; mm4 = p0..p16
+ punpcklbw mm4, mm0 ; mm4 = p0..p8
+ pmullw mm4, mm2 ; mm4 *= kernel 1 modifiers.
+ paddw mm3, mm4 ; mm3 += mm4
+
+ paddw mm3, rd ; mm3 += round value
+ psraw mm3, FILTER_SHIFT ; mm3 /= 128
+ packuswb mm3, mm0 ; pack and unpack to saturate
+
+ movd [edi+4],mm3 ; store the results in the destination
+
+ // the subsequent iterations repeat 3 out of 4 of these reads. Since the
+ // recon block should be in cache this shouldn't cost much. Its obviously
+ // avoidable!!!.
+ add esi,edx
+ add edi,eax
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dBil
+ *
+ * INPUTS : Pointer to source data
+ *
+ * OUTPUTS : Filtered data
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Applies a bilinear filter on the intput data to produce
+ * a predictor block (UINT16)
+ *
+ * SPECIAL NOTES :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+_inline
+void FilterBlock2dBil_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
+{
+
+ __asm
+ {
+ mov eax, HFilter ;
+ mov edi, OutputPtr ;
+ mov esi, SrcPtr ;
+ lea ecx, [edi+64] ;
+ mov edx, SrcPixelsPerLine ;
+
+ movq mm1, [eax] ;
+ movq mm2, [eax+16] ;
+
+ mov eax, VFilter ;
+ pxor mm0, mm0 ;
+
+ // get the first horizontal line done ;
+ movq mm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movq mm4, mm3 ; make a copy of current line
+
+ punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw mm4, mm0 ;
+
+ pmullw mm3, mm1 ;
+ pmullw mm4, mm1 ;
+
+ movq mm5, [esi+1] ;
+ movq mm6, mm5 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0 ;
+
+ pmullw mm5, mm2 ;
+ pmullw mm6, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+ paddw mm3, rd ; xmm3 += round value
+ psraw mm3, FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, rd ;
+ psraw mm4, FILTER_SHIFT ;
+
+ movq mm7, mm3 ;
+ packuswb mm7, mm4 ;
+
+ add esi, edx ; next line
+NextRow:
+ movq mm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movq mm4, mm3 ; make a copy of current line
+
+ punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
+ punpckhbw mm4, mm0 ;
+
+ pmullw mm3, mm1 ;
+ pmullw mm4, mm1 ;
+
+ movq mm5, [esi+1] ;
+ movq mm6, mm5 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0 ;
+
+ pmullw mm5, mm2 ;
+ pmullw mm6, mm2 ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+ movq mm5, mm7 ;
+ movq mm6, mm7 ;
+
+ punpcklbw mm5, mm0 ;
+ punpckhbw mm6, mm0
+
+ pmullw mm5, [eax] ;
+ pmullw mm6, [eax] ;
+
+ paddw mm3, rd ; xmm3 += round value
+ psraw mm3, FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, rd ;
+ psraw mm4, FILTER_SHIFT ;
+
+ movq mm7, mm3 ;
+ packuswb mm7, mm4 ;
+
+
+ pmullw mm3, [eax+16] ;
+ pmullw mm4, [eax+16] ;
+
+ paddw mm3, mm5 ;
+ paddw mm4, mm6 ;
+
+
+ paddw mm3, rd ; xmm3 += round value
+ psraw mm3, FILTER_SHIFT ; xmm3 /= 128
+
+ paddw mm4, rd ;
+ psraw mm4, FILTER_SHIFT ;
+
+ packuswb mm3, mm4
+
+ movq [edi], mm3 ; store the results in the destination
+
+ add esi, edx ; next line
+ add edi, 8 ;
+
+ cmp edi, ecx ;
+ jne NextRow
+
+ }
+
+ // First filter 1d Horizontal
+ //FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter );
+ // Now filter Verticaly
+ //FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
+
+
+}
+
+
+
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlockBil_8
+ *
+ * INPUTS : ReconPtr1, ReconPtr12
+ * Two pointers into the block of data to be filtered
+ * These pointers bound the fractional pel position
+ * PixelsPerLine
+ * Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
+ * Modx, ModY
+ * The fractional pel bits used to select a filter.
+ *
+ *
+ * OUTPUTS : ReconRefPtr
+ * A pointer to an 8x8 buffer into which UINT8 filtered data is written.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Produces a bilinear filtered fractional pel prediction block
+ * with UINT8 output
+ *
+ * SPECIAL NOTES :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void FilterBlockBil_8_mmx( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY )
+{
+ int diff;
+
+ // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+ diff=ReconPtr2-ReconPtr1;
+
+ // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+ // This works out to be what we want... despite the pointer swapping that goes on below.
+ // For example... if the X component of the vector is a +ve ModX = X%8.
+ // if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+
+ if(diff<0)
+ { // swap pointers so ReconPtr1 smaller
+ UINT8 *temp=ReconPtr1;
+ ReconPtr1=ReconPtr2;
+ ReconPtr2=temp;
+ diff= (int)(ReconPtr2-ReconPtr1);
+ }
+
+ if( diff==1 )
+ {
+ FilterBlock1d_hb8_mmx(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters_mmx[ModX] );
+ }
+ else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
+ {
+ FilterBlock1d_vb8_mmx(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_mmx[ModY]);
+ }
+ else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
+ {
+ FilterBlock2dBil_mmx( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );
+ }
+ else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
+ {
+ FilterBlock2dBil_mmx( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2d
+ *
+ * INPUTS : Pointer to source data
+ *
+ * OUTPUTS : Filtered data
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Applies a 2d 4 tap filter on the intput data to produce
+ * a predictor block (UINT16)
+ *
+ * SPECIAL NOTES :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void FilterBlock2d_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
+{
+
+ UINT8 Intermediate[256];
+
+ // First filter 1d Horizontal
+ FilterBlock1d_h_mmx(SrcPtr-SrcPixelsPerLine, Intermediate, SrcPixelsPerLine, 1, 11, 8, HFilter );
+
+ // Now filter Verticaly
+ FilterBlock1d_v_mmx(Intermediate+BLOCK_HEIGHT_WIDTH, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
+
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock
+ *
+ * INPUTS : ReconPtr1, ReconPtr12
+ * Two pointers into the block of data to be filtered
+ * These pointers bound the fractional pel position
+ * PixelsPerLine
+ * Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
+ * Modx, ModY
+ * The fractional pel bits used to select a filter.
+ * UseBicubic
+ * Whether to use the bicubuc filter set or the bilinear set
+ *
+ *
+ * OUTPUTS : ReconRefPtr
+ * A pointer to an 8x8 buffer into which the filtered data is written.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Produces a filtered fractional pel prediction block
+ * using bilinear or bicubic filters
+ *
+ * SPECIAL NOTES :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void FilterBlock_mmx( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha )
+{
+ int diff;
+ UINT8 Intermediate[256];
+
+ // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+ diff=ReconPtr2-ReconPtr1;
+
+ // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+ // This works out to be what we want... despite the pointer swapping that goes on below.
+ // For example... if the X component of the vector is a +ve ModX = X%8.
+ // if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+
+ if(diff<0)
+ { // swap pointers so ReconPtr1 smaller
+ UINT8 *temp=ReconPtr1;
+ ReconPtr1=ReconPtr2;
+ ReconPtr2=temp;
+ diff= (int)(ReconPtr2-ReconPtr1);
+ }
+
+ if(!diff)
+ {
+ return;
+ }
+ if( diff==1 )
+ { // Fractional pixel in horizontal only
+ if ( UseBicubic )
+ FilterBlock1d_h_mmx(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModX] );
+ else
+ FilterBlock1d_hb8_mmx(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BilinearFilters_mmx[ModX] );
+ }
+ else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
+ {
+ if ( UseBicubic )
+ FilterBlock1d_v_mmx(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModY]);
+ else
+ FilterBlock1d_vb8_mmx(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_mmx[ModY]);
+ }
+ else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
+ {
+ if ( UseBicubic )
+ FilterBlock2d_mmx( ReconPtr1-1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
+ else
+ FilterBlock2dBil_mmx( ReconPtr1-1, Intermediate, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );
+ }
+ else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
+ {
+ if ( UseBicubic )
+ FilterBlock2d_mmx( ReconPtr1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
+ else
+ FilterBlock2dBil_mmx( ReconPtr1, Intermediate, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );
+ }
+ UnpackBlock_MMX( Intermediate, ReconRefPtr, 8 );
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/filtwmt.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/filtwmt.c
new file mode 100644
index 00000000..7b5f0486
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/filtwmt.c
@@ -0,0 +1,790 @@
+/****************************************************************************
+ *
+ * Module Title : newLoopTest_asm.c
+ *
+ * Description : Codec specific functions
+ *
+ * AUTHOR : Yaowu Xu
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ * 1.02 YWX 03-Nov-00 Changed confusing variable name
+ * 1.01 YWX 02-Nov-00 Added the set of functions
+ * 1.00 YWX 19-Oct-00 configuration baseline
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Header Frames
+ *****************************************************************************
+ */
+
+
+#define STRICT /* Strict type checking. */
+#include "codec_common.h"
+#include <math.h>
+
+ /****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define FILTER_WEIGHT 128
+#define FILTER_SHIFT 7
+__declspec(align(16)) short rd[]={64,64,64,64,64,64,64,64};
+
+
+__declspec(align(16)) INT16 BilinearFilters_wmt[8][16] =
+{
+{ 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0 },
+{ 112,112,112,112,112,112,112,112, 16, 16, 16, 16, 16, 16, 16, 16 },
+{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
+{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
+{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
+{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
+{ 16, 16, 16, 16, 16, 16, 16, 16, 112,112,112,112,112,112,112,112 }
+};
+
+extern __declspec(align(16)) INT16 BicubicFilters_mmx[17][8][32];
+
+_inline
+void FilterBlock1d_h_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm2, [edi+ 16] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm6, [edi + 32] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm7, [edi + 48] ; xmm3 *= kernel 0 modifiers.
+
+ mov edi,OutputPtr
+ mov esi,SrcPtr
+ dec esi
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor xmm0, xmm0 ; xmm0 = 00000000
+
+nextrow:
+
+ // kernel 0 and 3 are potentially negative taps. These negative tap filters
+ // must be done first or we could have problems saturating our high value
+ // tap filters
+ movdqu xmm3, [esi] ; xmm3 = p-1..p14
+ movdqu xmm4, xmm3 ; xmm4 = p-1..p14
+ punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
+ pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
+
+ psrldq xmm4, 3 ; xmm4 = p2..p13
+ movdqa xmm5, xmm4 ; xmm5 = p2..p13
+ punpcklbw xmm5, xmm0 ; xmm5 = p2..p7
+ pmullw xmm5, xmm7 ; xmm5 *= kernel 3 modifiers
+ paddsw xmm3, xmm5 ; xmm3 += xmm5
+
+ movdqu xmm4, [esi+1] ; xmm4 = p0..p13
+ movdqa xmm5, xmm4 ; xmm5 = p0..p13
+ punpcklbw xmm5, xmm0 ; xmm5 = p0..p7
+ pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers
+ paddsw xmm3, xmm5 ; xmm3 += xmm5
+
+ psrldq xmm4, 1 ; xmm4 = p1..p13
+ movdqa xmm5, xmm4 ; xmm5 = p1..p13
+ punpcklbw xmm5, xmm0 ; xmm5 = p1..p7
+ pmullw xmm5, xmm6 ; xmm5 *= kernel 2 modifiers
+ paddsw xmm3, xmm5 ; xmm3 += xmm5
+
+ paddsw xmm3, rd ; xmm3 += round value
+ psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
+ packuswb xmm3, xmm0 ; pack and saturate
+
+ movdq2q mm0, xmm3
+ movq [edi],mm0 ; store the results in the destination
+
+ add esi,SrcPixelsPerLine ; next line
+ add edi,eax;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+ }
+}
+
+_inline
+void FilterBlock1d_v_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm6, [edi + 32] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm7, [edi + 48] ; xmm3 *= kernel 0 modifiers.
+
+ mov edx, PixelsPerLine
+ mov edi, OutputPtr
+ mov esi, SrcPtr
+ sub esi, PixelsPerLine
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor xmm0, xmm0 ; xmm0 = 00000000
+
+
+nextrow:
+ movdqu xmm3, [esi] ; xmm3 = p0..p16
+ punpcklbw xmm3, xmm0 ; xmm3 = p0..p8
+ pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
+
+ add esi, edx ; move source forward 1 line to avoid 3 * pitch
+
+ movdqu xmm4, [esi+2*edx] ; xmm4 = p0..p16
+ punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
+ pmullw xmm4, xmm7 ; xmm4 *= kernel 3 modifiers.
+ paddsw xmm3, xmm4 ; xmm3 += xmm4
+
+ movdqu xmm4, [esi ] ; xmm4 = p0..p16
+ punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
+ pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers.
+ paddsw xmm3, xmm4 ; xmm3 += xmm4
+
+ movdqu xmm4, [esi +edx] ; xmm4 = p0..p16
+ punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
+ pmullw xmm4, xmm6 ; xmm4 *= kernel 2 modifiers.
+ paddsw xmm3, xmm4 ; xmm3 += xmm4
+
+
+
+ paddsw xmm3, rd ; xmm3 += round value
+ psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
+ packuswb xmm3, xmm0 ; pack and unpack to saturate
+
+ movdq2q mm0, xmm3
+ movq [edi],mm0 ; store the results in the destination
+
+ // the subsequent iterations repeat 3 out of 4 of these reads. Since the
+ // recon block should be in cache this shouldn't cost much. Its obviously
+ // avoidable!!!.
+ add edi,eax;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+
+ }
+}
+
+
+_inline
+void FilterBlock1d_hb8_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
+
+ mov edi,OutputPtr
+ mov esi,SrcPtr
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor xmm0, xmm0 ; xmm0 = 00000000
+
+nextrow:
+ movdqu xmm3, [esi] ; xmm3 = p-1..p14
+ movdqu xmm5, xmm3 ; xmm4 = p-1..p14
+ punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
+ pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
+
+ psrldq xmm5, 1 ; xmm4 = p0..p13
+ punpcklbw xmm5, xmm0 ; xmm5 = p0..p7
+ pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers
+ paddw xmm3, xmm5 ; xmm3 += xmm5
+
+ paddw xmm3, rd ; xmm3 += round value
+ psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
+ packuswb xmm3, xmm0 ; pack and unpack to saturate
+
+ movdq2q mm0, xmm3
+ movq [edi],mm0 ; store the results in the destination
+
+ add esi,SrcPixelsPerLine ; next line
+ add edi,eax;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+ }
+}
+
+_inline
+void FilterBlock1d_vb8_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
+ mov edx, PixelsPerLine
+ mov edi, OutputPtr
+ mov esi, SrcPtr
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor xmm0, xmm0 ; xmm0 = 00000000
+
+
+nextrow:
+ movdqu xmm3, [esi] ; xmm3 = p0..p16
+ punpcklbw xmm3, xmm0 ; xmm3 = p0..p8
+ pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
+
+ movdqu xmm4, [esi +edx ] ; xmm4 = p0..p16
+ punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
+ pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers.
+ paddw xmm3, xmm4 ; xmm3 += xmm4
+
+ paddw xmm3, rd ; xmm3 += round value
+ psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
+ packuswb xmm3, xmm0 ; pack and unpack to saturate
+
+ movdq2q mm0, xmm3
+ movq [edi],mm0 ; store the results in the destination
+
+ // the subsequent iterations repeat 3 out of 4 of these reads. Since the
+ // recon block should be in cache this shouldn't cost much. Its obviously
+ // avoidable!!!.
+ add esi,edx
+ add edi,eax
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2dBil
+ *
+ * INPUTS : Pointer to source data
+ *
+ * OUTPUTS : Filtered data
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Applies a bilinear filter on the intput data to produce
+ * a predictor block (UINT16)
+ *
+ * SPECIAL NOTES :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+_inline
+void FilterBlock2dBil_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
+{
+
+ __asm
+ {
+ mov eax, HFilter ;
+ mov edi, OutputPtr ;
+ mov esi, SrcPtr ;
+ lea ecx, [edi+64] ;
+ mov edx, SrcPixelsPerLine ;
+
+ movdqa xmm1, [eax] ;
+ movdqa xmm2, [eax+16] ;
+
+ mov eax, VFilter ;
+ pxor xmm0, xmm0 ;
+
+ // get the first horizontal line done ;
+ movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movdqa xmm4, xmm3 ; make a copy of current line
+
+ punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
+ psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
+
+ pmullw xmm3, xmm1 ;
+ punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
+
+ pmullw xmm4, xmm2 ;
+ paddw xmm3, xmm4 ;
+
+ paddw xmm3, rd ;
+ psraw xmm3, FILTER_SHIFT ; ready for output
+
+ movdqa xmm5, xmm3 ;
+
+ add esi, edx ; next line
+NextRow:
+ pmullw xmm5, [eax] ;
+ movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+
+ movdqa xmm4, xmm3 ; make a copy of current line
+ punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
+
+ psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
+ pmullw xmm3, xmm1 ;
+ punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
+
+ movdqa xmm6, xmm5 ;
+ pmullw xmm4, xmm2 ;
+
+ paddw xmm3, xmm4 ;
+ paddw xmm3, rd ;
+
+ psraw xmm3, FILTER_SHIFT ; ready for output
+ movdqa xmm5, xmm3 ; make a copy for the next row
+
+ pmullw xmm3, [eax+16] ;
+ paddw xmm6, xmm3 ;
+
+
+ paddw xmm6, rd ; xmm6 += round value
+ psraw xmm6, FILTER_SHIFT ; xmm6 /= 128
+
+ packuswb xmm6, xmm0 ; pack and unpack to saturate
+ movdq2q mm0, xmm6
+
+ movq [edi], mm0 ; store the results in the destination
+ add esi, edx ; next line
+ add edi, 8 ;
+
+ cmp edi, ecx ;
+ jne NextRow
+
+ }
+
+ // First filter 1d Horizontal
+ //FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter );
+ // Now filter Verticaly
+ //FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
+
+
+}
+
+_inline
+void FilterUnpackBlock2dBil_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
+{
+
+ __asm
+ {
+ mov eax, HFilter ;
+ mov edi, OutputPtr ;
+ mov esi, SrcPtr ;
+ lea ecx, [edi+128] ;
+ mov edx, SrcPixelsPerLine ;
+
+ movdqa xmm1, [eax] ;
+ movdqa xmm2, [eax+16] ;
+
+ mov eax, VFilter ;
+ pxor xmm0, xmm0 ;
+
+ // get the first horizontal line done ;
+ movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+ movdqa xmm4, xmm3 ; make a copy of current line
+
+ punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
+ psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
+
+ pmullw xmm3, xmm1 ;
+ punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
+
+ pmullw xmm4, xmm2 ;
+ paddw xmm3, xmm4 ;
+
+ paddw xmm3, rd ;
+ psraw xmm3, FILTER_SHIFT ; ready for output
+
+ movdqa xmm5, xmm3 ;
+
+ add esi, edx ; next line
+NextRow:
+ pmullw xmm5, [eax] ;
+ movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+
+ movdqa xmm4, xmm3 ; make a copy of current line
+ punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
+
+ psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
+ pmullw xmm3, xmm1 ;
+ punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
+
+ movdqa xmm6, xmm5 ;
+ pmullw xmm4, xmm2 ;
+
+ paddw xmm3, xmm4 ;
+ paddw xmm3, rd ;
+
+ psraw xmm3, FILTER_SHIFT ; ready for output
+ movdqa xmm5, xmm3 ; make a copy for the next row
+
+ pmullw xmm3, [eax+16] ;
+ paddw xmm6, xmm3 ;
+
+
+ paddw xmm6, rd ; xmm6 += round value
+ psraw xmm6, FILTER_SHIFT ; xmm6 /= 128
+
+ movdqu [edi], xmm6;
+
+ /*
+ packuswb xmm6, xmm0 ; pack and unpack to saturate
+ movdq2q mm0, xmm6
+
+ movq [edi], mm0 ; store the results in the destination
+ */
+ add esi, edx ; next line
+ add edi, 16 ;
+
+ cmp edi, ecx ;
+ jne NextRow
+
+ }
+
+ // First filter 1d Horizontal
+ //FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter );
+ // Now filter Verticaly
+ //FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
+
+
+}
+_inline
+void FilterUnpackBlock1d_hb8_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
+
+ mov edi,OutputPtr
+ mov esi,SrcPtr
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor xmm0, xmm0 ; xmm0 = 00000000
+
+nextrow:
+ movdqu xmm3, [esi] ; xmm3 = p-1..p14
+ movdqu xmm5, xmm3 ; xmm4 = p-1..p14
+ punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
+ pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
+
+ psrldq xmm5, 1 ; xmm4 = p0..p13
+ punpcklbw xmm5, xmm0 ; xmm5 = p0..p7
+ pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers
+ paddw xmm3, xmm5 ; xmm3 += xmm5
+
+ paddw xmm3, rd ; xmm3 += round value
+ psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
+
+ /*
+ packuswb xmm3, xmm0 ; pack and unpack to saturate
+ movdq2q mm0, xmm3
+ */
+
+ movdqu [edi],xmm3 ; store the results in the destination
+
+ add esi,SrcPixelsPerLine ; next line
+ add edi,eax;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+ }
+}
+
+_inline
+void FilterUnpackBlock1d_vb8_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+ __asm
+ {
+
+ mov edi, Filter
+ movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
+ movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
+ mov edx, PixelsPerLine
+ mov edi, OutputPtr
+ mov esi, SrcPtr
+ mov ecx, DWORD PTR OutputHeight
+ mov eax, OutputWidth ; destination pitch?
+ pxor xmm0, xmm0 ; xmm0 = 00000000
+
+
+nextrow:
+ movdqu xmm3, [esi] ; xmm3 = p0..p16
+ punpcklbw xmm3, xmm0 ; xmm3 = p0..p8
+ pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
+
+ movdqu xmm4, [esi +edx ] ; xmm4 = p0..p16
+ punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
+ pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers.
+ paddw xmm3, xmm4 ; xmm3 += xmm4
+
+ paddw xmm3, rd ; xmm3 += round value
+ psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
+
+ /*packuswb xmm3, xmm0 ; pack and unpack to saturate
+
+ movdq2q mm0, xmm3
+ */
+ movdqu [edi],xmm3 ; store the results in the destination
+
+ // the subsequent iterations repeat 3 out of 4 of these reads. Since the
+ // recon block should be in cache this shouldn't cost much. Its obviously
+ // avoidable!!!.
+ add esi,edx
+ add edi,eax
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+
+ }
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlockBil_8
+ *
+ * INPUTS : ReconPtr1, ReconPtr12
+ * Two pointers into the block of data to be filtered
+ * These pointers bound the fractional pel position
+ * PixelsPerLine
+ * Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
+ * Modx, ModY
+ * The fractional pel bits used to select a filter.
+ *
+ *
+ * OUTPUTS : ReconRefPtr
+ * A pointer to an 8x8 buffer into which UINT8 filtered data is written.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Produces a bilinear filtered fractional pel prediction block
+ * with UINT8 output
+ *
+ * SPECIAL NOTES :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void FilterBlockBil_8_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY )
+{
+ int diff;
+
+ // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+ diff=ReconPtr2-ReconPtr1;
+
+ // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+ // This works out to be what we want... despite the pointer swapping that goes on below.
+ // For example... if the X component of the vector is a +ve ModX = X%8.
+ // if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+
+ if(diff<0)
+ { // swap pointers so ReconPtr1 smaller
+ UINT8 *temp=ReconPtr1;
+ ReconPtr1=ReconPtr2;
+ ReconPtr2=temp;
+ diff= (int)(ReconPtr2-ReconPtr1);
+ }
+
+ if( diff==1 )
+ {
+ FilterBlock1d_hb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters_wmt[ModX] );
+ }
+ else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
+ {
+ FilterBlock1d_vb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_wmt[ModY]);
+ }
+ else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
+ {
+ FilterBlock2dBil_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+ //FilterBlock2dBil_8_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+ }
+ else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
+ {
+ FilterBlock2dBil_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+ //FilterBlock2dBil_8_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+ }
+}
+
+_inline void UnpackBlock_wmt( UINT8 *SrcPtr, UINT16 *OutputPtr, UINT32 SrcPixelsPerLine )
+{
+ __asm
+ {
+ mov edi,OutputPtr
+ mov esi,SrcPtr
+
+ mov ecx, 8
+ mov eax, 16 ; destination pitch?
+ pxor xmm0, xmm0 ; xmm0 = 00000000
+
+nextrow:
+ movdqu xmm3, [esi] ; xmm3 = p-1..p14
+ punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
+ movdqu [edi],xmm3 ; store the results in the destination
+
+ add esi,SrcPixelsPerLine ; next line
+ add edi,eax;
+
+ dec ecx ; decrement count
+ jnz nextrow ; next row
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock2d
+ *
+ * INPUTS : Pointer to source data
+ *
+ * OUTPUTS : Filtered data
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Applies a 2d 4 tap filter on the intput data to produce
+ * a predictor block (UINT16)
+ *
+ * SPECIAL NOTES :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void FilterBlock2d_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
+{
+
+ UINT8 Intermediate[256];
+
+ // First filter 1d Horizontal
+ FilterBlock1d_h_wmt(SrcPtr-SrcPixelsPerLine, Intermediate, SrcPixelsPerLine, 1, 11, 8, HFilter );
+
+ // Now filter Verticaly
+ FilterBlock1d_v_wmt(Intermediate+BLOCK_HEIGHT_WIDTH, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
+
+
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : FilterBlock
+ *
+ * INPUTS : ReconPtr1, ReconPtr12
+ * Two pointers into the block of data to be filtered
+ * These pointers bound the fractional pel position
+ * PixelsPerLine
+ * Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
+ * Modx, ModY
+ * The fractional pel bits used to select a filter.
+ * UseBicubic
+ * Whether to use the bicubuc filter set or the bilinear set
+ *
+ *
+ * OUTPUTS : ReconRefPtr
+ * A pointer to an 8x8 buffer into which the filtered data is written.
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Produces a filtered fractional pel prediction block
+ * using bilinear or bicubic filters
+ *
+ * SPECIAL NOTES :
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void FilterBlock_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha )
+{
+ int diff;
+ UINT8 Intermediate[256];
+
+ // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+ diff=ReconPtr2-ReconPtr1;
+
+ // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+ // This works out to be what we want... despite the pointer swapping that goes on below.
+ // For example... if the X component of the vector is a +ve ModX = X%8.
+ // if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+
+ if(diff<0)
+ { // swap pointers so ReconPtr1 smaller
+ UINT8 *temp=ReconPtr1;
+ ReconPtr1=ReconPtr2;
+ ReconPtr2=temp;
+ diff= (int)(ReconPtr2-ReconPtr1);
+ }
+
+ if(!diff)
+ {
+ return;
+ }
+
+
+
+ if(UseBicubic)
+ {
+ if( diff==1 )
+ { // Fractional pixel in horizontal only
+ FilterBlock1d_h_wmt(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModX] );
+ }
+ else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
+ {
+ FilterBlock1d_v_wmt(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModY]);
+ }
+ else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
+ {
+ FilterBlock2d_wmt( ReconPtr1-1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
+ }
+ else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
+ {
+ FilterBlock2d_wmt( ReconPtr1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
+ }
+ UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
+ }
+ else
+ {
+
+ if( diff==1 )
+ {
+ FilterUnpackBlock1d_hb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 16, BilinearFilters_wmt[ModX] );
+
+ // Fractional pixel in horizontal only
+ /*
+ FilterBlock1d_hb8_wmt(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BilinearFilters_wmt[ModX] );
+ UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
+ */
+
+ }
+ else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
+ {
+ FilterUnpackBlock1d_vb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 16, BilinearFilters_wmt[ModY]);
+ /*
+ FilterBlock1d_vb8_wmt(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_wmt[ModY]);
+ UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
+ */
+ }
+ else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
+ {
+
+ FilterUnpackBlock2dBil_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+ /*
+ FilterBlock2dBil_wmt( ReconPtr1-1, Intermediate, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+ UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
+ */
+ }
+ else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
+ {
+ FilterUnpackBlock2dBil_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+ /*
+ FilterBlock2dBil_wmt( ReconPtr1, Intermediate, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+ UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
+ */
+ }
+ }
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxidct.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxidct.c
new file mode 100644
index 00000000..50d8749d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxidct.c
@@ -0,0 +1,2156 @@
+/****************************************************************************
+*
+* Module Title : IDCTPart.c
+*
+* Description : IDCT with multiple versions based on # of non 0 coeffs
+*
+* AUTHOR : Scott Lavarnway, Tim Murphy
+*
+*****************************************************************************
+* Revision History
+*
+* 1.02 JBB 15 Nov 00 Cleaned out unused ifdefs
+* 1.01 YWX 15/05/00 Added MMX_idct3 for use in PostProcesser
+* 1.00 YWX 14/05/00 Configuration baseline from Scott Lavarnway
+*
+*****************************************************************************
+*/
+
+// Dequantization + inverse discrete cosine transform.
+// Timothy S. Murphy 14 July 1999.
+
+#pragma warning(disable:4005)
+#include "codec_common.h"
+#include <math.h>
+#include <memory.h>
+#undef PI
+#define PI 3.14159265358979323846
+
+
+// Constants used in MMX implementation of dequantization and idct.
+// All the MMX stuff works with 4 16-bit quantities at a time and
+// we create 11 constants of size 4 x 16 bits.
+// The first 4 are used to mask the individual 16-bit words within a group
+// and are used in the address-shuffling part of the dequantization.
+// The last 7 are fixed-point approximations to the cosines of angles
+// occurring in the DCT; each of these contains 4 copies of the same value.
+
+// There is only one (statically initialized) instance of this object
+// wrapped in an allocator object that forces its starting address
+// to be evenly divisible by 32. Hence the actual object occupies 2.75
+// cache lines on a Pentium processor.
+
+// Offsets in bytes used by the assembler code below
+// must of course agree with the idctConstants constructor.
+
+#define MaskOffset 0 // 4 masks come in order low word to high
+#define CosineOffset 32 // 7 cosines come in order pi/16 * (1 ... 7)
+#define EightOffset 88
+#define IdctAdjustBeforeShift 8
+#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
+
+UINT16 idctconstants[(4+7+1) * 4];
+UINT16 idctcosTbl[ 7] =
+{
+ 64277, 60547, 54491, 46341, 36410, 25080, 12785
+};
+
+
+/* Dequantization + inverse DCT.
+
+ Dequantization multiplies user's 16-bit signed indices (range -512 to +511)
+ by unsigned 16-bit quantization table entries.
+ These table entries are upscaled by 4, max is 30 * 128 * 4 < 2^14.
+ Result is scaled signed DCT coefficients (abs value < 2^15).
+
+ In the data stream, the coefficients are sent in order of increasing
+ total (horizontal + vertical) frequency. The exact picture is as follows:
+
+ 00 01 05 06 16 17 33 34
+ 02 04 07 15 20 32 35 52
+ 03 10 14 21 31 36 51 53
+ 11 13 22 30 37 50 54 65
+
+ 12 23 27 40 47 55 64 66
+ 24 26 41 46 56 63 67 74
+ 25 42 45 57 62 70 73 75
+ 43 44 60 61 71 72 76 77
+
+ Here the position in the matrix corresponds to the (horiz,vert)
+ freqency indices and the octal entry in the matrix is the position
+ of the coefficient in the data stream. Thus the coefficients are sent
+ in sort of a diagonal "snake".
+
+ The dequantization stage "uncurls the snake" and stores the expanded
+ coefficients in more convenient positions. These are not exactly the
+ natural positions given above but take into account our implementation
+ of the idct, which basically requires two one-dimensional idcts and
+ two transposes.
+
+ We fold the first transpose into the storage of the expanded coefficients.
+ We don't actually do a full transpose because this would require doubling
+ the size of the idct buffer; rather, we just transpose each of the 4x4
+ subblocks. Using slightly varying addressing schemes in each of the
+ four 4x8 idcts then allows these transforms to be done in place.
+
+ Transposing the 4x4 subblocks in the matrix above gives
+
+ 00 02 03 11 16 20 31 37
+ 01 04 10 13 17 32 36 50
+ 05 07 14 22 33 35 51 54
+ 06 15 21 30 34 52 53 65
+
+ 12 24 25 43 47 56 62 71
+ 23 26 42 44 55 63 70 72
+ 27 41 45 60 64 67 73 76
+ 40 46 57 61 66 74 75 77
+
+ Finally, we reverse the words in each 4 word group to clarify
+ direction of shifts.
+
+ 11 03 02 00 37 31 20 16
+ 13 10 04 01 50 36 32 17
+ 22 14 07 05 54 51 35 33
+ 30 21 15 06 65 53 52 34
+
+ 43 25 24 12 71 62 56 47
+ 44 42 26 23 72 70 63 55
+ 60 45 41 27 76 73 67 64
+ 61 57 46 40 77 75 74 66
+
+ This matrix then shows the 16 4x16 destination words in terms of
+ the 16 4x16 input words.
+
+ We implement this algorithm by manipulation of mmx registers,
+ which seems to be the fastest way to proceed. It is completely
+ hand-written; there does not seem to be enough recurrence to
+ reasonably compartmentalize any of it. Hence the resulting
+ program is ugly and bloated. Furthermore, due to the absence of
+ register pressure, it is boring and artless. I hate it.
+
+ The idct itself is more interesting. Since the two-dimensional dct
+ basis functions are products of the one-dimesional dct basis functions,
+ we can compute an inverse (or forward) dct via two 1-D transforms,
+ on rows then on columns. To exploit MMX parallelism, we actually do
+ both operations on columns, interposing a (partial) transpose between
+ the two 1-D transforms, the first transpose being done by the expansion
+ described above.
+
+ The 8-sample one-dimensional DCT is a standard orthogonal expansion using
+ the (unnormalized) basis functions
+
+ b[k]( i) = cos( pi * k * (2i + 1) / 16);
+
+ here k = 0 ... 7 is the frequency and i = 0 ... 7 is the spatial coordinate.
+ To normalize, b[0] should be multiplied by 1/sqrt( 8) and the other b[k]
+ should be multiplied by 1/2.
+
+ The 8x8 two-dimensional DCT is just the product of one-dimensional DCTs
+ in each direction. The (unnormalized) basis functions are
+
+ B[k,l]( i, j) = b[k]( i) * b[l]( j);
+
+ this time k and l are the horizontal and vertical frequencies,
+ i and j are the horizontal and vertical spatial coordinates;
+ all indices vary from 0 ... 7 (as above)
+ and there are now 4 cases of normalization.
+
+ Our 1-D idct expansion uses constants C1 ... C7 given by
+
+ (*) Ck = C(-k) = cos( pi * k/16) = S(8-k) = -S(k-8) = sin( pi * (8-k)/16)
+
+ and the following 1-D algorithm transforming I0 ... I7 to R0 ... R7 :
+
+ A = (C1 * I1) + (C7 * I7) B = (C7 * I1) - (C1 * I7)
+ C = (C3 * I3) + (C5 * I5) D = (C3 * I5) - (C5 * I3)
+ A. = C4 * (A - C) B. = C4 * (B - D)
+ C. = A + C D. = B + D
+
+ E = C4 * (I0 + I4) F = C4 * (I0 - I4)
+ G = (C2 * I2) + (C6 * I6) H = (C6 * I2) - (C2 * I6)
+ E. = E - G
+ G. = E + G
+
+ A.. = F + A. B.. = B. - H
+ F. = F - A. H. = B. + H
+
+ R0 = G. + C. R1 = A.. + H. R3 = E. + D. R5 = F. + B..
+ R7 = G. - C. R2 = A.. - H. R4 = E. - D. R6 = F. - B..
+
+ This algorithm was also used by Paul Wilkins in his C implementation;
+ it is due to Vetterli and Lightenberg and may be found in the JPEG
+ reference book by Pennebaker and Mitchell.
+
+ Correctness of the algorithm follows from (*) together with the
+ addition formulas for sine and cosine:
+
+ cos( A + B) = cos( A) * cos( B) - sin( A) * sin( B)
+ sin( A + B) = sin( A) * cos( B) + cos( A) * sin( B)
+
+ Note that this implementation absorbs the difference in normalization
+ between the 0th and higher frequencies, although the results produced
+ are actually twice as big as they should be. Since we do this for each
+ dimension, the 2-D idct results are 4x the desired results. Finally,
+ taking into account that the dequantization multiplies by 4 as well,
+ our actual results are 16x too big. We fix this by shifting the final
+ results right by 4 bits.
+
+ High precision version approximates C1 ... C7 to 16 bits.
+ Since MMX only provides a signed multiply, C1 ... C5 appear to be
+ negative and multiplies involving them must be adjusted to compensate
+ for this. C6 and C7 do not require this adjustment since
+ they are < 1/2 and are correctly treated as positive numbers.
+
+ Following macro does four 8-sample one-dimensional idcts in parallel.
+ This is actually not such a difficult program to write once you
+ make a couple of observations (I of course was unable to make these
+ observations until I'd half-written a couple of other versions).
+
+ 1. Everything is easy once you are done with the multiplies.
+ This is because, given X and Y in registers, one may easily
+ calculate X+Y and X-Y using just those 2 registers.
+
+ 2. You always need at least 2 extra registers to calculate products,
+ so storing 2 temporaries is inevitable. C. and D. seem to be
+ the best candidates.
+
+ 3. The products should be calculated in decreasing order of complexity
+ (which translates into register pressure). Since C1 ... C5 require
+ adjustment (and C6, C7 do not), we begin by calculating C and D.
+*/
+
+/**************************************************************************************
+ *
+ * Routine: BeginIDCT
+ *
+ * Description: The Macro does IDct on 4 1-D Dcts
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+#define Dump __asm call MMX_dump
+
+#define BeginIDCT __asm { \
+ \
+ __asm movq r2, I(3) \
+ \
+ __asm movq r6, C(3) \
+ __asm movq r4, r2 \
+ __asm movq r7, J(5) \
+ __asm pmulhw r4, r6 /* r4 = c3*i3 - i3 */ \
+ __asm movq r1, C(5) \
+ __asm pmulhw r6, r7 /* r6 = c3*i5 - i5 */ \
+ __asm movq r5, r1 \
+ __asm pmulhw r1, r2 /* r1 = c5*i3 - i3 */ \
+ __asm movq r3, I(1) \
+ __asm pmulhw r5, r7 /* r5 = c5*i5 - i5 */ \
+ __asm movq r0, C(1) /* (all registers are in use) */ \
+ __asm paddw r4, r2 /* r4 = c3*i3 */ \
+ __asm paddw r6, r7 /* r6 = c3*i5 */ \
+ __asm paddw r2, r1 /* r2 = c5*i3 */ \
+ __asm movq r1, J(7) \
+ __asm paddw r7, r5 /* r7 = c5*i5 */ \
+ __asm movq r5, r0 /* r5 = c1 */ \
+ __asm pmulhw r0, r3 /* r0 = c1*i1 - i1 */ \
+ __asm paddsw r4, r7 /* r4 = C = c3*i3 + c5*i5 */ \
+ __asm pmulhw r5, r1 /* r5 = c1*i7 - i7 */ \
+ __asm movq r7, C(7) \
+ __asm psubsw r6, r2 /* r6 = D = c3*i5 - c5*i3 (done w/r2) */ \
+ __asm paddw r0, r3 /* r0 = c1*i1 */ \
+ __asm pmulhw r3, r7 /* r3 = c7*i1 */ \
+ __asm movq r2, I(2) \
+ __asm pmulhw r7, r1 /* r7 = c7*i7 */ \
+ __asm paddw r5, r1 /* r5 = c1*i7 */ \
+ __asm movq r1, r2 /* r1 = i2 */ \
+ __asm pmulhw r2, C(2) /* r2 = c2*i2 - i2 */ \
+ __asm psubsw r3, r5 /* r3 = B = c7*i1 - c1*i7 */ \
+ __asm movq r5, J(6) \
+ __asm paddsw r0, r7 /* r0 = A = c1*i1 + c7*i7 */ \
+ __asm movq r7, r5 /* r7 = i6 */ \
+ __asm psubsw r0, r4 /* r0 = A - C */ \
+ __asm pmulhw r5, C(2) /* r5 = c2*i6 - i6 */ \
+ __asm paddw r2, r1 /* r2 = c2*i2 */ \
+ __asm pmulhw r1, C(6) /* r1 = c6*i2 */ \
+ __asm paddsw r4, r4 /* r4 = C + C */ \
+ __asm paddsw r4, r0 /* r4 = C. = A + C */ \
+ __asm psubsw r3, r6 /* r3 = B - D */ \
+ __asm paddw r5, r7 /* r5 = c2*i6 */ \
+ __asm paddsw r6, r6 /* r6 = D + D */ \
+ __asm pmulhw r7, C(6) /* r7 = c6*i6 */ \
+ __asm paddsw r6, r3 /* r6 = D. = B + D */ \
+ __asm movq I(1), r4 /* save C. at I(1) */ \
+ __asm psubsw r1, r5 /* r1 = H = c6*i2 - c2*i6 */ \
+ __asm movq r4, C(4) \
+ __asm movq r5, r3 /* r5 = B - D */ \
+ __asm pmulhw r3, r4 /* r3 = (c4 - 1) * (B - D) */ \
+ __asm paddsw r7, r2 /* r7 = G = c6*i6 + c2*i2 */ \
+ __asm movq I(2), r6 /* save D. at I(2) */ \
+ __asm movq r2, r0 /* r2 = A - C */ \
+ __asm movq r6, I(0) \
+ __asm pmulhw r0, r4 /* r0 = (c4 - 1) * (A - C) */ \
+ __asm paddw r5, r3 /* r5 = B. = c4 * (B - D) */ \
+ \
+ __asm movq r3, J(4) \
+ __asm psubsw r5, r1 /* r5 = B.. = B. - H */ \
+ __asm paddw r2, r0 /* r0 = A. = c4 * (A - C) */ \
+ __asm psubsw r6, r3 /* r6 = i0 - i4 */ \
+ __asm movq r0, r6 \
+ __asm pmulhw r6, r4 /* r6 = (c4 - 1) * (i0 - i4) */ \
+ __asm paddsw r3, r3 /* r3 = i4 + i4 */ \
+ __asm paddsw r1, r1 /* r1 = H + H */ \
+ __asm paddsw r3, r0 /* r3 = i0 + i4 */ \
+ __asm paddsw r1, r5 /* r1 = H. = B + H */ \
+ __asm pmulhw r4, r3 /* r4 = (c4 - 1) * (i0 + i4) */ \
+ __asm paddsw r6, r0 /* r6 = F = c4 * (i0 - i4) */ \
+ __asm psubsw r6, r2 /* r6 = F. = F - A. */ \
+ __asm paddsw r2, r2 /* r2 = A. + A. */ \
+ __asm movq r0, I(1) /* r0 = C. */ \
+ __asm paddsw r2, r6 /* r2 = A.. = F + A. */ \
+ __asm paddw r4, r3 /* r4 = E = c4 * (i0 + i4) */ \
+ __asm psubsw r2, r1 /* r2 = R2 = A.. - H. */ \
+}
+// end BeginIDCT macro (38 cycles).
+
+
+// Two versions of the end of the idct depending on whether we're feeding
+// into a transpose or dividing the final results by 16 and storing them.
+
+/**************************************************************************************
+ *
+ * Routine: RowIDCT
+ *
+ * Description: The Macro does 1-D IDct on 4 Rows
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+// RowIDCT gets ready to transpose.
+
+#define RowIDCT __asm { \
+ \
+ BeginIDCT \
+ \
+ __asm movq r3, I(2) /* r3 = D. */ \
+ __asm psubsw r4, r7 /* r4 = E. = E - G */ \
+ __asm paddsw r1, r1 /* r1 = H. + H. */ \
+ __asm paddsw r7, r7 /* r7 = G + G */ \
+ __asm paddsw r1, r2 /* r1 = R1 = A.. + H. */ \
+ __asm paddsw r7, r4 /* r7 = G. = E + G */ \
+ __asm psubsw r4, r3 /* r4 = R4 = E. - D. */ \
+ __asm paddsw r3, r3 \
+ __asm psubsw r6, r5 /* r6 = R6 = F. - B.. */ \
+ __asm paddsw r5, r5 \
+ __asm paddsw r3, r4 /* r3 = R3 = E. + D. */ \
+ __asm paddsw r5, r6 /* r5 = R5 = F. + B.. */ \
+ __asm psubsw r7, r0 /* r7 = R7 = G. - C. */ \
+ __asm paddsw r0, r0 \
+ __asm movq I(1), r1 /* save R1 */ \
+ __asm paddsw r0, r7 /* r0 = R0 = G. + C. */ \
+}
+// end RowIDCT macro (8 + 38 = 46 cycles)
+
+
+/**************************************************************************************
+ *
+ * Routine: ColumnIDCT
+ *
+ * Description: The Macro does 1-D IDct on 4 columns
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+// Column IDCT normalizes and stores final results.
+
+#define ColumnIDCT __asm { \
+ \
+ BeginIDCT \
+ \
+ __asm paddsw r2, Eight /* adjust R2 (and R1) for shift */ \
+ __asm paddsw r1, r1 /* r1 = H. + H. */ \
+ __asm paddsw r1, r2 /* r1 = R1 = A.. + H. */ \
+ __asm psraw r2, 4 /* r2 = NR2 */ \
+ __asm psubsw r4, r7 /* r4 = E. = E - G */ \
+ __asm psraw r1, 4 /* r1 = NR1 */ \
+ __asm movq r3, I(2) /* r3 = D. */ \
+ __asm paddsw r7, r7 /* r7 = G + G */ \
+ __asm movq I(2), r2 /* store NR2 at I2 */ \
+ __asm paddsw r7, r4 /* r7 = G. = E + G */ \
+ __asm movq I(1), r1 /* store NR1 at I1 */ \
+ __asm psubsw r4, r3 /* r4 = R4 = E. - D. */ \
+ __asm paddsw r4, Eight /* adjust R4 (and R3) for shift */ \
+ __asm paddsw r3, r3 /* r3 = D. + D. */ \
+ __asm paddsw r3, r4 /* r3 = R3 = E. + D. */ \
+ __asm psraw r4, 4 /* r4 = NR4 */ \
+ __asm psubsw r6, r5 /* r6 = R6 = F. - B.. */ \
+ __asm psraw r3, 4 /* r3 = NR3 */ \
+ __asm paddsw r6, Eight /* adjust R6 (and R5) for shift */ \
+ __asm paddsw r5, r5 /* r5 = B.. + B.. */ \
+ __asm paddsw r5, r6 /* r5 = R5 = F. + B.. */ \
+ __asm psraw r6, 4 /* r6 = NR6 */ \
+ __asm movq J(4), r4 /* store NR4 at J4 */ \
+ __asm psraw r5, 4 /* r5 = NR5 */ \
+ __asm movq I(3), r3 /* store NR3 at I3 */ \
+ __asm psubsw r7, r0 /* r7 = R7 = G. - C. */ \
+ __asm paddsw r7, Eight /* adjust R7 (and R0) for shift */ \
+ __asm paddsw r0, r0 /* r0 = C. + C. */ \
+ __asm paddsw r0, r7 /* r0 = R0 = G. + C. */ \
+ __asm psraw r7, 4 /* r7 = NR7 */ \
+ __asm movq J(6), r6 /* store NR6 at J6 */ \
+ __asm psraw r0, 4 /* r0 = NR0 */ \
+ __asm movq J(5), r5 /* store NR5 at J5 */ \
+ \
+ __asm movq J(7), r7 /* store NR7 at J7 */ \
+ \
+ __asm movq I(0), r0 /* store NR0 at I0 */ \
+ \
+}
+// end ColumnIDCT macro (38 + 19 = 57 cycles)
+
+/**************************************************************************************
+ *
+ * Routine: Transpose
+ *
+ * Description: The Macro does two 4x4 transposes in place.
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+/* Following macro does two 4x4 transposes in place.
+
+ At entry (we assume):
+
+ r0 = a3 a2 a1 a0
+ I(1) = b3 b2 b1 b0
+ r2 = c3 c2 c1 c0
+ r3 = d3 d2 d1 d0
+
+ r4 = e3 e2 e1 e0
+ r5 = f3 f2 f1 f0
+ r6 = g3 g2 g1 g0
+ r7 = h3 h2 h1 h0
+
+ At exit, we have:
+
+ I(0) = d0 c0 b0 a0
+ I(1) = d1 c1 b1 a1
+ I(2) = d2 c2 b2 a2
+ I(3) = d3 c3 b3 a3
+
+ J(4) = h0 g0 f0 e0
+ J(5) = h1 g1 f1 e1
+ J(6) = h2 g2 f2 e2
+ J(7) = h3 g3 f3 e3
+
+ I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
+ J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7.
+
+ Since r1 is free at entry, we calculate the Js first. */
+
+
+#define Transpose __asm { \
+ \
+ __asm movq r1, r4 /* r1 = e3 e2 e1 e0 */ \
+ __asm punpcklwd r4, r5 /* r4 = f1 e1 f0 e0 */ \
+ __asm movq I(0), r0 /* save a3 a2 a1 a0 */ \
+ __asm punpckhwd r1, r5 /* r1 = f3 e3 f2 e2 */ \
+ __asm movq r0, r6 /* r0 = g3 g2 g1 g0 */ \
+ __asm punpcklwd r6, r7 /* r6 = h1 g1 h0 g0 */ \
+ __asm movq r5, r4 /* r5 = f1 e1 f0 e0 */ \
+ __asm punpckldq r4, r6 /* r4 = h0 g0 f0 e0 = R4 */ \
+ __asm punpckhdq r5, r6 /* r5 = h1 g1 f1 e1 = R5 */ \
+ __asm movq r6, r1 /* r6 = f3 e3 f2 e2 */ \
+ __asm movq J(4), r4 \
+ __asm punpckhwd r0, r7 /* r0 = h3 g3 h2 g2 */ \
+ __asm movq J(5), r5 \
+ __asm punpckhdq r6, r0 /* r6 = h3 g3 f3 e3 = R7 */ \
+ __asm movq r4, I(0) /* r4 = a3 a2 a1 a0 */ \
+ __asm punpckldq r1, r0 /* r1 = h2 g2 f2 e2 = R6 */ \
+ __asm movq r5, I(1) /* r5 = b3 b2 b1 b0 */ \
+ __asm movq r0, r4 /* r0 = a3 a2 a1 a0 */ \
+ __asm movq J(7), r6 \
+ __asm punpcklwd r0, r5 /* r0 = b1 a1 b0 a0 */ \
+ __asm movq J(6), r1 \
+ __asm punpckhwd r4, r5 /* r4 = b3 a3 b2 a2 */ \
+ __asm movq r5, r2 /* r5 = c3 c2 c1 c0 */ \
+ __asm punpcklwd r2, r3 /* r2 = d1 c1 d0 c0 */ \
+ __asm movq r1, r0 /* r1 = b1 a1 b0 a0 */ \
+ __asm punpckldq r0, r2 /* r0 = d0 c0 b0 a0 = R0 */ \
+ __asm punpckhdq r1, r2 /* r1 = d1 c1 b1 a1 = R1 */ \
+ __asm movq r2, r4 /* r2 = b3 a3 b2 a2 */ \
+ __asm movq I(0), r0 \
+ __asm punpckhwd r5, r3 /* r5 = d3 c3 d2 c2 */ \
+ __asm movq I(1), r1 \
+ __asm punpckhdq r4, r5 /* r4 = d3 c3 b3 a3 = R3 */ \
+ __asm punpckldq r2, r5 /* r2 = d2 c2 b2 a2 = R2 */ \
+ \
+ __asm movq I(3), r4 \
+ \
+ __asm movq I(2), r2 \
+ \
+}
+// end Transpose macro (19 cycles).
+
+/*
+__declspec( naked) static void MMX_dump()
+{
+ __asm
+ {
+ movq [edi], mm0
+ movq [edi+8], mm1
+ movq [edi+16], mm2
+ movq [edi+24], mm3
+ movq [edi+32], mm4
+ movq [edi+40], mm5
+ movq [edi+48], mm6
+ movq [edi+56], mm7
+ ret
+ }
+}
+*/
+/**************************************************************************************
+ *
+ * Routine: MMX_idct
+ *
+ * Description: Perform IDCT on a 8x8 block
+ *
+ * Input: Pointer to input and output buffer
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: The input coefficients are in ZigZag order
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+__declspec ( naked ) void MMX_idct ( INT16 * input, INT16 * qtbl, INT16 * output)
+{
+
+// uINT16 *constants = idctconstants;
+# define M(I) [ecx + MaskOffset + I*8]
+# define C(I) [ecx + CosineOffset + (I-1)*8]
+# define Eight [ecx + EightOffset]
+# undef Arg
+# define Arg(I) [esp + 1*4 + 3*4 + I*4] // 1 return address + 3 pushes prior to args
+
+# define r0 mm0
+# define r1 mm1
+# define r2 mm2
+# define r3 mm3
+# define r4 mm4
+# define r5 mm5
+# define r6 mm6
+# define r7 mm7
+ (void) output;
+ (void) qtbl;
+ (void) input;
+
+
+ __asm {
+
+ push edx
+ push ecx
+ push ebx
+
+;; Label:
+ mov eax, Arg( 0) ; eax = quantized input
+ mov edx, Arg( 2) ; edx = destination (= idct buffer)
+
+ mov ecx, [edx] ; (+1 at least) preload the cache before writing
+ mov ebx, [edx+28] ; in case proc doesn't cache on writes
+ mov ecx, [edx+56] ; gets all the cache lines
+ mov ebx, [edx+84] ; regardless of alignment (beyond 32-bit)
+ mov ecx, [edx+112] ; also avoids address contention stalls
+ mov ebx, [edx+124]
+
+ mov ebx, Arg( 1) ; ebx = quantization table
+ lea ecx, idctconstants ;;[0];
+
+ movq r0, [eax]
+ ;
+ pmullw r0, [ebx] ; r0 = 03 02 01 00
+ ;
+ movq r1, [eax+16]
+ ;
+ pmullw r1, [ebx+16] ; r1 = 13 12 11 10
+ ;
+ movq r2, M(0) ; r2 = __ __ __ FF
+ movq r3, r0 ; r3 = 03 02 01 00
+ movq r4, [eax+8]
+ psrlq r0, 16 ; r0 = __ 03 02 01
+ pmullw r4, [ebx+8] ; r4 = 07 06 05 04
+ pand r3, r2 ; r3 = __ __ __ 00
+ movq r5, r0 ; r5 = __ 03 02 01
+ movq r6, r1 ; r6 = 13 12 11 10
+ pand r5, r2 ; r5 = __ __ __ 01
+ psllq r6, 32 ; r6 = 11 10 __ __
+ movq r7, M(3) ; r7 = FF __ __ __
+ pxor r0, r5 ; r0 = __ 03 02 __
+ pand r7, r6 ; r7 = 11 __ __ __
+ por r0, r3 ; r0 = __ 03 02 00
+ pxor r6, r7 ; r6 = __ 10 __ __
+ por r0, r7 ; r0 = 11 03 02 00 = R0
+ movq r7, M(3) ; r7 = FF __ __ __
+ movq r3, r4 ; r3 = 07 06 05 04
+ movq [edx], r0 ; write R0 = r0
+ pand r3, r2 ; r3 = __ __ __ 04
+ movq r0, [eax+32]
+ psllq r3, 16 ; r3 = __ __ 04 __
+ pmullw r0, [ebx+32] ; r0 = 23 22 21 20
+ pand r7, r1 ; r7 = 13 __ __ __
+ por r5, r3 ; r5 = __ __ 04 01
+ por r7, r6 ; r7 = 13 10 __ __
+ movq r3, [eax+24]
+ por r7, r5 ; r7 = 13 10 04 01 = R1
+ pmullw r3, [ebx+24] ; r3 = 17 16 15 14
+ psrlq r4, 16 ; r4 = __ 07 06 05
+ movq [edx+16], r7 ; write R1 = r7
+ movq r5, r4 ; r5 = __ 07 06 05
+ movq r7, r0 ; r7 = 23 22 21 20
+ psrlq r4, 16 ; r4 = __ __ 07 06
+ psrlq r7, 48 ; r7 = __ __ __ 23
+ movq r6, r2 ; r6 = __ __ __ FF
+ pand r5, r2 ; r5 = __ __ __ 05
+ pand r6, r4 ; r6 = __ __ __ 06
+ movq [edx+80], r7 ; partial R9 = __ __ __ 23
+ pxor r4, r6 ; r4 = __ __ 07 __
+ psrlq r1, 32 ; r1 = __ __ 13 12
+ por r4, r5 ; r4 = __ __ 07 05
+ movq r7, M(3) ; r7 = FF __ __ __
+ pand r1, r2 ; r1 = __ __ __ 12
+ movq r5, [eax+48]
+ psllq r0, 16 ; r0 = 22 21 20 __
+ pmullw r5, [ebx+48] ; r5 = 33 32 31 30
+ pand r7, r0 ; r7 = 22 __ __ __
+ movq [edx+64], r1 ; partial R8 = __ __ __ 12
+ por r7, r4 ; r7 = 22 __ 07 05
+ movq r4, r3 ; r4 = 17 16 15 14
+ pand r3, r2 ; r3 = __ __ __ 14
+ movq r1, M(2) ; r1 = __ FF __ __
+ psllq r3, 32 ; r3 = __ 14 __ __
+ por r7, r3 ; r7 = 22 14 07 05 = R2
+ movq r3, r5 ; r3 = 33 32 31 30
+ psllq r3, 48 ; r3 = 30 __ __ __
+ pand r1, r0 ; r1 = __ 21 __ __
+ movq [edx+32], r7 ; write R2 = r7
+ por r6, r3 ; r6 = 30 __ __ 06
+ movq r7, M(1) ; r7 = __ __ FF __
+ por r6, r1 ; r6 = 30 21 __ 06
+ movq r1, [eax+56]
+ pand r7, r4 ; r7 = __ __ 15 __
+ pmullw r1, [ebx+56] ; r1 = 37 36 35 34
+ por r7, r6 ; r7 = 30 21 15 06 = R3
+ pand r0, M(1) ; r0 = __ __ 20 __
+ psrlq r4, 32 ; r4 = __ __ 17 16
+ movq [edx+48], r7 ; write R3 = r7
+ movq r6, r4 ; r6 = __ __ 17 16
+ movq r7, M(3) ; r7 = FF __ __ __
+ pand r4, r2 ; r4 = __ __ __ 16
+ movq r3, M(1) ; r3 = __ __ FF __
+ pand r7, r1 ; r7 = 37 __ __ __
+ pand r3, r5 ; r3 = __ __ 31 __
+ por r0, r4 ; r0 = __ __ 20 16
+ psllq r3, 16 ; r3 = __ 31 __ __
+ por r7, r0 ; r7 = 37 __ 20 16
+ movq r4, M(2) ; r4 = __ FF __ __
+ por r7, r3 ; r7 = 37 31 20 16 = R4
+ movq r0, [eax+80]
+ movq r3, r4 ; r3 = __ __ FF __
+ pmullw r0, [ebx+80] ; r0 = 53 52 51 50
+ pand r4, r5 ; r4 = __ 32 __ __
+ movq [edx+8], r7 ; write R4 = r7
+ por r6, r4 ; r6 = __ 32 17 16
+ movq r4, r3 ; r4 = __ FF __ __
+ psrlq r6, 16 ; r6 = __ __ 32 17
+ movq r7, r0 ; r7 = 53 52 51 50
+ pand r4, r1 ; r4 = __ 36 __ __
+ psllq r7, 48 ; r7 = 50 __ __ __
+ por r6, r4 ; r6 = __ 36 32 17
+ movq r4, [eax+88]
+ por r7, r6 ; r7 = 50 36 32 17 = R5
+ pmullw r4, [ebx+88] ; r4 = 57 56 55 54
+ psrlq r3, 16 ; r3 = __ __ FF __
+ movq [edx+24], r7 ; write R5 = r7
+ pand r3, r1 ; r3 = __ __ 35 __
+ psrlq r5, 48 ; r5 = __ __ __ 33
+ pand r1, r2 ; r1 = __ __ __ 34
+ movq r6, [eax+104]
+ por r5, r3 ; r5 = __ __ 35 33
+ pmullw r6, [ebx+104] ; r6 = 67 66 65 64
+ psrlq r0, 16 ; r0 = __ 53 52 51
+ movq r7, r4 ; r7 = 57 56 55 54
+ movq r3, r2 ; r3 = __ __ __ FF
+ psllq r7, 48 ; r7 = 54 __ __ __
+ pand r3, r0 ; r3 = __ __ __ 51
+ pxor r0, r3 ; r0 = __ 53 52 __
+ psllq r3, 32 ; r3 = __ 51 __ __
+ por r7, r5 ; r7 = 54 __ 35 33
+ movq r5, r6 ; r5 = 67 66 65 64
+ pand r6, M(1) ; r6 = __ __ 65 __
+ por r7, r3 ; r7 = 54 51 35 33 = R6
+ psllq r6, 32 ; r6 = 65 __ __ __
+ por r0, r1 ; r0 = __ 53 52 34
+ movq [edx+40], r7 ; write R6 = r7
+ por r0, r6 ; r0 = 65 53 52 34 = R7
+ movq r7, [eax+120]
+ movq r6, r5 ; r6 = 67 66 65 64
+ pmullw r7, [ebx+120] ; r7 = 77 76 75 74
+ psrlq r5, 32 ; r5 = __ __ 67 66
+ pand r6, r2 ; r6 = __ __ __ 64
+ movq r1, r5 ; r1 = __ __ 67 66
+ movq [edx+56], r0 ; write R7 = r0
+ pand r1, r2 ; r1 = __ __ __ 66
+ movq r0, [eax+112]
+ movq r3, r7 ; r3 = 77 76 75 74
+ pmullw r0, [ebx+112] ; r0 = 73 72 71 70
+ psllq r3, 16 ; r3 = 76 75 74 __
+ pand r7, M(3) ; r7 = 77 __ __ __
+ pxor r5, r1 ; r5 = __ __ 67 __
+ por r6, r5 ; r6 = __ __ 67 64
+ movq r5, r3 ; r5 = 76 75 74 __
+ pand r5, M(3) ; r5 = 76 __ __ __
+ por r7, r1 ; r7 = 77 __ __ 66
+ movq r1, [eax+96]
+ pxor r3, r5 ; r3 = __ 75 74 __
+ pmullw r1, [ebx+96] ; r1 = 63 62 61 60
+ por r7, r3 ; r7 = 77 75 74 66 = R15
+ por r6, r5 ; r6 = 76 __ 67 64
+ movq r5, r0 ; r5 = 73 72 71 70
+ movq [edx+120], r7 ; store R15 = r7
+ psrlq r5, 16 ; r5 = __ 73 72 71
+ pand r5, M(2) ; r5 = __ 73 __ __
+ movq r7, r0 ; r7 = 73 72 71 70
+ por r6, r5 ; r6 = 76 73 67 64 = R14
+ pand r0, r2 ; r0 = __ __ __ 70
+ pxor r7, r0 ; r7 = 73 72 71 __
+ psllq r0, 32 ; r0 = __ 70 __ __
+ movq [edx+104], r6 ; write R14 = r6
+ psrlq r4, 16 ; r4 = __ 57 56 55
+ movq r5, [eax+72]
+ psllq r7, 16 ; r7 = 72 71 __ __
+ pmullw r5, [ebx+72] ; r5 = 47 46 45 44
+ movq r6, r7 ; r6 = 72 71 __ __
+ movq r3, M(2) ; r3 = __ FF __ __
+ psllq r6, 16 ; r6 = 71 __ __ __
+ pand r7, M(3) ; r7 = 72 __ __ __
+ pand r3, r1 ; r3 = __ 62 __ __
+ por r7, r0 ; r7 = 72 70 __ __
+ movq r0, r1 ; r0 = 63 62 61 60
+ pand r1, M(3) ; r1 = 63 __ __ __
+ por r6, r3 ; r6 = 71 62 __ __
+ movq r3, r4 ; r3 = __ 57 56 55
+ psrlq r1, 32 ; r1 = __ __ 63 __
+ pand r3, r2 ; r3 = __ __ __ 55
+ por r7, r1 ; r7 = 72 70 63 __
+ por r7, r3 ; r7 = 72 70 63 55 = R13
+ movq r3, r4 ; r3 = __ 57 56 55
+ pand r3, M(1) ; r3 = __ __ 56 __
+ movq r1, r5 ; r1 = 47 46 45 44
+ movq [edx+88], r7 ; write R13 = r7
+ psrlq r5, 48 ; r5 = __ __ __ 47
+ movq r7, [eax+64]
+ por r6, r3 ; r6 = 71 62 56 __
+ pmullw r7, [ebx+64] ; r7 = 43 42 41 40
+ por r6, r5 ; r6 = 71 62 56 47 = R12
+ pand r4, M(2) ; r4 = __ 57 __ __
+ psllq r0, 32 ; r0 = 61 60 __ __
+ movq [edx+72], r6 ; write R12 = r6
+ movq r6, r0 ; r6 = 61 60 __ __
+ pand r0, M(3) ; r0 = 61 __ __ __
+ psllq r6, 16 ; r6 = 60 __ __ __
+ movq r5, [eax+40]
+ movq r3, r1 ; r3 = 47 46 45 44
+ pmullw r5, [ebx+40] ; r5 = 27 26 25 24
+ psrlq r1, 16 ; r1 = __ 47 46 45
+ pand r1, M(1) ; r1 = __ __ 46 __
+ por r0, r4 ; r0 = 61 57 __ __
+ pand r2, r7 ; r2 = __ __ __ 40
+ por r0, r1 ; r0 = 61 57 46 __
+ por r0, r2 ; r0 = 61 57 46 40 = R11
+ psllq r3, 16 ; r3 = 46 45 44 __
+ movq r4, r3 ; r4 = 46 45 44 __
+ movq r2, r5 ; r2 = 27 26 25 24
+ movq [edx+112], r0 ; write R11 = r0
+ psrlq r2, 48 ; r2 = __ __ __ 27
+ pand r4, M(2) ; r4 = __ 45 __ __
+ por r6, r2 ; r6 = 60 __ __ 27
+ movq r2, M(1) ; r2 = __ __ FF __
+ por r6, r4 ; r6 = 60 45 __ 27
+ pand r2, r7 ; r2 = __ __ 41 __
+ psllq r3, 32 ; r3 = 44 __ __ __
+ por r3, [edx+80] ; r3 = 44 __ __ 23
+ por r6, r2 ; r6 = 60 45 41 27 = R10
+ movq r2, M(3) ; r2 = FF __ __ __
+ psllq r5, 16 ; r5 = 26 25 24 __
+ movq [edx+96], r6 ; store R10 = r6
+ pand r2, r5 ; r2 = 26 __ __ __
+ movq r6, M(2) ; r6 = __ FF __ __
+ pxor r5, r2 ; r5 = __ 25 24 __
+ pand r6, r7 ; r6 = __ 42 __ __
+ psrlq r2, 32 ; r2 = __ __ 26 __
+ pand r7, M(3) ; r7 = 43 __ __ __
+ por r3, r2 ; r3 = 44 __ 26 23
+ por r7, [edx+64] ; r7 = 43 __ __ 12
+ por r6, r3 ; r6 = 44 42 26 23 = R9
+ por r7, r5 ; r7 = 43 25 24 12 = R8
+ ;
+ movq [edx+80], r6 ; store R9 = r6
+ ;
+ movq [edx+64], r7 ; store R8 = r7
+ ;
+ ; 123c ( / 64 coeffs < 2c / coeff)
+# undef M
+
+; Done w/dequant + descramble + partial transpose; now do the idct itself.
+
+# define I( K) [edx + ( K * 16)]
+# define J( K) [edx + ( (K - 4) * 16) + 8]
+
+ RowIDCT ; 46 c
+ Transpose ; 19 c
+
+# undef I
+# undef J
+# define I( K) [edx + ( K * 16) + 64]
+# define J( K) [edx + ( (K - 4) * 16) + 72]
+
+ RowIDCT ; 46 c
+ Transpose ; 19 c
+
+# undef I
+# undef J
+# define I( K) [edx + (K * 16)]
+# define J( K) I( K)
+
+ ColumnIDCT ; 57 c
+
+# undef I
+# undef J
+# define I( K) [edx + (K * 16) + 8]
+# define J( K) I( K)
+
+ ColumnIDCT ; 57 c
+
+# undef I
+# undef J
+ pop ebx
+ pop ecx
+ pop edx
+ ret
+ ; 368 cycles ( / 64 coeff < 6 c / coeff)
+ }
+}
+
+/**************************************************************************************
+ *
+ * Routine: MMX_idct10
+ *
+ * Description: Perform IDCT on a 8x8 block with at most 10 nonzero coefficients
+ *
+ * Input: Pointer to input and output buffer
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: The input coefficients are in transposed ZigZag order
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+/* --------------------------------------------------------------- */
+// This macro does four 4-sample one-dimensional idcts in parallel. Inputs
+// 4 thru 7 are assumed to be zero.
+#define BeginIDCT_10 __asm { \
+\
+ __asm movq r2, I(3) \
+__asm nop \
+\
+ __asm movq r6, C(3) \
+ __asm movq r4, r2 \
+\
+ __asm movq r1, C(5) \
+ __asm pmulhw r4, r6 /* r4 = c3*i3 - i3 */ \
+\
+ __asm movq r3, I(1) \
+ __asm pmulhw r1, r2 /* r1 = c5*i3 - i3 */ \
+\
+ __asm movq r0, C(1) /* (all registers are in use) */ \
+ __asm paddw r4, r2 /* r4 = C = c3*i3 */ \
+\
+ __asm pxor r6,r6 /* used to get -(c5*i3) */ \
+ __asm paddw r2, r1 /* r2 = c5*i3 */ \
+\
+ __asm movq r5, I(2) \
+ __asm pmulhw r0, r3 /* r0 = c1*i1 - i1 */ \
+\
+ __asm movq r1, r5 \
+ __asm paddw r0, r3 /* r0 = A = c1*i1 */ \
+\
+ __asm pmulhw r3, C(7) /* r3 = B = c7*i1 */ \
+ __asm psubsw r6, r2 /* r6 = D = -c5*i3 */ \
+\
+ __asm pmulhw r5, C(2) /* r1 = c2*i2 - i2 */ \
+ __asm psubsw r0, r4 /* r0 = A - C */ \
+\
+ __asm movq r7,I(2) \
+ __asm paddsw r4, r4 /* r4 = C + C */ \
+\
+ __asm paddw r7, r5 /* r7 = G = c2*i2 */ \
+ __asm paddsw r4, r0 /* r4 = C. = A + C */ \
+\
+ __asm pmulhw r1, C(6) /* r1 = H = c6*i2 */ \
+ __asm psubsw r3, r6 /* r3 = B - D */ \
+\
+ __asm movq I(1), r4 /* save C. at I(1) */ \
+ __asm paddsw r6, r6 /* r6 = D + D */ \
+\
+ __asm movq r4, C(4) \
+ __asm paddsw r6, r3 /* r6 = D. = B + D */ \
+\
+ __asm movq r5, r3 /* r5 = B - D */ \
+ __asm pmulhw r3, r4 /* r3 = (c4 - 1) * (B - D) */ \
+\
+ __asm movq I(2), r6 /* save D. at I(2) */ \
+ __asm movq r2, r0 /* r2 = A - C */ \
+\
+ __asm movq r6, I(0) \
+ __asm pmulhw r0, r4 /* r0 = (c4 - 1) * (A - C) */ \
+\
+ __asm paddw r5, r3 /* r5 = B. = c4 * (B - D) */ \
+ __asm paddw r2, r0 /* r0 = A. = c4 * (A - C) */ \
+\
+ __asm psubsw r5, r1 /* r5 = B.. = B. - H */ \
+ __asm pmulhw r6, r4 /* r6 = c4*i0 - i0 */ \
+\
+ __asm paddw r6, I(0) /* r6 = E = c4*i0 */ \
+ __asm paddsw r1, r1 /* r1 = H + H */ \
+\
+ __asm movq r4, r6 /* r4 = E */ \
+ __asm paddsw r1, r5 /* r1 = H. = B + H */ \
+\
+ __asm psubsw r6, r2 /* r6 = F. = E - A. */ \
+ __asm paddsw r2, r2 /* r2 = A. + A. */ \
+\
+ __asm movq r0, I(1) /* r0 = C. */ \
+ __asm paddsw r2, r6 /* r2 = A.. = E + A. */ \
+\
+ __asm psubsw r2, r1 /* r2 = R2 = A.. - H. */ \
+__asm nop \
+}
+// end BeginIDCT_10 macro (25 cycles).
+
+#define RowIDCT_10 __asm { \
+ \
+ BeginIDCT_10 \
+ \
+ __asm movq r3, I(2) /* r3 = D. */ \
+ __asm psubsw r4, r7 /* r4 = E. = E - G */ \
+ __asm paddsw r1, r1 /* r1 = H. + H. */ \
+ __asm paddsw r7, r7 /* r7 = G + G */ \
+ __asm paddsw r1, r2 /* r1 = R1 = A.. + H. */ \
+ __asm paddsw r7, r4 /* r7 = G. = E + G */ \
+ __asm psubsw r4, r3 /* r4 = R4 = E. - D. */ \
+ __asm paddsw r3, r3 \
+ __asm psubsw r6, r5 /* r6 = R6 = F. - B.. */ \
+ __asm paddsw r5, r5 \
+ __asm paddsw r3, r4 /* r3 = R3 = E. + D. */ \
+ __asm paddsw r5, r6 /* r5 = R5 = F. + B.. */ \
+ __asm psubsw r7, r0 /* r7 = R7 = G. - C. */ \
+ __asm paddsw r0, r0 \
+ __asm movq I(1), r1 /* save R1 */ \
+ __asm paddsw r0, r7 /* r0 = R0 = G. + C. */ \
+}
+// end RowIDCT macro (8 + 38 = 46 cycles)
+
+// Column IDCT normalizes and stores final results.
+
+#define ColumnIDCT_10 __asm { \
+ \
+ BeginIDCT_10 \
+ \
+ __asm paddsw r2, Eight /* adjust R2 (and R1) for shift */ \
+ __asm paddsw r1, r1 /* r1 = H. + H. */ \
+ __asm paddsw r1, r2 /* r1 = R1 = A.. + H. */ \
+ __asm psraw r2, 4 /* r2 = NR2 */ \
+ __asm psubsw r4, r7 /* r4 = E. = E - G */ \
+ __asm psraw r1, 4 /* r1 = NR1 */ \
+ __asm movq r3, I(2) /* r3 = D. */ \
+ __asm paddsw r7, r7 /* r7 = G + G */ \
+ __asm movq I(2), r2 /* store NR2 at I2 */ \
+ __asm paddsw r7, r4 /* r7 = G. = E + G */ \
+ __asm movq I(1), r1 /* store NR1 at I1 */ \
+ __asm psubsw r4, r3 /* r4 = R4 = E. - D. */ \
+ __asm paddsw r4, Eight /* adjust R4 (and R3) for shift */ \
+ __asm paddsw r3, r3 /* r3 = D. + D. */ \
+ __asm paddsw r3, r4 /* r3 = R3 = E. + D. */ \
+ __asm psraw r4, 4 /* r4 = NR4 */ \
+ __asm psubsw r6, r5 /* r6 = R6 = F. - B.. */ \
+ __asm psraw r3, 4 /* r3 = NR3 */ \
+ __asm paddsw r6, Eight /* adjust R6 (and R5) for shift */ \
+ __asm paddsw r5, r5 /* r5 = B.. + B.. */ \
+ __asm paddsw r5, r6 /* r5 = R5 = F. + B.. */ \
+ __asm psraw r6, 4 /* r6 = NR6 */ \
+ __asm movq J(4), r4 /* store NR4 at J4 */ \
+ __asm psraw r5, 4 /* r5 = NR5 */ \
+ __asm movq I(3), r3 /* store NR3 at I3 */ \
+ __asm psubsw r7, r0 /* r7 = R7 = G. - C. */ \
+ __asm paddsw r7, Eight /* adjust R7 (and R0) for shift */ \
+ __asm paddsw r0, r0 /* r0 = C. + C. */ \
+ __asm paddsw r0, r7 /* r0 = R0 = G. + C. */ \
+ __asm psraw r7, 4 /* r7 = NR7 */ \
+ __asm movq J(6), r6 /* store NR6 at J6 */ \
+ __asm psraw r0, 4 /* r0 = NR0 */ \
+ __asm movq J(5), r5 /* store NR5 at J5 */ \
+ \
+ __asm movq J(7), r7 /* store NR7 at J7 */ \
+ \
+ __asm movq I(0), r0 /* store NR0 at I0 */ \
+ \
+}
+// end ColumnIDCT macro (38 + 19 = 57 cycles)
+/* --------------------------------------------------------------- */
+
+
+/* --------------------------------------------------------------- */
+/* IDCT 10 */
+__declspec ( naked ) void MMX_idct10 ( INT16 * input, INT16 * qtbl, INT16 * output)
+{
+
+# define M(I) [ecx + MaskOffset + I*8]
+# define C(I) [ecx + CosineOffset + (I-1)*8]
+# define Eight [ecx + EightOffset]
+# undef Arg
+# define Arg(I) [esp + 16 + I*4]
+
+# define r0 mm0
+# define r1 mm1
+# define r2 mm2
+# define r3 mm3
+# define r4 mm4
+# define r5 mm5
+# define r6 mm6
+# define r7 mm7
+ (void) output;
+ (void) qtbl;
+ (void) input;
+
+ __asm {
+ push edx
+ push ecx
+ push ebx
+
+// Label:
+ mov eax, Arg( 0) ; eax = quantized input
+ mov edx, Arg( 2) ; edx = destination (= idct buffer)
+
+ mov ecx, [edx] ; (+1 at least) preload the cache before writing
+ mov ebx, [edx+28] ; in case proc doesn't cache on writes
+ mov ecx, [edx+56] ; gets all the cache lines
+ mov ebx, [edx+84] ; regardless of alignment (beyond 32-bit)
+ mov ecx, [edx+112] ; also avoids address contention stalls
+ mov ebx, [edx+124]
+
+ mov ebx, Arg( 1) ; ebx = quantization table
+ lea ecx, idctconstants ;; [0];
+
+ movq r0, [eax]
+ ;
+ pmullw r0, [ebx] ; r0 = 03 02 01 00
+ ;
+ movq r1, [eax+16]
+ ;
+ pmullw r1, [ebx+16] ; r1 = 13 12 11 10
+ ;
+ movq r2, M(0) ; r2 = __ __ __ FF
+ movq r3, r0 ; r3 = 03 02 01 00
+ movq r4, [eax+8]
+ psrlq r0, 16 ; r0 = __ 03 02 01
+ pmullw r4, [ebx+8] ; r4 = 07 06 05 04
+ pand r3, r2 ; r3 = __ __ __ 00
+ movq r5, r0 ; r5 = __ 03 02 01
+ movq r6, r1 ; r6 = 13 12 11 10
+ pand r5, r2 ; r5 = __ __ __ 01
+ psllq r6, 32 ; r6 = 11 10 __ __
+ movq r7, M(3) ; r7 = FF __ __ __
+ pxor r0, r5 ; r0 = __ 03 02 __
+ pand r7, r6 ; r7 = 11 __ __ __
+ por r0, r3 ; r0 = __ 03 02 00
+ pxor r6, r7 ; r6 = __ 10 __ __
+ por r0, r7 ; r0 = 11 03 02 00 = R0
+ movq r7, M(3) ; r7 = FF __ __ __
+ movq r3, r4 ; r3 = 07 06 05 04
+ movq [edx], r0 ; write R0 = r0
+ pand r3, r2 ; r3 = __ __ __ 04
+ movq r0, [eax+32]
+ psllq r3, 16 ; r3 = __ __ 04 __
+ pmullw r0, [ebx+32] ; r0 = 23 22 21 20
+ pand r7, r1 ; r7 = 13 __ __ __
+ por r5, r3 ; r5 = __ __ 04 01
+ por r7, r6 ; r7 = 13 10 __ __
+ movq r3, [eax+24]
+ por r7, r5 ; r7 = 13 10 04 01 = R1
+ pmullw r3, [ebx+24] ; r3 = 17 16 15 14
+ psrlq r4, 16 ; r4 = __ 07 06 05
+ movq [edx+16], r7 ; write R1 = r7
+ movq r5, r4 ; r5 = __ 07 06 05
+ movq r7, r0 ; r7 = 23 22 21 20
+ psrlq r4, 16 ; r4 = __ __ 07 06
+ psrlq r7, 48 ; r7 = __ __ __ 23
+ movq r6, r2 ; r6 = __ __ __ FF
+ pand r5, r2 ; r5 = __ __ __ 05
+ pand r6, r4 ; r6 = __ __ __ 06
+ movq [edx+80], r7 ; partial R9 = __ __ __ 23
+ pxor r4, r6 ; r4 = __ __ 07 __
+ psrlq r1, 32 ; r1 = __ __ 13 12
+ por r4, r5 ; r4 = __ __ 07 05
+ movq r7, M(3) ; r7 = FF __ __ __
+ pand r1, r2 ; r1 = __ __ __ 12
+ movq r5, [eax+48]
+ psllq r0, 16 ; r0 = 22 21 20 __
+ pmullw r5, [ebx+48] ; r5 = 33 32 31 30
+ pand r7, r0 ; r7 = 22 __ __ __
+ movq [edx+64], r1 ; partial R8 = __ __ __ 12
+ por r7, r4 ; r7 = 22 __ 07 05
+ movq r4, r3 ; r4 = 17 16 15 14
+ pand r3, r2 ; r3 = __ __ __ 14
+ movq r1, M(2) ; r1 = __ FF __ __
+ psllq r3, 32 ; r3 = __ 14 __ __
+ por r7, r3 ; r7 = 22 14 07 05 = R2
+ movq r3, r5 ; r3 = 33 32 31 30
+ psllq r3, 48 ; r3 = 30 __ __ __
+ pand r1, r0 ; r1 = __ 21 __ __
+ movq [edx+32], r7 ; write R2 = r7
+ por r6, r3 ; r6 = 30 __ __ 06
+ movq r7, M(1) ; r7 = __ __ FF __
+ por r6, r1 ; r6 = 30 21 __ 06
+ movq r1, [eax+56]
+ pand r7, r4 ; r7 = __ __ 15 __
+ pmullw r1, [ebx+56] ; r1 = 37 36 35 34
+ por r7, r6 ; r7 = 30 21 15 06 = R3
+ pand r0, M(1) ; r0 = __ __ 20 __
+ psrlq r4, 32 ; r4 = __ __ 17 16
+ movq [edx+48], r7 ; write R3 = r7
+ movq r6, r4 ; r6 = __ __ 17 16
+ movq r7, M(3) ; r7 = FF __ __ __
+ pand r4, r2 ; r4 = __ __ __ 16
+ movq r3, M(1) ; r3 = __ __ FF __
+ pand r7, r1 ; r7 = 37 __ __ __
+ pand r3, r5 ; r3 = __ __ 31 __
+ por r0, r4 ; r0 = __ __ 20 16
+ psllq r3, 16 ; r3 = __ 31 __ __
+ por r7, r0 ; r7 = 37 __ 20 16
+ movq r4, M(2) ; r4 = __ FF __ __
+ por r7, r3 ; r7 = 37 31 20 16 = R4
+ movq r0, [eax+80]
+ movq r3, r4 ; r3 = __ __ FF __
+ pmullw r0, [ebx+80] ; r0 = 53 52 51 50
+ pand r4, r5 ; r4 = __ 32 __ __
+ movq [edx+8], r7 ; write R4 = r7
+ por r6, r4 ; r6 = __ 32 17 16
+ movq r4, r3 ; r4 = __ FF __ __
+ psrlq r6, 16 ; r6 = __ __ 32 17
+ movq r7, r0 ; r7 = 53 52 51 50
+ pand r4, r1 ; r4 = __ 36 __ __
+ psllq r7, 48 ; r7 = 50 __ __ __
+ por r6, r4 ; r6 = __ 36 32 17
+ movq r4, [eax+88]
+ por r7, r6 ; r7 = 50 36 32 17 = R5
+ pmullw r4, [ebx+88] ; r4 = 57 56 55 54
+ psrlq r3, 16 ; r3 = __ __ FF __
+ movq [edx+24], r7 ; write R5 = r7
+ pand r3, r1 ; r3 = __ __ 35 __
+ psrlq r5, 48 ; r5 = __ __ __ 33
+ pand r1, r2 ; r1 = __ __ __ 34
+ movq r6, [eax+104]
+ por r5, r3 ; r5 = __ __ 35 33
+ pmullw r6, [ebx+104] ; r6 = 67 66 65 64
+ psrlq r0, 16 ; r0 = __ 53 52 51
+ movq r7, r4 ; r7 = 57 56 55 54
+ movq r3, r2 ; r3 = __ __ __ FF
+ psllq r7, 48 ; r7 = 54 __ __ __
+ pand r3, r0 ; r3 = __ __ __ 51
+ pxor r0, r3 ; r0 = __ 53 52 __
+ psllq r3, 32 ; r3 = __ 51 __ __
+ por r7, r5 ; r7 = 54 __ 35 33
+ movq r5, r6 ; r5 = 67 66 65 64
+ pand r6, M(1) ; r6 = __ __ 65 __
+ por r7, r3 ; r7 = 54 51 35 33 = R6
+ psllq r6, 32 ; r6 = 65 __ __ __
+ por r0, r1 ; r0 = __ 53 52 34
+ movq [edx+40], r7 ; write R6 = r7
+ por r0, r6 ; r0 = 65 53 52 34 = R7
+ movq r7, [eax+120]
+ movq r6, r5 ; r6 = 67 66 65 64
+ pmullw r7, [ebx+120] ; r7 = 77 76 75 74
+ psrlq r5, 32 ; r5 = __ __ 67 66
+ pand r6, r2 ; r6 = __ __ __ 64
+ movq r1, r5 ; r1 = __ __ 67 66
+ movq [edx+56], r0 ; write R7 = r0
+ pand r1, r2 ; r1 = __ __ __ 66
+ movq r0, [eax+112]
+ movq r3, r7 ; r3 = 77 76 75 74
+ pmullw r0, [ebx+112] ; r0 = 73 72 71 70
+ psllq r3, 16 ; r3 = 76 75 74 __
+ pand r7, M(3) ; r7 = 77 __ __ __
+ pxor r5, r1 ; r5 = __ __ 67 __
+ por r6, r5 ; r6 = __ __ 67 64
+ movq r5, r3 ; r5 = 76 75 74 __
+ pand r5, M(3) ; r5 = 76 __ __ __
+ por r7, r1 ; r7 = 77 __ __ 66
+ movq r1, [eax+96]
+ pxor r3, r5 ; r3 = __ 75 74 __
+ pmullw r1, [ebx+96] ; r1 = 63 62 61 60
+ por r7, r3 ; r7 = 77 75 74 66 = R15
+ por r6, r5 ; r6 = 76 __ 67 64
+ movq r5, r0 ; r5 = 73 72 71 70
+ movq [edx+120], r7 ; store R15 = r7
+ psrlq r5, 16 ; r5 = __ 73 72 71
+ pand r5, M(2) ; r5 = __ 73 __ __
+ movq r7, r0 ; r7 = 73 72 71 70
+ por r6, r5 ; r6 = 76 73 67 64 = R14
+ pand r0, r2 ; r0 = __ __ __ 70
+ pxor r7, r0 ; r7 = 73 72 71 __
+ psllq r0, 32 ; r0 = __ 70 __ __
+ movq [edx+104], r6 ; write R14 = r6
+ psrlq r4, 16 ; r4 = __ 57 56 55
+ movq r5, [eax+72]
+ psllq r7, 16 ; r7 = 72 71 __ __
+ pmullw r5, [ebx+72] ; r5 = 47 46 45 44
+ movq r6, r7 ; r6 = 72 71 __ __
+ movq r3, M(2) ; r3 = __ FF __ __
+ psllq r6, 16 ; r6 = 71 __ __ __
+ pand r7, M(3) ; r7 = 72 __ __ __
+ pand r3, r1 ; r3 = __ 62 __ __
+ por r7, r0 ; r7 = 72 70 __ __
+ movq r0, r1 ; r0 = 63 62 61 60
+ pand r1, M(3) ; r1 = 63 __ __ __
+ por r6, r3 ; r6 = 71 62 __ __
+ movq r3, r4 ; r3 = __ 57 56 55
+ psrlq r1, 32 ; r1 = __ __ 63 __
+ pand r3, r2 ; r3 = __ __ __ 55
+ por r7, r1 ; r7 = 72 70 63 __
+ por r7, r3 ; r7 = 72 70 63 55 = R13
+ movq r3, r4 ; r3 = __ 57 56 55
+ pand r3, M(1) ; r3 = __ __ 56 __
+ movq r1, r5 ; r1 = 47 46 45 44
+ movq [edx+88], r7 ; write R13 = r7
+ psrlq r5, 48 ; r5 = __ __ __ 47
+ movq r7, [eax+64]
+ por r6, r3 ; r6 = 71 62 56 __
+ pmullw r7, [ebx+64] ; r7 = 43 42 41 40
+ por r6, r5 ; r6 = 71 62 56 47 = R12
+ pand r4, M(2) ; r4 = __ 57 __ __
+ psllq r0, 32 ; r0 = 61 60 __ __
+ movq [edx+72], r6 ; write R12 = r6
+ movq r6, r0 ; r6 = 61 60 __ __
+ pand r0, M(3) ; r0 = 61 __ __ __
+ psllq r6, 16 ; r6 = 60 __ __ __
+ movq r5, [eax+40]
+ movq r3, r1 ; r3 = 47 46 45 44
+ pmullw r5, [ebx+40] ; r5 = 27 26 25 24
+ psrlq r1, 16 ; r1 = __ 47 46 45
+ pand r1, M(1) ; r1 = __ __ 46 __
+ por r0, r4 ; r0 = 61 57 __ __
+ pand r2, r7 ; r2 = __ __ __ 40
+ por r0, r1 ; r0 = 61 57 46 __
+ por r0, r2 ; r0 = 61 57 46 40 = R11
+ psllq r3, 16 ; r3 = 46 45 44 __
+ movq r4, r3 ; r4 = 46 45 44 __
+ movq r2, r5 ; r2 = 27 26 25 24
+ movq [edx+112], r0 ; write R11 = r0
+ psrlq r2, 48 ; r2 = __ __ __ 27
+ pand r4, M(2) ; r4 = __ 45 __ __
+ por r6, r2 ; r6 = 60 __ __ 27
+ movq r2, M(1) ; r2 = __ __ FF __
+ por r6, r4 ; r6 = 60 45 __ 27
+ pand r2, r7 ; r2 = __ __ 41 __
+ psllq r3, 32 ; r3 = 44 __ __ __
+ por r3, [edx+80] ; r3 = 44 __ __ 23
+ por r6, r2 ; r6 = 60 45 41 27 = R10
+ movq r2, M(3) ; r2 = FF __ __ __
+ psllq r5, 16 ; r5 = 26 25 24 __
+ movq [edx+96], r6 ; store R10 = r6
+ pand r2, r5 ; r2 = 26 __ __ __
+ movq r6, M(2) ; r6 = __ FF __ __
+ pxor r5, r2 ; r5 = __ 25 24 __
+ pand r6, r7 ; r6 = __ 42 __ __
+ psrlq r2, 32 ; r2 = __ __ 26 __
+ pand r7, M(3) ; r7 = 43 __ __ __
+ por r3, r2 ; r3 = 44 __ 26 23
+ por r7, [edx+64] ; r7 = 43 __ __ 12
+ por r6, r3 ; r6 = 44 42 26 23 = R9
+ por r7, r5 ; r7 = 43 25 24 12 = R8
+ ;
+ movq [edx+80], r6 ; store R9 = r6
+ ;
+ movq [edx+64], r7 ; store R8 = r7
+ ;
+ ; 123c ( / 64 coeffs < 2c / coeff)
+
+# undef M
+
+; Done w/dequant + descramble + partial transpose; now do the idct itself.
+
+# define I( K) [edx + ( K * 16)]
+# define J( K) [edx + ( (K - 4) * 16) + 8]
+
+ RowIDCT_10 ; 33 c
+ Transpose ; 19 c
+
+# undef I
+# undef J
+# define I( K) [edx + ( K * 16) + 64]
+# define J( K) [edx + ( (K - 4) * 16) + 72]
+
+// RowIDCT ; 46 c
+// Transpose ; 19 c
+
+# undef I
+# undef J
+# define I( K) [edx + (K * 16)]
+# define J( K) I( K)
+
+ ColumnIDCT_10 ; 44 c
+
+# undef I
+# undef J
+# define I( K) [edx + (K * 16) + 8]
+# define J( K) I( K)
+
+ ColumnIDCT_10 ; 44 c
+
+# undef I
+# undef J
+
+
+
+ pop ebx
+ pop ecx
+ pop edx
+ ret
+ }
+}
+
+/**************************************************************************************
+ *
+ * Routine: MMX_idct1
+ *
+ * Description: Perform IDCT on a 8x8 block with at most 1 nonzero coefficients
+ *
+ * Input: Pointer to input and output buffer
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+/* --------------------------------------------------------------- */
+/* IDCT 1 */
+void MMX_idct1 (INT16 * input, INT16 * qtbl, INT16 * output)
+{
+ if(input[0])
+ {
+ int i;
+ INT32 temp = (INT32)input[0];
+ INT32 *iBuf=(INT32 *)output;
+
+ temp *= qtbl[0];
+
+ //necessary in order to match tim's
+ temp += 15;
+
+ temp >>= 5;
+
+ temp &= 0xffff;
+
+ temp += temp << 16;
+
+ for(i = 0; i < 32; i += 4)
+ {
+ iBuf[i] = temp;
+ iBuf[i+1] = temp;
+ iBuf[i+2] = temp;
+ iBuf[i+3] = temp;
+ }
+ }
+ else
+ {
+ /* special case where there is only a 0 dc coeff */
+ memset( output, 0, 128);
+ }
+
+}
+
+/* --------------------------------------------------------------- */
+/*
+ The following functions (MMX_idct_DX and MMX_idct10_DX) are only
+ used by the dxer. The coeffs are written into a transposed order
+ during the unpack stage.
+*/
+/* --------------------------------------------------------------- */
+/**************************************************************************************
+ *
+ * Routine: MMX_idct_DX
+ *
+ * Description: Perform IDCT on a 8x8 block
+ *
+ * Input: Pointer to input and output buffer
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: The input coefficients are in transposed ZigZag order
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+__declspec ( naked ) void MMX_idct_DX ( INT16 * input, INT16 * qtbl, INT16 * output)
+{
+
+// uINT16 *constants = idctconstants;
+# define M(I) [ecx + MaskOffset + I*8]
+# define C(I) [ecx + CosineOffset + (I-1)*8]
+# define Eight [ecx + EightOffset]
+# undef Arg
+# define Arg(I) [esp + 1*4 + 3*4 + I*4] // 1 return address + 3 pushes prior to args
+
+# define r0 mm0
+# define r1 mm1
+# define r2 mm2
+# define r3 mm3
+# define r4 mm4
+# define r5 mm5
+# define r6 mm6
+# define r7 mm7
+ (void) output;
+ (void) qtbl;
+ (void) input;
+
+ __asm {
+
+ push edx
+ push ecx
+ push ebx
+
+;; Label:
+ mov eax, Arg( 0) ; eax = quantized input
+ mov edx, Arg( 2) ; edx = destination (= idct buffer)
+
+ mov ecx, [edx] ; (+1 at least) preload the cache before writing
+ mov ebx, [edx+28] ; in case proc doesn't cache on writes
+ mov ecx, [edx+56] ; gets all the cache lines
+ mov ebx, [edx+84] ; regardless of alignment (beyond 32-bit)
+ mov ecx, [edx+112] ; also avoids address contention stalls
+ mov ebx, [edx+124]
+
+ mov ebx, Arg( 1) ; ebx = quantization table
+ lea ecx, idctconstants ;;[0];
+
+//dequantization
+//try to optimize better
+ movq r0, [eax+0]
+ ;
+ pmullw r0, [ebx+0] ; r0 = 03 02 01 00
+ ;
+ movq r1, [eax+8]
+ ;
+ pmullw r1, [ebx+8]
+ ;
+ movq r2, [eax+16]
+ ;
+ pmullw r2, [ebx+16]
+ ;
+ movq r3, [eax+24]
+ ;
+ pmullw r3, [ebx+24]
+ ;
+ movq r4, [eax+32]
+ ;
+ pmullw r4, [ebx+32]
+ ;
+ movq r5, [eax+40]
+ ;
+ pmullw r5, [ebx+40]
+ ;
+ movq r6, [eax+48]
+ ;
+ pmullw r6, [ebx+48]
+ ;
+ movq r7, [eax+56]
+ ;
+ pmullw r7, [ebx+56]
+ ;
+ movq [edx+0],r0
+ ;
+ movq [edx+8],r1
+ ;
+ movq [edx+16],r2
+ ;
+ movq [edx+24],r3
+ ;
+ movq [edx+32],r4
+ ;
+ movq [edx+40],r5
+ ;
+ movq [edx+48],r6
+ ;
+ movq [edx+56],r7
+ ;
+;;;;;;;;;;;
+ movq r0, [eax+64]
+ ;
+ pmullw r0, [ebx+64] ; r0 = 03 02 01 00
+ ;
+ movq r1, [eax+72]
+ ;
+ pmullw r1, [ebx+72]
+ ;
+ movq r2, [eax+80]
+ ;
+ pmullw r2, [ebx+80]
+ ;
+ movq r3, [eax+88]
+ ;
+ pmullw r3, [ebx+88]
+ ;
+ movq r4, [eax+96]
+ ;
+ pmullw r4, [ebx+96]
+ ;
+ movq r5, [eax+104]
+ ;
+ pmullw r5, [ebx+104]
+ ;
+ movq r6, [eax+112]
+ ;
+ pmullw r6, [ebx+112]
+ ;
+ movq r7, [eax+120]
+ ;
+ pmullw r7, [ebx+120]
+ ;
+ movq [edx+64],r0
+ ;
+ movq [edx+72],r1
+ ;
+ movq [edx+80],r2
+ ;
+ movq [edx+88],r3
+ ;
+ movq [edx+96],r4
+ ;
+ movq [edx+104],r5
+ ;
+ movq [edx+112],r6
+ ;
+ movq [edx+120],r7
+ ;
+
+# undef M
+
+; Done w/dequant + descramble + partial transpose; now do the idct itself.
+
+# define I( K) [edx + ( K * 16)]
+# define J( K) [edx + ( (K - 4) * 16) + 8]
+
+ RowIDCT ; 46 c
+ Transpose ; 19 c
+
+# undef I
+# undef J
+# define I( K) [edx + ( K * 16) + 64]
+# define J( K) [edx + ( (K - 4) * 16) + 72]
+
+ RowIDCT ; 46 c
+ Transpose ; 19 c
+
+# undef I
+# undef J
+# define I( K) [edx + (K * 16)]
+# define J( K) I( K)
+
+ ColumnIDCT ; 57 c
+
+# undef I
+# undef J
+# define I( K) [edx + (K * 16) + 8]
+# define J( K) I( K)
+
+ ColumnIDCT ; 57 c
+
+# undef I
+# undef J
+ pop ebx
+ pop ecx
+ pop edx
+ ret
+ ; 368 cycles ( / 64 coeff < 6 c / coeff)
+ }
+}
+
+/**************************************************************************************
+ *
+ * Routine: MMX_idct10_DX
+ *
+ * Description: Perform IDCT on a 8x8 block with at most 10 nonzero coefficients
+ *
+ * Input: Pointer to input and output buffer
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: The input coefficients are in transposed ZigZag order
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+/* --------------------------------------------------------------- */
+/* IDCT 10 */
+__declspec ( naked ) void MMX_idct10_DX ( INT16 * input, INT16 * qtbl, INT16 * output)
+{
+
+# define M(I) [ecx + MaskOffset + I*8]
+# define C(I) [ecx + CosineOffset + (I-1)*8]
+# define Eight [ecx + EightOffset]
+# undef Arg
+# define Arg(I) [esp + 16 + I*4]
+
+# define r0 mm0
+# define r1 mm1
+# define r2 mm2
+# define r3 mm3
+# define r4 mm4
+# define r5 mm5
+# define r6 mm6
+# define r7 mm7
+ (void) output;
+ (void) qtbl;
+ (void) input;
+
+ __asm {
+ push edx
+ push ecx
+ push ebx
+
+// Label:
+ mov eax, Arg( 0) ; eax = quantized input
+ mov edx, Arg( 2) ; edx = destination (= idct buffer)
+
+ mov ecx, [edx] ; (+1 at least) preload the cache before writing
+ mov ebx, [edx+28] ; in case proc doesn't cache on writes
+ mov ecx, [edx+56] ; gets all the cache lines
+ mov ebx, [edx+84] ; regardless of alignment (beyond 32-bit)
+ mov ecx, [edx+112] ; also avoids address contention stalls
+ mov ebx, [edx+124]
+
+ mov ebx, Arg( 1) ; ebx = quantization table
+ lea ecx, idctconstants ;; [0];
+
+//dequantization
+ movq r0, [eax+0]
+ ;
+ pmullw r0, [ebx+0]
+ ;
+ movq r1, [eax+16]
+ ;
+ pmullw r1, [ebx+16]
+ ;
+ movq r2, [eax+32]
+ ;
+ pmullw r2, [ebx+32]
+ ;
+ movq r3, [eax+48]
+ ;
+ pmullw r3, [ebx+48]
+ ;
+ movq [edx+0],r0
+ pxor r5,r5
+
+ movq [edx+8],r5
+ ;
+ movq [edx+16],r1
+ ;
+ movq [edx+24],r5
+ ;
+ movq [edx+32],r2
+ ;
+ movq [edx+40],r5
+ ;
+ movq [edx+48],r3
+ ;
+ movq [edx+56],r5
+ ;
+ movq [edx+64],r5
+ ;
+ movq [edx+72],r5
+ ;
+ movq [edx+80],r5
+ ;
+ movq [edx+88],r5
+ ;
+ movq [edx+96],r5
+ ;
+ movq [edx+104],r5
+ ;
+ movq [edx+112],r5
+ ;
+ movq [edx+120],r5
+ ;
+
+# undef M
+
+; Done w/dequant + descramble + partial transpose; now do the idct itself.
+
+# define I( K) [edx + ( K * 16)]
+# define J( K) [edx + ( (K - 4) * 16) + 8]
+
+ RowIDCT_10 ; 33 c
+ Transpose ; 19 c
+
+# undef I
+# undef J
+# define I( K) [edx + ( K * 16) + 64]
+# define J( K) [edx + ( (K - 4) * 16) + 72]
+
+// RowIDCT ; 46 c
+// Transpose ; 19 c
+
+# undef I
+# undef J
+# define I( K) [edx + (K * 16)]
+# define J( K) I( K)
+
+ ColumnIDCT_10 ; 44 c
+
+# undef I
+# undef J
+# define I( K) [edx + (K * 16) + 8]
+# define J( K) I( K)
+
+ ColumnIDCT_10 ; 44 c
+
+# undef I
+# undef J
+
+
+
+ pop ebx
+ pop ecx
+ pop edx
+ ret
+ }
+}
+
+
+
+/**************************************************************************************
+ *
+ * Routine: MMX_idct3
+ *
+ * Description: Perform IDCT on a 8x8 block with at most 3 nonzero coefficients
+ *
+ * Input: Pointer to input and output buffer
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: Only works for three nonzero coefficients.
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+/***************************************************************************************
+ In IDCT 3, we are dealing with only three Non-Zero coefficients in the 8x8 block.
+ In the case that we work in the fashion RowIDCT -> ColumnIDCT, we only have to
+ do 1-D row idcts on the first two rows, the rest six rows remain zero anyway.
+ After row IDCTs, since every column could have nonzero coefficients, we need do
+ eight 1-D column IDCT. However, for each column, there are at most two nonzero
+ coefficients, coefficient 0 and coefficient 1. Same for the coefficents for the
+ two 1-d row idcts. For this reason, the process of a 1-D IDCT is simplified
+
+ from a full version:
+
+ A = (C1 * I1) + (C7 * I7) B = (C7 * I1) - (C1 * I7)
+ C = (C3 * I3) + (C5 * I5) D = (C3 * I5) - (C5 * I3)
+ A. = C4 * (A - C) B. = C4 * (B - D)
+ C. = A + C D. = B + D
+
+ E = C4 * (I0 + I4) F = C4 * (I0 - I4)
+ G = (C2 * I2) + (C6 * I6) H = (C6 * I2) - (C2 * I6)
+ E. = E - G
+ G. = E + G
+
+ A.. = F + A. B.. = B. - H
+ F. = F - A. H. = B. + H
+
+ R0 = G. + C. R1 = A.. + H. R3 = E. + D. R5 = F. + B..
+ R7 = G. - C. R2 = A.. - H. R4 = E. - D. R6 = F. - B..
+
+ To:
+
+
+ A = (C1 * I1) B = (C7 * I1)
+ C = 0 D = 0
+ A. = C4 * A B. = C4 * B
+ C. = A D. = B
+
+ E = C4 * I0 F = E
+ G = 0 H = 0
+ E. = E
+ G. = E
+
+ A.. = E + A. B.. = B.
+ F. = E - A. H. = B.
+
+ R0 = E + A R1 = E + A. + B. R3 = E + B R5 = E - A. + B.
+ R7 = E - A R2 = E + A. - B. R4 = E - B R6 = F - A. - B.
+
+******************************************************************************************/
+
+#define RowIDCT_3 __asm { \
+\
+ __asm movq r7, I(1) /* r7 = I1 */ \
+ __asm movq r0, C(1) /* r0 = C1 */ \
+\
+ __asm movq r3, C(7) /* r3 = C7 */ \
+ __asm pmulhw r0, r7 /* r0 = C1 * I1 - I1 */ \
+\
+ __asm pmulhw r3, r7 /* r3 = C7 * I1 = B, D. */ \
+ __asm movq r6, I(0) /* r6 = I0 */ \
+\
+ __asm movq r4, C(4) /* r4 = C4 */ \
+ __asm paddw r0, r7 /* r0 = C1 * I1 = A, C. */ \
+\
+ __asm movq r1, r6 /* make a copy of I0 */ \
+ __asm pmulhw r6, r4 /* r2 = C4 * I0 - I0 */ \
+\
+ __asm movq r2, r0 /* make a copy of A */ \
+ __asm movq r5, r3 /* make a copy of B */ \
+\
+ __asm pmulhw r2, r4 /* r2 = C4 * A - A */ \
+ __asm pmulhw r5, r4 /* r5 = C4 * B - B */ \
+\
+ __asm paddw r6, r1 /* r2 = C4 * I0 = E, F */ \
+ __asm movq r4, r6 /* r4 = E */ \
+\
+ __asm paddw r2, r0 /* r2 = A. */ \
+ __asm paddw r5, r3 /* r5 = B. */ \
+\
+ __asm movq r7, r6 /* r7 = E */ \
+ __asm movq r1, r5 /* r1 = B. */ \
+\
+ /* r0 = A */ \
+ /* r3 = B */ \
+ /* r2 = A. */ \
+ /* r5 = B. */ \
+ /* r6 = E */ \
+ /* r4 = E */ \
+ /* r7 = E */ \
+ /* r1 = B. */ \
+\
+ __asm psubw r6, r2 /* r6 = E - A. */ \
+ __asm psubw r4, r3 /* r4 = E - B ----R4 */ \
+\
+ __asm psubw r7, r0 /* r7 = E - A ----R7 */ \
+ __asm paddw r2, r2 /* r2 = A. + A. */ \
+\
+ __asm paddw r3, r3 /* r3 = B + B */ \
+ __asm paddw r0, r0 /* r0 = A + A */ \
+\
+ __asm paddw r2, r6 /* r2 = E + A. */ \
+ __asm paddw r3, r4 /* r3 = E + B ----R3 */ \
+\
+ __asm psubw r2, r1 /* r2 = E + A. - B. ----R2 */ \
+ __asm psubw r6, r5 /* r6 = E - A. - B. ----R6 */ \
+\
+ __asm paddw r1, r1 /* r1 = B. + B. */ \
+ __asm paddw r5, r5 /* r5 = B. + B. */ \
+\
+ __asm paddw r0, r7 /* r0 = E + A ----R0 */ \
+ __asm paddw r1, r2 /* r1 = E + A. + B. -----R1 */ \
+\
+ __asm movq I(1), r1 /* save r1 */ \
+ __asm paddw r5, r6 /* r5 = E - A. + B. -----R5 */ \
+\
+}
+//End of RowIDCT_3
+
+#define ColumnIDCT_3 __asm { \
+\
+ __asm movq r7, I(1) /* r7 = I1 */ \
+ __asm movq r0, C(1) /* r0 = C1 */ \
+\
+ __asm movq r3, C(7) /* r3 = C7 */ \
+ __asm pmulhw r0, r7 /* r0 = C1 * I1 - I1 */ \
+\
+ __asm pmulhw r3, r7 /* r3 = C7 * I1 = B, D. */ \
+ __asm movq r6, I(0) /* r6 = I0 */ \
+\
+ __asm movq r4, C(4) /* r4 = C4 */ \
+ __asm paddw r0, r7 /* r0 = C1 * I1 = A, C. */ \
+\
+ __asm movq r1, r6 /* make a copy of I0 */ \
+ __asm pmulhw r6, r4 /* r2 = C4 * I0 - I0 */ \
+\
+ __asm movq r2, r0 /* make a copy of A */ \
+ __asm movq r5, r3 /* make a copy of B */ \
+\
+ __asm pmulhw r2, r4 /* r2 = C4 * A - A */ \
+ __asm pmulhw r5, r4 /* r5 = C4 * B - B */ \
+\
+ __asm paddw r6, r1 /* r2 = C4 * I0 = E, F */ \
+ __asm movq r4, r6 /* r4 = E */ \
+\
+ __asm paddw r6, Eight /* +8 for shift */ \
+ __asm Paddw r4, Eight /* +8 for shift */ \
+\
+ __asm paddw r2, r0 /* r2 = A. */ \
+ __asm paddw r5, r3 /* r5 = B. */ \
+\
+ __asm movq r7, r6 /* r7 = E */ \
+ __asm movq r1, r5 /* r1 = B. */ \
+\
+/* r0 = A */ \
+/* r3 = B */ \
+/* r2 = A. */ \
+/* r5 = B. */ \
+/* r6 = E */ \
+/* r4 = E */ \
+/* r7 = E */ \
+/* r1 = B. */ \
+\
+ __asm psubw r6, r2 /* r6 = E - A. */ \
+ __asm psubw r4, r3 /* r4 = E - B ----R4 */ \
+\
+ __asm psubw r7, r0 /* r7 = E - A ----R7 */ \
+ __asm paddw r2, r2 /* r2 = A. + A. */ \
+\
+ __asm paddw r3, r3 /* r3 = B + B */ \
+ __asm paddw r0, r0 /* r0 = A + A */ \
+\
+ __asm paddw r2, r6 /* r2 = E + A. */ \
+ __asm paddw r3, r4 /* r3 = E + B ----R3 */ \
+\
+ __asm psraw r4, 4 /* shift */ \
+ __asm movq J(4), r4 /* store R4 at J4 */ \
+\
+ __asm psraw r3, 4 /* shift */ \
+ __asm movq I(3), r3 /* store R3 at I3 */ \
+\
+ __asm psubw r2, r1 /* r2 = E + A. - B. ----R2 */ \
+ __asm psubw r6, r5 /* r6 = E - A. - B. ----R6 */ \
+\
+ __asm paddw r1, r1 /* r1 = B. + B. */ \
+ __asm paddw r5, r5 /* r5 = B. + B. */ \
+\
+ __asm paddw r0, r7 /* r0 = E + A ----R0 */ \
+ __asm paddw r1, r2 /* r1 = E + A. + B. -----R1 */ \
+\
+ __asm psraw r7, 4 /* shift */ \
+ __asm psraw r2, 4 /* shift */ \
+\
+ __asm psraw r0, 4 /* shift */ \
+ __asm psraw r1, 4 /* shift */ \
+\
+ __asm movq J(7), r7 /* store R7 to J7 */ \
+ __asm movq I(0), r0 /* store R0 to I0 */ \
+\
+ __asm movq I(1), r1 /* store R1 to I1 */ \
+ __asm movq I(2), r2 /* store R2 to I2 */ \
+\
+ __asm movq I(1), r1 /* save r1 */ \
+ __asm paddw r5, r6 /* r5 = E - A. + B. -----R5 */ \
+\
+ __asm psraw r5, 4 /* shift */ \
+ __asm movq J(5), r5 /* store R5 at J5 */ \
+\
+ __asm psraw r6, 4 /* shift */ \
+ __asm movq J(6), r6 /* store R6 at J6 */ \
+\
+}
+//End of ColumnIDCT_3
+
+__declspec ( naked ) void MMX_idct3 ( INT16 * input, INT16 * output )
+{
+
+# define M(I) [ecx + MaskOffset + I*8]
+# define C(I) [ecx + CosineOffset + (I-1)*8]
+# define Eight [ecx + EightOffset]
+# undef Arg
+# define Arg(I) [esp + 16 + I*4]
+
+# define r0 mm0
+# define r1 mm1
+# define r2 mm2
+# define r3 mm3
+# define r4 mm4
+# define r5 mm5
+# define r6 mm6
+# define r7 mm7
+ (void) output;
+ (void) input;
+
+ __asm {
+ push edx
+ push ecx
+ push ebx
+
+// Label:
+ mov eax, Arg( 0) ; eax = quantized input
+ mov edx, Arg( 1) ; edx = destination (= idct buffer)
+
+ mov ecx, [edx] ; (+1 at least) preload the cache before writing
+ mov ebx, [edx+28] ; in case proc doesn't cache on writes
+ mov ecx, [edx+56] ; gets all the cache lines
+ mov ebx, [edx+84] ; regardless of alignment (beyond 32-bit)
+ mov ecx, [edx+112] ; also avoids address contention stalls
+ mov ebx, [edx+124]
+
+ lea ecx, idctconstants ;; [0];
+
+ movq r0, [eax] ; r0 = 03 02 01 00
+ ;
+ pxor r1, r1 ; r1 = 13 12 11 10; all zero
+ ;
+ movq r2, M(0) ; r2 = __ __ __ FF
+ movq r3, r0 ; r3 = 03 02 01 00
+ pxor r4, r4
+ psrlq r0, 16 ; r0 = __ 03 02 01
+ pand r3, r2 ; r3 = __ __ __ 00
+ movq r5, r0 ; r5 = __ 03 02 01
+ movq r6, r1 ; r6 = 13 12 11 10;all zero
+ pand r5, r2 ; r5 = __ __ __ 01
+ ;psllq r6, 32 ; r6 = 11 10 __ __
+ movq r7, M(3) ; r7 = FF __ __ __
+ pxor r0, r5 ; r0 = __ 03 02 __
+ pand r7, r6 ; r7 = 11 __ __ __
+ por r0, r3 ; r0 = __ 03 02 00
+ pxor r6, r7 ; r6 = __ 10 __ __
+ por r0, r7 ; r0 = 11 03 02 00 = R0
+ movq r7, M(3) ; r7 = FF __ __ __
+ movq r3, r4 ; r3 = 07 06 05 04
+ movq [edx], r0 ; write R0 = r0
+ pand r3, r2 ; r3 = __ __ __ 04
+ psllq r3, 16 ; r3 = __ __ 04 __
+ pand r7, r1 ; r7 = 13 __ __ __
+ por r5, r3 ; r5 = __ __ 04 01
+ por r7, r6 ; r7 = 13 10 __ __
+ por r7, r5 ; r7 = 13 10 04 01 = R1
+ psrlq r4, 16 ; r4 = __ 07 06 05
+ movq [edx+16], r7 ; write R1 = r7
+ movq [edx+32], r4 ; write R2 = r7
+ movq [edx+48], r4 ; write R3 = r7
+ movq [edx+8], r4 ; write R4 = r7
+ movq [edx+24], r4 ; write R5 = r7
+ movq [edx+40], r4 ; write R6 = r7
+ movq [edx+56], r4 ; write R7 = r0
+ movq [edx+120], r4 ; store R15 = r7
+ movq [edx+104], r4 ; write R14 = r6
+ movq [edx+88], r4 ; write R13 = r7
+ movq [edx+72], r4 ; write R12 = r6
+ movq [edx+112], r4 ; write R12 = r6
+ movq [edx+96], r4 ; store R10 = r6
+ movq [edx+80], r4 ; store R9 = r6
+ movq [edx+64], r4 ; store R8 = r7
+ ;
+ ; 123c ( / 64 coeffs < 2c / coeff)
+
+# undef M
+
+; Donepartial transpose; now do the idct itself.
+
+# define I( K) [edx + ( K * 16)]
+# define J( K) [edx + ( (K - 4) * 16) + 8]
+
+ RowIDCT_3 ; 33 c
+ Transpose ; 19 c
+
+# undef I
+# undef J
+# define I( K) [edx + ( K * 16) + 64]
+# define J( K) [edx + ( (K - 4) * 16) + 72]
+
+// RowIDCT ; 46 c
+// Transpose ; 19 c
+
+# undef I
+# undef J
+# define I( K) [edx + (K * 16)]
+# define J( K) I( K)
+
+ ColumnIDCT_3 ; 44 c
+
+# undef I
+# undef J
+# define I( K) [edx + (K * 16) + 8]
+# define J( K) I( K)
+
+ ColumnIDCT_3 ; 44 c
+
+# undef I
+# undef J
+
+ pop ebx
+ pop ecx
+ pop edx
+ ret
+ }
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxrecon.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxrecon.c
new file mode 100644
index 00000000..7875112b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxrecon.c
@@ -0,0 +1,856 @@
+/****************************************************************************
+*
+* Module Title : OptFunctions.c
+*
+* Description : MMX or otherwise processor specific
+* optimised versions of functions
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.07 JBB 26/01/01 Removed unused function
+* 1.06 YWX 23/05/00 Remove the clamping in MmxReconPostProcess()
+* 1.05 YWX 15/05/00 Added MmxReconPostProcess()
+* 1.04 SJL 03/14/00 Added in Tim's versions of MmxReconInter and MmxReconInterHalfPixel2.
+* 1.03 PGW 12/10/99 Changes to reduce uneccessary dependancies.
+* 1.02 PGW 30/08/99 Minor changes to MmxReconInterHalfPixel2().
+* 1.01 PGW 13/07/99 Changes to keep reconstruction data to 16 bit
+* 1.00 PGW 14/06/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/*
+ Use Tim's optimized version.
+*/
+#define USING_TIMS 1
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+
+#define STRICT // Strict type checking.
+
+#include "codec_common.h"
+
+#include "reconstruct.h"
+
+/****************************************************************************
+* Module constants.
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Imports.
+*****************************************************************************
+*/
+
+extern INT32 * XX_LUT;
+
+/****************************************************************************
+* Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Exported Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Module Statics
+*****************************************************************************
+*/
+
+INT16 Ones[4] = {1,1,1,1};
+INT16 OneTwoEight[4] = {128,128,128,128};
+UINT8 Eight128s[8] = {128,128,128,128,128,128,128,128};
+
+#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
+/****************************************************************************
+* Forward References
+*****************************************************************************
+*/
+/****************************************************************************
+ *
+ * ROUTINE : MMXReconIntra
+ *
+ * INPUTS : INT16 * idct
+ * Pointer to the output from the idct for this block
+ *
+ * UINT32 stride
+ * Line Length in pixels in recon and reference images
+ *
+ *
+ *
+ *
+ * OUTPUTS : UINT8 * dest
+ * The reconstruction buffer
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs an intra block - MMX version
+ *
+ * SPECIAL NOTES : Tim Murphy's optimized version
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void MMXReconIntra( INT16 *TmpDataBuffer, UINT8 * dest, UINT16 * idct, UINT32 stride )
+{
+ (void) TmpDataBuffer;
+ __asm
+ {
+ // u pipe
+ // v pipe
+ mov eax,[idct] ; Signed 16 bit inputs
+ mov edx,[dest] ; Signed 8 bit outputs
+ movq mm0,[Eight128s] ; Set mm0 to 0x8080808080808080
+ ;
+ mov ebx,[stride] ; Line stride in output buffer
+ lea ecx,[eax+128] ; Endpoint in input buffer
+loop_label: ;
+ movq mm2,[eax] ; First four input values
+ ;
+ packsswb mm2,[eax+8] ; pack with next(high) four values
+ por mm0,mm0 ; stall
+ pxor mm2,mm0 ; Convert result to unsigned (same as add 128)
+ lea eax,[eax + 16] ; Step source buffer
+ cmp eax,ecx ; are we done
+ ;
+ movq [edx],mm2 ; store results
+ ;
+ lea edx,[edx+ebx] ; Step output buffer
+ jc loop_label ; Loop back if we are not done
+ }
+ // 6c/8 elts = 9c/8 = 1.125 c/pix
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxReconInter
+ *
+ * INPUTS : UINT8 * RefPtr
+ * The last frame reference
+ *
+ * INT16 * ChangePtr
+ * Pointer to the change data
+ *
+ * UINT32 LineStep
+ * Line Length in pixels in recon and ref images
+ *
+ * OUTPUTS : UINT8 * ReconPtr
+ * The reconstruction
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs data from last data and change
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+#if USING_TIMS
+void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+ (void) TmpDataBuffer;
+
+ _asm {
+ push edi
+;; mov ebx, [ref]
+;; mov ecx, [diff]
+;; mov eax, [dest]
+;; mov edx, [stride]
+ mov ebx, [RefPtr]
+ mov ecx, [ChangePtr]
+ mov eax, [ReconPtr]
+ mov edx, [LineStep]
+ pxor mm0, mm0
+ lea edi, [ecx + 128]
+ ;
+ L:
+ movq mm2, [ebx] ; (+3 misaligned) 8 reference pixels
+ ;
+ movq mm4, [ecx] ; first 4 changes
+ movq mm3, mm2
+ movq mm5, [ecx + 8] ; last 4 changes
+ punpcklbw mm2, mm0 ; turn first 4 refs into positive 16-bit #s
+ paddsw mm2, mm4 ; add in first 4 changes
+ punpckhbw mm3, mm0 ; turn last 4 refs into positive 16-bit #s
+ paddsw mm3, mm5 ; add in last 4 changes
+ add ebx, edx ; next row of reference pixels
+ packuswb mm2, mm3 ; pack result to unsigned 8-bit values
+ lea ecx, [ecx + 16] ; next row of changes
+ cmp ecx, edi ; are we done?
+ ;
+ movq [eax], mm2 ; store result
+ ;
+ lea eax, [eax+edx] ; next row of output
+ jc L ; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+ pop edi
+ }
+}
+#else
+void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+
+ // Note that the line step for the change data is assumed to be 8 * 32 bits.
+__asm
+ {
+ // Set up data pointers
+ mov eax,dword ptr [ReconPtr]
+ mov ebx,dword ptr [RefPtr]
+ mov ecx,dword ptr [ChangePtr]
+ mov edx,dword ptr [LineStep]
+ pxor mm6, mm6 ; Blank mmx6
+
+ // Row 1
+ // Load the data values. The change data needs to be unpacked to words
+ movq mm0,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data
+ paddsw mm0, mm2 ; First 4 values
+ paddsw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [eax],mm0 ; Write the data out to the results buffer
+
+ add ebx,edx ; Step the reference pointer.
+ add ecx,16 ; Step the change pointer.
+ add eax,edx ; Step the reconstruction pointer
+
+ // Row 2
+ // Load the data values. The change data needs to be unpacked to words
+ movq mm0,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data
+ paddsw mm0, mm2 ; First 4 values
+ paddsw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [eax],mm0 ; Write the data out to the results buffer
+
+ add ebx,edx ; Step the reference pointer.
+ add ecx,16 ; Step the change pointer.
+ add eax,edx ; Step the reconstruction pointer
+
+ // Row 3
+ // Load the data values. The change data needs to be unpacked to words
+ movq mm0,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data
+ paddsw mm0, mm2 ; First 4 values
+ paddsw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [eax],mm0 ; Write the data out to the results buffer
+
+ add ebx,edx ; Step the reference pointer.
+ add ecx,16 ; Step the change pointer.
+ add eax,edx ; Step the reconstruction pointer
+
+ // Row 4
+ // Load the data values. The change data needs to be unpacked to words
+ movq mm0,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data
+ paddsw mm0, mm2 ; First 4 values
+ paddsw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [eax],mm0 ; Write the data out to the results buffer
+
+ add ebx,edx ; Step the reference pointer.
+ add ecx,16 ; Step the change pointer.
+ add eax,edx ; Step the reconstruction pointer
+
+ // Row 5
+ // Load the data values. The change data needs to be unpacked to words
+ movq mm0,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data
+ paddsw mm0, mm2 ; First 4 values
+ paddsw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [eax],mm0 ; Write the data out to the results buffer
+
+ add ebx,edx ; Step the reference pointer.
+ add ecx,16 ; Step the change pointer.
+ add eax,edx ; Step the reconstruction pointer
+
+ // Row 6
+ // Load the data values. The change data needs to be unpacked to words
+ movq mm0,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data
+ paddsw mm0, mm2 ; First 4 values
+ paddsw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [eax],mm0 ; Write the data out to the results buffer
+
+ add ebx,edx ; Step the reference pointer.
+ add ecx,16 ; Step the change pointer.
+ add eax,edx ; Step the reconstruction pointer
+
+ // Row 7
+ // Load the data values. The change data needs to be unpacked to words
+ movq mm0,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data
+ paddsw mm0, mm2 ; First 4 values
+ paddsw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [eax],mm0 ; Write the data out to the results buffer
+
+ add ebx,edx ; Step the reference pointer.
+ add ecx,16 ; Step the change pointer.
+ add eax,edx ; Step the reconstruction pointer
+
+ // Row 8
+ // Load the data values. The change data needs to be unpacked to words
+ movq mm0,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data
+ paddsw mm0, mm2 ; First 4 values
+ paddsw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [eax],mm0 ; Write the data out to the results buffer
+
+ //emms ; Clear the MMX state.
+ }
+}
+#endif
+
+/****************************************************************************
+ *
+ * ROUTINE : MmxReconInterHalfPixel2
+ *
+ * INPUTS : UINT8 * RefPtr1, RefPtr2
+ * The last frame reference
+ *
+ * INT16 * ChangePtr
+ * Pointer to the change data
+ *
+ * UINT32 LineStep
+ * Line Length in pixels in recon and ref images
+ *
+ *
+ * OUTPUTS : UINT8 * ReconPtr
+ * The reconstruction
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs data from half pixel reference data and change.
+ * Half pixel data interpolated from 2 references.
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+#if USING_TIMS
+
+#define A 0
+
+void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,
+ UINT8 * RefPtr1, UINT8 * RefPtr2,
+ INT16 * ChangePtr, UINT32 LineStep )
+{
+# if A
+ static culong FourOnes[2] = { 65537, 65537}; // only read once
+# endif
+ (void) TmpDataBuffer;
+
+ _asm {
+ push esi
+ push edi
+
+;; mov ecx, [diff]
+;; mov esi, [ref1]
+;; mov edi, [ref2]
+;; mov ebx, [dest]
+;; mov edx, [stride]
+
+ mov ecx, [ChangePtr]
+ mov esi, [RefPtr1]
+ mov edi, [RefPtr2]
+ mov ebx, [ReconPtr]
+ mov edx, [LineStep]
+
+ lea eax, [ecx+128]
+
+# if A
+ movq mm1, [FourOnes]
+# endif
+
+ pxor mm0, mm0
+ L:
+ movq mm2, [esi] ; (+3 misaligned) mm2 = row from ref1
+ ;
+ movq mm4, [edi] ; (+3 misaligned) mm4 = row from ref2
+ movq mm3, mm2
+ punpcklbw mm2, mm0 ; mm2 = start ref1 as positive 16-bit #s
+ movq mm5, mm4
+ movq mm6, [ecx] ; mm6 = first 4 changes
+ punpckhbw mm3, mm0 ; mm3 = end ref1 as positive 16-bit #s
+ movq mm7, [ecx+8] ; mm7 = last 4 changes
+ punpcklbw mm4, mm0 ; mm4 = start ref2 as positive 16-bit #s
+ punpckhbw mm5, mm0 ; mm5 = end ref2 as positive 16-bit #s
+ paddw mm2, mm4 ; mm2 = start (ref1 + ref2)
+ paddw mm3, mm5 ; mm3 = end (ref1 + ref2)
+
+# if A
+ paddw mm2, mm1 ; rounding adjustment
+ paddw mm3, mm1
+# endif
+
+ psrlw mm2, 1 ; mm2 = start (ref1 + ref2)/2
+ psrlw mm3, 1 ; mm3 = end (ref1 + ref2)/2
+ paddw mm2, mm6 ; add changes to start
+ paddw mm3, mm7 ; add changes to end
+ lea ecx, [ecx+16] ; next row idct
+ packuswb mm2, mm3 ; pack start|end to unsigned 8-bit
+ add esi, edx ; next row ref1
+ add edi, edx ; next row ref2
+ cmp ecx, eax
+ movq [ebx], mm2 ; store result
+ ;
+ lea ebx, [ebx+edx]
+ jc L ; 22c / 8 elts = 33c / 8 pixels = 4.125 c/pix
+
+ pop edi
+ pop esi
+ }
+}
+
+#undef A
+
+#else
+void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,
+ UINT8 * RefPtr1, UINT8 * RefPtr2,
+ INT16 * ChangePtr, UINT32 LineStep )
+{
+ UINT8 * TmpDataPtr = (UINT8 *)TmpDataBuffer->TmpReconBuffer;
+
+ // Note that the line step for the change data is assumed to be 8 * 32 bits.
+ __asm
+ {
+ pxor mm6, mm6 ; Blank mmx6
+
+ // Set up data pointers
+ mov eax,dword ptr [RefPtr1]
+ mov ebx,dword ptr [RefPtr2]
+ mov edx,dword ptr [LineStep]
+
+ // Row 1
+ // Load the change pointer
+ mov ecx,dword ptr [ChangePtr]
+
+ // Load the data values (Ref1 and Ref2) and unpack to signed 16 bit values
+ movq mm0,dword ptr [eax] ; Load 8 elements of source data
+ movq mm2,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ movq mm3, mm2 ; Copy data
+
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpcklbw mm2, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+ punpckhbw mm3, mm6 ; High bytes to words
+
+ // Average Ref1 and Ref2
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm3 ; Second 4 values
+ psrlw mm0, 1
+ psrlw mm1, 1
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data reference and difference data
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ mov ecx,dword ptr [TmpDataPtr] ; Load the temp results pointer
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [ecx],mm0 ; Write the data out to the temporary results buffer
+ add eax,edx ; Step the reference pointers
+ add ebx,edx
+
+ // Row 2
+ // Load the change pointer
+ mov ecx,dword ptr [ChangePtr]
+ add ecx,16
+
+ // Load the data values (Ref1 and Ref2).
+ movq mm0,dword ptr [eax] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+
+ movq mm2,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm3, mm2 ; Copy data
+ punpcklbw mm2, mm6 ; Low bytes to words
+ punpckhbw mm3, mm6 ; High bytes to words
+
+ // Average Ref1 and Ref2
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm3 ; Second 4 values
+ psrlw mm0, 1
+ psrlw mm1, 1
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data reference and difference data
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ mov ecx,dword ptr [TmpDataPtr] ; Load the temp results pointer
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [ecx+8],mm0 ; Write the data out to the temporary results buffer
+ add eax,edx ; Step the reference pointers
+ add ebx,edx
+
+ // Row 3
+ // Load the change pointer
+ mov ecx,dword ptr [ChangePtr]
+ add ecx,32
+
+ // Load the data values (Ref1 and Ref2).
+ movq mm0,dword ptr [eax] ; Load 8 elements of source data
+ movq mm2,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ movq mm3, mm2 ; Copy data
+
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+ punpcklbw mm2, mm6 ; Low bytes to words
+ punpckhbw mm3, mm6 ; High bytes to words
+
+ // Average Ref1 and Ref2
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm3 ; Second 4 values
+ psrlw mm0, 1
+ psrlw mm1, 1
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data reference and difference data
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ mov ecx,dword ptr [TmpDataPtr]
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [ecx+16],mm0 ; Write the data out to the temporary results buffer
+ add eax,edx ; Step the reference pointers
+ add ebx,edx
+
+ // Row 4
+ // Load the change pointer
+ mov ecx,dword ptr [ChangePtr]
+ add ecx,48
+
+ // Load the data values (Ref1 and Ref2).
+ movq mm0,dword ptr [eax] ; Load 8 elements of source data
+ movq mm2,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ movq mm3, mm2 ; Copy data
+
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+ punpcklbw mm2, mm6 ; Low bytes to words
+ punpckhbw mm3, mm6 ; High bytes to words
+
+ // Average Ref1 and Ref2
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm3 ; Second 4 values
+ psrlw mm0, 1
+ psrlw mm1, 1
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data reference and difference data
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ mov ecx,dword ptr [TmpDataPtr]
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [ecx+24],mm0 ; Write the data out to the temporary results buffer
+ add eax,edx ; Step the reference pointers
+ add ebx,edx
+
+ // Row 5
+ // Load the change pointer
+ mov ecx,dword ptr [ChangePtr]
+ add ecx,64
+
+ // Load the data values (Ref1 and Ref2).
+ movq mm0,dword ptr [eax] ; Load 8 elements of source data
+ movq mm2,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ movq mm3, mm2 ; Copy data
+
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+ punpcklbw mm2, mm6 ; Low bytes to words
+ punpckhbw mm3, mm6 ; High bytes to words
+
+ // Average Ref1 and Ref2
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm3 ; Second 4 values
+ psrlw mm0, 1
+ psrlw mm1, 1
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data reference and difference data
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ mov ecx,dword ptr [TmpDataPtr]
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [ecx+32],mm0 ; Write the data out to the temporary results buffer
+ add eax,edx ; Step the reference pointers
+ add ebx,edx
+
+ // Row 6
+ // Load the change pointer
+ mov ecx,dword ptr [ChangePtr]
+ add ecx,80
+
+ // Load the data values (Ref1 and Ref2).
+ movq mm0,dword ptr [eax] ; Load 8 elements of source data
+ movq mm2,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ movq mm3, mm2 ; Copy data
+
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+ punpcklbw mm2, mm6 ; Low bytes to words
+ punpckhbw mm3, mm6 ; High bytes to words
+
+ // Average Ref1 and Ref2
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm3 ; Second 4 values
+ psrlw mm0, 1
+ psrlw mm1, 1
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data reference and difference data
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ mov ecx,dword ptr [TmpDataPtr]
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [ecx+40],mm0 ; Write the data out to the temporary results buffer
+ add eax,edx ; Step the reference pointers
+ add ebx,edx
+
+ // Row 7
+ // Load the change pointer
+ mov ecx,dword ptr [ChangePtr]
+ add ecx,96
+
+ // Load the data values (Ref1 and Ref2).
+ movq mm0,dword ptr [eax] ; Load 8 elements of source data
+ movq mm2,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ movq mm3, mm2 ; Copy data
+
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+ punpcklbw mm2, mm6 ; Low bytes to words
+ punpckhbw mm3, mm6 ; High bytes to words
+
+ // Average Ref1 and Ref2
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm3 ; Second 4 values
+ psrlw mm0, 1
+ psrlw mm1, 1
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data reference and difference data
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ mov ecx,dword ptr [TmpDataPtr]
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [ecx+48],mm0 ; Write the data out to the temporary results buffer
+ add eax,edx ; Step the reference pointers
+ add ebx,edx
+
+ // Row 8
+ // Load the change pointer
+ mov ecx,dword ptr [ChangePtr]
+ add ecx,112
+
+ // Load the data values (Ref1 and Ref2).
+ movq mm0,dword ptr [eax] ; Load 8 elements of source data
+ movq mm2,dword ptr [ebx] ; Load 8 elements of source data
+ movq mm1, mm0 ; Copy data
+ movq mm3, mm2 ; Copy data
+
+ punpcklbw mm0, mm6 ; Low bytes to words
+ punpckhbw mm1, mm6 ; High bytes to words
+ punpcklbw mm2, mm6 ; Low bytes to words
+ punpckhbw mm3, mm6 ; High bytes to words
+
+ // Average Ref1 and Ref2
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm3 ; Second 4 values
+ psrlw mm0, 1
+ psrlw mm1, 1
+
+ // Load 8 elements of 16 bit change data
+ movq mm2,dword ptr [ecx] ; Load 4 elements of change data
+ movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
+
+ // Sum the data reference and difference data
+ paddw mm0, mm2 ; First 4 values
+ paddw mm1, mm4 ; Second 4 values
+
+ // Pack and store
+ mov ecx,dword ptr [TmpDataPtr]
+ packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
+ movq dword ptr [ecx+56],mm0 ; Write the data out to the temporary results buffer
+
+
+ // Now copy the results back to the reconstruction buffer.
+ mov eax,dword ptr [ReconPtr] ; Load the reconstruction Pointer
+ mov ecx,dword ptr [TmpDataPtr] ; Load the temp results pointer
+ // Row 1
+ movq mm0,dword ptr [ecx] ; Load 8 elements of results data
+ movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
+ add eax,edx ; Step the reconstruction pointer
+ // Row 2
+ movq mm0,dword ptr [ecx+8] ; Load 8 elements of results data
+ movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
+ add eax,edx ; Step the reconstruction pointer
+ // Row 3
+ movq mm0,dword ptr [ecx+16] ; Load 8 elements of results data
+ movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
+ add eax,edx ; Step the reconstruction pointer
+ // Row 4
+ movq mm0,dword ptr [ecx+24] ; Load 8 elements of results data
+ movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
+ add eax,edx ; Step the reconstruction pointer
+ // Row 5
+ movq mm0,dword ptr [ecx+32] ; Load 8 elements of results data
+ movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
+ add eax,edx ; Step the reconstruction pointer
+ // Row 6
+ movq mm0,dword ptr [ecx+40] ; Load 8 elements of results data
+ movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
+ add eax,edx ; Step the reconstruction pointer
+ // Row 7
+ movq mm0,dword ptr [ecx+48] ; Load 8 elements of results data
+ movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
+ add eax,edx ; Step the reconstruction pointer
+ // Row 8
+ movq mm0,dword ptr [ecx+56] ; Load 8 elements of results data
+ movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
+ add eax,edx ; Step the reconstruction pointer
+
+ //emms
+ }
+}
+#endif
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/uoptsystemdependant.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/uoptsystemdependant.c
new file mode 100644
index 00000000..bfcc194a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/uoptsystemdependant.c
@@ -0,0 +1,351 @@
+/****************************************************************************
+*
+* Module Title : SystemDependant.c
+*
+* Description : Miscellaneous system dependant functions
+*
+* AUTHOR : Paul Wilkins
+*
+*****************************************************************************
+* Revision History
+*
+* 1.20 YWX 06-Nov-02 Added forward DCT function optimized for Pentium 4
+* 1.19 YWX 15-Jun-01 added function pointer setups for new deblocking filter
+* 1.18 YWX 26-Apr-01 Fixed the cpu frequency detection bug caused by Sleep()
+* 1.17 JBX 22-Mar-01 Merged with new vp4-mapca bitstream
+* 1.16 JBB 26-Jan-01 Cleaned out unused function
+* 1.15 YWX 08-dec-00 Added WMT PostProcessor and
+* moved function declarations into _head files
+* 1.14 JBB 30 NOV 00 Version number changes
+* 1.13 YWX 03-Nov-00 Optimized postprocessor filters
+* 1.12 YWX 02-Nov-00 Added new loopfilter function pointers
+* 1.11 YWX 19-Oct-00 Added 1-2 Scaling functions pointers
+* 1.10 jbb 16 oct 00 added ifdefs to insure version code
+* 1.09 YWX 04-Oct-00 Added function pointers for scaling
+* 1.08 YWX 06 Sep 00 Added function pointers for new deringing filter
+* using frag baseed Q Value.
+* 1.07 JBB 21 Aug 00 New More Blurry in high variance area deringer
+* 1.06 YWX 2 Aug 00 Added function pointers for postprocess
+* 1.05 YWX 15/05/00 Added functions to check processor frequency
+* and more function pointers for postprocessor
+* 1.04 YWX 08/05/00 Added function pointers setup for postprocess
+* 1.03 SJL 20/04/00 Added ability to enable the new dequant code.
+* 1.02 SJL 22/03/00 Function pointers for the loop filter.
+* 1.01 JBB 21/03/00 More Function Pointers for optimized playback
+* 1.00 PGW 12/10/99 Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+* Header Files
+*****************************************************************************
+*/
+#include "codec_common.h"
+#include "vputil_if.h"
+#include "cpuidlib.h"
+
+//global debugging aid's!
+int fastIDCTDisabled = 0;
+int forceCPUID = 0;
+int CPUID = 0;
+
+
+extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
+
+// Scalar (no mmx) reconstruction functions
+extern void ClearSysState_C(void);
+extern void IDctSlow( INT16 * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void IDct10( INT16 * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void IDct1( INT16 * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void ScalarReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void ScalarReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void ScalarReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+extern void ReconBlock_C(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep);
+extern void SubtractBlock_C( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
+extern void UnpackBlock_C( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void AverageBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void CopyBlock_C(unsigned char *src, unsigned char *dest, unsigned int srcstride);
+extern void Copy12x12_C(const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride);
+extern void fdct_short_C ( INT16 * InputData, INT16 * OutputData );
+extern void FilterBlockBil_8_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
+extern void FilterBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+
+// MMx versions
+extern void fdct_MMX ( INT16 * InputData, INT16 * OutputData );
+extern void ClearMmx(void);
+extern void MMXReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+extern void MMX_idct( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void MMX_idct10( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void MMX_idct1( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void MMX_idct_DX( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void MMX_idct10_DX( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void ReconBlock_MMX(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep);
+extern void SubtractBlock_MMX( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
+extern void UnpackBlock_MMX( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void AverageBlock_MMX( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride);
+extern void Copy12x12_MMX(const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride);
+extern void FilterBlockBil_8_mmx( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
+extern void FilterBlock_mmx( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+
+// WMT versions
+extern void WmtReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void WmtReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void WmtReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+extern void Wmt_idct1( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void Wmt_IDct_Dx( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void Wmt_IDct10_Dx( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void fdct_WMT(short *InputData, short *OutputData);
+extern void FilterBlockBil_8_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
+extern void FilterBlock_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+
+
+#define IdctAdjustBeforeShift 8
+extern UINT16 idctconstants[(4+7+1) * 4];
+extern UINT16 idctcosTbl[ 7];
+
+void fillidctconstants(void)
+{
+ int j = 16;
+ UINT16 * p;
+ do
+ {
+ idctconstants[ --j] = 0;
+ }
+ while( j);
+
+ idctconstants[0] = idctconstants[5] = idctconstants[10] = idctconstants[15] = 65535;
+
+ j = 1;
+ do
+ {
+ p = idctconstants + ( (j+3) << 2);
+ p[0] = p[1] = p[2] = p[3] = idctcosTbl[ j - 1];
+ }
+ while( ++j <= 7);
+
+ idctconstants[44] = idctconstants[45] = idctconstants[46] = idctconstants[47] = IdctAdjustBeforeShift;
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : Get Processor Flags
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support
+ * sets approipriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void GetProcessorFlags
+(
+ INT32 *MmxEnabled,
+ INT32 *XmmEnabled,
+ INT32 *WmtEnabled
+)
+{
+
+ PROCTYPE CPUType = findCPUId();
+ if(forceCPUID)
+ CPUType = CPUID;
+
+ switch(CPUType)
+ {
+ case X86 :
+ case PPRO :
+ case C6X86 :
+ case C6X86MX:
+ case AMDK5 :
+ case MACG3 :
+ case MAC68K :
+ *MmxEnabled = FALSE;
+ *XmmEnabled = FALSE;
+ *WmtEnabled = FALSE;
+ break;
+ case PII :
+ case AMDK63D:
+ case AMDK6 :
+ case PMMX :
+ *MmxEnabled = TRUE;
+ *XmmEnabled = FALSE;
+ *WmtEnabled = FALSE;
+ break;
+ case XMM :
+ *MmxEnabled = TRUE;
+ *XmmEnabled = TRUE;
+ *WmtEnabled = FALSE;
+ break;
+ case WMT :
+ *MmxEnabled = TRUE;
+ *XmmEnabled = TRUE;
+ *WmtEnabled = TRUE;
+ break;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : MachineSpecificConfig
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Checks for machine specifc features such as MMX support
+ * sets approipriate flags and function pointers.
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void UtilMachineSpecificConfig
+(
+ void
+)
+{
+ UINT32 i;
+ INT32 MmxEnabled;
+ INT32 XmmEnabled;
+ INT32 WmtEnabled;
+
+ GetProcessorFlags( &MmxEnabled,&XmmEnabled,&WmtEnabled);
+
+ if(WmtEnabled) //Willamette
+ {
+ for(i=0;i<=64;i++)
+ {
+
+ if(fastIDCTDisabled)
+ idct[i]=Wmt_IDct_Dx;
+ else
+ {
+ if(i<=1)idct[i]=Wmt_idct1;
+ else if(i<=10)idct[i]=Wmt_IDct10_Dx;
+ else idct[i]=Wmt_IDct_Dx;
+ }
+ }
+ for(i=0;i<=64;i++)
+ {
+ if(fastIDCTDisabled)
+ idctc[i]=MMX_idct;
+ else
+ {
+ if(i<=1)idctc[i]=Wmt_idct1;
+ else if(i<=10)idctc[i]=MMX_idct10;
+ else idctc[i]=MMX_idct;
+ }
+ }
+ fdct_short=fdct_WMT;
+
+ ReconIntra = WmtReconIntra;
+ ReconInter = WmtReconInter;
+ ReconInterHalfPixel2 = WmtReconInterHalfPixel2;
+ ClearSysState = ClearMmx;
+ AverageBlock = AverageBlock_MMX;
+ UnpackBlock = UnpackBlock_MMX;
+ ReconBlock = ReconBlock_MMX;
+ SubtractBlock = SubtractBlock_MMX;
+ CopyBlock = CopyBlockMMX;
+ Copy12x12 = Copy12x12_MMX;
+ FilterBlockBil_8 = FilterBlockBil_8_wmt;
+ FilterBlock=FilterBlock_wmt;
+ //FilterBlock=FilterBlock_C;
+ }
+ else if ( MmxEnabled )
+ {
+ for(i=0;i<=64;i++)
+ {
+ if(fastIDCTDisabled)
+ idctc[i]=MMX_idct_DX;
+ else
+ {
+ if(i<=1)idctc[i]=MMX_idct1;
+ else if(i<=10)idctc[i]=MMX_idct10;
+ else idctc[i]=MMX_idct;
+ }
+ }
+ fdct_short=fdct_MMX;
+ for(i=0;i<=64;i++)
+ {
+ if(fastIDCTDisabled)
+ idct[i]=MMX_idct_DX;
+ else
+ {
+ if(i<=1)idct[i]=MMX_idct1;
+ else if(i<=10)idct[i]=MMX_idct10_DX;
+ else idct[i]=MMX_idct_DX;
+ }
+ }
+
+ ReconIntra = MMXReconIntra;
+ ReconInter = MmxReconInter;
+ ReconInterHalfPixel2 = MmxReconInterHalfPixel2;
+ ClearSysState = ClearMmx;
+ AverageBlock = AverageBlock_MMX;
+ UnpackBlock = UnpackBlock_MMX;
+ ReconBlock = ReconBlock_MMX;
+ SubtractBlock = SubtractBlock_MMX;
+ CopyBlock = CopyBlockMMX;
+ Copy12x12 = Copy12x12_MMX;
+ FilterBlockBil_8 = FilterBlockBil_8_mmx;
+ FilterBlock=FilterBlock_mmx;
+ //FilterBlock=FilterBlock_C;
+ }
+ else
+ {
+ int i;
+ for(i=0;i<=64;i++)
+ {
+ if(fastIDCTDisabled)
+ idctc[i]=IDctSlow;
+ else
+ {
+ if(i<=1)idctc[i]=IDct1;
+ else if(i<=10)idctc[i]=IDct10;
+ else idctc[i]=IDctSlow;
+ }
+ }
+ fdct_short=fdct_short_C ;
+ for(i=0;i<=64;i++)
+ {
+ if(fastIDCTDisabled)
+ idct[i]=IDctSlow;
+ else
+ {
+ if(i<=1)idct[i]=IDct1;
+ else if(i<=10)idct[i]=IDct10;
+ else idct[i]=IDctSlow;
+ }
+ }
+ ClearSysState = ClearSysState_C;
+ ReconIntra = ScalarReconIntra;
+ ReconInter = ScalarReconInter;
+ ReconInterHalfPixel2 = ScalarReconInterHalfPixel2;
+ AverageBlock = AverageBlock_C;
+ UnpackBlock = UnpackBlock_C;
+ ReconBlock = ReconBlock_C;
+ SubtractBlock = SubtractBlock_C;
+ CopyBlock = CopyBlock_C;
+ Copy12x12 = Copy12x12_MMX;
+ FilterBlockBil_8 = FilterBlockBil_8_C;
+ FilterBlock=FilterBlock_C;
+ }
+ //FilterBlock=FilterBlock_C;
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/vputilasm.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/vputilasm.c
new file mode 100644
index 00000000..3d173913
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/vputilasm.c
@@ -0,0 +1,507 @@
+/****************************************************************************
+ *
+ * Module Title : newLoopTest_asm.c
+ *
+ * Description : Codec specific functions
+ *
+ * AUTHOR : Yaowu Xu
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ * 1.02 YWX 03-Nov-00 Changed confusing variable name
+ * 1.01 YWX 02-Nov-00 Added the set of functions
+ * 1.00 YWX 19-Oct-00 configuration baseline
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Header Frames
+ *****************************************************************************
+ */
+
+
+#define STRICT /* Strict type checking. */
+#include "codec_common.h"
+#include <math.h>
+
+ /****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+
+/****************************************************************************
+ * Explicit Imports
+ *****************************************************************************
+ */
+extern void SatUnsigned8( UINT8 * ResultPtr, INT16 * DataBlock,
+ UINT32 ResultLineStep, UINT32 DataLineStep );
+
+/****************************************************************************
+ * Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Exported Functions
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Module Statics
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Foreward References
+ *****************************************************************************
+ */
+
+
+/****************************************************************************
+ *
+ * ROUTINE : ClearMmx()
+ *
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ *
+ * FUNCTION : Clears down the MMX state
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ClearMmx(void)
+{
+ __asm
+ {
+ emms ; Clear the MMX state.
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CopyBlockUsingMMX
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Copies a block from source to destination
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride)
+{
+ unsigned char *s = src;
+ unsigned char *d = dest;
+ unsigned int stride = srcstride;
+ // recon copy
+ _asm
+ {
+ mov ecx, [stride]
+ mov eax, [s]
+ mov ebx, [d]
+ lea edx, [ecx + ecx * 2]
+
+ movq mm0, [eax]
+ movq mm1, [eax + ecx]
+ movq mm2, [eax + ecx*2]
+ movq mm3, [eax + edx]
+
+ lea eax, [eax + ecx*4]
+
+ movq [ebx], mm0
+ movq [ebx + ecx], mm1
+ movq [ebx + ecx*2], mm2
+ movq [ebx + edx], mm3
+
+ lea ebx, [ebx + ecx * 4]
+
+ movq mm0, [eax]
+ movq mm1, [eax + ecx]
+ movq mm2, [eax + ecx*2]
+ movq mm3, [eax + edx]
+
+ movq [ebx], mm0
+ movq [ebx + ecx], mm1
+ movq [ebx + ecx*2], mm2
+ movq [ebx + edx], mm3
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : CopyBlockUsingMMX
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Copies a block from source to destination
+ *
+ * SPECIAL NOTES : None.
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void Copy12x12_MMX(
+ const unsigned char *src,
+ unsigned char *dest,
+ unsigned int srcstride,
+ unsigned int deststride)
+{
+
+
+ int j=0;
+ do
+ {
+ ((UINT32*)dest)[0] = ((UINT32*)src)[0];
+ ((UINT32*)dest)[1] = ((UINT32*)src)[1];
+ ((UINT32*)dest)[2] = ((UINT32*)src)[2];
+ src+=srcstride;
+ dest+=deststride;
+ }
+ while(++j<12);
+
+}
+
+/****************************************************************************
+
+/****************************************************************************
+ *
+ * ROUTINE : AverageBlock_MMX
+ *
+ * INPUTS : Two block data to be averaged
+ *
+ * OUTPUTS : block with the average values
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Do pixel averages on two reference blocks
+ *
+ * SPECIAL NOTES : This functions has a mmx version in newlooptest_asm.c
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void AverageBlock_MMX( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine)
+{
+
+ __asm
+ {
+ mov esi, ReconPtr1
+ mov eax, ReconPtr2
+
+ mov edi, ReconRefPtr
+ mov ecx, BLOCK_HEIGHT_WIDTH
+
+ mov edx, ReconPixelsPerLine
+ pxor mm7, mm7
+
+AverageBlock_Loop:
+
+ movq mm0, [esi]
+ movq mm1, [eax]
+
+ movq mm2, mm0
+ punpcklbw mm0, mm7
+
+ movq mm3, mm1
+ punpcklbw mm1, mm7
+
+ paddw mm0, mm1
+ punpckhbw mm2, mm7
+
+ psraw mm0, 1
+ punpckhbw mm3, mm7
+
+ paddw mm2, mm3
+ movq [edi], mm0
+
+ psraw mm2, 1
+ add esi, edx
+
+ add eax, edx
+ add edi, 16
+
+ movq [edi-8], mm2
+ dec ecx
+
+ jnz AverageBlock_Loop
+ }
+ /*
+ UINT32 i;
+
+ // For each block row
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ ReconRefPtr[0] = (INT16)((INT32)(ReconPtr1[0])+ ((INT32)ReconPtr2[0]))>>1;
+ ReconRefPtr[1] = (INT16)((INT32)(ReconPtr1[1])+ ((INT32)ReconPtr2[1]))>>1;
+ ReconRefPtr[2] = (INT16)((INT32)(ReconPtr1[2])+ ((INT32)ReconPtr2[2]))>>1;
+ ReconRefPtr[3] = (INT16)((INT32)(ReconPtr1[3])+ ((INT32)ReconPtr2[3]))>>1;
+ ReconRefPtr[4] = (INT16)((INT32)(ReconPtr1[4])+ ((INT32)ReconPtr2[4]))>>1;
+ ReconRefPtr[5] = (INT16)((INT32)(ReconPtr1[5])+ ((INT32)ReconPtr2[5]))>>1;
+ ReconRefPtr[6] = (INT16)((INT32)(ReconPtr1[6])+ ((INT32)ReconPtr2[6]))>>1;
+ ReconRefPtr[7] = (INT16)((INT32)(ReconPtr1[7])+ ((INT32)ReconPtr2[7]))>>1;
+
+ // Start next row
+ ReconPtr1 += ReconPixelsPerLine;
+ ReconPtr2 += ReconPixelsPerLine;
+
+ ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+ }
+ */
+}
+
+
+/****************************************************************************
+ *
+ * ROUTINE : UnpackBlock
+ *
+ * INPUTS : Block of char data to be converted to short
+ *
+ * OUTPUTS : converted output
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION : Converted char block data to short
+ *
+ * SPECIAL NOTES : This functions has a mmx version in newlooptest_asm.c
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void UnpackBlock_MMX( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine)
+{
+
+ __asm
+ {
+ mov esi, ReconPtr
+ mov edi, ReconRefPtr
+
+ mov ecx, BLOCK_HEIGHT_WIDTH
+ mov edx, ReconPixelsPerLine
+
+ pxor mm7, mm7
+
+UnpackBlock_Loop:
+
+ movq mm0, [esi]
+ movq mm2, mm0
+
+ punpcklbw mm0, mm7
+ movq [edi], mm0
+
+ punpckhbw mm2, mm7
+ add esi, edx
+
+ movq [edi+8], mm2
+ add edi, 16
+
+ dec ecx
+ jnz UnpackBlock_Loop
+ }
+
+ /*
+ UINT32 i;
+
+ // For each block row
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+
+ ReconRefPtr[0] = (INT16)(ReconPtr[0]);
+ ReconRefPtr[1] = (INT16)(ReconPtr[1]);
+ ReconRefPtr[2] = (INT16)(ReconPtr[2]);
+ ReconRefPtr[3] = (INT16)(ReconPtr[3]);
+ ReconRefPtr[4] = (INT16)(ReconPtr[4]);
+ ReconRefPtr[5] = (INT16)(ReconPtr[5]);
+ ReconRefPtr[6] = (INT16)(ReconPtr[6]);
+ ReconRefPtr[7] = (INT16)(ReconPtr[7]);
+
+ // Start next row
+ ReconPtr += ReconPixelsPerLine;
+ ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+ }
+ */
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : SubtractBlock
+ *
+ * INPUTS : Get the residue data for the block
+ *
+ * OUTPUTS : Source block data and ref block data
+ *
+ * RETURNS : residue block data
+ *
+ * FUNCTION : do pixel subtraction of ref block from source block
+ *
+ * SPECIAL NOTES : This functions has a mmx version in newlooptest_asm.c
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void SubtractBlock_MMX( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep )
+{
+
+ __asm
+ {
+
+ mov esi, SrcBlock
+ mov edi, DestPtr
+
+ mov edx, LineStep
+ mov ecx, 8
+
+ pxor mm7, mm7
+
+SubtractBlock_Loop:
+
+ movq mm0, [esi]
+ movq mm1, [edi]
+
+ movq mm2, mm0
+ punpcklbw mm0, mm7
+
+ movq mm3, [edi+8]
+ psubw mm0, mm1
+
+ punpckhbw mm2, mm7
+ movq [edi], mm0
+
+ psubw mm2, mm3
+ add esi, edx
+
+ movq [edi+8], mm2
+ add edi, 16
+
+ dec ecx
+ jnz SubtractBlock_Loop
+ }
+
+ /*
+ UINT32 i;
+
+ // For each block row
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+
+ DestPtr[0] = (INT16)((INT32)SrcBlock[0] - (INT32)DestPtr[0]);
+ DestPtr[1] = (INT16)((INT32)SrcBlock[1] - (INT32)DestPtr[1]);
+ DestPtr[2] = (INT16)((INT32)SrcBlock[2] - (INT32)DestPtr[2]);
+ DestPtr[3] = (INT16)((INT32)SrcBlock[3] - (INT32)DestPtr[3]);
+ DestPtr[4] = (INT16)((INT32)SrcBlock[4] - (INT32)DestPtr[4]);
+ DestPtr[5] = (INT16)((INT32)SrcBlock[5] - (INT32)DestPtr[5]);
+ DestPtr[6] = (INT16)((INT32)SrcBlock[6] - (INT32)DestPtr[6]);
+ DestPtr[7] = (INT16)((INT32)SrcBlock[7] - (INT32)DestPtr[7]);
+
+ // Start next row
+ SrcBlock += LineStep;
+ DestPtr += BLOCK_HEIGHT_WIDTH;
+ }
+ */
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : ReconBlock
+ *
+ * INPUTS :
+ *
+ * OUTPUTS :
+ *
+ * RETURNS :
+ *
+ * FUNCTION : Reconstrut a block using ref blocka and change data
+ *
+ * SPECIAL NOTES : This functions has a mmx version in newlooptest_asm.c
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void ReconBlock_MMX( INT16 *SrcBlock, INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep)
+{
+
+ __asm
+ {
+
+ mov esi, SrcBlock
+ mov eax, ReconRefPtr
+
+ mov edi, DestBlock
+ mov ecx, 8
+
+ mov edx, LineStep
+ pxor mm7, mm7
+
+ReconBlock_Loop:
+
+ movq mm0, [esi]
+ movq mm1, [eax]
+
+ movq mm2, [esi+8]
+ movq mm3, [eax+8]
+
+ paddw mm0, mm1
+ paddw mm2, mm3
+
+ packuswb mm0, mm2
+ movq [edi], mm0
+
+ add esi, 16
+ add eax, 16
+
+ add edi, edx
+ dec ecx
+
+ jnz ReconBlock_Loop
+
+ }
+
+ /*
+ UINT32 i;
+ INT16 *SrcBlockPtr = SrcBlock;
+
+ // For each block row
+ for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+ {
+ SrcBlock[0] += ReconRefPtr[0];
+ SrcBlock[1] += ReconRefPtr[1];
+ SrcBlock[2] += ReconRefPtr[2];
+ SrcBlock[3] += ReconRefPtr[3];
+ SrcBlock[4] += ReconRefPtr[4];
+ SrcBlock[5] += ReconRefPtr[5];
+ SrcBlock[6] += ReconRefPtr[6];
+ SrcBlock[7] += ReconRefPtr[7];
+
+ // Start next row
+ SrcBlock += BLOCK_HEIGHT_WIDTH;
+ ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+ }
+ // Saturated the block and write to the output
+ SatUnsigned8( DestBlock, SrcBlockPtr, LineStep, BLOCK_HEIGHT_WIDTH );
+ */
+
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtidct.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtidct.c
new file mode 100644
index 00000000..cec0599c
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtidct.c
@@ -0,0 +1,1859 @@
+/****************************************************************************
+ *
+ * Module Title : wmtidct.c
+ *
+ * Description : IDct functions optimized specifically for willamette
+ * processor
+ *
+ * Special Notes:
+ *
+ * AUTHOR : YaoWu Xu
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ * 1.02 YWX 07-dec-00 Removed code not in use and added push pop ebx
+ * 1.01 YWX 29/06/00 Added Wmt_IDCT_Dx and Wmt_IDCT10_Dx
+ * 1.00 YWX 31/05/00 Configuration baseline
+ *
+ *****************************************************************************
+ */
+
+
+/*******************************************************************************
+ * Module Constants
+ *******************************************************************************
+ */
+
+
+/* constants for rounding */
+__declspec(align(32)) static unsigned int Eight[]=
+{
+ 0x00080008,
+ 0x00080008,
+ 0x00080008,
+ 0x00080008
+};
+/* cosine constants, cosine ( i * pi / 8 ) */
+__declspec(align(32)) static unsigned short WmtIdctConst[7 * 8]=
+{
+ 64277,64277,64277,64277,64277,64277,64277,64277,
+ 60547,60547,60547,60547,60547,60547,60547,60547,
+ 54491,54491,54491,54491,54491,54491,54491,54491,
+ 46341,46341,46341,46341,46341,46341,46341,46341,
+ 36410,36410,36410,36410,36410,36410,36410,36410,
+ 25080,25080,25080,25080,25080,25080,25080,25080,
+ 12785,12785,12785,12785,12785,12785,12785,12785
+};
+/* Mask constant for dequantization */
+__declspec(align(32)) static unsigned short WmtDequantConst[]=
+{
+ 0,65535,65535,0,0,0,0,0, //0x0000 0000 0000 0000 0000 FFFF FFFF 0000
+ 0,0,0,0,65535,65535,0,0, //0x0000 0000 FFFF FFFF 0000 0000 0000 0000
+ 65535,65535,65535,0,0,0,0,0,//0x0000 0000 0000 0000 0000 FFFF FFFF FFFF
+ 0,0,0,65535,0,0,0,0, //0x0000 0000 0000 0000 FFFF 0000 0000 0000
+ 0,0,0,65535,65535,0,0,0, //0x0000 0000 0000 FFFF FFFF 0000 0000 0000
+ 65535,0,0,0,0,65535,0,0, //0x0000 0000 FFFF 0000 0000 0000 0000 FFFF
+ 0,0,65535,65535, 0,0,0,0 //0x0000 0000 0000 0000 FFFF FFFF 0000 0000
+};
+
+
+/*******************************************************************************
+ * Forward Reference
+ *******************************************************************************
+ */
+
+/********************************************************************************
+ * Description of Inverse DCT algorithm.
+ ********************************************************************************
+ *
+
+ Dequantization multiplies user's 16-bit signed indices (range -512 to +511)
+ by unsigned 16-bit quantization table entries.
+ These table entries are upscaled by 4, max is 30 * 128 * 4 < 2^14.
+ Result is scaled signed DCT coefficients (abs value < 2^15).
+
+ In the data stream, the coefficients are sent in order of increasing
+ total (horizontal + vertical) frequency. The exact picture is as follows:
+
+ 00 01 05 06 16 17 33 34
+ 02 04 07 15 20 32 35 52
+ 03 10 14 21 31 36 51 53
+ 11 13 22 30 37 50 54 65
+
+ 12 23 27 40 47 55 64 66
+ 24 26 41 46 56 63 67 74
+ 25 42 45 57 62 70 73 75
+ 43 44 60 61 71 72 76 77
+
+ Here the position in the matrix corresponds to the (horiz,vert)
+ freqency indices and the octal entry in the matrix is the position
+ of the coefficient in the data stream. Thus the coefficients are sent
+ in sort of a diagonal "snake".
+
+ The dequantization stage "uncurls the snake" and stores the expanded
+ coefficients in more convenient positions. These are not exactly the
+ natural positions given above but take into account our implementation
+ of the idct, which basically requires two one-dimensional idcts and
+ two transposes.
+
+
+ Transposing the 8x8 matrix above gives
+
+ 00 02 03 11 12 24 25 43
+ 01 04 10 13 23 26 42 44
+ 05 07 14 22 27 41 45 60
+ 06 15 21 30 40 46 57 61
+
+ 16 20 31 37 47 56 62 71
+ 17 32 36 50 55 63 70 72
+ 33 35 51 54 64 67 73 76
+ 34 52 53 65 66 74 75 77
+
+
+ The idct itself is more interesting. Since the two-dimensional dct
+ basis functions are products of the one-dimesional dct basis functions,
+ we can compute an inverse (or forward) dct via two 1-D transforms,
+ on rows then on columns. To exploit MMX parallelism, we actually do
+ both operations on columns, interposing a (partial) transpose between
+ the two 1-D transforms, the first transpose being done by the expansion
+ described above.
+
+ The 8-sample one-dimensional DCT is a standard orthogonal expansion using
+ the (unnormalized) basis functions
+
+ b[k]( i) = cos( pi * k * (2i + 1) / 16);
+
+ here k = 0 ... 7 is the frequency and i = 0 ... 7 is the spatial coordinate.
+ To normalize, b[0] should be multiplied by 1/sqrt( 8) and the other b[k]
+ should be multiplied by 1/2.
+
+ The 8x8 two-dimensional DCT is just the product of one-dimensional DCTs
+ in each direction. The (unnormalized) basis functions are
+
+ B[k,l]( i, j) = b[k]( i) * b[l]( j);
+
+ this time k and l are the horizontal and vertical frequencies,
+ i and j are the horizontal and vertical spatial coordinates;
+ all indices vary from 0 ... 7 (as above)
+ and there are now 4 cases of normalization.
+
+ Our 1-D idct expansion uses constants C1 ... C7 given by
+
+ (*) Ck = C(-k) = cos( pi * k/16) = S(8-k) = -S(k-8) = sin( pi * (8-k)/16)
+
+ and the following 1-D algorithm transforming I0 ... I7 to R0 ... R7 :
+
+ A = (C1 * I1) + (C7 * I7) B = (C7 * I1) - (C1 * I7)
+ C = (C3 * I3) + (C5 * I5) D = (C3 * I5) - (C5 * I3)
+ A. = C4 * (A - C) B. = C4 * (B - D)
+ C. = A + C D. = B + D
+
+ E = C4 * (I0 + I4) F = C4 * (I0 - I4)
+ G = (C2 * I2) + (C6 * I6) H = (C6 * I2) - (C2 * I6)
+ E. = E - G
+ G. = E + G
+
+ A.. = F + A. B.. = B. - H
+ F. = F - A. H. = B. + H
+
+ R0 = G. + C. R1 = A.. + H. R3 = E. + D. R5 = F. + B..
+ R7 = G. - C. R2 = A.. - H. R4 = E. - D. R6 = F. - B..
+
+ This algorithm was also used by Paul Wilkins in his C implementation;
+ it is due to Vetterli and Lightenberg and may be found in the JPEG
+ reference book by Pennebaker and Mitchell.
+
+ Correctness of the algorithm follows from (*) together with the
+ addition formulas for sine and cosine:
+
+ cos( A + B) = cos( A) * cos( B) - sin( A) * sin( B)
+ sin( A + B) = sin( A) * cos( B) + cos( A) * sin( B)
+
+ Note that this implementation absorbs the difference in normalization
+ between the 0th and higher frequencies, although the results produced
+ are actually twice as big as they should be. Since we do this for each
+ dimension, the 2-D idct results are 4x the desired results. Finally,
+ taking into account that the dequantization multiplies by 4 as well,
+ our actual results are 16x too big. We fix this by shifting the final
+ results right by 4 bits.
+
+ High precision version approximates C1 ... C7 to 16 bits.
+ Since there is not multiply taking one unsigned and one signed,
+ we have to use the signed multiplay, therefore C1 ... C5 appear to be
+ negative and multiplies involving them must be adjusted to compensate
+ for this. C6 and C7 do not require this adjustment since
+ they are < 1/2 and are correctly treated as positive numbers.
+
+ Following macro does Eight 8-sample one-dimensional idcts in parallel.
+ This is actually not such a difficult program to write once you
+ make a couple of observations (I of course was unable to make these
+ observations until I'd half-written a couple of other versions).
+
+ 1. Everything is easy once you are done with the multiplies.
+ This is because, given X and Y in registers, one may easily
+ calculate X+Y and X-Y using just those 2 registers.
+
+ 2. You always need at least 2 extra registers to calculate products,
+ so storing 2 temporaries is inevitable. C. and D. seem to be
+ the best candidates.
+
+ 3. The products should be calculated in decreasing order of complexity
+ (which translates into register pressure). Since C1 ... C5 require
+ adjustment (and C6, C7 do not), we begin by calculating C and D.
+
+********************************************************************************/
+
+
+/**************************************************************************************
+ *
+ * Macro: Wmt_Column_IDCT
+ *
+ * Description: The Macro does 1-D IDct on 8 columns.
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+/*
+ The major difference between Willamette processor and other IA32 processors is that
+ all of the simd integer instructions now support the 128 bit xmm registers instead
+ of 64 bit mmx registers. By using these instructions, we can do 8 1-D coloumn idcts
+ that takes shorts as input and outputs shorts at once
+
+*/
+
+#define Wmt_Column_IDCT __asm { \
+ \
+ __asm movdqa xmm2, I(3) /* xmm2 = i3 */ \
+ __asm movdqa xmm6, C(3) /* xmm6 = c3 */ \
+ \
+ __asm movdqa xmm4, xmm2 /* xmm4 = i3 */ \
+ __asm movdqa xmm7, I(5) /* xmm7 = i5 */ \
+ \
+ __asm pmulhw xmm4, xmm6 /* xmm4 = c3 * i3 - i3 */ \
+ __asm movdqa xmm1, C(5) /* xmm1 = c5 */ \
+ \
+ __asm pmulhw xmm6, xmm7 /* xmm6 = c3 * i5 - i5 */ \
+ __asm movdqa xmm5, xmm1 /* xmm5 = c5 */ \
+ \
+ __asm pmulhw xmm1, xmm2 /* xmm1 = c5 * i3 - i3 */ \
+ __asm movdqa xmm3, I(1) /* xmm3 = i1 */ \
+ \
+ __asm pmulhw xmm5, xmm7 /* xmm5 = c5 * i5 - i5 */ \
+ __asm movdqa xmm0, C(1) /* xmm0 = c1 */ \
+ \
+ /* all registers are in use */ \
+ \
+ __asm paddw xmm4, xmm2 /* xmm4 = c3 * i3 */ \
+ __asm paddw xmm6, xmm7 /* xmm6 = c3 * i5 */ \
+ \
+ __asm paddw xmm2, xmm1 /* xmm2 = c5 * i3 */ \
+ __asm movdqa xmm1, I(7) /* xmm1 = i7 */ \
+ \
+ __asm paddw xmm7, xmm5 /* xmm7 = c5 * i5 */ \
+ __asm movdqa xmm5, xmm0 /* xmm5 = c1 */ \
+ \
+ __asm pmulhw xmm0, xmm3 /* xmm0 = c1 * i1 - i1 */ \
+ __asm paddsw xmm4, xmm7 /* xmm4 = c3 * i3 + c5 * i5 = C */ \
+ \
+ __asm pmulhw xmm5, xmm1 /* xmm5 = c1 * i7 - i7 */ \
+ __asm movdqa xmm7, C(7) /* xmm7 = c7 */ \
+ \
+ __asm psubsw xmm6, xmm2 /* xmm6 = c3 * i5 - c5 * i3 = D */ \
+ __asm paddw xmm0, xmm3 /* xmm0 = c1 * i1 */ \
+ \
+ __asm pmulhw xmm3, xmm7 /* xmm3 = c7 * i1 */ \
+ __asm movdqa xmm2, I(2) /* xmm2 = i2 */ \
+ \
+ __asm pmulhw xmm7, xmm1 /* xmm7 = c7 * i7 */ \
+ __asm paddw xmm5, xmm1 /* xmm5 = c1 * i7 */ \
+ \
+ __asm movdqa xmm1, xmm2 /* xmm1 = i2 */ \
+ __asm pmulhw xmm2, C(2) /* xmm2 = i2 * c2 -i2 */ \
+ \
+ __asm psubsw xmm3, xmm5 /* xmm3 = c7 * i1 - c1 * i7 = B */ \
+ __asm movdqa xmm5, I(6) /* xmm5 = i6 */ \
+ \
+ __asm paddsw xmm0, xmm7 /* xmm0 = c1 * i1 + c7 * i7 = A */ \
+ __asm movdqa xmm7, xmm5 /* xmm7 = i6 */ \
+ \
+ __asm psubsw xmm0, xmm4 /* xmm0 = A - C */ \
+ __asm pmulhw xmm5, C(2) /* xmm5 = c2 * i6 - i6 */ \
+ \
+ __asm paddw xmm2, xmm1 /* xmm2 = i2 * c2 */ \
+ __asm pmulhw xmm1, C(6) /* xmm1 = c6 * i2 */ \
+ \
+ __asm paddsw xmm4, xmm4 /* xmm4 = C + C */ \
+ __asm paddsw xmm4, xmm0 /* xmm4 = A + C = C. */ \
+ \
+ __asm psubsw xmm3, xmm6 /* xmm3 = B - D */ \
+ __asm paddw xmm5, xmm7 /* xmm5 = c2 * i6 */ \
+ \
+ __asm paddsw xmm6, xmm6 /* xmm6 = D + D */ \
+ __asm pmulhw xmm7, C(6) /* xmm7 = c6 * i6 */ \
+ \
+ __asm paddsw xmm6, xmm3 /* xmm6 = B + D = D. */ \
+ __asm movdqa I(1), xmm4 /* Save C. at I(1) */ \
+ \
+ __asm psubsw xmm1, xmm5 /* xmm1 = c6 * i2 - c2 * i6 = H */ \
+ __asm movdqa xmm4, C(4) /* xmm4 = c4 */ \
+ \
+ __asm movdqa xmm5, xmm3 /* xmm5 = B - D */ \
+ __asm pmulhw xmm3, xmm4 /* xmm3 = ( c4 -1 ) * ( B - D ) */ \
+ \
+ __asm paddsw xmm7, xmm2 /* xmm7 = c2 * i2 + c6 * i6 = G */ \
+ __asm movdqa I(2), xmm6 /* Save D. at I(2) */ \
+ \
+ __asm movdqa xmm2, xmm0 /* xmm2 = A - C */ \
+ __asm movdqa xmm6, I(0) /* xmm6 = i0 */ \
+ \
+ __asm pmulhw xmm0, xmm4 /* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */\
+ __asm paddw xmm5, xmm3 /* xmm5 = c4 * ( B - D ) = B. */ \
+ \
+ __asm movdqa xmm3, I(4) /* xmm3 = i4 */ \
+ __asm psubsw xmm5, xmm1 /* xmm5 = B. - H = B.. */ \
+ \
+ __asm paddw xmm2, xmm0 /* xmm2 = c4 * ( A - C) = A. */ \
+ __asm psubsw xmm6, xmm3 /* xmm6 = i0 - i4 */ \
+ \
+ __asm movdqa xmm0, xmm6 /* xmm0 = i0 - i4 */ \
+ __asm pmulhw xmm6, xmm4 /* xmm6 = (c4 - 1) * (i0 - i4) = F */ \
+ \
+ __asm paddsw xmm3, xmm3 /* xmm3 = i4 + i4 */ \
+ __asm paddsw xmm1, xmm1 /* xmm1 = H + H */ \
+ \
+ __asm paddsw xmm3, xmm0 /* xmm3 = i0 + i4 */ \
+ __asm paddsw xmm1, xmm5 /* xmm1 = B. + H = H. */ \
+ \
+ __asm pmulhw xmm4, xmm3 /* xmm4 = ( c4 - 1 ) * ( i0 + i4 ) */ \
+ __asm paddw xmm6, xmm0 /* xmm6 = c4 * ( i0 - i4 ) */ \
+ \
+ __asm psubsw xmm6, xmm2 /* xmm6 = F - A. = F. */ \
+ __asm paddsw xmm2, xmm2 /* xmm2 = A. + A. */ \
+ \
+ __asm movdqa xmm0, I(1) /* Load C. from I(1) */ \
+ __asm paddsw xmm2, xmm6 /* xmm2 = F + A. = A.. */ \
+ \
+ __asm paddw xmm4, xmm3 /* xmm4 = c4 * ( i0 + i4 ) = 3 */ \
+ __asm psubsw xmm2, xmm1 /* xmm2 = A.. - H. = R2 */ \
+ \
+ __asm paddsw xmm2, Eight /* Adjust R2 and R1 before shifting */ \
+ __asm paddsw xmm1, xmm1 /* xmm1 = H. + H. */ \
+ \
+ __asm paddsw xmm1, xmm2 /* xmm1 = A.. + H. = R1 */ \
+ __asm psraw xmm2, 4 /* xmm2 = op2 */ \
+ \
+ __asm psubsw xmm4, xmm7 /* xmm4 = E - G = E. */ \
+ __asm psraw xmm1, 4 /* xmm1 = op1 */ \
+ \
+ __asm movdqa xmm3, I(2) /* Load D. from I(2) */ \
+ __asm paddsw xmm7, xmm7 /* xmm7 = G + G */ \
+ \
+ __asm movdqa O(2), xmm2 /* Write out op2 */ \
+ __asm paddsw xmm7, xmm4 /* xmm7 = E + G = G. */ \
+ \
+ __asm movdqa O(1), xmm1 /* Write out op1 */ \
+ __asm psubsw xmm4, xmm3 /* xmm4 = E. - D. = R4 */ \
+ \
+ __asm paddsw xmm4, Eight /* Adjust R4 and R3 before shifting */ \
+ __asm paddsw xmm3, xmm3 /* xmm3 = D. + D. */ \
+ \
+ __asm paddsw xmm3, xmm4 /* xmm3 = E. + D. = R3 */ \
+ __asm psraw xmm4, 4 /* xmm4 = op4 */ \
+ \
+ __asm psubsw xmm6, xmm5 /* xmm6 = F. - B..= R6 */ \
+ __asm psraw xmm3, 4 /* xmm3 = op3 */ \
+ \
+ __asm paddsw xmm6, Eight /* Adjust R6 and R5 before shifting */ \
+ __asm paddsw xmm5, xmm5 /* xmm5 = B.. + B.. */ \
+ \
+ __asm paddsw xmm5, xmm6 /* xmm5 = F. + B.. = R5 */ \
+ __asm psraw xmm6, 4 /* xmm6 = op6 */ \
+ \
+ __asm movdqa O(4), xmm4 /* Write out op4 */ \
+ __asm psraw xmm5, 4 /* xmm5 = op5 */ \
+ \
+ __asm movdqa O(3), xmm3 /* Write out op3 */ \
+ __asm psubsw xmm7, xmm0 /* xmm7 = G. - C. = R7 */ \
+ \
+ __asm paddsw xmm7, Eight /* Adjust R7 and R0 before shifting */ \
+ __asm paddsw xmm0, xmm0 /* xmm0 = C. + C. */ \
+ \
+ __asm paddsw xmm0, xmm7 /* xmm0 = G. + C. */ \
+ __asm psraw xmm7, 4 /* xmm7 = op7 */ \
+ \
+ __asm movdqa O(6), xmm6 /* Write out op6 */ \
+ __asm psraw xmm0, 4 /* xmm0 = op0 */ \
+ \
+ __asm movdqa O(5), xmm5 /* Write out op5 */ \
+ __asm movdqa O(7), xmm7 /* Write out op7 */ \
+ \
+ __asm movdqa O(0), xmm0 /* Write out op0 */ \
+ \
+ } /* End of Wmt_Column_IDCT macro */
+
+
+/**************************************************************************************
+ *
+ * Macro: Wmt_Row_IDCT
+ *
+ * Description: The Macro does 1-D IDct on 8 columns.
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+/*
+ The major difference between Willamette processor and other IA32 processors is that
+ all of the simd integer instructions now support the 128 bit xmm registers instead
+ of 64 bit mmx registers. By using these instructions, we can do 8 1-D coloumn idcts
+ that takes shorts as input and outputs shorts at once
+
+*/
+
+#define Wmt_Row_IDCT __asm { \
+ \
+ __asm movdqa xmm2, I(3) /* xmm2 = i3 */ \
+ __asm movdqa xmm6, C(3) /* xmm6 = c3 */ \
+ \
+ __asm movdqa xmm4, xmm2 /* xmm4 = i3 */ \
+ __asm movdqa xmm7, I(5) /* xmm7 = i5 */ \
+ \
+ __asm pmulhw xmm4, xmm6 /* xmm4 = c3 * i3 - i3 */ \
+ __asm movdqa xmm1, C(5) /* xmm1 = c5 */ \
+ \
+ __asm pmulhw xmm6, xmm7 /* xmm6 = c3 * i5 - i5 */ \
+ __asm movdqa xmm5, xmm1 /* xmm5 = c5 */ \
+ \
+ __asm pmulhw xmm1, xmm2 /* xmm1 = c5 * i3 - i3 */ \
+ __asm movdqa xmm3, I(1) /* xmm3 = i1 */ \
+ \
+ __asm pmulhw xmm5, xmm7 /* xmm5 = c5 * i5 - i5 */ \
+ __asm movdqa xmm0, C(1) /* xmm0 = c1 */ \
+ \
+ /* all registers are in use */ \
+ \
+ __asm paddw xmm4, xmm2 /* xmm4 = c3 * i3 */ \
+ __asm paddw xmm6, xmm7 /* xmm6 = c3 * i5 */ \
+ \
+ __asm paddw xmm2, xmm1 /* xmm2 = c5 * i3 */ \
+ __asm movdqa xmm1, I(7) /* xmm1 = i7 */ \
+ \
+ __asm paddw xmm7, xmm5 /* xmm7 = c5 * i5 */ \
+ __asm movdqa xmm5, xmm0 /* xmm5 = c1 */ \
+ \
+ __asm pmulhw xmm0, xmm3 /* xmm0 = c1 * i1 - i1 */ \
+ __asm paddsw xmm4, xmm7 /* xmm4 = c3 * i3 + c5 * i5 = C */ \
+ \
+ __asm pmulhw xmm5, xmm1 /* xmm5 = c1 * i7 - i7 */ \
+ __asm movdqa xmm7, C(7) /* xmm7 = c7 */ \
+ \
+ __asm psubsw xmm6, xmm2 /* xmm6 = c3 * i5 - c5 * i3 = D */ \
+ __asm paddw xmm0, xmm3 /* xmm0 = c1 * i1 */ \
+ \
+ __asm pmulhw xmm3, xmm7 /* xmm3 = c7 * i1 */ \
+ __asm movdqa xmm2, I(2) /* xmm2 = i2 */ \
+ \
+ __asm pmulhw xmm7, xmm1 /* xmm7 = c7 * i7 */ \
+ __asm paddw xmm5, xmm1 /* xmm5 = c1 * i7 */ \
+ \
+ __asm movdqa xmm1, xmm2 /* xmm1 = i2 */ \
+ __asm pmulhw xmm2, C(2) /* xmm2 = i2 * c2 -i2 */ \
+ \
+ __asm psubsw xmm3, xmm5 /* xmm3 = c7 * i1 - c1 * i7 = B */ \
+ __asm movdqa xmm5, I(6) /* xmm5 = i6 */ \
+ \
+ __asm paddsw xmm0, xmm7 /* xmm0 = c1 * i1 + c7 * i7 = A */ \
+ __asm movdqa xmm7, xmm5 /* xmm7 = i6 */ \
+ \
+ __asm psubsw xmm0, xmm4 /* xmm0 = A - C */ \
+ __asm pmulhw xmm5, C(2) /* xmm5 = c2 * i6 - i6 */ \
+ \
+ __asm paddw xmm2, xmm1 /* xmm2 = i2 * c2 */ \
+ __asm pmulhw xmm1, C(6) /* xmm1 = c6 * i2 */ \
+ \
+ __asm paddsw xmm4, xmm4 /* xmm4 = C + C */ \
+ __asm paddsw xmm4, xmm0 /* xmm4 = A + C = C. */ \
+ \
+ __asm psubsw xmm3, xmm6 /* xmm3 = B - D */ \
+ __asm paddw xmm5, xmm7 /* xmm5 = c2 * i6 */ \
+ \
+ __asm paddsw xmm6, xmm6 /* xmm6 = D + D */ \
+ __asm pmulhw xmm7, C(6) /* xmm7 = c6 * i6 */ \
+ \
+ __asm paddsw xmm6, xmm3 /* xmm6 = B + D = D. */ \
+ __asm movdqa I(1), xmm4 /* Save C. at I(1) */ \
+ \
+ __asm psubsw xmm1, xmm5 /* xmm1 = c6 * i2 - c2 * i6 = H */ \
+ __asm movdqa xmm4, C(4) /* xmm4 = c4 */ \
+ \
+ __asm movdqa xmm5, xmm3 /* xmm5 = B - D */ \
+ __asm pmulhw xmm3, xmm4 /* xmm3 = ( c4 -1 ) * ( B - D ) */ \
+ \
+ __asm paddsw xmm7, xmm2 /* xmm7 = c2 * i2 + c6 * i6 = G */ \
+ __asm movdqa I(2), xmm6 /* Save D. at I(2) */ \
+ \
+ __asm movdqa xmm2, xmm0 /* xmm2 = A - C */ \
+ __asm movdqa xmm6, I(0) /* xmm6 = i0 */ \
+ \
+ __asm pmulhw xmm0, xmm4 /* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */ \
+ __asm paddw xmm5, xmm3 /* xmm5 = c4 * ( B - D ) = B. */ \
+ \
+ __asm movdqa xmm3, I(4) /* xmm3 = i4 */ \
+ __asm psubsw xmm5, xmm1 /* xmm5 = B. - H = B.. */ \
+ \
+ __asm paddw xmm2, xmm0 /* xmm2 = c4 * ( A - C) = A. */ \
+ __asm psubsw xmm6, xmm3 /* xmm6 = i0 - i4 */ \
+ \
+ __asm movdqa xmm0, xmm6 /* xmm0 = i0 - i4 */ \
+ __asm pmulhw xmm6, xmm4 /* xmm6 = ( c4 - 1 ) * ( i0 - i4 ) = F */ \
+ \
+ __asm paddsw xmm3, xmm3 /* xmm3 = i4 + i4 */ \
+ __asm paddsw xmm1, xmm1 /* xmm1 = H + H */ \
+ \
+ __asm paddsw xmm3, xmm0 /* xmm3 = i0 + i4 */ \
+ __asm paddsw xmm1, xmm5 /* xmm1 = B. + H = H. */ \
+ \
+ __asm pmulhw xmm4, xmm3 /* xmm4 = ( c4 - 1 ) * ( i0 + i4 ) */ \
+ __asm paddw xmm6, xmm0 /* xmm6 = c4 * ( i0 - i4 ) */ \
+ \
+ __asm psubsw xmm6, xmm2 /* xmm6 = F - A. = F. */ \
+ __asm paddsw xmm2, xmm2 /* xmm2 = A. + A. */ \
+ \
+ __asm movdqa xmm0, I(1) /* Load C. from I(1) */ \
+ __asm paddsw xmm2, xmm6 /* xmm2 = F + A. = A.. */ \
+ \
+ __asm paddw xmm4, xmm3 /* xmm4 = c4 * ( i0 + i4 ) = 3 */ \
+ __asm psubsw xmm2, xmm1 /* xmm2 = A.. - H. = R2 */ \
+ \
+ __asm paddsw xmm1, xmm1 /* xmm1 = H. + H. */ \
+ __asm paddsw xmm1, xmm2 /* xmm1 = A.. + H. = R1 */ \
+ \
+ __asm psubsw xmm4, xmm7 /* xmm4 = E - G = E. */ \
+ \
+ __asm movdqa xmm3, I(2) /* Load D. from I(2) */ \
+ __asm paddsw xmm7, xmm7 /* xmm7 = G + G */ \
+ \
+ __asm movdqa I(2), xmm2 /* Write out op2 */ \
+ __asm paddsw xmm7, xmm4 /* xmm7 = E + G = G. */ \
+ \
+ __asm movdqa I(1), xmm1 /* Write out op1 */ \
+ __asm psubsw xmm4, xmm3 /* xmm4 = E. - D. = R4 */ \
+ \
+ __asm paddsw xmm3, xmm3 /* xmm3 = D. + D. */ \
+ \
+ __asm paddsw xmm3, xmm4 /* xmm3 = E. + D. = R3 */ \
+ \
+ __asm psubsw xmm6, xmm5 /* xmm6 = F. - B..= R6 */ \
+ \
+ __asm paddsw xmm5, xmm5 /* xmm5 = B.. + B.. */ \
+ \
+ __asm paddsw xmm5, xmm6 /* xmm5 = F. + B.. = R5 */ \
+ \
+ __asm movdqa I(4), xmm4 /* Write out op4 */ \
+ \
+ __asm movdqa I(3), xmm3 /* Write out op3 */ \
+ __asm psubsw xmm7, xmm0 /* xmm7 = G. - C. = R7 */ \
+ \
+ __asm paddsw xmm0, xmm0 /* xmm0 = C. + C. */ \
+ \
+ __asm paddsw xmm0, xmm7 /* xmm0 = G. + C. */ \
+ \
+ __asm movdqa I(6), xmm6 /* Write out op6 */ \
+ \
+ __asm movdqa I(5), xmm5 /* Write out op5 */ \
+ __asm movdqa I(7), xmm7 /* Write out op7 */ \
+ \
+ __asm movdqa I(0), xmm0 /* Write out op0 */ \
+ \
+ } /* End of Wmt_Row_IDCT macro */
+
+/**************************************************************************************
+ *
+ * Macro: Transpose
+ *
+ * Description: The Macro does 8x8 transpose
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+
+#define Transpose __asm { \
+ \
+ __asm movdqa xmm4, I(4) /* xmm4=e7e6e5e4e3e2e1e0 */ \
+ __asm movdqa xmm0, I(5) /* xmm4=f7f6f5f4f3f2f1f0 */ \
+ \
+ __asm movdqa xmm5, xmm4 /* make a copy */ \
+ __asm punpcklwd xmm4, xmm0 /* xmm4=f3e3f2e2f1e1f0e0 */ \
+ \
+ __asm punpckhwd xmm5, xmm0 /* xmm5=f7e7f6e6f5e5f4e4 */ \
+ __asm movdqa xmm6, I(6) /* xmm6=g7g6g5g4g3g2g1g0 */ \
+ \
+ __asm movdqa xmm0, I(7) /* xmm0=h7h6h5h4h3h2h1h0 */ \
+ __asm movdqa xmm7, xmm6 /* make a copy */ \
+ \
+ __asm punpcklwd xmm6, xmm0 /* xmm6=h3g3h3g2h1g1h0g0 */ \
+ __asm punpckhwd xmm7, xmm0 /* xmm7=h7g7h6g6h5g5h4g4 */ \
+ \
+ __asm movdqa xmm3, xmm4 /* make a copy */ \
+ __asm punpckldq xmm4, xmm6 /* xmm4=h1g1f1e1h0g0f0e0 */ \
+ \
+ __asm punpckhdq xmm3, xmm6 /* xmm3=h3g3g3e3h2g2f2e2 */ \
+ __asm movdqa I(6), xmm3 /* save h3g3g3e3h2g2f2e2 */ \
+ /* Free xmm6 */ \
+ __asm movdqa xmm6, xmm5 /* make a copy */ \
+ __asm punpckldq xmm5, xmm7 /* xmm5=h5g5f5e5h4g4f4e4 */ \
+ \
+ __asm punpckhdq xmm6, xmm7 /* xmm6=h7g7f7e7h6g6f6e6 */ \
+ __asm movdqa xmm0, I(0) /* xmm0=a7a6a5a4a3a2a1a0 */ \
+ /* Free xmm7 */ \
+ __asm movdqa xmm1, I(1) /* xmm1=b7b6b5b4b3b2b1b0 */ \
+ __asm movdqa xmm7, xmm0 /* make a copy */ \
+ \
+ __asm punpcklwd xmm0, xmm1 /* xmm0=b3a3b2a2b1a1b0a0 */ \
+ __asm punpckhwd xmm7, xmm1 /* xmm7=b7a7b6a6b5a5b4a4 */ \
+ /* Free xmm1 */ \
+ __asm movdqa xmm2, I(2) /* xmm2=c7c6c5c4c3c2c1c0 */ \
+ __asm movdqa xmm3, I(3) /* xmm3=d7d6d5d4d3d2d1d0 */ \
+ \
+ __asm movdqa xmm1, xmm2 /* make a copy */ \
+ __asm punpcklwd xmm2, xmm3 /* xmm2=d3c3d2c2d1c1d0c0 */ \
+ \
+ __asm punpckhwd xmm1, xmm3 /* xmm1=d7c7d6c6d5c5d4c4 */ \
+ __asm movdqa xmm3, xmm0 /* make a copy */ \
+ \
+ __asm punpckldq xmm0, xmm2 /* xmm0=d1c1b1a1d0c0b0a0 */ \
+ __asm punpckhdq xmm3, xmm2 /* xmm3=d3c3b3a3d2c2b2a2 */ \
+ /* Free xmm2 */ \
+ __asm movdqa xmm2, xmm7 /* make a copy */ \
+ __asm punpckldq xmm2, xmm1 /* xmm2=d5c5b5a5d4c4b4a4 */ \
+ \
+ __asm punpckhdq xmm7, xmm1 /* xmm7=d7c7b7a7d6c6b6a6 */ \
+ __asm movdqa xmm1, xmm0 /* make a copy */ \
+ \
+ __asm punpcklqdq xmm0, xmm4 /* xmm0=h0g0f0e0d0c0b0a0 */ \
+ __asm punpckhqdq xmm1, xmm4 /* xmm1=h1g1g1e1d1c1b1a1 */ \
+ \
+ __asm movdqa I(0), xmm0 /* save I(0) */ \
+ __asm movdqa I(1), xmm1 /* save I(1) */ \
+ \
+ __asm movdqa xmm0, I(6) /* load h3g3g3e3h2g2f2e2 */ \
+ __asm movdqa xmm1, xmm3 /* make a copy */ \
+ \
+ __asm punpcklqdq xmm1, xmm0 /* xmm1=h2g2f2e2d2c2b2a2 */ \
+ __asm punpckhqdq xmm3, xmm0 /* xmm3=h3g3f3e3d3c3b3a3 */ \
+ \
+ __asm movdqa xmm4, xmm2 /* make a copy */ \
+ __asm punpcklqdq xmm4, xmm5 /* xmm4=h4g4f4e4d4c4b4a4 */ \
+ \
+ __asm punpckhqdq xmm2, xmm5 /* xmm2=h5g5f5e5d5c5b5a5 */ \
+ __asm movdqa I(2), xmm1 /* save I(2) */ \
+ \
+ __asm movdqa I(3), xmm3 /* save I(3) */ \
+ __asm movdqa I(4), xmm4 /* save I(4) */ \
+ \
+ __asm movdqa I(5), xmm2 /* save I(5) */ \
+ __asm movdqa xmm5, xmm7 /* make a copy */ \
+ \
+ __asm punpcklqdq xmm5, xmm6 /* xmm5=h6g6f6e6d6c6b6a6 */ \
+ __asm punpckhqdq xmm7, xmm6 /* xmm7=h7g7f7e7d7c7b7a7 */ \
+ \
+ __asm movdqa I(6), xmm5 /* save I(6) */ \
+ __asm movdqa I(7), xmm7 /* save I(7) */ \
+ \
+ }/* End of Transpose Macro */
+
+
+/**************************************************************************************
+ *
+ * Macro: Wmt_Dequant
+ *
+ * Description: The Macro does dequantzation and reorder the coefficents to avoid
+ * the first transpose before Wmt_Row_IDCT
+ *
+ * Input: [eax], quantized input,
+ * [ebx], quantizaiton table,
+ *
+ * Output: [eax]
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+#define Wmt_Dequant __asm { \
+ __asm lea ecx, WmtDequantConst \
+ __asm movdqa xmm0, [eax] \
+ \
+ __asm pmullw xmm0, [ebx] /* xmm0 = 07 06 05 04 03 02 01 00 */ \
+ __asm movdqa xmm1, [eax + 16] \
+ \
+ __asm pmullw xmm1, [ebx + 16] /* xmm1 = 17 16 15 14 13 12 11 10 */ \
+ __asm pshuflw xmm3, xmm0, 078h /* xmm3 = 07 06 05 04 01 03 02 00 */ \
+ \
+ __asm movdqa xmm2, xmm1 /* xmm2 = 17 16 15 14 13 12 11 10 */ \
+ __asm movdqa xmm7, [ecx] /* xmm7 = -- -- -- -- -- FF FF -- */ \
+ \
+ __asm movdqa xmm4, [eax + 32] \
+ __asm movdqa xmm5, [eax + 64] \
+ \
+ __asm pmullw xmm4, [ebx + 32] /* xmm4 = 27 26 25 24 23 22 21 20 */ \
+ __asm pmullw xmm5, [ebx + 64] /* xmm5 = 47 46 45 44 43 42 41 40 */ \
+ \
+ __asm movdqa xmm6, [ecx + 16] /* xmm6 = -- -- FF FF -- -- -- -- */ \
+ __asm pand xmm7, xmm2 /* xmm7 = -- -- -- -- -- 12 11 -- */ \
+ \
+ __asm pand xmm6, xmm4 /* xmm6 = -- -- 25 24 -- -- -- -- */ \
+ __asm pxor xmm2, xmm7 /* xmm2 = 17 16 15 14 13 -- -- 10 */ \
+ \
+ __asm pxor xmm4, xmm6 /* xmm4 = 27 26 -- -- 23 22 21 20 */ \
+ __asm pslldq xmm7, 4 /* xmm7 = -- -- -- 12 11 -- -- -- */ \
+ \
+ __asm pslldq xmm6, 2 /* xmm6 = -- 25 24 -- -- -- -- -- */ \
+ __asm por xmm7, xmm6 /* xmm7 = -- 25 24 12 11 -- -- -- */ \
+ \
+ __asm movdqa xmm0, [ecx + 32] /* xmm0 = -- -- -- -- -- FF FF FF */ \
+ __asm movdqa xmm6, [ecx + 48] /* xmm6 = -- -- -- -- FF -- -- -- */ \
+ \
+ __asm pand xmm0, xmm3 /* xmm0 = -- -- -- -- -- 03 02 00 */ \
+ __asm pand xmm6, xmm5 /* xmm6 = -- -- -- -- 43 -- -- -- */ \
+ \
+ __asm pxor xmm3, xmm0 /* xmm3 = 07 06 05 04 01 -- -- -- */ \
+ __asm pxor xmm5, xmm6 /* xmm5 = 47 46 45 44 -- 42 41 40 */ \
+ \
+ __asm por xmm0, xmm7 /* xmm0 = -- 25 24 12 11 03 02 00 */ \
+ __asm pslldq xmm6, 8 /* xmm6 = 43 -- -- -- -- -- -- -- */ \
+ \
+ __asm por xmm0, xmm6 /* O0 =xmm0 = 43 25 24 12 11 03 02 00 */ \
+ /* 02345 in use */ \
+ \
+ __asm movdqa xmm1, [ecx + 64 ] /* xmm1 = -- -- -- FF FF -- -- -- */ \
+ __asm pshuflw xmm5, xmm5, 0B4h /* xmm5 = 47 46 45 44 42 -- 41 40 */ \
+ \
+ __asm movdqa xmm7, xmm1 /* xmm7 = -- -- -- FF FF -- -- -- */ \
+ __asm movdqa xmm6, xmm1 /* xmm6 = -- -- -- FF FF -- -- -- */ \
+ \
+ __asm movdqa [eax], xmm0 /* write 43 25 24 12 11 03 02 00 */ \
+ __asm pshufhw xmm4, xmm4, 0C2h /* xmm4 = 27 -- -- 26 23 22 21 20 */ \
+ \
+ __asm pand xmm7, xmm4 /* xmm7 = -- -- -- 26 23 -- -- -- */ \
+ __asm pand xmm1, xmm5 /* xmm1 = -- -- -- 44 42 -- -- -- */ \
+ \
+ __asm pxor xmm4, xmm7 /* xmm4 = 27 -- -- -- -- 22 21 20 */ \
+ __asm pxor xmm5, xmm1 /* xmm5 = 47 46 45 -- -- -- 41 40 */ \
+ \
+ __asm pshuflw xmm2, xmm2, 0C6h /* xmm2 = 17 16 15 14 13 10 -- -- */ \
+ __asm movdqa xmm0, xmm6 /* xmm0 = -- -- -- FF FF -- -- -- */ \
+ \
+ __asm pslldq xmm7, 2 /* xmm7 = -- -- 26 23 -- -- -- -- */ \
+ __asm pslldq xmm1, 6 /* xmm1 = 44 42 -- -- -- -- -- -- */ \
+ \
+ __asm psrldq xmm0, 2 /* xmm0 = -- -- -- -- FF FF -- -- */ \
+ __asm pand xmm6, xmm3 /* xmm6 = -- -- -- 04 01 -- -- -- */ \
+ \
+ __asm pand xmm0, xmm2 /* xmm0 = -- -- -- -- 13 10 -- -- */ \
+ __asm pxor xmm3, xmm6 /* xmm3 = 07 06 05 -- -- -- -- -- */ \
+ \
+ __asm pxor xmm2, xmm0 /* xmm2 = 17 16 15 14 -- -- -- -- */ \
+ __asm psrldq xmm6, 6 /* xmm0 = -- -- -- -- -- -- 04 01 */ \
+ \
+ __asm por xmm1, xmm7 /* xmm1 = 44 42 26 23 -- -- -- -- */ \
+ __asm por xmm0, xmm6 /* xmm1 = -- -- -- -- 13 10 04 01 */ \
+ /* 12345 in use */ \
+ __asm por xmm1, xmm0 /* o1 =xmm1 = 44 42 26 23 13 10 04 01 */ \
+ __asm pshuflw xmm4, xmm4, 093h /* xmm4 = 27 -- -- -- 22 21 20 -- */ \
+ \
+ __asm pshufhw xmm4, xmm4, 093h /* xmm4 = -- -- -- 27 22 21 20 -- */ \
+ __asm movdqa [eax + 16], xmm1 /* write 44 42 26 23 13 10 04 01 */ \
+ \
+ __asm pshufhw xmm3, xmm3, 0D2h /* xmm3 = 07 05 -- 06 -- -- -- -- */ \
+ __asm movdqa xmm0, [ecx + 64] /* xmm0 = -- -- -- FF FF -- -- -- */ \
+ \
+ __asm pand xmm0, xmm3 /* xmm0 = -- -- -- 06 -- -- -- -- */ \
+ __asm psrldq xmm3, 12 /* xmm3 = -- -- -- -- -- -- 07 05 */ \
+ \
+ __asm psrldq xmm0, 8 /* xmm0 = -- -- -- -- -- -- -- 06 */ \
+ \
+ __asm movdqa xmm6, [ecx + 64] /* xmm6 = -- -- -- FF FF -- -- -- */ \
+ __asm movdqa xmm7, [ecx + 96] /* xmm7 = -- -- -- -- FF FF -- -- */ \
+ \
+ __asm pand xmm6, xmm4 /* xmm6 = -- -- -- 27 22 -- -- -- */ \
+ __asm pxor xmm4, xmm6 /* xmm4 = -- -- -- -- -- 21 20 -- */ \
+ \
+ __asm por xmm3, xmm6 /* xmm3 = -- -- -- 27 22 -- 07 05 */ \
+ __asm pand xmm7, xmm4 /* xmm7 = -- -- -- -- -- 21 -- -- */ \
+ \
+ __asm por xmm0, xmm7 /* xmm0 = -- -- -- -- -- 21 -- 06 */ \
+ __asm pxor xmm4, xmm7 /* xmm4 = -- -- -- -- -- -- 20 -- */ \
+ \
+ __asm movdqa xmm6, [ecx + 16 ] /* xmm6 = -- -- FF FF -- -- -- -- */ \
+ __asm movdqa xmm1, [ecx + 64 ] /* xmm1 = -- -- -- FF FF -- -- -- */ \
+ \
+ __asm pand xmm6, xmm2 /* xmm6 = -- -- 15 14 -- -- -- -- */ \
+ __asm pand xmm1, xmm6 /* xmm1 = -- -- -- 14 -- -- -- -- */ \
+ \
+ __asm pxor xmm2, xmm6 /* xmm2 = 17 16 -- -- -- -- -- -- */ \
+ __asm pxor xmm6, xmm1 /* xmm6 = -- -- 15 -- -- -- -- -- */ \
+ \
+ __asm psrldq xmm1, 4 /* xmm1 = -- -- -- -- -- 14 -- -- */ \
+ \
+ __asm psrldq xmm6, 8 /* xmm6 = -- -- -- -- -- -- 15 -- */ \
+ __asm por xmm3, xmm1 /* xmm3 = -- -- -- 27 22 14 07 05 */ \
+ \
+ __asm por xmm0, xmm6 /* xmm0 = -- -- -- -- -- 21 15 06 */ \
+ __asm pshufhw xmm5, xmm5, 0E1h /* xmm5 = 47 46 -- 45 -- -- 41 40 */ \
+ \
+ __asm movdqa xmm1, [ecx + 64] /* xmm1 = -- -- -- FF FF -- -- -- */ \
+ __asm pshuflw xmm5, xmm5, 072h /* xmm5 = 47 46 -- 45 41 -- 40 -- */ \
+ \
+ __asm movdqa xmm6, xmm1 /* xmm6 = -- -- -- FF FF -- -- -- */ \
+ __asm pand xmm1, xmm5 /* xmm1 = -- -- -- 45 41 -- -- -- */ \
+ \
+ __asm pxor xmm5, xmm1 /* xmm5 = 47 46 -- -- -- -- 40 -- */ \
+ __asm pslldq xmm1, 4 /* xmm1 = -- 45 41 -- -- -- -- -- */ \
+ \
+ __asm pshufd xmm5, xmm5, 09Ch /* xmm5 = -- -- -- -- 47 46 40 -- */ \
+ __asm por xmm3, xmm1 /* xmm3 = -- 45 41 27 22 14 07 05 */ \
+ \
+ __asm movdqa xmm1, [eax + 96] /* xmm1 = 67 66 65 64 63 62 61 60 */ \
+ __asm pmullw xmm1, [ebx + 96] \
+ \
+ __asm movdqa xmm7, [ecx] /* xmm7 = -- -- -- -- -- FF FF -- */ \
+ \
+ __asm psrldq xmm6, 8 /* xmm6 = -- -- -- -- -- -- -- FF */ \
+ __asm pand xmm7, xmm5 /* xmm7 = -- -- -- -- -- 46 40 -- */ \
+ \
+ __asm pand xmm6, xmm1 /* xmm6 = -- -- -- -- -- -- -- 60 */ \
+ __asm pxor xmm5, xmm7 /* xmm5 = -- -- -- -- 47 -- -- -- */ \
+ \
+ __asm pxor xmm1, xmm6 /* xmm1 = 67 66 65 64 63 62 61 -- */ \
+ __asm pslldq xmm5, 2 /* xmm5 = -- -- -- 47 -- -- -- -- */ \
+ \
+ __asm pslldq xmm6, 14 /* xmm6 = 60 -- -- -- -- -- -- -- */ \
+ __asm por xmm4, xmm5 /* xmm4 = -- -- -- 47 -- -- 20 -- */ \
+ \
+ __asm por xmm3, xmm6 /* O2 = xmm3= 60 45 41 27 22 14 07 05 */ \
+ __asm pslldq xmm7, 6 /* xmm7 = -- -- 46 40 -- -- -- -- */ \
+ \
+ __asm movdqa [eax+32], xmm3 /* write 60 45 41 27 22 14 07 05 */ \
+ __asm por xmm0, xmm7 /* xmm0 = -- -- 46 40 -- 21 15 06 */ \
+ /* 0, 1, 2, 4 in use */ \
+ __asm movdqa xmm3, [eax + 48] /* xmm3 = 37 36 35 34 33 32 31 30 */ \
+ __asm movdqa xmm5, [eax + 80] /* xmm5 = 57 56 55 54 53 52 51 50 */ \
+ \
+ __asm pmullw xmm3, [ebx + 48] \
+ __asm pmullw xmm5, [ebx + 80] \
+ \
+ __asm movdqa xmm6, [ecx + 64] /* xmm6 = -- -- -- FF FF -- -- -- */ \
+ __asm movdqa xmm7, [ecx + 64] /* xmm7 = -- -- -- FF FF -- -- -- */ \
+ \
+ __asm psrldq xmm6, 8 /* xmm6 = -- -- -- -- -- -- -- FF */ \
+ __asm pslldq xmm7, 8 /* xmm7 = FF -- -- -- -- -- -- -- */ \
+ \
+ __asm pand xmm6, xmm3 /* xmm6 = -- -- -- -- -- -- -- 30 */ \
+ __asm pand xmm7, xmm5 /* xmm7 = 57 -- -- -- -- -- -- -- */ \
+ \
+ __asm pxor xmm3, xmm6 /* xmm3 = 37 36 35 34 33 32 31 -- */ \
+ __asm pxor xmm5, xmm7 /* xmm5 = __ 56 55 54 53 52 51 50 */ \
+ \
+ __asm pslldq xmm6, 6 /* xmm6 = -- -- -- -- 30 -- -- -- */ \
+ __asm psrldq xmm7, 2 /* xmm7 = -- 57 -- -- -- -- -- -- */ \
+ \
+ __asm por xmm6, xmm7 /* xmm6 = -- 57 -- -- 30 -- -- -- */ \
+ __asm movdqa xmm7, [ecx] /* xmm7 = -- -- -- -- -- FF FF -- */ \
+ \
+ __asm por xmm0, xmm6 /* xmm0 = -- 57 46 40 30 21 15 06 */ \
+ __asm psrldq xmm7, 2 /* xmm7 = -- -- -- -- -- -- FF FF */ \
+ \
+ __asm movdqa xmm6, xmm2 /* xmm6 = 17 16 -- -- -- -- -- -- */ \
+ __asm pand xmm7, xmm1 /* xmm7 = -- -- -- -- -- -- 61 -- */ \
+ \
+ __asm pslldq xmm6, 2 /* xmm6 = 16 -- -- -- -- -- -- -- */ \
+ __asm psrldq xmm2, 14 /* xmm2 = -- -- -- -- -- -- -- 17 */ \
+ \
+ __asm pxor xmm1, xmm7 /* xmm1 = 67 66 65 64 63 62 -- -- */ \
+ __asm pslldq xmm7, 12 /* xmm7 = 61 -- -- -- -- -- -- -- */ \
+ \
+ __asm psrldq xmm6, 14 /* xmm6 = -- -- -- -- -- -- -- 16 */ \
+ __asm por xmm4, xmm6 /* xmm4 = -- -- -- 47 -- -- 20 16 */ \
+ \
+ __asm por xmm0, xmm7 /* xmm0 = 61 57 46 40 30 21 15 06 */ \
+ __asm movdqa xmm6, [ecx] /* xmm6 = -- -- -- -- -- FF FF -- */ \
+ \
+ __asm psrldq xmm6, 2 /* xmm6 = -- -- -- -- -- -- FF FF */ \
+ __asm movdqa [eax+48], xmm0 /* write 61 57 46 40 30 21 15 06 */ \
+ /* 1, 2, 3, 4, 5 in use */\
+ __asm movdqa xmm0, [ecx] /* xmm0 = -- -- -- -- -- FF FF -- */ \
+ __asm pand xmm6, xmm3 /* xmm6 = -- -- -- -- -- -- 31 -- */ \
+ \
+ __asm movdqa xmm7, xmm3 /* xmm7 = 37 36 35 34 33 32 31 -- */ \
+ __asm pxor xmm3, xmm6 /* xmm3 = 37 36 35 34 33 32 -- -- */ \
+ \
+ __asm pslldq xmm3, 2 /* xmm3 = 36 35 34 33 32 -- -- -- */ \
+ __asm pand xmm0, xmm1 /* xmm0 = -- -- -- -- -- 62 -- -- */ \
+ \
+ __asm psrldq xmm7, 14 /* xmm7 = -- -- -- -- -- -- -- 37 */ \
+ __asm pxor xmm1, xmm0 /* xmm1 = 67 66 65 64 63 -- -- -- */ \
+ \
+ __asm por xmm6, xmm7 /* xmm6 = -- -- -- -- -- -- 31 37 */ \
+ __asm movdqa xmm7, [ecx + 64] /* xmm7 = -- -- -- FF FF -- -- -- */ \
+ \
+ __asm pshuflw xmm6, xmm6, 01Eh /* xmm6 = -- -- -- -- 37 31 -- -- */ \
+ __asm pslldq xmm7, 6 /* xmm7 = FF FF -- -- -- -- -- -- */ \
+ \
+ __asm por xmm4, xmm6 /* xmm4 = -- -- -- 47 37 31 20 16 */ \
+ __asm pand xmm7, xmm5 /* xmm7 = -- 56 -- -- -- -- -- -- */ \
+ \
+ __asm pslldq xmm0, 8 /* xmm0 = -- 62 -- -- -- -- -- -- */ \
+ __asm pxor xmm5, xmm7 /* xmm5 = -- -- 55 54 53 52 51 50 */ \
+ \
+ __asm psrldq xmm7, 2 /* xmm7 = -- -- 56 -- -- -- -- -- */ \
+ \
+ __asm pshufhw xmm3, xmm3, 087h /* xmm3 = 35 33 34 36 32 -- -- -- */ \
+ __asm por xmm0, xmm7 /* xmm0 = -- 62 56 -- -- -- -- -- */ \
+ \
+ __asm movdqa xmm7, [eax + 112] /* xmm7 = 77 76 75 74 73 72 71 70 */ \
+ __asm pmullw xmm7, [ebx + 112] \
+ \
+ __asm movdqa xmm6, [ecx + 64] /* xmm6 = -- -- -- FF FF -- -- -- */ \
+ __asm por xmm4, xmm0 /* xmm4 = -- 62 56 47 37 31 20 16 */ \
+ \
+ __asm pshuflw xmm7, xmm7, 0E1h /* xmm7 = 77 76 75 74 73 72 70 71 */ \
+ __asm psrldq xmm6, 8 /* xmm6 = -- -- -- -- -- -- -- FF */ \
+ \
+ __asm movdqa xmm0, [ecx + 64] /* xmm0 = -- -- -- FF FF -- -- -- */ \
+ __asm pand xmm6, xmm7 /* xmm6 = -- -- -- -- -- -- -- 71 */ \
+ \
+ __asm pand xmm0, xmm3 /* xmm0 = -- -- -- 36 32 -- -- -- */ \
+ __asm pxor xmm7, xmm6 /* xmm7 = 77 76 75 74 73 72 70 -- */ \
+ \
+ __asm pxor xmm3, xmm0 /* xmm3 = 35 33 34 -- -- -- -- -- */ \
+ __asm pslldq xmm6, 14 /* xmm6 = 71 -- -- -- -- -- -- -- */ \
+ \
+ __asm psrldq xmm0, 4 /* xmm0 = -- -- -- -- -- 36 32 -- */ \
+ __asm por xmm4, xmm6 /* xmm4 = 71 62 56 47 37 31 20 16 */ \
+ \
+ __asm por xmm2, xmm0 /* xmm2 = -- -- -- -- -- 36 32 17 */ \
+ __asm movdqa [eax + 64], xmm4 /* write 71 62 56 47 37 31 20 16 */ \
+ /* 1, 2, 3, 5, 7 in use */ \
+ __asm movdqa xmm6, [ecx + 80] /* xmm6 = -- -- FF -- -- -- -- FF */ \
+ __asm pshufhw xmm7, xmm7, 0D2h /* xmm7 = 77 75 74 76 73 72 70 __ */ \
+ \
+ __asm movdqa xmm4, [ecx] /* xmm4 = -- -- -- -- -- FF FF -- */ \
+ __asm movdqa xmm0, [ecx+48] /* xmm0 = -- -- -- -- FF -- -- -- */ \
+ \
+ __asm pand xmm6, xmm5 /* xmm6 = -- -- 55 -- -- -- -- 50 */ \
+ __asm pand xmm4, xmm7 /* xmm4 = -- -- -- -- -- 72 70 -- */ \
+ \
+ __asm pand xmm0, xmm1 /* xmm0 = -- -- -- -- 63 -- -- -- */ \
+ __asm pxor xmm5, xmm6 /* xmm5 = -- -- -- 54 53 52 51 -- */ \
+ \
+ __asm pxor xmm7, xmm4 /* xmm7 = 77 75 74 76 73 -- -- -- */ \
+ __asm pxor xmm1, xmm0 /* xmm1 = 67 66 65 64 -- -- -- -- */ \
+ \
+ __asm pshuflw xmm6, xmm6, 02Bh /* xmm6 = -- -- 55 -- 50 -- -- -- */ \
+ __asm pslldq xmm4, 10 /* xmm4 = 72 20 -- -- -- -- -- -- */ \
+ \
+ __asm pshufhw xmm6, xmm6, 0B1h /* xmm6 = -- -- -- 55 50 -- -- -- */ \
+ __asm pslldq xmm0, 4 /* xmm0 = -- -- 63 -- -- -- -- -- */ \
+ \
+ __asm por xmm6, xmm4 /* xmm6 = 72 70 -- 55 50 -- -- -- */ \
+ __asm por xmm2, xmm0 /* xmm2 = -- -- 63 -- -- 36 32 17 */ \
+ \
+ __asm por xmm2, xmm6 /* xmm2 = 72 70 64 55 50 36 32 17 */ \
+ __asm pshufhw xmm1, xmm1, 0C9h /* xmm1 = 67 64 66 65 -- -- -- -- */ \
+ \
+ __asm movdqa xmm6, xmm3 /* xmm6 = 35 33 34 -- -- -- -- -- */ \
+ __asm movdqa [eax+80], xmm2 /* write 72 70 64 55 50 36 32 17 */ \
+ \
+ __asm psrldq xmm6, 12 /* xmm6 = -- -- -- -- -- -- 35 33 */ \
+ __asm pslldq xmm3, 4 /* xmm3 = 34 -- -- -- -- -- -- -- */ \
+ \
+ __asm pshuflw xmm5, xmm5, 04Eh /* xmm5 = -- -- -- 54 51 -- 53 52 */ \
+ __asm movdqa xmm4, xmm7 /* xmm4 = 77 75 74 76 73 -- -- -- */ \
+ \
+ __asm movdqa xmm2, xmm5 /* xmm2 = -- -- -- 54 51 -- 53 52 */ \
+ __asm psrldq xmm7, 10 /* xmm7 = -- -- -- -- -- 77 75 74 */ \
+ \
+ __asm pslldq xmm4, 6 /* xmm4 = 76 73 -- -- -- -- -- -- */ \
+ __asm pslldq xmm2, 12 /* xmm2 = 53 52 -- -- -- -- -- -- */ \
+ \
+ __asm movdqa xmm0, xmm1 /* xmm0 = 67 64 66 65 -- -- -- -- */ \
+ __asm psrldq xmm1, 12 /* xmm1 = -- -- -- -- -- -- 67 64 */ \
+ \
+ __asm psrldq xmm5, 6 /* xmm5 = -- -- -- -- -- -- 54 51 */ \
+ __asm psrldq xmm3, 14 /* xmm3 = -- -- -- -- -- -- -- 34 */ \
+ \
+ __asm pslldq xmm7, 10 /* xmm7 = 77 75 74 -- -- -- -- -- */ \
+ __asm por xmm4, xmm6 /* xmm4 = 76 73 -- -- -- -- 35 33 */ \
+ \
+ __asm psrldq xmm2, 10 /* xmm2 = -- -- -- -- -- 53 52 -- */ \
+ __asm pslldq xmm0, 4 /* xmm0 = 66 65 -- -- -- -- -- -- */ \
+ \
+ __asm pslldq xmm1, 8 /* xmm1 = -- -- 67 64 -- -- -- -- */ \
+ __asm por xmm3, xmm7 /* xmm3 = 77 75 74 -- -- -- -- 34 */ \
+ \
+ __asm psrldq xmm0, 6 /* xmm0 = -- -- -- 66 65 -- -- -- */ \
+ __asm pslldq xmm5, 4 /* xmm5 = -- -- -- -- 54 51 -- -- */ \
+ \
+ __asm por xmm4, xmm1 /* xmm4 = 76 73 67 64 -- -- 35 33 */ \
+ __asm por xmm3, xmm2 /* xmm3 = 77 75 74 -- -- 53 52 34 */ \
+ \
+ __asm por xmm4, xmm5 /* xmm4 = 76 73 67 64 54 51 35 33 */ \
+ __asm por xmm3, xmm0 /* xmm3 = 77 75 74 66 65 53 52 34 */ \
+ \
+ __asm movdqa [eax+96], xmm4 /* write 76 73 67 64 54 51 35 33 */ \
+ __asm movdqa [eax+112], xmm3 /* write 77 75 74 66 65 53 52 34 */ \
+ \
+ }/* end of Wmt_Dequant Macro */
+
+
+/**************************************************************************************
+ *
+ * Macro: Wmt_Dequant_Dx
+ *
+ * Description: The Macro does dequantzation
+ *
+ * Input: [eax], quantized input,
+ * [ebx], quantizaiton table,
+ *
+ * Output: [eax]
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+#define Wmt_Dequant_Dx __asm { \
+ __asm movdqa xmm0, [eax] \
+ __asm movdqa xmm1, [eax + 16] \
+ \
+ __asm pmullw xmm0, [ebx] /* xmm0 = 07 06 05 04 03 02 01 00 */ \
+ __asm pmullw xmm1, [ebx + 16] /* xmm1 = 17 16 15 14 13 12 11 10 */ \
+ \
+ __asm movdqa xmm2, [eax + 32] \
+ __asm movdqa xmm3, [eax + 48] /* xmm3 = 37 36 35 34 33 32 31 30 */ \
+ \
+ __asm pmullw xmm2, [ebx + 32] /* xmm4 = 27 26 25 24 23 22 21 20 */ \
+ __asm pmullw xmm3, [ebx + 48] \
+ \
+ __asm movdqa [edx], xmm0 /* write 43 25 24 12 11 03 02 00 */ \
+ __asm movdqa [edx + 16], xmm1 /* write 44 42 26 23 13 10 04 01 */ \
+ \
+ __asm movdqa xmm4, [eax + 64] \
+ __asm movdqa xmm5, [eax + 80] /* xmm5 = 57 56 55 54 53 52 51 50 */ \
+ \
+ __asm pmullw xmm4, [ebx + 64] /* xmm5 = 47 46 45 44 43 42 41 40 */ \
+ __asm pmullw xmm5, [ebx + 80] \
+ \
+ __asm movdqa [edx+32], xmm2 /* write 60 45 41 27 22 14 07 05 */ \
+ __asm movdqa [edx+48], xmm3 /* write 61 57 46 40 30 21 15 06 */ \
+ \
+ __asm movdqa xmm6, [eax + 96] /* xmm1 = 67 66 65 64 63 62 61 60 */ \
+ __asm movdqa xmm7, [eax + 112] /* xmm7 = 77 76 75 74 73 72 71 70 */ \
+ \
+ __asm pmullw xmm6, [ebx + 96] \
+ __asm pmullw xmm7, [ebx + 112] \
+ \
+ __asm movdqa [edx+64], xmm4 /* write 71 62 56 47 37 31 20 16 */ \
+ __asm movdqa [edx+80], xmm5 /* write 72 70 64 55 50 36 32 17 */ \
+ \
+ __asm movdqa [edx+96], xmm6 /* write 76 73 67 64 54 51 35 33 */ \
+ __asm movdqa [edx+112], xmm7 /* write 77 75 74 66 65 53 52 34 */ \
+ \
+ }/* end of Wmt_Dequant Macro */
+
+
+
+
+/**************************************************************************************
+ *
+ * Routine: Wmt_IDct_Dx
+ *
+ * Description: Perform IDCT on a 8x8 block
+ *
+ * Input: Pointer to input and output buffer
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: The input coefficients are in raster order
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+void Wmt_IDct_Dx(short *InputData, short *QuantizationTable, short *OutputData)
+{
+
+
+ __asm
+ {
+
+ push ebx
+
+ mov eax, InputData
+ mov ebx, QuantizationTable
+ mov edx, OutputData
+ lea ecx, WmtIdctConst
+
+ Wmt_Dequant_Dx
+
+#undef I
+#undef O
+#undef C
+#define I(i) [edx + 16 * i ]
+#define O(i) [edx + 16 * i ]
+#define C(i) [ecx + 16 * (i-1) ]
+
+
+ /* Transpose - absorbed by the Wmt_dequant */
+
+ Wmt_Row_IDCT
+
+ Transpose
+
+ Wmt_Column_IDCT
+
+ pop ebx
+ }
+
+}
+
+/**************************************************************************************
+ ************** Wmt_IDCT10_Dx ******************************************************
+ **************************************************************************************
+
+
+ In IDCT10, we are dealing with only ten Non-Zero coefficients in the 8x8 block.
+ In the case that we work in the fashion RowIDCT -> ColumnIDCT, we only have to
+ do 1-D row idcts on the first four rows, the rest four rows remain zero anyway.
+ After row IDCTs, since every column could have nonzero coefficients, we need do
+ eight 1-D column IDCT. However, for each column, there are at most two nonzero
+ coefficients, coefficient 0 to coefficient 3. Same for the coefficents for the
+ two 1-d row idcts. For this reason, the process of a 1-D IDCT is simplified
+
+ from a full version:
+
+ A = (C1 * I1) + (C7 * I7) B = (C7 * I1) - (C1 * I7)
+ C = (C3 * I3) + (C5 * I5) D = (C3 * I5) - (C5 * I3)
+ A. = C4 * (A - C) B. = C4 * (B - D)
+ C. = A + C D. = B + D
+
+ E = C4 * (I0 + I4) F = C4 * (I0 - I4)
+ G = (C2 * I2) + (C6 * I6) H = (C6 * I2) - (C2 * I6)
+ E. = E - G
+ G. = E + G
+
+ A.. = F + A. B.. = B. - H
+ F. = F - A. H. = B. + H
+
+ R0 = G. + C. R1 = A.. + H. R3 = E. + D. R5 = F. + B..
+ R7 = G. - C. R2 = A.. - H. R4 = E. - D. R6 = F. - B..
+
+
+ To:
+
+ A = (C1 * I1) B = (C7 * I1)
+ C = (C3 * I3) D = - (C5 * I3)
+ A. = C4 * (A - C) B. = C4 * (B - D)
+ C. = A + C D. = B + D
+
+ E = C4 * I0 F = E
+ G = (C2 * I2) H = (C6 * I2)
+ E. = E - G
+ G. = E + G
+
+ A.. = F + A. B.. = B. - H
+ F. = F - A. H. = B. + H
+
+ R0 = G. + C. R1 = A.. + H. R3 = E. + D. R5 = F. + B..
+ R7 = G. - C. R2 = A.. - H. R4 = E. - D. R6 = F. - B..
+
+
+******************************************************************************************/
+
+
+/**************************************************************************************
+ *
+ * Macro: Wmt_Column_IDCT10
+ *
+ * Description: The Macro does 1-D IDct on 8 columns.
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+/*
+ The major difference between Willamette processor and other IA32 processors is that
+ all of the simd integer instructions now support the 128 bit xmm registers instead
+ of 64 bit mmx registers. By using these instructions, we can do 8 1-D coloumn idcts
+ that takes shorts as input and outputs shorts at once
+
+*/
+
+#define Wmt_Column_IDCT10 __asm { \
+ \
+ __asm movdqa xmm2, I(3) /* xmm2 = i3 */ \
+ __asm movdqa xmm6, C(3) /* xmm6 = c3 */ \
+ \
+ __asm movdqa xmm4, xmm2 /* xmm4 = i3 */ \
+ __asm pmulhw xmm4, xmm6 /* xmm4 = c3 * i3 - i3 */ \
+ \
+ __asm movdqa xmm1, C(5) /* xmm1 = c5 */ \
+ __asm movdqa xmm5, xmm1 /* xmm5 = c5 */ \
+ \
+ __asm pmulhw xmm1, xmm2 /* xmm1 = c5 * i3 - i3 */ \
+ __asm movdqa xmm3, I(1) /* xmm3 = i1 */ \
+ \
+ __asm movdqa xmm0, C(1) /* xmm0 = c1 */ \
+ __asm paddw xmm4, xmm2 /* xmm4 = c3 * i3 = C */ \
+ \
+ __asm movdqa xmm7, C(7) /* xmm7 = c7 */ \
+ \
+ __asm paddw xmm2, xmm1 /* xmm2 = c5 * i3 */ \
+ __asm movdqa xmm5, xmm0 /* xmm5 = c1 */ \
+ \
+ __asm pmulhw xmm0, xmm3 /* xmm0 = c1 * i1 - i1 */ \
+ __asm pxor xmm6, xmm6 /* clear xmm6 */ \
+ \
+ __asm psubsw xmm6, xmm2 /* xmm6 = - c5 * i3 = D */ \
+ __asm paddw xmm0, xmm3 /* xmm0 = c1 * i1 = A */ \
+ \
+ __asm pmulhw xmm3, xmm7 /* xmm3 = c7 * i1 = B */ \
+ __asm movdqa xmm2, I(2) /* xmm2 = i2 */ \
+ \
+ __asm movdqa xmm1, xmm2 /* xmm1 = i2 */ \
+ __asm pmulhw xmm2, C(2) /* xmm2 = i2 * c2 -i2 */ \
+ \
+ __asm psubsw xmm0, xmm4 /* xmm0 = A - C */ \
+ \
+ __asm paddw xmm2, xmm1 /* xmm2 = i2 * c2 */ \
+ __asm pmulhw xmm1, C(6) /* xmm1 = c6 * i2 */ \
+ \
+ __asm paddsw xmm4, xmm4 /* xmm4 = C + C */ \
+ __asm paddsw xmm4, xmm0 /* xmm4 = A + C = C. */ \
+ \
+ __asm psubsw xmm3, xmm6 /* xmm3 = B - D */ \
+ __asm paddsw xmm6, xmm6 /* xmm6 = D + D */ \
+ \
+ __asm paddsw xmm6, xmm3 /* xmm6 = B + D = D. */ \
+ __asm movdqa I(1), xmm4 /* Save C. at I(1) */ \
+ \
+ __asm movdqa xmm4, C(4) /* xmm4 = c4 */ \
+ __asm movdqa xmm5, xmm3 /* xmm5 = B - D */ \
+ \
+ __asm pmulhw xmm3, xmm4 /* xmm3 = ( c4 -1 ) * ( B - D ) */ \
+ \
+ __asm movdqa xmm7, xmm2 /* xmm7 = c2 * i2 + c6 * i6 = G */ \
+ __asm movdqa I(2), xmm6 /* Save D. at I(2) */ \
+ \
+ __asm movdqa xmm2, xmm0 /* xmm2 = A - C */ \
+ __asm movdqa xmm6, I(0) /* xmm6 = i0 */ \
+ \
+ __asm pmulhw xmm0, xmm4 /* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */\
+ __asm paddw xmm5, xmm3 /* xmm5 = c4 * ( B - D ) = B. */ \
+ \
+ __asm psubsw xmm5, xmm1 /* xmm5 = B. - H = B.. */ \
+ __asm paddw xmm2, xmm0 /* xmm2 = c4 * ( A - C) = A. */ \
+ \
+ __asm movdqa xmm0, xmm6 /* xmm0 = i0 */ \
+ __asm pmulhw xmm6, xmm4 /* xmm6 = (c4 - 1) * i0 = E = F */ \
+ \
+ __asm paddsw xmm1, xmm1 /* xmm1 = H + H */ \
+ __asm paddsw xmm1, xmm5 /* xmm1 = B. + H = H. */ \
+ \
+ __asm paddw xmm6, xmm0 /* xmm6 = c4 * i0 */ \
+ __asm movdqa xmm4, xmm6 /* xmm4 = c4 * i0 = E */ \
+ \
+ __asm psubsw xmm6, xmm2 /* xmm6 = F - A. = F. */ \
+ __asm paddsw xmm2, xmm2 /* xmm2 = A. + A. */ \
+ \
+ __asm movdqa xmm0, I(1) /* Load C. from I(1) */ \
+ __asm paddsw xmm2, xmm6 /* xmm2 = F + A. = A.. */ \
+ \
+ __asm psubsw xmm2, xmm1 /* xmm2 = A.. - H. = R2 */ \
+ \
+ __asm paddsw xmm2, Eight /* Adjust R2 and R1 before shifting */ \
+ __asm paddsw xmm1, xmm1 /* xmm1 = H. + H. */ \
+ \
+ __asm paddsw xmm1, xmm2 /* xmm1 = A.. + H. = R1 */ \
+ __asm psraw xmm2, 4 /* xmm2 = op2 */ \
+ \
+ __asm psubsw xmm4, xmm7 /* xmm4 = E - G = E. */ \
+ __asm psraw xmm1, 4 /* xmm1 = op1 */ \
+ \
+ __asm movdqa xmm3, I(2) /* Load D. from I(2) */ \
+ __asm paddsw xmm7, xmm7 /* xmm7 = G + G */ \
+ \
+ __asm movdqa O(2), xmm2 /* Write out op2 */ \
+ __asm paddsw xmm7, xmm4 /* xmm7 = E + G = G. */ \
+ \
+ __asm movdqa O(1), xmm1 /* Write out op1 */ \
+ __asm psubsw xmm4, xmm3 /* xmm4 = E. - D. = R4 */ \
+ \
+ __asm paddsw xmm4, Eight /* Adjust R4 and R3 before shifting */ \
+ __asm paddsw xmm3, xmm3 /* xmm3 = D. + D. */ \
+ \
+ __asm paddsw xmm3, xmm4 /* xmm3 = E. + D. = R3 */ \
+ __asm psraw xmm4, 4 /* xmm4 = op4 */ \
+ \
+ __asm psubsw xmm6, xmm5 /* xmm6 = F. - B..= R6 */ \
+ __asm psraw xmm3, 4 /* xmm3 = op3 */ \
+ \
+ __asm paddsw xmm6, Eight /* Adjust R6 and R5 before shifting */ \
+ __asm paddsw xmm5, xmm5 /* xmm5 = B.. + B.. */ \
+ \
+ __asm paddsw xmm5, xmm6 /* xmm5 = F. + B.. = R5 */ \
+ __asm psraw xmm6, 4 /* xmm6 = op6 */ \
+ \
+ __asm movdqa O(4), xmm4 /* Write out op4 */ \
+ __asm psraw xmm5, 4 /* xmm5 = op5 */ \
+ \
+ __asm movdqa O(3), xmm3 /* Write out op3 */ \
+ __asm psubsw xmm7, xmm0 /* xmm7 = G. - C. = R7 */ \
+ \
+ __asm paddsw xmm7, Eight /* Adjust R7 and R0 before shifting */ \
+ __asm paddsw xmm0, xmm0 /* xmm0 = C. + C. */ \
+ \
+ __asm paddsw xmm0, xmm7 /* xmm0 = G. + C. */ \
+ __asm psraw xmm7, 4 /* xmm7 = op7 */ \
+ \
+ __asm movdqa O(6), xmm6 /* Write out op6 */ \
+ __asm psraw xmm0, 4 /* xmm0 = op0 */ \
+ \
+ __asm movdqa O(5), xmm5 /* Write out op5 */ \
+ __asm movdqa O(7), xmm7 /* Write out op7 */ \
+ \
+ __asm movdqa O(0), xmm0 /* Write out op0 */ \
+ \
+ } /* End of Wmt_Column_IDCT10 macro */
+
+
+/**************************************************************************************
+ *
+ * Macro: Wmt_Row_IDCT10
+ *
+ * Description: The Macro does 1-D IDct on 8 columns.
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+/*
+ The major difference between Willamette processor and other IA32 processors is that
+ all of the simd integer instructions now support the 128 bit xmm registers instead
+ of 64 bit mmx registers. By using these instructions, we can do 8 1-D coloumn idcts
+ that takes shorts as input and outputs shorts at once
+
+*/
+
+#define Wmt_Row_IDCT10 __asm { \
+ \
+ __asm movdqa xmm2, I(3) /* xmm2 = i3 */ \
+ __asm movdqa xmm6, C(3) /* xmm6 = c3 */ \
+ \
+ __asm movdqa xmm4, xmm2 /* xmm4 = i3 */ \
+ __asm pmulhw xmm4, xmm6 /* xmm4 = c3 * i3 - i3 */ \
+ \
+ __asm movdqa xmm1, C(5) /* xmm1 = c5 */ \
+ __asm movdqa xmm5, xmm1 /* xmm5 = c5 */ \
+ \
+ __asm pmulhw xmm1, xmm2 /* xmm1 = c5 * i3 - i3 */ \
+ __asm movdqa xmm3, I(1) /* xmm3 = i1 */ \
+ \
+ __asm movdqa xmm0, C(1) /* xmm0 = c1 */ \
+ __asm paddw xmm4, xmm2 /* xmm4 = c3 * i3 =C */ \
+ \
+ __asm movdqa xmm7, C(7) /* xmm7 = c7 */ \
+ \
+ __asm paddw xmm2, xmm1 /* xmm2 = c5 * i3 */ \
+ __asm movdqa xmm5, xmm0 /* xmm5 = c1 */ \
+ \
+ __asm pmulhw xmm0, xmm3 /* xmm0 = c1 * i1 - i1 */ \
+ __asm pxor xmm6, xmm6 /* clear xmm6 */ \
+ \
+ __asm psubsw xmm6, xmm2 /* xmm6 = - c5 * i3 = D */ \
+ __asm paddw xmm0, xmm3 /* xmm0 = c1 * i1 = A */ \
+ \
+ __asm pmulhw xmm3, xmm7 /* xmm3 = c7 * i1 = B */ \
+ __asm movdqa xmm2, I(2) /* xmm2 = i2 */ \
+ \
+ __asm movdqa xmm1, xmm2 /* xmm1 = i2 */ \
+ __asm pmulhw xmm2, C(2) /* xmm2 = i2 * c2 -i2 */ \
+ \
+ __asm psubsw xmm0, xmm4 /* xmm0 = A - C */ \
+ \
+ __asm paddw xmm2, xmm1 /* xmm2 = i2 * c2 = G */ \
+ __asm pmulhw xmm1, C(6) /* xmm1 = c6 * i2 = H */ \
+ \
+ __asm paddsw xmm4, xmm4 /* xmm4 = C + C */ \
+ __asm paddsw xmm4, xmm0 /* xmm4 = A + C = C. */ \
+ \
+ __asm psubsw xmm3, xmm6 /* xmm3 = B - D */ \
+ __asm paddsw xmm6, xmm6 /* xmm6 = D + D */ \
+ \
+ __asm paddsw xmm6, xmm3 /* xmm6 = B + D = D. */ \
+ __asm movdqa I(1), xmm4 /* Save C. at I(1) */ \
+ \
+ __asm movdqa xmm4, C(4) /* xmm4 = c4 */ \
+ \
+ __asm movdqa xmm5, xmm3 /* xmm5 = B - D */ \
+ __asm pmulhw xmm3, xmm4 /* xmm3 = ( c4 -1 ) * ( B - D ) */ \
+ \
+ __asm movdqa xmm7, xmm2 /* xmm7 = c2 * i2 = G */ \
+ __asm movdqa I(2), xmm6 /* Save D. at I(2) */ \
+ \
+ __asm movdqa xmm2, xmm0 /* xmm2 = A - C */ \
+ __asm movdqa xmm6, I(0) /* xmm6 = i0 */ \
+ \
+ __asm pmulhw xmm0, xmm4 /* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */ \
+ __asm paddw xmm5, xmm3 /* xmm5 = c4 * ( B - D ) = B. */ \
+ \
+ __asm psubsw xmm5, xmm1 /* xmm5 = B. - H = B.. */ \
+ __asm paddw xmm2, xmm0 /* xmm2 = c4 * ( A - C) = A. */ \
+ \
+ __asm movdqa xmm0, xmm6 /* xmm0 = i0 */ \
+ __asm pmulhw xmm6, xmm4 /* xmm6 = ( c4 - 1 ) * i0 = E = F */ \
+ \
+ __asm paddsw xmm1, xmm1 /* xmm1 = H + H */ \
+ __asm paddsw xmm1, xmm5 /* xmm1 = B. + H = H. */ \
+ \
+ __asm paddw xmm6, xmm0 /* xmm6 = c4 * i0 */ \
+ __asm movdqa xmm4, xmm6 /* xmm4 = c4 * i0 */ \
+ \
+ __asm psubsw xmm6, xmm2 /* xmm6 = F - A. = F. */ \
+ __asm paddsw xmm2, xmm2 /* xmm2 = A. + A. */ \
+ \
+ __asm movdqa xmm0, I(1) /* Load C. from I(1) */ \
+ __asm paddsw xmm2, xmm6 /* xmm2 = F + A. = A.. */ \
+ \
+ __asm psubsw xmm2, xmm1 /* xmm2 = A.. - H. = R2 */ \
+ \
+ __asm paddsw xmm1, xmm1 /* xmm1 = H. + H. */ \
+ __asm paddsw xmm1, xmm2 /* xmm1 = A.. + H. = R1 */ \
+ \
+ __asm psubsw xmm4, xmm7 /* xmm4 = E - G = E. */ \
+ \
+ __asm movdqa xmm3, I(2) /* Load D. from I(2) */ \
+ __asm paddsw xmm7, xmm7 /* xmm7 = G + G */ \
+ \
+ __asm movdqa I(2), xmm2 /* Write out op2 */ \
+ __asm paddsw xmm7, xmm4 /* xmm7 = E + G = G. */ \
+ \
+ __asm movdqa I(1), xmm1 /* Write out op1 */ \
+ __asm psubsw xmm4, xmm3 /* xmm4 = E. - D. = R4 */ \
+ \
+ __asm paddsw xmm3, xmm3 /* xmm3 = D. + D. */ \
+ \
+ __asm paddsw xmm3, xmm4 /* xmm3 = E. + D. = R3 */ \
+ \
+ __asm psubsw xmm6, xmm5 /* xmm6 = F. - B..= R6 */ \
+ \
+ __asm paddsw xmm5, xmm5 /* xmm5 = B.. + B.. */ \
+ \
+ __asm paddsw xmm5, xmm6 /* xmm5 = F. + B.. = R5 */ \
+ \
+ __asm movdqa I(4), xmm4 /* Write out op4 */ \
+ \
+ __asm movdqa I(3), xmm3 /* Write out op3 */ \
+ __asm psubsw xmm7, xmm0 /* xmm7 = G. - C. = R7 */ \
+ \
+ __asm paddsw xmm0, xmm0 /* xmm0 = C. + C. */ \
+ \
+ __asm paddsw xmm0, xmm7 /* xmm0 = G. + C. */ \
+ \
+ __asm movdqa I(6), xmm6 /* Write out op6 */ \
+ \
+ __asm movdqa I(5), xmm5 /* Write out op5 */ \
+ __asm movdqa I(7), xmm7 /* Write out op7 */ \
+ \
+ __asm movdqa I(0), xmm0 /* Write out op0 */ \
+ \
+ } /* End of Wmt_Row_IDCT10 macro */
+
+/**************************************************************************************
+ *
+ * Macro: Transpose
+ *
+ * Description: The Macro does 8x8 transpose
+ *
+ * Input: None
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+
+#define Transpose10 __asm { \
+ \
+ __asm movdqa xmm4, I(4) /* xmm4=e7e6e5e4e3e2e1e0 */ \
+ __asm movdqa xmm0, I(5) /* xmm4=f7f6f5f4f3f2f1f0 */ \
+ \
+ __asm movdqa xmm5, xmm4 /* make a copy */ \
+ __asm punpcklwd xmm4, xmm0 /* xmm4=f3e3f2e2f1e1f0e0 */ \
+ \
+ __asm punpckhwd xmm5, xmm0 /* xmm5=f7e7f6e6f5e5f4e4 */ \
+ __asm movdqa xmm6, I(6) /* xmm6=g7g6g5g4g3g2g1g0 */ \
+ \
+ __asm movdqa xmm0, I(7) /* xmm0=h7h6h5h4h3h2h1h0 */ \
+ __asm movdqa xmm7, xmm6 /* make a copy */ \
+ \
+ __asm punpcklwd xmm6, xmm0 /* xmm6=h3g3h3g2h1g1h0g0 */ \
+ __asm punpckhwd xmm7, xmm0 /* xmm7=h7g7h6g6h5g5h4g4 */ \
+ \
+ __asm movdqa xmm3, xmm4 /* make a copy */ \
+ __asm punpckldq xmm4, xmm6 /* xmm4=h1g1f1e1h0g0f0e0 */ \
+ \
+ __asm punpckhdq xmm3, xmm6 /* xmm3=h3g3g3e3h2g2f2e2 */ \
+ __asm movdqa I(6), xmm3 /* save h3g3g3e3h2g2f2e2 */ \
+ /* Free xmm6 */ \
+ __asm movdqa xmm6, xmm5 /* make a copy */ \
+ __asm punpckldq xmm5, xmm7 /* xmm5=h5g5f5e5h4g4f4e4 */ \
+ \
+ __asm punpckhdq xmm6, xmm7 /* xmm6=h7g7f7e7h6g6f6e6 */ \
+ __asm movdqa xmm0, I(0) /* xmm0=a7a6a5a4a3a2a1a0 */ \
+ /* Free xmm7 */ \
+ __asm movdqa xmm1, I(1) /* xmm1=b7b6b5b4b3b2b1b0 */ \
+ __asm movdqa xmm7, xmm0 /* make a copy */ \
+ \
+ __asm punpcklwd xmm0, xmm1 /* xmm0=b3a3b2a2b1a1b0a0 */ \
+ __asm punpckhwd xmm7, xmm1 /* xmm7=b7a7b6a6b5a5b4a4 */ \
+ /* Free xmm1 */ \
+ __asm movdqa xmm2, I(2) /* xmm2=c7c6c5c4c3c2c1c0 */ \
+ __asm movdqa xmm3, I(3) /* xmm3=d7d6d5d4d3d2d1d0 */ \
+ \
+ __asm movdqa xmm1, xmm2 /* make a copy */ \
+ __asm punpcklwd xmm2, xmm3 /* xmm2=d3c3d2c2d1c1d0c0 */ \
+ \
+ __asm punpckhwd xmm1, xmm3 /* xmm1=d7c7d6c6d5c5d4c4 */ \
+ __asm movdqa xmm3, xmm0 /* make a copy */ \
+ \
+ __asm punpckldq xmm0, xmm2 /* xmm0=d1c1b1a1d0c0b0a0 */ \
+ __asm punpckhdq xmm3, xmm2 /* xmm3=d3c3b3a3d2c2b2a2 */ \
+ /* Free xmm2 */ \
+ __asm movdqa xmm2, xmm7 /* make a copy */ \
+ __asm punpckldq xmm2, xmm1 /* xmm2=d5c5b5a5d4c4b4a4 */ \
+ \
+ __asm punpckhdq xmm7, xmm1 /* xmm7=d7c7b7a7d6c6b6a6 */ \
+ __asm movdqa xmm1, xmm0 /* make a copy */ \
+ \
+ __asm punpcklqdq xmm0, xmm4 /* xmm0=h0g0f0e0d0c0b0a0 */ \
+ __asm punpckhqdq xmm1, xmm4 /* xmm1=h1g1g1e1d1c1b1a1 */ \
+ \
+ __asm movdqa I(0), xmm0 /* save I(0) */ \
+ __asm movdqa I(1), xmm1 /* save I(1) */ \
+ \
+ __asm movdqa xmm0, I(6) /* load h3g3g3e3h2g2f2e2 */ \
+ __asm movdqa xmm1, xmm3 /* make a copy */ \
+ \
+ __asm punpcklqdq xmm1, xmm0 /* xmm1=h2g2f2e2d2c2b2a2 */ \
+ __asm punpckhqdq xmm3, xmm0 /* xmm3=h3g3f3e3d3c3b3a3 */ \
+ \
+ __asm movdqa xmm4, xmm2 /* make a copy */ \
+ __asm punpcklqdq xmm4, xmm5 /* xmm4=h4g4f4e4d4c4b4a4 */ \
+ \
+ __asm punpckhqdq xmm2, xmm5 /* xmm2=h5g5f5e5d5c5b5a5 */ \
+ __asm movdqa I(2), xmm1 /* save I(2) */ \
+ \
+ __asm movdqa I(3), xmm3 /* save I(3) */ \
+ __asm movdqa I(4), xmm4 /* save I(4) */ \
+ \
+ __asm movdqa I(5), xmm2 /* save I(5) */ \
+ __asm movdqa xmm5, xmm7 /* make a copy */ \
+ \
+ __asm punpcklqdq xmm5, xmm6 /* xmm5=h6g6f6e6d6c6b6a6 */ \
+ __asm punpckhqdq xmm7, xmm6 /* xmm7=h7g7f7e7d7c7b7a7 */ \
+ \
+ __asm movdqa I(6), xmm5 /* save I(6) */ \
+ __asm movdqa I(7), xmm7 /* save I(7) */ \
+ \
+ }/* End of Transpose10 Macro */
+
+
+/**************************************************************************************
+ *
+ * Macro: Wmt_Dequant10_Dx
+ *
+ * Description: The Macro does dequantzation
+ *
+ * Input: [eax], quantized input,
+ * [ebx], quantizaiton table,
+ *
+ * Output: [eax]
+ *
+ * Return: None
+ *
+ * Special Note: None
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+#define Wmt_Dequant10_Dx __asm { \
+ __asm movdqa xmm0, [eax] \
+ __asm movdqa xmm1, [eax + 16] \
+ \
+ __asm pmullw xmm0, [ebx] /* xmm0 = 07 06 05 04 03 02 01 00 */ \
+ __asm pmullw xmm1, [ebx + 16] /* xmm1 = 17 16 15 14 13 12 11 10 */ \
+ \
+ __asm movdqa xmm2, [eax + 32] \
+ __asm movdqa xmm3, [eax + 48] /* xmm3 = 37 36 35 34 33 32 31 30 */ \
+ \
+ __asm pmullw xmm2, [ebx + 32] /* xmm2 = 27 26 25 24 23 22 21 20 */ \
+ __asm pmullw xmm3, [ebx + 48] \
+ \
+ __asm movdqa [edx], xmm0 /* write */ \
+ __asm movdqa [edx + 16], xmm1 /* write */ \
+ \
+ __asm movdqa [edx+32], xmm2 /* write */ \
+ __asm movdqa [edx+48], xmm3 /* write */ \
+ \
+ }/* end of Wmt_Dequant10_Dx Macro */
+
+
+
+
+/**************************************************************************************
+ *
+ * Routine: Wmt_IDct10_Dx
+ *
+ * Description: Perform IDCT on a 8x8 block where only the first 10 coeffs are
+ * non-zero coefficients.
+ *
+ * Input: Pointer to input and output buffer
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: The input coefficients are in raster order
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+void Wmt_IDct10_Dx(short *InputData, short *QuantizationTable, short *OutputData)
+{
+
+
+ __asm
+ {
+ push ebx
+
+ mov eax, InputData
+ mov ebx, QuantizationTable
+ mov edx, OutputData
+ lea ecx, WmtIdctConst
+
+ Wmt_Dequant10_Dx
+
+#define I(i) [edx + 16 * i ]
+#define O(i) [edx + 16 * i ]
+#define C(i) [ecx + 16 * (i-1) ]
+
+
+ /* Transpose - absorbed by the Wmt_dequant */
+
+ Wmt_Row_IDCT10
+
+ Transpose10
+
+ Wmt_Column_IDCT10
+
+ pop ebx
+ }
+
+}
+/**************************************************************************************
+ *
+ * Routine: Wmt_IDct1
+ *
+ * Description: Perform IDCT on a 8x8 block where only the first 1 coeff
+ *
+ * Input: Pointer to input and output buffer
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: We only have one coefficient
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+void Wmt_idct1 (short * input, short * qtbl, short * output)
+{
+ __asm
+ {
+ mov eax, [input]
+ mov edx, 0xf
+
+ movd xmm2, edx
+
+ mov ecx, [qtbl]
+ mov edx, [output]
+
+ movq xmm0, QWORD ptr [eax]
+ movq xmm1, QWORD ptr [ecx]
+
+ pmullw xmm0, xmm1;
+ paddw xmm0, xmm2
+
+ psraw xmm0, 5;
+ punpcklwd xmm0, xmm0;
+
+ punpckldq xmm0, xmm0;
+ punpcklqdq xmm0, xmm0;
+
+ movdqa xmm1, xmm0
+
+ movdqa [edx], xmm0;
+ movdqa [edx+16], xmm1;
+
+ movdqa [edx+32], xmm0;
+ movdqa [edx+48], xmm1;
+
+ movdqa [edx+64], xmm0;
+ movdqa [edx+80], xmm1;
+
+ movdqa [edx+96], xmm0;
+ movdqa [edx+112], xmm1;
+
+ }
+}
+/**************************************************************************************
+ ************** Wmt_IDCT3 ******************************************************
+ **************************************************************************************
+ */
+
+/**************************************************************************************
+ *
+ * Routine: Wmt_IDCT3
+ *
+ * Description: Perform IDCT on a 8x8 block with at most 3 nonzero coefficients
+ *
+ * Input: Pointer to input and output buffer
+ *
+ * Output: None
+ *
+ * Return: None
+ *
+ * Special Note: Intel Compiler, Please
+ *
+ * Error: None
+ *
+ ***************************************************************************************
+ */
+
+/***************************************************************************************
+ In IDCT 3, we are dealing with only three Non-Zero coefficients in the 8x8 block.
+ In the case that we work in the fashion RowIDCT -> ColumnIDCT, we only have to
+ do 1-D row idcts on the first two rows, the rest six rows remain zero anyway.
+ After row IDCTs, since every column could have nonzero coefficients, we need do
+ eight 1-D column IDCT. However, for each column, there are at most two nonzero
+ coefficients, coefficient 0 and coefficient 1. Same for the coefficents for the
+ two 1-d row idcts. For this reason, the process of a 1-D IDCT is simplified
+
+ from a full version:
+
+ A = (C1 * I1) + (C7 * I7) B = (C7 * I1) - (C1 * I7)
+ C = (C3 * I3) + (C5 * I5) D = (C3 * I5) - (C5 * I3)
+ A. = C4 * (A - C) B. = C4 * (B - D)
+ C. = A + C D. = B + D
+
+ E = C4 * (I0 + I4) F = C4 * (I0 - I4)
+ G = (C2 * I2) + (C6 * I6) H = (C6 * I2) - (C2 * I6)
+ E. = E - G
+ G. = E + G
+
+ A.. = F + A. B.. = B. - H
+ F. = F - A. H. = B. + H
+
+ R0 = G. + C. R1 = A.. + H. R3 = E. + D. R5 = F. + B..
+ R7 = G. - C. R2 = A.. - H. R4 = E. - D. R6 = F. - B..
+
+ To:
+
+
+ A = (C1 * I1) B = (C7 * I1)
+ C = 0 D = 0
+ A. = C4 * A B. = C4 * B
+ C. = A D. = B
+
+ E = C4 * I0 F = E
+ G = 0 H = 0
+ E. = E
+ G. = E
+
+ A.. = E + A. B.. = B.
+ F. = E - A. H. = B.
+
+ R0 = E + A R1 = E + A. + B. R3 = E + B R5 = E - A. + B.
+ R7 = E - A R2 = E + A. - B. R4 = E - B R6 = F - A. - B.
+
+******************************************************************************************/
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtrecon.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtrecon.c
new file mode 100644
index 00000000..60436225
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtrecon.c
@@ -0,0 +1,281 @@
+ /****************************************************************************
+ *
+ * Module Title : WmtOptFunctions.c
+ *
+ * Description : willamette processor specific
+ * optimised versions of functions
+ *
+ * AUTHOR : Yaowu Xu
+ *
+ * Special Note:
+ *
+ *****************************************************************************
+ * Revision History
+ *
+ *
+ * 1.03 YWX 07-Dec-00 Removed constants and functions that are not in use
+ * Added push and pop ebx in WmtReconIntra
+ * 1.02 YWX 30 Aug 00 changed to be compatible with Microsoft compiler
+ * 1.01 YWX 13 JUL 00 New Willamette Optimized Functions
+ * 1.00 YWX 14/06/00 Configuration baseline from OptFunctions.c
+ *
+ *****************************************************************************
+ */
+
+/*
+ Use Tim's optimized version.
+*/
+
+/****************************************************************************
+ * Header Files
+ *****************************************************************************
+ */
+
+#define STRICT // Strict type checking.
+
+#include "reconstruct.h"
+
+/****************************************************************************
+ * Module constants.
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Imports.
+ *****************************************************************************
+ */
+
+
+/****************************************************************************
+ * Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Exported Functions
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * Module Statics
+ *****************************************************************************
+ */
+
+
+
+_declspec(align(16)) static UINT8 Eight128s[8] = {128,128,128,128,128,128,128,128};
+
+#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
+
+/****************************************************************************
+* Forward References
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtReconIntra
+ *
+ * INPUTS : INT16 * idct
+ * Pointer to the output from the idct for this block
+ *
+ * UINT32 stride
+ * Line Length in pixels in recon and reference images
+ *
+ *
+ *
+ *
+ * OUTPUTS : UINT8 * dest
+ * The reconstruction buffer
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs an intra block - wmt version
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void WmtReconIntra( INT16 *TmpDataBuffer, UINT8 * dest, UINT16 * idct, UINT32 stride )
+{
+ (void)TmpDataBuffer;
+ __asm
+ {
+
+ push ebx
+
+ mov eax,[idct] ; Signed 16 bit inputs
+ mov edx,[dest] ; Unsigned 8 bit outputs
+
+ movq xmm0,QWORD PTR [Eight128s] ; Set xmm0 to 0x000000000000008080808080808080
+ pxor xmm3, xmm3 ; set xmm3 to 0
+ ;
+ mov ebx,[stride] ; Line stride in output buffer
+ lea ecx,[eax+128] ; Endpoint in input buffer
+
+loop_label:
+
+ movdqa xmm2,XMMWORD PTR [eax] ; Read the eight inputs
+ packsswb xmm2,xmm3 ;
+
+ pxor xmm2,xmm0 ; Convert result to unsigned (same as add 128)
+ lea eax,[eax + 16] ; Step source buffer
+
+ cmp eax,ecx ; are we done
+ movq QWORD PTR [edx],xmm2 ; store results
+
+ lea edx,[edx+ebx] ; Step output buffer
+ jc loop_label ; Loop back if we are not done
+
+ pop ebx
+ }
+
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : WmtReconInter
+ *
+ * INPUTS : UINT8 * RefPtr
+ * The last frame reference
+ *
+ * INT16 * ChangePtr
+ * Pointer to the change data
+ *
+ * UINT32 LineStep
+ * Line Length in pixels in recon and ref images
+ *
+ * OUTPUTS : UINT8 * ReconPtr
+ * The reconstruction
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs data from last data and change
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+void WmtReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+ (void) TmpDataBuffer;
+
+ _asm {
+ push edi
+
+ mov ebx, [RefPtr]
+ mov ecx, [ChangePtr]
+
+ mov eax, [ReconPtr]
+ mov edx, [LineStep]
+
+ pxor xmm0, xmm0
+ lea edi, [ecx + 128]
+ L:
+ movq xmm2, QWORD ptr [ebx] ; (+3 misaligned) 8 reference pixels
+ movdqa xmm4, XMMWORD ptr [ecx] ; 8 changes
+
+ punpcklbw xmm2, xmm0 ;
+
+ add ebx, edx ; next row of reference pixels
+ paddsw xmm2, xmm4 ; add in first 4 changes
+
+ lea ecx, [ecx + 16] ; next row of changes
+ packuswb xmm2, xmm0 ; pack result to unsigned 8-bit values
+
+ cmp ecx, edi ; are we done?
+ movq QWORD PTR [eax], xmm2 ; store result
+
+ lea eax, [eax+edx] ; next row of output
+ jc L ; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+ pop edi
+ }
+
+}
+/****************************************************************************
+ *
+ * ROUTINE : WmtReconInterHalfPixel2
+ *
+ * INPUTS : UINT8 * RefPtr1, RefPtr2
+ * The last frame reference
+ *
+ * INT16 * ChangePtr
+ * Pointer to the change data
+ *
+ * UINT32 LineStep
+ * Line Length in pixels in recon and ref images
+ *
+ *
+ * OUTPUTS : UINT8 * ReconPtr
+ * The reconstruction
+ *
+ * RETURNS : None
+ *
+ * FUNCTION : Reconstructs data from half pixel reference data and change.
+ * Half pixel data interpolated from 2 references.
+ *
+ * SPECIAL NOTES :
+ *
+ *
+ * ERRORS : None.
+ *
+ ****************************************************************************/
+
+void WmtReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,
+ UINT8 * RefPtr1, UINT8 * RefPtr2,
+ INT16 * ChangePtr, UINT32 LineStep )
+{
+ (void)TmpDataBuffer;
+
+ _asm {
+ push esi
+ push edi
+
+ mov ecx, [ChangePtr]
+ mov esi, [RefPtr1]
+
+ mov edi, [RefPtr2]
+ mov ebx, [ReconPtr]
+
+ mov edx, [LineStep]
+ lea eax, [ecx+128]
+
+ pxor xmm0, xmm0
+
+ L:
+
+ movq xmm2, QWORD PTR [esi] ; (+3 misaligned) mm2 = row from ref1
+ movq xmm4, QWORD PTR [edi] ; (+3 misaligned) mm4 = row from ref2
+
+ punpcklbw xmm2, xmm0 ;
+ punpcklbw xmm4, xmm0 ;
+
+ movdqa xmm6, [ecx] ; mm6 = first 4 changes
+ paddw xmm2, xmm4 ; mm2 = start (ref1 + ref2)
+
+
+ psrlw xmm2, 1 ; mm2 = start (ref1 + ref2)/2
+ paddw xmm2, xmm6 ; add changes to start
+
+ lea ecx, [ecx+16] ; next row idct
+ packuswb xmm2, xmm0 ; pack start|end to unsigned 8-bit
+
+ add esi, edx ; next row ref1
+ add edi, edx ; next row ref2
+
+ cmp ecx, eax
+ movq QWORD PTR [ebx], xmm2 ; store result
+ ;
+ lea ebx, [ebx+edx]
+ jc L
+
+ pop edi
+ pop esi
+ }
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vpxblit/Releasebcy00.lst b/Src/libvpShared/corelibs/cdxv/vpxblit/Releasebcy00.lst
new file mode 100644
index 00000000..248b173e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vpxblit/Releasebcy00.lst
@@ -0,0 +1,495 @@
+Microsoft (R) Macro Assembler Version 9.00.30729.01 06/23/11 13:09:02
+c:\Winamp\libvp6\corelibs\cdxv\vpxblit\wx86\bcy00.asm Page 1 - 1
+
+
+ ;//==========================================================================
+ ;//
+ ;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+ ;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ ;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+ ;// PURPOSE.
+ ;//
+ ;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+ ;//
+ ;//--------------------------------------------------------------------------
+
+
+ ;/***********************************************\
+ ;??? bcy00.asm
+ ; yv12 to yuy2 same blitter
+ ;\***********************************************/
+
+ .586
+ .387
+ .MODEL flat, SYSCALL, os_dos
+ .MMX
+
+ 00000000 .CODE
+
+ NAME x86bcy00
+
+ PUBLIC bcy00_MMX_
+ PUBLIC _bcy00_MMX
+
+
+ INCLUDE wilk.ash
+ C ;//==========================================================================
+ C ;//
+ C ;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+ C ;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ C ;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+ C ;// PURPOSE.
+ C ;//
+ C ;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+ C ;//
+ C ;//--------------------------------------------------------------------------
+ C
+ C
+ C ;/***********************************************\
+ C ;
+ C ;\***********************************************/
+ C
+ C ;;
+ C ;; YUV buffer configuration structure
+ C ;;
+ C ;------------------------------------------------
+ 00000030 C YUV_BUFFER_CONFIG STRUC
+ 00000000 00000000 C YWidth dd ?
+ 00000004 00000000 C YHeight dd ?
+ 00000008 00000000 C YStride dd ?
+ C
+ 0000000C 00000000 C UVWidth dd ?
+ 00000010 00000000 C UVHeight dd ?
+ 00000014 00000000 C UVStride dd ?
+ C
+ 00000018 00000000 C YBuffer dd ?
+ 0000001C 00000000 C UBuffer dd ?
+ 00000020 00000000 C VBuffer dd ?
+ C
+ 00000024 00000000 C uvStart dd ?
+ 00000028 00000000 C uvDstArea dd ?
+ 0000002C 00000000 C uvUsedArea dd ?
+ C YUV_BUFFER_CONFIG ENDS
+ C ;------------------------------------------------
+ C
+ INCLUDE wblit.ash
+ C ;//==========================================================================
+ C ;//
+ C ;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+ C ;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ C ;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+ C ;// PURPOSE.
+ C ;//
+ C ;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+ C ;//
+ C ;//--------------------------------------------------------------------------
+ C
+ C
+ C ;/***********************************************\
+ C ;??? wblit.ash
+ C ;
+ C ;\***********************************************/
+ C
+ C ;------------------------------------------------
+ 00000028 C x86_Params STRUC
+ 00000000 00000006 [ C dd 6 dup (?) ;6 pushed regs
+ 00000000
+ ]
+ 0DD0000C 00000000 C dd ? ;return address
+ 0DD00010 00000000 C dst dd ?
+ 0DD00014 00000000 C scrnPitch dd ?
+ 0DD00018 00000000 C buffConfig dd ?
+ C x86_Params ENDS
+ C ;------------------------------------------------
+ C
+ C EXTERNDEF _WK_YforY:DWORD
+ C EXTERNDEF _WK_UforBG:DWORD
+ C EXTERNDEF _WK_VforRG:DWORD
+ C
+ C EXTERNDEF _WK_YforY_MMX:DWORD
+ C EXTERNDEF _WK_UforBG_MMX:DWORD
+ C EXTERNDEF _WK_VforRG_MMX:DWORD
+ C
+ C EXTERNDEF _WK_ClampTableR:DWORD
+ C EXTERNDEF _WK_ClampTableG:DWORD
+ C EXTERNDEF _WK_ClampTableB:DWORD
+ C
+ C EXTERNDEF _WK_ClampTableR555:DWORD
+ C EXTERNDEF _WK_ClampTableG555:DWORD
+ C EXTERNDEF _WK_ClampTableB555:DWORD
+ C
+ C EXTERNDEF _WK_ClampTableR565:DWORD
+ C EXTERNDEF _WK_ClampTableG565:DWORD
+ C EXTERNDEF _WK_ClampTableB565:DWORD
+ C
+ = 00000600 C CLAMPCENTER EQU 256*4+128*4
+ C
+ C
+ C EXTERNDEF WK_johnsTable_MMX:DWORD
+ C EXTERNDEF WK_johnsTable:DWORD
+ C
+ C EXTERNDEF WK_RGB_MULFACTOR_555:QWORD
+ C EXTERNDEF WK_RB_MASK_555:QWORD
+ C EXTERNDEF WK_G_MASK_555:QWORD
+ C
+ C EXTERNDEF WK_RGB_MULFACTOR_565:QWORD
+ C EXTERNDEF WK_RB_MASK_565:QWORD
+ C EXTERNDEF WK_G_MASK_565:QWORD
+ C
+ C EXTERNDEF WK_MASK_YY_MMX:DWORD
+ C EXTERNDEF WK_MASK_BYTE0:DWORD
+ C
+ C
+
+ ;------------------------------------------------
+ ; local vars
+ 00000000 L_3s QWORD 0003000300030003h ; 4 3's
+ 0003000300030003
+ 00000008 L_2s QWORD 0002000200020002h ; 4 3's
+ 0002000200020002
+
+ = 00000000 L_blkWidth EQU 0
+ = 00000004 L_YStride EQU L_blkWidth+4
+ = 00000008 L_Height EQU L_YStride+4
+ = 0000000C L_extraWidth EQU L_Height+4
+ = 00000010 L_tempspaceL EQU L_extraWidth+4
+ = 00000018 L_tempspaceH EQU L_tempspaceL+8
+ = 00000020 LOCAL_SPACE EQU L_tempspaceL+16
+
+ ;------------------------------------------------
+ ;void bcy00_MMX(unsigned long *dst, int scrnPitch, YUV_BUFFER_CONFIG *buffConfig);
+ ;
+ 00000010 bcy00_MMX_:
+ 00000010 _bcy00_MMX:
+ 00000010 56 push esi
+ 00000011 57 push edi
+
+ 00000012 55 push ebp
+ 00000013 53 push ebx
+
+ 00000014 51 push ecx
+ 00000015 52 push edx
+
+ 00000016 8B 7C 24 1C mov edi,[esp].dst ; edi = dst
+ 0000001A 8B 6C 24 24 mov ebp,[esp].buffConfig ; ebp = buffConfig
+
+ 0000001E 90 nop
+ 0000001F 83 EC 20 sub esp,LOCAL_SPACE
+
+ 00000022 8B 45 08 mov eax,[ebp].YStride ; eax = YStride
+ 00000025 89 44 24 04 mov L_YStride[esp],eax ; save to local
+
+ 00000029 8B 45 04 mov eax,[ebp].YHeight ; eax = Height
+ 0000002C 48 dec eax ; 1 less than full height
+
+ 0000002D 89 44 24 08 mov L_Height[esp], eax ; save to local
+ 00000031 8B 4D 00 mov ecx,[ebp].YWidth ; ecx = YWidth
+
+ ; mov eax, [esp+LOCAL_SPACE].scrnPitch
+
+ 00000034 8B C1 mov eax, ecx ; eax = YWidth
+ 00000036 C1 E9 03 shr ecx,3 ;blocks of 8 pixels
+
+ 00000039 8B 75 18 mov esi,[ebp].YBuffer ; esi = YBuffer
+ 0000003C 33 DB xor ebx,ebx ; ebx = 0
+
+ 0000003E 89 0C 24 mov L_blkWidth[esp],ecx ; Save YWidth/8 to local
+ 00000041 83 E0 07 and eax, 7 ; extraWidth
+
+ 00000044 89 44 24 0C mov L_extraWidth[esp], eax ; save extraWidth;
+ 00000048 8B C3 mov eax,ebx ; eax = 0;
+
+ 0000004A 8B 55 20 mov edx,[ebp].VBuffer ; edx = YBuffer
+ 0000004D 8B 6D 1C mov ebp,[ebp].UBuffer ; ebp = UBuffer
+
+ 00000050 0F EF E4 pxor mm4,mm4
+ ;
+ ; eax = 0
+ ; ebx = 0;
+ ; ecx = YWidth/8
+ ; edx = VBuffer
+ ; ebp = UBuffer
+ ; esi = YBuffer
+ ; edi = dst
+ ;
+
+ 00000053 hloop:
+ 00000053 wloop:
+ 00000053 0F 6F 3C 13 movq mm7,[edx+ebx] ; get 8 v's
+ 00000057 0F 60 FC punpcklbw mm7,mm4 ; unpack v's with 0's
+
+ 0000005A 0F D5 3D pmullw mm7,L_3s ; v's * 3
+ 00000000 R
+ 00000061 0F 6F 2C 10 movq mm5,[edx+eax] ; get 8 v's pointed to by eax
+
+ 00000065 0F 7F EE movq mm6,mm5 ; copy to mm7
+ 00000068 0F 60 F4 punpcklbw mm6,mm4 ; unpack v's with 0's
+
+ 0000006B 0F FD FE paddw mm7,mm6 ; mm7 = 3 * [ebx] + 1 * [eax]
+ 0000006E 0F FD 3D paddw mm7,L_2s ; mm7 = 3 * [ebx] + 1 * [eax] + 2
+ 00000008 R
+
+ 00000075 0F 71 D7 02 psrlw mm7,2 ; mm7 = ( 3 * [ebx] + 1 * [eax] + 2 ) / 4
+
+ 00000079 0F 6F 1C 2B movq mm3,[ebp+ebx] ; get 8 u's into mm1
+ 0000007D 0F 60 DC punpcklbw mm3,mm4 ; unpack u's with 0's
+
+ 00000080 0F D5 1D pmullw mm3,L_3s ; u's * 3
+ 00000000 R
+ 00000087 0F 6F 2C 28 movq mm5,[ebp+eax] ; get 8 u's pointed to by eax
+
+ 0000008B 0F 7F EE movq mm6,mm5 ; copy to mm3
+ 0000008E 0F 60 F4 punpcklbw mm6,mm4 ; unpack u's with 0's
+
+ 00000091 0F FD DE paddw mm3,mm6 ; mm3 = 3 * [ebx] + 1 * [eax]
+ 00000094 0F FD 1D paddw mm3,L_2s ; mm3 = 3 * [ebx] + 1 * [eax] + 2
+ 00000008 R
+
+ 0000009B 0F 71 D3 02 psrlw mm3,2 ; mm3 = ( 3 * [ebx] + 1 * [eax] + 2 ) / 4
+
+ 0000009F 0F 71 F7 08 psllw mm7,8 ; v3 0 v2 0 v1 0 v0 0
+ 000000A3 0F EB DF por mm3,mm7 ; v3 u3 v2 u2 v1 u1 v0 u0
+
+ 000000A6 0F 6F 04 5E movq mm0,[esi+ebx*2] ; get the y's
+ 000000AA 0F 7F C1 movq mm1,mm0 ; save upper y's
+
+ 000000AD 0F 60 C3 punpcklbw mm0,mm3 ; v1 y3 u1 y2 v0 y1 u0 y0
+ 000000B0 0F 68 CB punpckhbw mm1,mm3 ; v3 y7 u3 y6 v2 y5 u2 y4
+
+ 000000B3 49 dec ecx
+ 000000B4 0F 7F 04 9F movq [edi+ebx*4],mm0 ;write first 4 pixels
+ ;-
+
+ 000000B8 0F 7F 4C 9F 08 movq 8[edi+ebx*4],mm1 ;write next 4 pixels
+ ;-
+
+ 000000BD 8D 40 04 lea eax,[eax+4] ;increment *1 pointer to next pixel
+ 000000C0 8D 5B 04 lea ebx,[ebx+4] ;increment *3 pointer to next pixel
+
+ 000000C3 7F 8E jg wloop
+ ;------------------------------------------------------------
+ ; need to handle the line end condition when YWidth%8 !=0
+ ;------------------------------------------------------------
+ 000000C5 8B 4C 24 0C mov ecx, L_extraWidth[esp] ; extraWidth
+
+ 000000C9 ewloop:
+ 000000C9 49 dec ecx
+ 000000CA 78 79 js phloop
+
+ 000000CC 0F 6F 3C 13 movq mm7,[edx+ebx] ; get 8 v's
+ 000000D0 0F 60 FC punpcklbw mm7,mm4 ; unpack v's with 0's
+
+ 000000D3 0F D5 3D pmullw mm7,L_3s ; v's * 3
+ 00000000 R
+ 000000DA 0F 6F 2C 10 movq mm5,[edx+eax] ; get 8 v's pointed to by eax
+
+ 000000DE 0F 7F EE movq mm6,mm5 ; copy to mm7
+ 000000E1 0F 60 F4 punpcklbw mm6,mm4 ; unpack v's with 0's
+
+ 000000E4 0F FD FE paddw mm7,mm6 ; mm7 = 3 * [ebx] + 1 * [eax]
+ 000000E7 0F FD 3D paddw mm7,L_2s ; mm7 = 3 * [ebx] + 1 * [eax] + 2
+ 00000008 R
+
+ 000000EE 0F 71 D7 02 psrlw mm7,2 ; mm7 = ( 3 * [ebx] + 1 * [eax] + 2 ) / 4
+
+ 000000F2 0F 6F 1C 2B movq mm3,[ebp+ebx] ; get 8 u's into mm1
+ 000000F6 0F 60 DC punpcklbw mm3,mm4 ; unpack u's with 0's
+
+ 000000F9 0F D5 1D pmullw mm3,L_3s ; u's * 3
+ 00000000 R
+ 00000100 0F 6F 2C 28 movq mm5,[ebp+eax] ; get 8 u's pointed to by eax
+
+ 00000104 0F 7F EE movq mm6,mm5 ; copy to mm3
+ 00000107 0F 60 F4 punpcklbw mm6,mm4 ; unpack u's with 0's
+
+ 0000010A 0F FD DE paddw mm3,mm6 ; mm3 = 3 * [ebx] + 1 * [eax]
+ 0000010D 0F FD 1D paddw mm3,L_2s ; mm3 = 3 * [ebx] + 1 * [eax] + 2
+ 00000008 R
+
+ 00000114 0F 71 D3 02 psrlw mm3,2 ; mm3 = ( 3 * [ebx] + 1 * [eax] + 2 ) / 4
+
+ 00000118 0F 71 F7 08 psllw mm7,8 ; v3 0 v2 0 v1 0 v0 0
+ 0000011C 0F EB DF por mm3,mm7 ; v3 u3 v2 u2 v1 u1 v0 u0
+
+ 0000011F 0F 6F 04 5E movq mm0,[esi+ebx*2] ; get the y's
+ 00000123 0F 7F C1 movq mm1,mm0 ; save upper y's
+
+ 00000126 0F 60 C3 punpcklbw mm0,mm3 ; v1 y3 u1 y2 v0 y1 u0 y0
+ 00000129 0F 68 CB punpckhbw mm1,mm3 ; v3 y7 u3 y6 v2 y5 u2 y4
+
+ 0000012C 0F 7F 44 24 10 movq L_tempspaceL[esp],mm0 ;write first 4 pixels
+ ;-
+
+ 00000131 0F 7F 4C 24 18 movq L_tempspaceH[esp],mm1 ;write next 4 pixels
+ ;-
+ ;------------------------------------------------------------
+ ; uncomment the next two line of code will make the image
+ ; output have a white last vertical line
+ ;------------------------------------------------------------
+ ; mov eax, 0ff80ff80h
+ ; mov L_tempspaceH[esp], eax; ; read two bytes
+ ;------------------------------------------------------------
+
+ 00000136 8D 1C 9F lea ebx, [edi + ebx*4];get the destination pointer
+ 00000139 cploop:
+ 00000139 66| 8B 44 4C mov ax, L_tempspaceL[esp + ecx * 2]; ; read two bytes
+ 10
+ 0000013E 66| 89 04 4B mov [ebx+ecx*2], ax
+
+ 00000142 49 dec ecx
+ 00000143 7D F4 jge cploop
+
+ 00000145 phloop:
+ ;------------------------------------------------------------
+ ; prepare for the next line
+ ;------------------------------------------------------------
+ 00000145 8B 4C 24 08 mov ecx,DWORD PTR L_Height[esp] ;get current line number
+ 00000149 8B 5C 24 40 mov ebx,[esp+LOCAL_SPACE].scrnPitch
+
+ 0000014D C1 E1 1F shl ecx,31 ; save low bit
+ 00000150 03 FB add edi,ebx
+
+ 00000152 C1 F9 1F sar ecx,31 ; even lines ecx = 00000000 odd lines it equals FFFFFFFF
+
+ 00000155 8B 5C 24 04 mov ebx,L_YStride[esp]
+ 00000159 D1 FB sar ebx,1 ; ebx is uv stride
+
+ 0000015B 8B C3 mov eax,ebx ; eax is uv stride
+ 0000015D 23 C1 and eax,ecx ; odd lines eax equals uvpitch even lines eax = 0
+
+ 0000015F F7 D1 not ecx ; even lines ecx = ffffffff odd lines it equals 00000000
+ 00000161 23 D9 and ebx,ecx ; ebx = uv pitch on even lines and 0 on odd lines
+
+ 00000163 2B EB sub ebp,ebx ; increment u pointer if we're on an odd line
+ 00000165 2B D3 sub edx,ebx ; increment v pointer if we're on an odd line
+
+ 00000167 F7 D8 neg eax ; eax = -uvpitch on odd lines and 0 on even lines
+ 00000169 03 C3 add eax,ebx ; eax = -uvpitch on odd lines and +uv pitch on even lines
+
+ 0000016B 33 DB xor ebx,ebx ; ebx is used as column pointer so set it to 0
+ 0000016D 8B 0C 24 mov ecx,L_blkWidth[esp]
+
+ 00000170 2B 74 24 04 sub esi,DWORD PTR L_YStride[esp]
+
+ 00000174 FF 4C 24 08 dec DWORD PTR L_Height[esp]
+ 00000178 0F 8F FFFFFED5 jg hloop
+
+ 0000017E 8B C3 mov eax,ebx ; last line ebx and eax should point to the same line
+ 00000180 0F 84 FFFFFECD jz hloop
+ ;------------------------------------------------
+
+ 00000186 theExit:
+ 00000186 83 C4 20 add esp,LOCAL_SPACE
+ 00000189 90 nop
+
+ 0000018A 0F 77 emms
+
+ 0000018C 5A pop edx
+ 0000018D 59 pop ecx
+ 0000018E 5B pop ebx
+ 0000018F 5D pop ebp
+ 00000190 5F pop edi
+ 00000191 5E pop esi
+
+ 00000192 C3 ret
+
+ ;************************************************
+ END
+ Microsoft (R) Macro Assembler Version 9.00.30729.01 06/23/11 13:09:02
+c:\Winamp\libvp6\corelibs\cdxv\vpxblit\wx86\bcy00.asm Symbols 2 - 1
+
+
+
+
+Structures and Unions:
+
+ N a m e Size
+ Offset Type
+
+YUV_BUFFER_CONFIG . . . . . . . 00000030
+ YWidth . . . . . . . . . . . . 00000000 DWord
+ YHeight . . . . . . . . . . . 00000004 DWord
+ YStride . . . . . . . . . . . 00000008 DWord
+ UVWidth . . . . . . . . . . . 0000000C DWord
+ UVHeight . . . . . . . . . . . 00000010 DWord
+ UVStride . . . . . . . . . . . 00000014 DWord
+ YBuffer . . . . . . . . . . . 00000018 DWord
+ UBuffer . . . . . . . . . . . 0000001C DWord
+ VBuffer . . . . . . . . . . . 00000020 DWord
+ uvStart . . . . . . . . . . . 00000024 DWord
+ uvDstArea . . . . . . . . . . 00000028 DWord
+ uvUsedArea . . . . . . . . . . 0000002C DWord
+x86_Params . . . . . . . . . . . 00000028
+ dst . . . . . . . . . . . . . 0000001C DWord
+ scrnPitch . . . . . . . . . . 00000020 DWord
+ buffConfig . . . . . . . . . . 00000024 DWord
+
+
+Segments and Groups:
+
+ N a m e Size Length Align Combine Class
+
+FLAT . . . . . . . . . . . . . . GROUP
+_DATA . . . . . . . . . . . . . 32 Bit 00000000 Para Public 'DATA'
+_TEXT . . . . . . . . . . . . . 32 Bit 00000193 Para Public 'CODE'
+
+
+Symbols:
+
+ N a m e Type Value Attr
+
+@CodeSize . . . . . . . . . . . Number 00000000h
+@DataSize . . . . . . . . . . . Number 00000000h
+@Interface . . . . . . . . . . . Number 00000002h
+@Model . . . . . . . . . . . . . Number 00000007h
+@code . . . . . . . . . . . . . Text _TEXT
+@data . . . . . . . . . . . . . Text FLAT
+@fardata? . . . . . . . . . . . Text FLAT
+@fardata . . . . . . . . . . . . Text FLAT
+@stack . . . . . . . . . . . . . Text FLAT
+CLAMPCENTER . . . . . . . . . . Number 00000600h
+LOCAL_SPACE . . . . . . . . . . Number 00000020h
+L_2s . . . . . . . . . . . . . . QWord 00000008 _TEXT
+L_3s . . . . . . . . . . . . . . QWord 00000000 _TEXT
+L_Height . . . . . . . . . . . . Number 00000008h
+L_YStride . . . . . . . . . . . Number 00000004h
+L_blkWidth . . . . . . . . . . . Number 00000000h
+L_extraWidth . . . . . . . . . . Number 0000000Ch
+L_tempspaceH . . . . . . . . . . Number 00000018h
+L_tempspaceL . . . . . . . . . . Number 00000010h
+WK_G_MASK_555 . . . . . . . . . QWord 00000000 FLAT External SYSCALL
+WK_G_MASK_565 . . . . . . . . . QWord 00000000 FLAT External SYSCALL
+WK_MASK_BYTE0 . . . . . . . . . DWord 00000000 FLAT External SYSCALL
+WK_MASK_YY_MMX . . . . . . . . . DWord 00000000 FLAT External SYSCALL
+WK_RB_MASK_555 . . . . . . . . . QWord 00000000 FLAT External SYSCALL
+WK_RB_MASK_565 . . . . . . . . . QWord 00000000 FLAT External SYSCALL
+WK_RGB_MULFACTOR_555 . . . . . . QWord 00000000 FLAT External SYSCALL
+WK_RGB_MULFACTOR_565 . . . . . . QWord 00000000 FLAT External SYSCALL
+WK_johnsTable_MMX . . . . . . . DWord 00000000 FLAT External SYSCALL
+WK_johnsTable . . . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_ClampTableB555 . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_ClampTableB565 . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_ClampTableB . . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_ClampTableG555 . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_ClampTableG565 . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_ClampTableG . . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_ClampTableR555 . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_ClampTableR565 . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_ClampTableR . . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_UforBG_MMX . . . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_UforBG . . . . . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_VforRG_MMX . . . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_VforRG . . . . . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_YforY_MMX . . . . . . . . . DWord 00000000 FLAT External SYSCALL
+_WK_YforY . . . . . . . . . . . DWord 00000000 FLAT External SYSCALL
+_bcy00_MMX . . . . . . . . . . . L Near 00000010 _TEXT Public SYSCALL
+bcy00_MMX_ . . . . . . . . . . . L Near 00000010 _TEXT Public SYSCALL
+cploop . . . . . . . . . . . . . L Near 00000139 _TEXT
+ewloop . . . . . . . . . . . . . L Near 000000C9 _TEXT
+hloop . . . . . . . . . . . . . L Near 00000053 _TEXT
+phloop . . . . . . . . . . . . . L Near 00000145 _TEXT
+theExit . . . . . . . . . . . . L Near 00000186 _TEXT
+wloop . . . . . . . . . . . . . L Near 00000053 _TEXT
+
+ 0 Warnings
+ 0 Errors
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.plg b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.plg
new file mode 100644
index 00000000..83ce8fbe
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.plg
@@ -0,0 +1,1786 @@
+<html>
+<body>
+<pre>
+<h1>Build Log</h1>
+<h3>
+--------------------Configuration: CPUIdLib - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB01.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\VerifyXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\VerifyXMMReg.obj .\Win32\VerifyXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB01.bat"
+Creating temporary file "C:\tmp\RSPB02.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\TrashXMMreg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\TrashXMMreg.obj .\Win32\TrashXMMreg.asm
+]
+Creating command line "C:\tmp\RSPB02.bat"
+Creating temporary file "C:\tmp\RSPB03.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\InitXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\InitXMMReg.obj .\Win32\InitXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB03.bat"
+Creating temporary file "C:\tmp\RSPB04.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\D9xOSSupXMM.lst /Fo .\..\..\..\ObjectCode\cpuID\release\D9xOSSupXMM.obj .\Win32\D9xOSSupXMM.asm
+]
+Creating command line "C:\tmp\RSPB04.bat"
+Creating temporary file "C:\tmp\RSPB05.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\cpuid.lst /Fo .\..\..\..\ObjectCode\cpuID\release\cpuid.obj .\Win32\cpuid.asm
+]
+Creating command line "C:\tmp\RSPB05.bat"
+Creating temporary file "C:\tmp\RSPB06.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\include" /I "..\..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fo"..\..\..\ObjectCode\cpuID\release/" /Fd"..\..\..\ObjectCode\cpuID\release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\cid.c"
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\Wmt_CpuID.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB06.tmp"
+Performing Custom Build Step on .\Win32\VerifyXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\VerifyXMMReg.asm
+Performing Custom Build Step on .\Win32\TrashXMMreg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\TrashXMMreg.asm
+Performing Custom Build Step on .\Win32\InitXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\InitXMMReg.asm
+Performing Custom Build Step on .\Win32\D9xOSSupXMM.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\D9xOSSupXMM.asm
+Performing Custom Build Step on .\Win32\cpuid.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\cpuid.asm
+Creating temporary file "C:\tmp\RSPB07.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\Release\s_cpuid.lib"
+\NEWZIP\ObjectCode\cpuID\release\cid.obj
+\NEWZIP\ObjectCode\cpuID\release\Wmt_CpuID.obj
+\NEWZIP\ObjectCode\cpuID\release\cpuid.obj
+\NEWZIP\ObjectCode\cpuID\release\D9xOSSupXMM.obj
+\NEWZIP\ObjectCode\cpuID\release\InitXMMReg.obj
+\NEWZIP\ObjectCode\cpuID\release\TrashXMMreg.obj
+\NEWZIP\ObjectCode\cpuID\release\VerifyXMMReg.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB07.tmp"
+<h3>Output Window</h3>
+Compiling...
+cid.c
+Generating Code...
+Compiling...
+Wmt_CpuID.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cpuid.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: CPUIdLib - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB08.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\VerifyXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\VerifyXMMReg.obj .\Win32\VerifyXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB08.bat"
+Creating temporary file "C:\tmp\RSPB09.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\TrashXMMreg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\TrashXMMreg.obj .\Win32\TrashXMMreg.asm
+]
+Creating command line "C:\tmp\RSPB09.bat"
+Creating temporary file "C:\tmp\RSPB0A.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\InitXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\InitXMMReg.obj .\Win32\InitXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB0A.bat"
+Creating temporary file "C:\tmp\RSPB0B.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\D9xOSSupXMM.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\D9xOSSupXMM.obj .\Win32\D9xOSSupXMM.asm
+]
+Creating command line "C:\tmp\RSPB0B.bat"
+Creating temporary file "C:\tmp\RSPB0C.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\cpuid.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\cpuid.obj .\Win32\cpuid.asm
+]
+Creating command line "C:\tmp\RSPB0C.bat"
+Creating temporary file "C:\tmp\RSPB0D.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /GX /Z7 /Od /I "..\..\include" /I "..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\cpuID\debug/CPUIdLib.pch" /YX /Fo"..\..\..\ObjectCode\cpuID\debug/" /Fd"..\..\..\ObjectCode\cpuID\debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\cid.c"
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\Wmt_CpuID.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB0D.tmp"
+Performing Custom Build Step on .\Win32\VerifyXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\VerifyXMMReg.asm
+Performing Custom Build Step on .\Win32\TrashXMMreg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\TrashXMMreg.asm
+Performing Custom Build Step on .\Win32\InitXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\InitXMMReg.asm
+Performing Custom Build Step on .\Win32\D9xOSSupXMM.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\D9xOSSupXMM.asm
+Performing Custom Build Step on .\Win32\cpuid.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\cpuid.asm
+Creating temporary file "C:\tmp\RSPB0E.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\debug\s_cpuid.lib"
+\NEWZIP\ObjectCode\cpuID\debug\cid.obj
+\NEWZIP\ObjectCode\cpuID\debug\Wmt_CpuID.obj
+\NEWZIP\ObjectCode\cpuID\debug\cpuid.obj
+\NEWZIP\ObjectCode\cpuID\debug\D9xOSSupXMM.obj
+\NEWZIP\ObjectCode\cpuID\debug\InitXMMReg.obj
+\NEWZIP\ObjectCode\cpuID\debug\TrashXMMreg.obj
+\NEWZIP\ObjectCode\cpuID\debug\VerifyXMMReg.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB0E.tmp"
+<h3>Output Window</h3>
+Compiling...
+cid.c
+Wmt_CpuID.cpp
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cpuid.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: colorconversions - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB0F.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.obj .\Win32\rgb32toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB0F.bat"
+Creating temporary file "C:\tmp\RSPB10.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.obj .\Win32\rgb32toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB10.bat"
+Creating temporary file "C:\tmp\RSPB11.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.obj .\Win32\rgb24toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB11.bat"
+Creating temporary file "C:\tmp\RSPB12.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.obj .\Win32\rgb24toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB12.bat"
+Creating temporary file "C:\tmp\RSPB13.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\..\include" /I "..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\ColorSpaces\Release/colorconversions.pch" /YX /Fo"..\..\..\ObjectCode\ColorSpaces\Release/" /Fd"..\..\..\ObjectCode\ColorSpaces\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\uyvytoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yuy2toyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yvyutoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\ColorConversions.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\lutbl.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12f.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB13.tmp"
+Performing Custom Build Step on .\Win32\rgb32toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb32toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_mmx.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_mmx.asm
+Creating temporary file "C:\tmp\RSPB14.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\release\s_cconv.lib"
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\ColorConversions.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\lutbl.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvitorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvitoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB14.tmp"
+<h3>Output Window</h3>
+Compiling...
+uyvytoyv12_mmx.c
+yuy2toyv12_mmx.c
+yvyutoyv12_mmx.c
+ColorConversions.c
+lutbl.c
+rgb24toyv12.c
+rgb24toyv12f.c
+rgb32toyv12.c
+rgb32toyv12f.c
+rgbtorgb.c
+rgbtoyuv.c
+rgbtoyuvi.c
+uyvytoyv12.c
+uyvytoyv12f.c
+yuvitorgb.c
+yuvitoyuv.c
+yuvtorgb.c
+yuvtoyuv.c
+yuvtoyuvi.c
+yuy2toyv12.c
+yuy2toyv12f.c
+yvyutoyv12.c
+yvyutoyv12f.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cconv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: colorconversions - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB15.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.obj .\Win32\rgb32toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB15.bat"
+Creating temporary file "C:\tmp\RSPB16.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.obj .\Win32\rgb32toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB16.bat"
+Creating temporary file "C:\tmp\RSPB17.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.obj .\Win32\rgb24toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB17.bat"
+Creating temporary file "C:\tmp\RSPB18.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.obj .\Win32\rgb24toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB18.bat"
+Creating temporary file "C:\tmp\RSPB19.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /GX /Z7 /Od /I "..\..\include" /I "..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\ColorSpaces\Debug/colorconversions.pch" /YX /Fo"..\..\..\ObjectCode\ColorSpaces\Debug/" /Fd"..\..\..\ObjectCode\ColorSpaces\Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\uyvytoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yuy2toyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yvyutoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\ColorConversions.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\lutbl.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12f.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB19.tmp"
+Performing Custom Build Step on .\Win32\rgb32toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb32toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_mmx.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_mmx.asm
+Creating temporary file "C:\tmp\RSPB1A.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\debug\s_cconv.lib"
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\ColorConversions.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\lutbl.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvitorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvitoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB1A.tmp"
+<h3>Output Window</h3>
+Compiling...
+uyvytoyv12_mmx.c
+yuy2toyv12_mmx.c
+yvyutoyv12_mmx.c
+ColorConversions.c
+lutbl.c
+rgb24toyv12.c
+rgb24toyv12f.c
+rgb32toyv12.c
+rgb32toyv12f.c
+rgbtorgb.c
+rgbtoyuv.c
+rgbtoyuvi.c
+uyvytoyv12.c
+uyvytoyv12f.c
+yuvitorgb.c
+yuvitoyuv.c
+yuvtorgb.c
+yuvtoyuv.c
+yuvtoyuvi.c
+yuy2toyv12.c
+yuy2toyv12f.c
+yvyutoyv12.c
+yvyutoyv12f.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cconv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: dxv - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB1B.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\include" /I "..\..\include" /I "..\..\..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\ObjectCode\dxv\Release/dxv.pch" /YX /Fo"..\..\..\..\ObjectCode\dxv\Release/" /Fd"..\..\..\..\ObjectCode\dxv\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\vscreen.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\ximage.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB1B.tmp"
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Release\s_dxv.lib" \NEWZIP\ObjectCode\dxv\Release\vscreen.obj \NEWZIP\ObjectCode\dxv\Release\ximage.obj "
+<h3>Output Window</h3>
+Compiling...
+vscreen.c
+ximage.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_dxv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: dxv - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB1C.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\include" /I "..\..\include\win32" /I "..\..\include" /I "..\..\..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\ObjectCode\dxv\debug/dxv.pch" /YX /Fo"..\..\..\..\ObjectCode\dxv\debug/" /Fd"..\..\..\..\ObjectCode\dxv\debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\vscreen.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\ximage.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB1C.tmp"
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Debug\s_dxv.lib" \NEWZIP\ObjectCode\dxv\debug\vscreen.obj \NEWZIP\ObjectCode\dxv\debug\ximage.obj "
+<h3>Output Window</h3>
+Compiling...
+vscreen.c
+ximage.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_dxv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: on2_mem - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB1D.tmp" with contents
+[
+/nologo /MT /W3 /GX /O2 /I "..\..\include" /I "..\..\..\common\include" /I "..\..\memory_manager\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\obj\on2_mem\win32\release/on2_mem.pch" /YX /Fo"..\..\..\..\obj\on2_mem\win32\release/" /Fd"..\..\..\..\obj\on2_mem\win32\release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_alloc.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_base.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_dflt_abort.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_grow.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_largest.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_resize.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_shrink.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_true.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem_tracker.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB1D.tmp"
+Creating temporary file "C:\tmp\RSPB1E.tmp" with contents
+[
+/nologo /out:"..\..\..\..\..\..\lib\win32\release\on2_mem.lib"
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_alloc.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_base.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_dflt_abort.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_grow.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_largest.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_resize.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_shrink.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_true.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\on2_mem.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\on2_mem_tracker.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB1E.tmp"
+<h3>Output Window</h3>
+Compiling...
+hmm_alloc.c
+hmm_base.c
+hmm_dflt_abort.c
+hmm_grow.c
+hmm_largest.c
+hmm_resize.c
+hmm_shrink.c
+hmm_true.c
+on2_mem.c
+on2_mem_tracker.c
+Creating library...
+
+
+
+<h3>Results</h3>
+on2_mem.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: on2_mem - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB1F.tmp" with contents
+[
+/nologo /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\common\include" /I "..\..\memory_manager\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\obj\on2_mem\win32\debug/on2_mem.pch" /YX /Fo"..\..\..\..\obj\on2_mem\win32\debug/" /Fd"..\..\..\..\obj\on2_mem\win32\debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_alloc.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_base.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_dflt_abort.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_grow.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_largest.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_resize.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_shrink.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_true.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem_tracker.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB1F.tmp"
+Creating temporary file "C:\tmp\RSPB20.tmp" with contents
+[
+/nologo /out:"..\..\..\..\..\..\lib\win32\debug\on2_mem.lib"
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_alloc.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_base.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_dflt_abort.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_grow.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_largest.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_resize.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_shrink.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_true.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\on2_mem.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\on2_mem_tracker.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB20.tmp"
+<h3>Output Window</h3>
+Compiling...
+hmm_alloc.c
+hmm_base.c
+hmm_dflt_abort.c
+hmm_grow.c
+hmm_largest.c
+hmm_resize.c
+hmm_shrink.c
+hmm_true.c
+on2_mem.c
+on2_mem_tracker.c
+Creating library...
+
+
+
+<h3>Results</h3>
+on2_mem.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: preproc - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB21.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\vp60\include" /I "..\include" /I "..\..\include" /I ".\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /Fp"Release/preproc.pch" /YX /Fo"Release/" /Fd"Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\preproc\preproc.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB21.tmp"
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Release\s_preproc.lib" .\Release\preproc.obj "
+<h3>Output Window</h3>
+Compiling...
+preproc.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_preproc.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: preproc - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB22.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\vp60\include" /I "..\..\include" /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /Fo"Debug/" /Fd"Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\preproc\preproc.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB22.tmp"
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Debug\s_preproc.lib" .\Debug\preproc.obj "
+<h3>Output Window</h3>
+Compiling...
+preproc.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_preproc.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6d - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB23.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Ox /Ot /Oa /Ow /Og /Oi /Ob2 /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6D_EXPORTS" /D "PREDICT_2D" /D "PBDLL" /D "VFW_PB" /D "USE_DRAWDIB" /D "POSTPROCESS" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6d\Release/" /Fd"..\..\..\..\ObjectCode\vp6d\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\boolhuff.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\debug.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\DFrameR.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\FrameIni.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\Huffman.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\pb_globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\quantize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\recon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\TokenEntropy.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\vfwpbdll_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\dsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\quantindexmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\vp60dxv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\xprintf\xprintf.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB23.tmp"
+Creating temporary file "C:\tmp\RSPB24.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Release\s_vp60d.lib"
+\NEWZIP\VP6\ObjectCode\vp6d\Release\boolhuff.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\debug.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\DFrameR.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\FrameIni.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\Huffman.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\pb_globals.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\quantize.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\recon.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\TokenEntropy.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\vfwpbdll_if.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\dsystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\quantindexmmx.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\vp60dxv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\xprintf.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB24.tmp"
+<h3>Output Window</h3>
+Compiling...
+boolhuff.c
+debug.c
+decodembs.c
+decodemode.c
+decodemv.c
+DFrameR.c
+FrameIni.c
+Huffman.c
+pb_globals.c
+quantize.c
+recon.c
+TokenEntropy.c
+vfwpbdll_if.c
+dsystemdependant.c
+quantindexmmx.c
+vp60dxv.c
+Generating Code...
+Compiling...
+xprintf.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60d.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6d - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB25.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /I "..\..\..\include" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6D_EXPORTS" /D "PREDICT_2D" /D "PBDLL" /D "VFW_PB" /D "USE_DRAWDIB" /D "POSTPROCESS" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6d\Debug/" /Fd"..\..\..\..\ObjectCode\vp6d\Debug/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\boolhuff.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\debug.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\DFrameR.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\FrameIni.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\Huffman.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\pb_globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\quantize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\recon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\TokenEntropy.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\vfwpbdll_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\dsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\quantindexmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\vp60dxv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\xprintf\xprintf.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB25.tmp"
+Creating temporary file "C:\tmp\RSPB26.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Debug\s_vp60d.lib"
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\boolhuff.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\debug.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\DFrameR.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\FrameIni.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\Huffman.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\pb_globals.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\quantize.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\recon.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\TokenEntropy.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\vfwpbdll_if.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\dsystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\quantindexmmx.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\vp60dxv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\xprintf.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB26.tmp"
+<h3>Output Window</h3>
+Compiling...
+boolhuff.c
+debug.c
+decodembs.c
+decodemode.c
+decodemv.c
+DFrameR.c
+FrameIni.c
+Huffman.c
+pb_globals.c
+quantize.c
+recon.c
+TokenEntropy.c
+vfwpbdll_if.c
+dsystemdependant.c
+quantindexmmx.c
+vp60dxv.c
+Generating Code...
+Compiling...
+xprintf.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60d.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6e - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB27.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmSAD.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmSAD.obj .\cx\Win32\XmmSAD.asm
+]
+Creating command line "C:\tmp\RSPB27.bat"
+Creating temporary file "C:\tmp\RSPB28.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmGetSAD8.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmGetSAD8.obj .\CX\Win32\XmmGetSAD8.asm
+]
+Creating command line "C:\tmp\RSPB28.bat"
+Creating temporary file "C:\tmp\RSPB29.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmGetError.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmGetError.obj .\cx\Win32\XmmGetError.asm
+]
+Creating command line "C:\tmp\RSPB29.bat"
+Creating temporary file "C:\tmp\RSPB2A.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\MmxEncodeMath.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\MmxEncodeMath.obj .\cx\Win32\MmxEncodeMath.asm
+]
+Creating command line "C:\tmp\RSPB2A.bat"
+Creating temporary file "C:\tmp\RSPB2B.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\Include\vp60" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6E_EXPORTS" /D "PREDICT_2D" /D "VFW_COMP" /D "COMPDLL" /D "POSTPROCESS" /D "CPUISLITTLEENDIAN" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6e\Release/" /Fd"..\..\..\..\ObjectCode\vp6e\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\Comp_Globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Encode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\fullframefdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\mcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\misc_common.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PackVideo.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PickModes.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\RawBuffer.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Tokenize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Transform.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\twopass.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\COptFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\csystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\CWmtFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\WmtTransform.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB2B.tmp"
+Performing Custom Build Step on .\cx\Win32\XmmSAD.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\XmmSAD.asm
+Performing Custom Build Step on .\CX\Win32\XmmGetSAD8.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\CX\Win32\XmmGetSAD8.asm
+Performing Custom Build Step on .\cx\Win32\XmmGetError.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\XmmGetError.asm
+Performing Custom Build Step on .\cx\Win32\MmxEncodeMath.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\MmxEncodeMath.asm
+Creating temporary file "C:\tmp\RSPB2C.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Release\s_vp60e.lib"
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Comp_Globals.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Encode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\fullframefdct.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\mcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\misc_common.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\PackVideo.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\PickModes.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\RawBuffer.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Tokenize.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Transform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\twopass.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\vfwcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\vfwcomp_if.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\COptFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\csystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\CWmtFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\WmtTransform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\MmxEncodeMath.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmGetError.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmGetSAD8.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmSAD.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB2C.tmp"
+<h3>Output Window</h3>
+Compiling...
+Comp_Globals.c
+Encode.c
+encodembs.c
+encodemode.c
+encodemv.c
+fullframefdct.c
+mcomp.c
+misc_common.c
+PackVideo.c
+PickModes.c
+RawBuffer.c
+Tokenize.c
+Transform.c
+twopass.c
+vfwcomp.c
+vfwcomp_if.c
+COptFunctions.c
+csystemdependant.c
+CWmtFunctions.c
+WmtTransform.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60e.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6e - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB2D.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmSAD.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmSAD.obj .\cx\Win32\XmmSAD.asm
+]
+Creating command line "C:\tmp\RSPB2D.bat"
+Creating temporary file "C:\tmp\RSPB2E.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmGetSAD8.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmGetSAD8.obj .\CX\Win32\XmmGetSAD8.asm
+]
+Creating command line "C:\tmp\RSPB2E.bat"
+Creating temporary file "C:\tmp\RSPB2F.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmGetError.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmGetError.obj .\cx\Win32\XmmGetError.asm
+]
+Creating command line "C:\tmp\RSPB2F.bat"
+Creating temporary file "C:\tmp\RSPB30.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\MmxEncodeMath.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\MmxEncodeMath.obj .\cx\Win32\MmxEncodeMath.asm
+]
+Creating command line "C:\tmp\RSPB30.bat"
+Creating temporary file "C:\tmp\RSPB31.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\Include\vp60" /I "..\..\..\..\include\vp60" /D "vp6E_EXPORTS" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PREDICT_2D" /D "VFW_COMP" /D "COMPDLL" /D "POSTPROCESS" /D "CPUISLITTLEENDIAN" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6e\debug/" /Fd"..\..\..\..\ObjectCode\vp6e\debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\Comp_Globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Encode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\fullframefdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\mcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\misc_common.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PackVideo.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PickModes.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\RawBuffer.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Tokenize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Transform.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\twopass.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\COptFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\csystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\CWmtFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\WmtTransform.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB31.tmp"
+Performing Custom Build Step on .\cx\Win32\XmmSAD.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\XmmSAD.asm
+Performing Custom Build Step on .\CX\Win32\XmmGetSAD8.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\CX\Win32\XmmGetSAD8.asm
+Performing Custom Build Step on .\cx\Win32\XmmGetError.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\XmmGetError.asm
+Performing Custom Build Step on .\cx\Win32\MmxEncodeMath.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\cx\Win32\MmxEncodeMath.asm
+Creating temporary file "C:\tmp\RSPB32.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Debug\s_vp60e.lib"
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Comp_Globals.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Encode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\fullframefdct.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\mcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\misc_common.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\PackVideo.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\PickModes.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\RawBuffer.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Tokenize.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Transform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\twopass.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\vfwcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\vfwcomp_if.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\COptFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\csystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\CWmtFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\WmtTransform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\MmxEncodeMath.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmGetError.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmGetSAD8.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmSAD.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB32.tmp"
+<h3>Output Window</h3>
+Compiling...
+Comp_Globals.c
+Encode.c
+encodembs.c
+encodemode.c
+encodemv.c
+fullframefdct.c
+mcomp.c
+misc_common.c
+PackVideo.c
+PickModes.c
+RawBuffer.c
+Tokenize.c
+Transform.c
+twopass.c
+vfwcomp.c
+vfwcomp_if.c
+COptFunctions.c
+csystemdependant.c
+CWmtFunctions.c
+WmtTransform.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60e.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vppp - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB33.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /Ob2 /I ".\include" /I "..\include" /I "..\vp60\include" /I "..\..\..\include" /I "..\..\include" /D "_MBCS" /D "_LIB" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\ObjectCode\vpppd6\Release/" /Fd"..\..\..\..\ObjectCode\vpppd6\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\borders.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\clamp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\deblock.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\DeInterlace.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\dering.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\loopfilter.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\postproc.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\scale.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\simpledeblocker.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\clamp_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceMmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceWmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\doptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\loopf_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\newlooptest_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\scaleopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\simpledeblock_asm.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB33.tmp"
+Creating temporary file "C:\tmp\RSPB34.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\release\s_vpppd.lib"
+\NEWZIP\ObjectCode\vpppd6\Release\borders.obj
+\NEWZIP\ObjectCode\vpppd6\Release\clamp.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblock.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlace.obj
+\NEWZIP\ObjectCode\vpppd6\Release\dering.obj
+\NEWZIP\ObjectCode\vpppd6\Release\loopfilter.obj
+\NEWZIP\ObjectCode\vpppd6\Release\postproc.obj
+\NEWZIP\ObjectCode\vpppd6\Release\scale.obj
+\NEWZIP\ObjectCode\vpppd6\Release\simpledeblocker.obj
+\NEWZIP\ObjectCode\vpppd6\Release\clamp_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblockopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblockwmtopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlaceMmx.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlaceWmt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deringopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deringwmtopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\doptsystemdependant.obj
+\NEWZIP\ObjectCode\vpppd6\Release\loopf_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\newlooptest_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\scaleopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\simpledeblock_asm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB34.tmp"
+<h3>Output Window</h3>
+Compiling...
+borders.c
+clamp.c
+deblock.c
+DeInterlace.c
+dering.c
+loopfilter.c
+postproc.c
+scale.c
+simpledeblocker.c
+clamp_asm.c
+deblockopt.c
+deblockwmtopt.c
+DeInterlaceMmx.c
+DeInterlaceWmt.c
+deringopt.c
+deringwmtopt.c
+doptsystemdependant.c
+loopf_asm.c
+newlooptest_asm.c
+scaleopt.c
+Generating Code...
+Compiling...
+simpledeblock_asm.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpppd.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vppp - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB35.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\vp60\include" /I "..\..\..\include" /I "..\..\include" /D "_MBCS" /D "_LIB" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\ObjectCode\vppp\Debug/vppp.pch" /YX /Fo"..\..\..\..\ObjectCode\vppp\Debug/" /Fd"..\..\..\..\ObjectCode\vppp\Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\borders.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\clamp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\deblock.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\DeInterlace.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\dering.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\loopfilter.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\postproc.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\scale.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\simpledeblocker.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\clamp_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceMmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceWmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\doptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\loopf_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\newlooptest_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\scaleopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\simpledeblock_asm.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB35.tmp"
+Creating temporary file "C:\tmp\RSPB36.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vpppd.lib"
+\NEWZIP\ObjectCode\vppp\Debug\borders.obj
+\NEWZIP\ObjectCode\vppp\Debug\clamp.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblock.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlace.obj
+\NEWZIP\ObjectCode\vppp\Debug\dering.obj
+\NEWZIP\ObjectCode\vppp\Debug\loopfilter.obj
+\NEWZIP\ObjectCode\vppp\Debug\postproc.obj
+\NEWZIP\ObjectCode\vppp\Debug\scale.obj
+\NEWZIP\ObjectCode\vppp\Debug\simpledeblocker.obj
+\NEWZIP\ObjectCode\vppp\Debug\clamp_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblockopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblockwmtopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlaceMmx.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlaceWmt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deringopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deringwmtopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\doptsystemdependant.obj
+\NEWZIP\ObjectCode\vppp\Debug\loopf_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\newlooptest_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\scaleopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\simpledeblock_asm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB36.tmp"
+<h3>Output Window</h3>
+Compiling...
+borders.c
+clamp.c
+deblock.c
+DeInterlace.c
+dering.c
+loopfilter.c
+postproc.c
+scale.c
+simpledeblocker.c
+clamp_asm.c
+deblockopt.c
+deblockwmtopt.c
+DeInterlaceMmx.c
+DeInterlaceWmt.c
+deringopt.c
+deringwmtopt.c
+doptsystemdependant.c
+loopf_asm.c
+newlooptest_asm.c
+scaleopt.c
+simpledeblock_asm.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpppd.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vputil - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB37.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\vp60\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fo"..\..\..\..\ObjectCode\vputil\Release/" /Fd"..\..\..\..\ObjectCode\vputil\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\fdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\idctpart.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\reconstruct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\vputil.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxrecon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\uoptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\vputilasm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtrecon.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB37.tmp"
+Creating temporary file "C:\tmp\RSPB38.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Release\s_vputil.lib"
+\NEWZIP\ObjectCode\vputil\Release\fdct.obj
+\NEWZIP\ObjectCode\vputil\Release\idctpart.obj
+\NEWZIP\ObjectCode\vputil\Release\reconstruct.obj
+\NEWZIP\ObjectCode\vputil\Release\vputil.obj
+\NEWZIP\ObjectCode\vputil\Release\fdctmmx.obj
+\NEWZIP\ObjectCode\vputil\Release\fdctwmt.obj
+\NEWZIP\ObjectCode\vputil\Release\filtmmx.obj
+\NEWZIP\ObjectCode\vputil\Release\filtwmt.obj
+\NEWZIP\ObjectCode\vputil\Release\mmxidct.obj
+\NEWZIP\ObjectCode\vputil\Release\mmxrecon.obj
+\NEWZIP\ObjectCode\vputil\Release\uoptsystemdependant.obj
+\NEWZIP\ObjectCode\vputil\Release\vputilasm.obj
+\NEWZIP\ObjectCode\vputil\Release\wmtidct.obj
+\NEWZIP\ObjectCode\vputil\Release\wmtrecon.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB38.tmp"
+<h3>Output Window</h3>
+Compiling...
+fdct.c
+idctpart.c
+reconstruct.c
+vputil.c
+fdctmmx.c
+fdctwmt.c
+filtmmx.c
+filtwmt.c
+mmxidct.c
+mmxrecon.c
+uoptsystemdependant.c
+vputilasm.c
+wmtidct.c
+wmtrecon.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vputil.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vputil - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB39.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\vp60\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\..\ObjectCode\vputil\Debug/vputil.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vputil\Debug/" /Fd"..\..\..\..\..\ObjectCode\vputil\Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\fdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\idctpart.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\reconstruct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\vputil.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxrecon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\uoptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\vputilasm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtrecon.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB39.tmp"
+Creating temporary file "C:\tmp\RSPB3A.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vputil.lib"
+\ObjectCode\vputil\Debug\fdct.obj
+\ObjectCode\vputil\Debug\idctpart.obj
+\ObjectCode\vputil\Debug\reconstruct.obj
+\ObjectCode\vputil\Debug\vputil.obj
+\ObjectCode\vputil\Debug\fdctmmx.obj
+\ObjectCode\vputil\Debug\fdctwmt.obj
+\ObjectCode\vputil\Debug\filtmmx.obj
+\ObjectCode\vputil\Debug\filtwmt.obj
+\ObjectCode\vputil\Debug\mmxidct.obj
+\ObjectCode\vputil\Debug\mmxrecon.obj
+\ObjectCode\vputil\Debug\uoptsystemdependant.obj
+\ObjectCode\vputil\Debug\vputilasm.obj
+\ObjectCode\vputil\Debug\wmtidct.obj
+\ObjectCode\vputil\Debug\wmtrecon.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB3A.tmp"
+<h3>Output Window</h3>
+Compiling...
+fdct.c
+idctpart.c
+reconstruct.c
+vputil.c
+fdctmmx.c
+fdctwmt.c
+filtmmx.c
+filtwmt.c
+mmxidct.c
+mmxrecon.c
+uoptsystemdependant.c
+vputilasm.c
+wmtidct.c
+wmtrecon.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vputil.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vpxblit - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB3B.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\const.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\const.obj .\wx86\const.asm
+]
+Creating command line "C:\tmp\RSPB3B.bat"
+Creating temporary file "C:\tmp\RSPB3C.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcy00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcy00.obj .\wx86\bcy00.asm
+]
+Creating command line "C:\tmp\RSPB3C.bat"
+Creating temporary file "C:\tmp\RSPB3D.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcu00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcu00.obj .\wx86\bcu00.asm
+]
+Creating command line "C:\tmp\RSPB3D.bat"
+Creating temporary file "C:\tmp\RSPB3E.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bct10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bct10.obj .\wx86\bct10.asm
+]
+Creating command line "C:\tmp\RSPB3E.bat"
+Creating temporary file "C:\tmp\RSPB3F.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bct00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bct00.obj .\wx86\bct00.asm
+]
+Creating command line "C:\tmp\RSPB3F.bat"
+Creating temporary file "C:\tmp\RSPB40.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs10.obj .\wx86\bcs10.asm
+]
+Creating command line "C:\tmp\RSPB40.bat"
+Creating temporary file "C:\tmp\RSPB41.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs00.obj .\wx86\bcs00.asm
+]
+Creating command line "C:\tmp\RSPB41.bat"
+Creating temporary file "C:\tmp\RSPB42.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf10.obj .\wx86\bcf10.asm
+]
+Creating command line "C:\tmp\RSPB42.bat"
+Creating temporary file "C:\tmp\RSPB43.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf00.obj .\wx86\bcf00.asm
+]
+Creating command line "C:\tmp\RSPB43.bat"
+Creating temporary file "C:\tmp\RSPB44.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcd00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcd00.obj .\wx86\bcd00.asm
+]
+Creating command line "C:\tmp\RSPB44.bat"
+Creating temporary file "C:\tmp\RSPB45.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc10.obj .\wx86\bcc10.asm
+]
+Creating command line "C:\tmp\RSPB45.bat"
+Creating temporary file "C:\tmp\RSPB46.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc00.obj .\wx86\bcc00.asm
+]
+Creating command line "C:\tmp\RSPB46.bat"
+Creating temporary file "C:\tmp\RSPB47.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /I ".\generic" /D "_WINDOWS" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\..\ObjectCode\vpxblit\Release/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\ctables.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\wksetblt.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB47.tmp"
+Creating temporary file "C:\tmp\RSPB48.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /D "_WINDOWS" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\..\ObjectCode\vpxblit\Release/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Release/" /FD /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcu00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcy00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\vpx_reg.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_targa_c.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB48.tmp"
+Performing Custom Build Step on .\wx86\const.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\const.asm
+Performing Custom Build Step on .\wx86\bcy00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcy00.asm
+Performing Custom Build Step on .\wx86\bcu00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcu00.asm
+Performing Custom Build Step on .\wx86\bct10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bct10.asm
+Performing Custom Build Step on .\wx86\bct00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bct00.asm
+Performing Custom Build Step on .\wx86\bcs10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcs10.asm
+Performing Custom Build Step on .\wx86\bcs00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcs00.asm
+Performing Custom Build Step on .\wx86\bcf10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcf10.asm
+Performing Custom Build Step on .\wx86\bcf00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcf00.asm
+Performing Custom Build Step on .\wx86\bcd00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcd00.asm
+Performing Custom Build Step on .\wx86\bcc10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcc10.asm
+Performing Custom Build Step on .\wx86\bcc00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcc00.asm
+Creating temporary file "C:\tmp\RSPB49.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Release\s_vpxblit.lib"
+\ObjectCode\vpxblit\Release\ctables.obj
+\ObjectCode\vpxblit\Release\wksetblt.obj
+\ObjectCode\vpxblit\Release\bcf00_c.obj
+\ObjectCode\vpxblit\Release\bcf10_c.obj
+\ObjectCode\vpxblit\Release\bcs00_c.obj
+\ObjectCode\vpxblit\Release\bcs10_c.obj
+\ObjectCode\vpxblit\Release\bct00_c.obj
+\ObjectCode\vpxblit\Release\bct10_c.obj
+\ObjectCode\vpxblit\Release\bcu00_c.obj
+\ObjectCode\vpxblit\Release\bcy00_c.obj
+\ObjectCode\vpxblit\Release\vpx_reg.obj
+\ObjectCode\vpxblit\Release\bct00_targa_c.obj
+\ObjectCode\vpxblit\Release\bcc00.obj
+\ObjectCode\vpxblit\Release\bcc10.obj
+\ObjectCode\vpxblit\Release\bcd00.obj
+\ObjectCode\vpxblit\Release\bcf00.obj
+\ObjectCode\vpxblit\Release\bcf10.obj
+\ObjectCode\vpxblit\Release\bcs00.obj
+\ObjectCode\vpxblit\Release\bcs10.obj
+\ObjectCode\vpxblit\Release\bct00.obj
+\ObjectCode\vpxblit\Release\bct10.obj
+\ObjectCode\vpxblit\Release\bcu00.obj
+\ObjectCode\vpxblit\Release\bcy00.obj
+\ObjectCode\vpxblit\Release\const.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB49.tmp"
+<h3>Output Window</h3>
+Compiling...
+ctables.c
+wksetblt.c
+Generating Code...
+Compiling...
+bcf00_c.c
+bcf10_c.c
+bcs00_c.c
+bcs10_c.c
+bct00_c.c
+bct10_c.c
+bcu00_c.c
+bcy00_c.c
+vpx_reg.c
+bct00_targa_c.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpxblit.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vpxblit - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB4A.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\const.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\const.obj .\wx86\const.asm
+]
+Creating command line "C:\tmp\RSPB4A.bat"
+Creating temporary file "C:\tmp\RSPB4B.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcy00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcy00.obj .\wx86\bcy00.asm
+]
+Creating command line "C:\tmp\RSPB4B.bat"
+Creating temporary file "C:\tmp\RSPB4C.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcu00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcu00.obj .\wx86\bcu00.asm
+]
+Creating command line "C:\tmp\RSPB4C.bat"
+Creating temporary file "C:\tmp\RSPB4D.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct10.obj .\wx86\bct10.asm
+]
+Creating command line "C:\tmp\RSPB4D.bat"
+Creating temporary file "C:\tmp\RSPB4E.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct00.obj .\wx86\bct00.asm
+]
+Creating command line "C:\tmp\RSPB4E.bat"
+Creating temporary file "C:\tmp\RSPB4F.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs10.obj .\wx86\bcs10.asm
+]
+Creating command line "C:\tmp\RSPB4F.bat"
+Creating temporary file "C:\tmp\RSPB50.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs00.obj .\wx86\bcs00.asm
+]
+Creating command line "C:\tmp\RSPB50.bat"
+Creating temporary file "C:\tmp\RSPB51.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf10.obj .\wx86\bcf10.asm
+]
+Creating command line "C:\tmp\RSPB51.bat"
+Creating temporary file "C:\tmp\RSPB52.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf00.obj .\wx86\bcf00.asm
+]
+Creating command line "C:\tmp\RSPB52.bat"
+Creating temporary file "C:\tmp\RSPB53.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcd00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcd00.obj .\wx86\bcd00.asm
+]
+Creating command line "C:\tmp\RSPB53.bat"
+Creating temporary file "C:\tmp\RSPB54.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc10.obj .\wx86\bcc10.asm
+]
+Creating command line "C:\tmp\RSPB54.bat"
+Creating temporary file "C:\tmp\RSPB55.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc00.obj .\wx86\bcc00.asm
+]
+Creating command line "C:\tmp\RSPB55.bat"
+Creating temporary file "C:\tmp\RSPB56.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /I ".\generic" /D "_WINDOWS" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\..\ObjectCode\vpxblit\Debug/vpxblit.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\ctables.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\wksetblt.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB56.tmp"
+Creating temporary file "C:\tmp\RSPB57.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /D "_WINDOWS" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\..\ObjectCode\vpxblit\Debug/vpxblit.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /FD /GZ /c
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcu00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcy00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\vpx_reg.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_targa_c.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB57.tmp"
+Performing Custom Build Step on .\wx86\const.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\const.asm
+Performing Custom Build Step on .\wx86\bcy00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcy00.asm
+Performing Custom Build Step on .\wx86\bcu00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcu00.asm
+Performing Custom Build Step on .\wx86\bct10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bct10.asm
+Performing Custom Build Step on .\wx86\bct00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bct00.asm
+Performing Custom Build Step on .\wx86\bcs10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcs10.asm
+Performing Custom Build Step on .\wx86\bcs00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcs00.asm
+Performing Custom Build Step on .\wx86\bcf10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcf10.asm
+Performing Custom Build Step on .\wx86\bcf00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcf00.asm
+Performing Custom Build Step on .\wx86\bcd00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcd00.asm
+Performing Custom Build Step on .\wx86\bcc10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcc10.asm
+Performing Custom Build Step on .\wx86\bcc00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000. All rights reserved.
+
+ Assembling: .\wx86\bcc00.asm
+Creating temporary file "C:\tmp\RSPB58.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vpxblit.lib"
+\ObjectCode\vpxblit\Debug\ctables.obj
+\ObjectCode\vpxblit\Debug\wksetblt.obj
+\ObjectCode\vpxblit\Debug\bcf00_c.obj
+\ObjectCode\vpxblit\Debug\bcf10_c.obj
+\ObjectCode\vpxblit\Debug\bcs00_c.obj
+\ObjectCode\vpxblit\Debug\bcs10_c.obj
+\ObjectCode\vpxblit\Debug\bct00_c.obj
+\ObjectCode\vpxblit\Debug\bct10_c.obj
+\ObjectCode\vpxblit\Debug\bcu00_c.obj
+\ObjectCode\vpxblit\Debug\bcy00_c.obj
+\ObjectCode\vpxblit\Debug\vpx_reg.obj
+\ObjectCode\vpxblit\Debug\bct00_targa_c.obj
+\ObjectCode\vpxblit\Debug\bcc00.obj
+\ObjectCode\vpxblit\Debug\bcc10.obj
+\ObjectCode\vpxblit\Debug\bcd00.obj
+\ObjectCode\vpxblit\Debug\bcf00.obj
+\ObjectCode\vpxblit\Debug\bcf10.obj
+\ObjectCode\vpxblit\Debug\bcs00.obj
+\ObjectCode\vpxblit\Debug\bcs10.obj
+\ObjectCode\vpxblit\Debug\bct00.obj
+\ObjectCode\vpxblit\Debug\bct10.obj
+\ObjectCode\vpxblit\Debug\bcu00.obj
+\ObjectCode\vpxblit\Debug\bcy00.obj
+\ObjectCode\vpxblit\Debug\const.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB58.tmp"
+<h3>Output Window</h3>
+Compiling...
+ctables.c
+wksetblt.c
+Compiling...
+bcf00_c.c
+bcf10_c.c
+bcs00_c.c
+bcs10_c.c
+bct00_c.c
+bct10_c.c
+bcu00_c.c
+bcy00_c.c
+vpx_reg.c
+bct00_targa_c.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpxblit.lib - 0 error(s), 0 warning(s)
+</pre>
+</body>
+</html>
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj
new file mode 100644
index 00000000..1eb11535
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj
@@ -0,0 +1,354 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <VCProjectVersion>17.0</VCProjectVersion>
+ <ProjectGuid>{C3547FC9-A6AC-4706-BED7-D696A8EF9EED}</ProjectGuid>
+ <RootNamespace>on2_mem</RootNamespace>
+ <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <PlatformToolset>v142</PlatformToolset>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>..\..\..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\..\..\..\obj\on2_mem\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <OutDir>..\..\..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\..\..\..\obj\on2_mem\$(PlatformShortName)_$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>..\..\..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\..\..\..\obj\on2_mem\$(PlatformShortName)_$(Configuration)\</IntDir>
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <IncludePath>$(IncludePath)</IncludePath>
+ <LibraryPath>$(LibraryPath)</LibraryPath>
+ <OutDir>..\..\..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+ <IntDir>..\..\..\..\..\..\obj\on2_mem\$(PlatformShortName)_$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg">
+ <VcpkgEnableManifest>false</VcpkgEnableManifest>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ <VcpkgConfiguration>Debug</VcpkgConfiguration>
+ </PropertyGroup>
+ <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <VcpkgInstalledDir>
+ </VcpkgInstalledDir>
+ <VcpkgUseStatic>false</VcpkgUseStatic>
+ <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>..\..\include;..\..\..\common\include;..\..\memory_manager\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeader />
+ <PrecompiledHeaderOutputFile>$(IntDir)on2_mem.pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <Optimization>MaxSpeed</Optimization>
+ <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <AdditionalIncludeDirectories>..\..\include;..\..\..\common\include;..\..\memory_manager\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>$(IntDir)on2_mem.pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>None</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>..\..\include;..\..\..\common\include;..\..\memory_manager\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader />
+ <PrecompiledHeaderOutputFile>$(IntDir)on2_mem.pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>..\..\include;..\..\..\common\include;..\..\memory_manager\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <PrecompiledHeaderOutputFile>$(IntDir)on2_mem.pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+ <WarningLevel>Level3</WarningLevel>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <CompileAs>Default</CompileAs>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <Culture>0x0409</Culture>
+ </ResourceCompile>
+ <Lib>
+ <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+ <SuppressStartupBanner>true</SuppressStartupBanner>
+ <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+ </Lib>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\memory_manager\hmm_alloc.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_base.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_dflt_abort.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_grow.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_largest.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_resize.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_shrink.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_true.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="..\..\on2_mem.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ <ClCompile Include="..\..\on2_mem_tracker.c">
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+ <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+ <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\..\include\on2_mem.h" />
+ <ClInclude Include="..\..\include\on2_mem_tracker.h" />
+ <ClInclude Include="..\..\memory_manager\include\cavl_if.h" />
+ <ClInclude Include="..\..\memory_manager\include\cavl_impl.h" />
+ <ClInclude Include="..\..\memory_manager\include\heapmm.h" />
+ <ClInclude Include="..\..\memory_manager\include\hmm_cnfg.h" />
+ <ClInclude Include="..\..\memory_manager\include\hmm_intrnl.h" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj.filters b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj.filters
new file mode 100644
index 00000000..517d62e8
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj.filters
@@ -0,0 +1,74 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{9e1216e0-9418-47c8-8b65-d829ad4b69a5}</UniqueIdentifier>
+ <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions>
+ </Filter>
+ <Filter Include="Source Files\memory_manager">
+ <UniqueIdentifier>{4c33a825-25eb-47cb-b841-87f800085fba}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="Header Files">
+ <UniqueIdentifier>{70389f5d-efd2-456e-a0e2-09499eb80192}</UniqueIdentifier>
+ <Extensions>h;hpp;hxx;hm;inl</Extensions>
+ </Filter>
+ <Filter Include="Header Files\memory_manager_hdrs">
+ <UniqueIdentifier>{7deb3717-4ad2-4cdc-8773-cc326f108428}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\on2_mem.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\on2_mem_tracker.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_alloc.c">
+ <Filter>Source Files\memory_manager</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_base.c">
+ <Filter>Source Files\memory_manager</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_dflt_abort.c">
+ <Filter>Source Files\memory_manager</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_grow.c">
+ <Filter>Source Files\memory_manager</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_largest.c">
+ <Filter>Source Files\memory_manager</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_resize.c">
+ <Filter>Source Files\memory_manager</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_shrink.c">
+ <Filter>Source Files\memory_manager</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\memory_manager\hmm_true.c">
+ <Filter>Source Files\memory_manager</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\..\include\on2_mem.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="..\..\include\on2_mem_tracker.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="..\..\memory_manager\include\cavl_if.h">
+ <Filter>Header Files\memory_manager_hdrs</Filter>
+ </ClInclude>
+ <ClInclude Include="..\..\memory_manager\include\cavl_impl.h">
+ <Filter>Header Files\memory_manager_hdrs</Filter>
+ </ClInclude>
+ <ClInclude Include="..\..\memory_manager\include\heapmm.h">
+ <Filter>Header Files\memory_manager_hdrs</Filter>
+ </ClInclude>
+ <ClInclude Include="..\..\memory_manager\include\hmm_cnfg.h">
+ <Filter>Header Files\memory_manager_hdrs</Filter>
+ </ClInclude>
+ <ClInclude Include="..\..\memory_manager\include\hmm_intrnl.h">
+ <Filter>Header Files\memory_manager_hdrs</Filter>
+ </ClInclude>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem.h
new file mode 100644
index 00000000..6a63475c
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem.h
@@ -0,0 +1,108 @@
+#ifndef __ON2_MEM_H__
+#define __ON2_MEM_H__
+
+/* on2_mem version info */
+#define on2_mem_version "2.0.1.1"
+
+#define ON2_MEM_VERSION_CHIEF 2
+#define ON2_MEM_VERSION_MAJOR 0
+#define ON2_MEM_VERSION_MINOR 1
+#define ON2_MEM_VERSION_PATCH 1
+/* end - on2_mem version info */
+
+#define ON2_TRACK_MEM_USAGE 0 //enable memory tracking/integrity checks
+#define ON2_CHECK_MEM_FUNCTIONS 0 //enable basic safety checks in _memcpy,
+ //_memset, and _memmove
+#define REPLACE_BUILTIN_FUNCTIONS 0 //replace builtin functions with their
+ //on2_ equivalents
+
+#include <stddef.h>
+#include "on2_mem_tracker.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ on2_mem_get_version()
+ provided for runtime version checking. Returns an unsigned int of the form
+ CHIEF | MAJOR | MINOR | PATCH, where the chief version number is the high
+ order byte.
+*/
+unsigned int on2_mem_get_version();
+
+/*
+ on2_mem_set_heap_size(size_t size)
+ size - size in bytes for the memory manager to allocate for its heap
+ Sets the memory manager's initial heap size
+ Return:
+ 0: on success
+ -1: if memory manager calls have not been included in the on2_mem lib
+ -2: if the memory manager has been compiled to use static memory
+ -3: if the memory manager has already allocated its heap
+*/
+int on2_mem_set_heap_size(size_t size);
+
+void* on2_memalign(size_t align, size_t size);
+void* on2_malloc(size_t size);
+void* on2_calloc(size_t num, size_t size);
+void* on2_realloc(void* memblk, size_t size);
+void on2_free(void* memblk);
+
+void* on2_memcpy(void* dest, const void* src, size_t length);
+void* on2_memset(void* dest, int val, size_t length);
+void* on2_memmove(void* dest, const void* src, size_t count);
+
+/* some defines for backward compatibility */
+#define DMEM_GENERAL 0
+
+#define duck_memalign(X,Y,Z) on2_memalign(X,Y)
+#define duck_malloc(X,Y) on2_malloc(X)
+#define duck_calloc(X,Y,Z) on2_calloc(X,Y)
+#define duck_realloc on2_realloc
+#define duck_free on2_free
+#define duck_memcpy on2_memcpy
+#define duck_memmove on2_memmove
+#define duck_memset on2_memset
+
+#if REPLACE_BUILTIN_FUNCTIONS
+#define memalign on2_memalign
+#define malloc on2_malloc
+#define calloc on2_calloc
+#define realloc on2_realloc
+#define free on2_free
+#define memcpy on2_memcpy
+#define memmove on2_memmove
+#define memset on2_memset
+#endif
+
+#if ON2_TRACK_MEM_USAGE
+# ifndef __ON2_MEM_C__
+# define on2_memalign(align, size) xon2_memalign((align), (size), __FILE__, __LINE__)
+# define on2_malloc(size) xon2_malloc((size), __FILE__, __LINE__)
+# define on2_calloc(num, size) xon2_calloc(num, size, __FILE__, __LINE__)
+# define on2_realloc(addr, size) xon2_realloc(addr, size, __FILE__, __LINE__)
+# define on2_free(addr) xon2_free(addr, __FILE__, __LINE__)
+# endif
+
+ void* xon2_memalign(size_t align, size_t size, char* file, int line);
+ void* xon2_malloc(size_t size, char* file, int line);
+ void* xon2_calloc(size_t num, size_t size, char* file, int line);
+ void* xon2_realloc(void* memblk, size_t size, char* file, int line);
+ void xon2_free(void* memblk, char* file, int line);
+#endif
+
+#if !ON2_CHECK_MEM_FUNCTIONS
+# ifndef __ON2_MEM_C__
+# include <string.h>
+# define on2_memcpy memcpy
+# define on2_memset memset
+# define on2_memmove memmove
+# endif
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __ON2_MEM_H__ */
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem_tracker.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem_tracker.h
new file mode 100644
index 00000000..4c5e6774
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem_tracker.h
@@ -0,0 +1,126 @@
+#ifndef __ON2_MEM_TRACKER_H__
+#define __ON2_MEM_TRACKER_H__
+
+/* on2_mem_tracker version info */
+#define on2_mem_tracker_version "2.3.1.2"
+
+#define ON2_MEM_TRACKER_VERSION_CHIEF 2
+#define ON2_MEM_TRACKER_VERSION_MAJOR 3
+#define ON2_MEM_TRACKER_VERSION_MINOR 1
+#define ON2_MEM_TRACKER_VERSION_PATCH 2
+/* END - on2_mem_tracker version info */
+
+struct MemBlock
+{
+ size_t addr;
+ unsigned int size,
+ line;
+ char* file;
+ struct MemBlock* prev,
+ * next;
+};
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ on2_MemoryTrackerInit(int padding_size, int pad_value)
+ padding_size - the size of the padding before and after each mem addr.
+ Values > 0 indicate that integrity checks can be performed
+ by inspecting these areas.
+ pad_value - the initial value within the padding area before and after
+ each mem addr.
+
+ Initializes the memory tracker interface. Should be called before any
+ other calls to the memory tracker.
+*/
+int on2_MemoryTrackerInit(int padding_size, int pad_value);
+
+/*
+ on2_MemoryTrackerDestroy()
+ Deinitializes the memory tracker interface
+*/
+void on2_MemoryTrackerDestroy();
+
+/*
+ on2_MemoryTrackerAdd(size_t addr, unsigned int size,
+ char * file, unsigned int line)
+ addr - memory address to be added to list
+ size - size of addr
+ file - the file addr was referenced from
+ line - the line in file addr was referenced from
+ Adds memory address addr, it's size, file and line it came from
+ to the memory tracker allocation table
+*/
+void on2_MemoryTrackerAdd(size_t addr, unsigned int size,
+ char * file, unsigned int line);
+
+/*
+ on2_MemoryTrackerAdd(size_t addr, unsigned int size, char * file, unsigned int line)
+ addr - memory address to be added to be removed
+ Removes the specified address from the memory tracker's allocation
+ table
+ Return:
+ 0: on success
+ -1: if memory allocation table's mutex could not be locked
+ -2: if the addr was not found in the list
+*/
+int on2_MemoryTrackerRemove(size_t addr);
+
+/*
+ on2_MemoryTrackerFind(unsigned int addr)
+ addr - address to be found in the memory tracker's
+ allocation table
+ Return:
+ If found, pointer to the memory block that matches addr
+ NULL otherwise
+*/
+struct MemBlock* on2_MemoryTrackerFind(size_t addr);
+
+/*
+ on2_MemoryTrackerDump()
+ Dumps the current contents of the memory
+ tracker allocation table
+*/
+void on2_MemoryTrackerDump();
+
+/*
+ on2_MemoryTrackerCheckIntegrity()
+ If a padding_size was provided to on2_MemoryTrackerInit()
+ This function will verify that the region before and after each
+ memory address contains the specified pad_value. Should the check
+ fail, the filename and line of the check will be printed out.
+*/
+void on2_MemoryTrackerCheckIntegrity(char* file, unsigned int line);
+
+/*
+ on2_MemoryTrackerSetLogType
+ type - value representing the logging type to use
+ option - type specific option. This will be interpreted differently
+ based on the type.
+ Sets the logging type for the memory tracker.
+ Values currently supported:
+ 0: if option is NULL, log to stderr, otherwise interpret option as a
+ filename and attempt to open it.
+ -1: Use OutputDebugString (WIN32 only), option ignored
+ Return:
+ 0: on success
+ -1: if the logging type could not be set, because the value was invalid
+ or because a file could not be opened
+*/
+int on2_MemoryTrackerSetLogType(int type, char* option);
+
+#if !defined(__ON2_MEM_TRACKER_C__) && !defined(__ON2_MEM_C__)
+#if ON2_TRACK_MEM_USAGE
+#define on2_MemoryTrackerCheckIntegrity() on2_MemoryTrackerCheckIntegrity(__FILE__, __LINE__)
+#else
+#define on2_MemoryTrackerCheckIntegrity()
+#endif
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif //__ON2_MEM_TRACKER_H__
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_alloc.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_alloc.c
new file mode 100644
index 00000000..502f4bd9
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_alloc.c
@@ -0,0 +1,48 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+void * U(alloc)(U(descriptor) *desc, U(size_aau) n)
+ {
+ #ifdef HMM_AUDIT_FAIL
+ if (desc->avl_tree_root)
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+ #endif
+
+ if (desc->last_freed)
+ {
+ #ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(desc->last_freed)
+ #endif
+
+ U(into_free_collection)(desc, (head_record *) (desc->last_freed));
+
+ desc->last_freed = 0;
+ }
+
+ /* Add space for block header. */
+ n += HEAD_AAUS;
+
+ /* Convert n from number of address alignment units to block alignment
+ ** units. */
+ n = DIV_ROUND_UP(n, HMM_BLOCK_ALIGN_UNIT);
+
+ if (n < MIN_BLOCK_BAUS)
+ n = MIN_BLOCK_BAUS;
+
+ {
+ /* Search for the first node of the bin containing the smallest
+ ** block big enough to satisfy request. */
+ ptr_record *ptr_rec_ptr =
+ U(avl_search)(
+ (U(avl_avl) *) &(desc->avl_tree_root), (U(size_bau)) n,
+ AVL_GREATER_EQUAL);
+
+ /* If an approprate bin is found, satisfy the allocation request,
+ ** otherwise return null pointer. */
+ return(ptr_rec_ptr ?
+ U(alloc_from_bin)(desc, ptr_rec_ptr, (U(size_bau)) n) : 0);
+ }
+ }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_base.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_base.c
new file mode 100644
index 00000000..ea2a5463
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_base.c
@@ -0,0 +1,418 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+void U(init)(U(descriptor) *desc)
+ {
+ desc->avl_tree_root = 0;
+ desc->last_freed = 0;
+ }
+
+/* Remove a free block from a bin's doubly-linked list when it is not,
+** the first block in the bin.
+*/
+void U(dll_remove)(
+ /* Pointer to pointer record in the block to be removed. */
+ ptr_record *to_remove)
+ {
+ to_remove->prev->next = to_remove->next;
+ if (to_remove->next)
+ to_remove->next->prev = to_remove->prev;
+ }
+
+/* Put a block into the free collection of a heap.
+*/
+void U(into_free_collection)(
+ /* Pointer to heap descriptor. */
+ U(descriptor) *desc,
+ /* Pointer to _head record of block. */
+ head_record *head_ptr)
+ {
+ ptr_record *ptr_rec_ptr = HEAD_TO_PTR_REC(head_ptr);
+
+ ptr_record *bin_front_ptr =
+ U(avl_insert)((U(avl_avl) *) &(desc->avl_tree_root), ptr_rec_ptr);
+
+ if (bin_front_ptr != ptr_rec_ptr)
+ {
+ /* The block was not inserted into the AVL tree because there is
+ ** already a bin for the size of the block. */
+
+ MARK_SUCCESSIVE_BLOCK_IN_FREE_BIN(head_ptr)
+ ptr_rec_ptr->self = ptr_rec_ptr;
+
+ /* Make the block the new second block in the bin's doubly-linked
+ ** list. */
+ ptr_rec_ptr->prev = bin_front_ptr;
+ ptr_rec_ptr->next = bin_front_ptr->next;
+ bin_front_ptr->next = ptr_rec_ptr;
+ if (ptr_rec_ptr->next)
+ ptr_rec_ptr->next->prev = ptr_rec_ptr;
+ }
+ else
+ /* Block is first block in new bin. */
+ ptr_rec_ptr->next = 0;
+ }
+
+/* Allocate a block from a given bin. Returns a pointer to the payload
+** of the removed block. The "last freed" pointer must be null prior
+** to calling this function.
+*/
+void * U(alloc_from_bin)(
+ /* Pointer to heap descriptor. */
+ U(descriptor) *desc,
+ /* Pointer to pointer record of first block in bin. */
+ ptr_record *bin_front_ptr,
+ /* Number of BAUs needed in the allocated block. If the block taken
+ ** from the bin is significantly larger than the number of BAUs needed,
+ ** the "extra" BAUs are split off to form a new free block. */
+ U(size_bau) n_baus)
+ {
+ head_record *head_ptr;
+ U(size_bau) rem_baus;
+
+ if (bin_front_ptr->next)
+ {
+ /* There are multiple blocks in this bin. Use the 2nd block in
+ ** the bin to avoid needless change to the AVL tree.
+ */
+
+ ptr_record *ptr_rec_ptr = bin_front_ptr->next;
+ head_ptr = PTR_REC_TO_HEAD(ptr_rec_ptr);
+
+ #ifdef AUDIT_FAIL
+ AUDIT_BLOCK(head_ptr)
+ #endif
+
+ U(dll_remove)(ptr_rec_ptr);
+ }
+ else
+ {
+ /* There is only one block in the bin, so it has to be removed
+ ** from the AVL tree.
+ */
+
+ head_ptr = PTR_REC_TO_HEAD(bin_front_ptr);
+
+ U(avl_remove)(
+ (U(avl_avl) *) &(desc->avl_tree_root), BLOCK_BAUS(head_ptr));
+ }
+
+ MARK_BLOCK_ALLOCATED(head_ptr)
+
+ rem_baus = BLOCK_BAUS(head_ptr) - n_baus;
+
+ if (rem_baus >= MIN_BLOCK_BAUS)
+ {
+ /* Since there are enough "extra" BAUs, split them off to form
+ ** a new free block.
+ */
+
+ head_record *rem_head_ptr =
+ (head_record *) BAUS_FORWARD(head_ptr, n_baus);
+
+ /* Change the next block's header to reflect the fact that the
+ ** block preceeding it is now smaller.
+ */
+ SET_PREV_BLOCK_BAUS(
+ BAUS_FORWARD(head_ptr, head_ptr->block_size), rem_baus)
+
+ head_ptr->block_size = n_baus;
+
+ rem_head_ptr->previous_block_size = n_baus;
+ rem_head_ptr->block_size = rem_baus;
+
+ desc->last_freed = rem_head_ptr;
+ }
+
+ return(HEAD_TO_PTR_REC(head_ptr));
+ }
+
+/* Take a block out of the free collection.
+*/
+void U(out_of_free_collection)(
+ /* Descriptor of heap that block is in. */
+ U(descriptor) *desc,
+ /* Pointer to _head of block to take out of free collection. */
+ head_record *head_ptr)
+ {
+ ptr_record *ptr_rec_ptr = HEAD_TO_PTR_REC(head_ptr);
+
+ if (ptr_rec_ptr->self == ptr_rec_ptr)
+ /* Block is not the front block in its bin, so all we have to
+ ** do is take it out of the bin's doubly-linked list. */
+ U(dll_remove)(ptr_rec_ptr);
+ else
+ {
+ ptr_record *next = ptr_rec_ptr->next;
+
+ if (next)
+ /* Block is the front block in its bin, and there is at least
+ ** one other block in the bin. Substitute the next block for
+ ** the front block. */
+ U(avl_subst)((U(avl_avl) *) &(desc->avl_tree_root), next);
+ else
+ /* Block is the front block in its bin, but there is no other
+ ** block in the bin. Eliminate the bin. */
+ U(avl_remove)(
+ (U(avl_avl) *) &(desc->avl_tree_root), BLOCK_BAUS(head_ptr));
+ }
+ }
+
+void U(free)(U(descriptor) *desc, void *payload_ptr)
+ {
+ /* Flags if coalesce with adjacent block. */
+ int coalesce;
+
+ head_record *fwd_head_ptr;
+ head_record *free_head_ptr = PTR_REC_TO_HEAD(payload_ptr);
+
+ desc->num_baus_can_shrink = 0;
+
+ #ifdef HMM_AUDIT_FAIL
+
+ AUDIT_BLOCK(free_head_ptr)
+
+ /* Make sure not freeing an already free block. */
+ if (!IS_BLOCK_ALLOCATED(free_head_ptr))
+ HMM_AUDIT_FAIL
+
+ if (desc->avl_tree_root)
+ /* Audit root block in AVL tree. */
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+
+ #endif
+
+ fwd_head_ptr =
+ (head_record *) BAUS_FORWARD(free_head_ptr, free_head_ptr->block_size);
+
+ if (free_head_ptr->previous_block_size)
+ {
+ /* Coalesce with backward block if possible. */
+
+ head_record *bkwd_head_ptr =
+ (head_record *) BAUS_BACKWARD(
+ free_head_ptr, free_head_ptr->previous_block_size);
+
+ #ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(bkwd_head_ptr)
+ #endif
+
+ if (bkwd_head_ptr == (head_record *) (desc->last_freed))
+ {
+ desc->last_freed = 0;
+ coalesce = 1;
+ }
+ else if (IS_BLOCK_ALLOCATED(bkwd_head_ptr))
+ coalesce = 0;
+ else
+ {
+ U(out_of_free_collection)(desc, bkwd_head_ptr);
+ coalesce = 1;
+ }
+
+ if (coalesce)
+ {
+ bkwd_head_ptr->block_size += free_head_ptr->block_size;
+ SET_PREV_BLOCK_BAUS(fwd_head_ptr, BLOCK_BAUS(bkwd_head_ptr))
+ free_head_ptr = bkwd_head_ptr;
+ }
+ }
+
+ if (fwd_head_ptr->block_size == 0)
+ {
+ /* Block to be freed is last block before dummy end-of-chunk block. */
+ desc->end_of_shrinkable_chunk =
+ BAUS_FORWARD(fwd_head_ptr, DUMMY_END_BLOCK_BAUS);
+ desc->num_baus_can_shrink = BLOCK_BAUS(free_head_ptr);
+ if (PREV_BLOCK_BAUS(free_head_ptr) == 0)
+ /* Free block is the entire chunk, so shrinking can eliminate
+ ** entire chunk including dummy end block. */
+ desc->num_baus_can_shrink += DUMMY_END_BLOCK_BAUS;
+ }
+ else
+ {
+ /* Coalesce with forward block if possible. */
+
+ #ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(fwd_head_ptr)
+ #endif
+
+ if (fwd_head_ptr == (head_record *) (desc->last_freed))
+ {
+ desc->last_freed = 0;
+ coalesce = 1;
+ }
+ else if (IS_BLOCK_ALLOCATED(fwd_head_ptr))
+ coalesce = 0;
+ else
+ {
+ U(out_of_free_collection)(desc, fwd_head_ptr);
+ coalesce = 1;
+ }
+
+ if (coalesce)
+ {
+ free_head_ptr->block_size += fwd_head_ptr->block_size;
+
+ fwd_head_ptr =
+ (head_record *) BAUS_FORWARD(
+ fwd_head_ptr, BLOCK_BAUS(fwd_head_ptr));
+
+ SET_PREV_BLOCK_BAUS(fwd_head_ptr, BLOCK_BAUS(free_head_ptr))
+
+ if (fwd_head_ptr->block_size == 0)
+ {
+ /* Coalesced block to be freed is last block before dummy
+ ** end-of-chunk block. */
+ desc->end_of_shrinkable_chunk =
+ BAUS_FORWARD(fwd_head_ptr, DUMMY_END_BLOCK_BAUS);
+ desc->num_baus_can_shrink = BLOCK_BAUS(free_head_ptr);
+ if (PREV_BLOCK_BAUS(free_head_ptr) == 0)
+ /* Free block is the entire chunk, so shrinking can
+ ** eliminate entire chunk including dummy end block. */
+ desc->num_baus_can_shrink += DUMMY_END_BLOCK_BAUS;
+ }
+ }
+ }
+
+ if (desc->last_freed)
+ {
+ /* There is a last freed block, but it is not adjacent to the
+ ** block being freed by this call to free, so put the last
+ ** freed block into the free collection.
+ */
+
+ #ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(desc->last_freed)
+ #endif
+
+ U(into_free_collection)(desc, (head_record *) (desc->last_freed));
+ }
+
+ desc->last_freed = free_head_ptr;
+ }
+
+void U(new_chunk)(U(descriptor) *desc, void *start, U(size_bau) n_baus)
+ {
+ #ifdef HMM_AUDIT_FAIL
+ if (desc->avl_tree_root)
+ /* Audit root block in AVL tree. */
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+ #endif
+
+ #undef HEAD_PTR
+ #define HEAD_PTR ((head_record *) start)
+
+ /* Make the chunk one big free block followed by a dummy end block.
+ */
+
+ n_baus -= DUMMY_END_BLOCK_BAUS;
+
+ HEAD_PTR->previous_block_size = 0;
+ HEAD_PTR->block_size = n_baus;
+
+ U(into_free_collection)(desc, HEAD_PTR);
+
+ /* Set up the dummy end block. */
+ start = BAUS_FORWARD(start, n_baus);
+ HEAD_PTR->previous_block_size = n_baus;
+ HEAD_PTR->block_size = 0;
+
+ #undef HEAD_PTR
+ }
+
+#ifdef HMM_AUDIT_FAIL
+
+/* Function that does audit fail actions defined my preprocessor symbol,
+** and returns a dummy integer value.
+*/
+int U(audit_block_fail_dummy_return)(void)
+ {
+ HMM_AUDIT_FAIL
+
+ /* Dummy return. */
+ return(0);
+ }
+
+#endif
+
+/* AVL Tree instantiation. */
+
+#ifdef HMM_AUDIT_FAIL
+
+/* The AVL tree generic package passes an ACCESS of 1 when it "touches"
+** a child node for the first time during a particular operation. I use
+** this feature to audit only one time (per operation) the free blocks
+** that are tree nodes. Since the root node is not a child node, it has
+** to be audited directly.
+*/
+
+/* The pain you feel while reading these macros will not be in vain. It
+** will remove all doubt from you mind that C++ inline functions are
+** a very good thing.
+*/
+
+#define AVL_GET_LESS(H, ACCESS) \
+ (((ACCESS) ? AUDIT_BLOCK_AS_EXPR(PTR_REC_TO_HEAD(H)) : 0), (H)->self)
+#define AVL_GET_GREATER(H, ACCESS) \
+ (((ACCESS) ? AUDIT_BLOCK_AS_EXPR(PTR_REC_TO_HEAD(H)) : 0), (H)->prev)
+
+#else
+
+#define AVL_GET_LESS(H, ACCESS) ((H)->self)
+#define AVL_GET_GREATER(H, ACCESS) ((H)->prev)
+
+#endif
+
+#define AVL_SET_LESS(H, LH) (H)->self = (LH);
+#define AVL_SET_GREATER(H, GH) (H)->prev = (GH);
+
+/* high bit of high bit of
+** block_size previous_block_size balance factor
+** ----------- ------------------- --------------
+** 0 0 n/a (block allocated)
+** 0 1 1
+** 1 0 -1
+** 1 1 0
+*/
+
+#define AVL_GET_BALANCE_FACTOR(H) \
+ ((((head_record *) (PTR_REC_TO_HEAD(H)))->block_size & \
+ HIGH_BIT_BAU_SIZE) ? \
+ (((head_record *) (PTR_REC_TO_HEAD(H)))->previous_block_size & \
+ HIGH_BIT_BAU_SIZE ? 0 : -1) : 1)
+
+#define AVL_SET_BALANCE_FACTOR(H, BF) \
+ { \
+ register head_record *p = \
+ (head_record *) PTR_REC_TO_HEAD(H); \
+ register int bal_f = (BF); \
+ \
+ if (bal_f <= 0) \
+ p->block_size |= HIGH_BIT_BAU_SIZE; \
+ else \
+ p->block_size &= ~HIGH_BIT_BAU_SIZE; \
+ if (bal_f >= 0) \
+ p->previous_block_size |= HIGH_BIT_BAU_SIZE; \
+ else \
+ p->previous_block_size &= ~HIGH_BIT_BAU_SIZE; \
+ }
+
+#define COMPARE_KEY_KEY(K1, K2) ((K1) == (K2) ? 0 : ((K1) > (K2) ? 1 : -1))
+
+#define AVL_COMPARE_KEY_NODE(K, H) \
+ COMPARE_KEY_KEY(K, BLOCK_BAUS(PTR_REC_TO_HEAD(H)))
+
+#define AVL_COMPARE_NODE_NODE(H1, H2) \
+ COMPARE_KEY_KEY(BLOCK_BAUS(PTR_REC_TO_HEAD(H1)), \
+ BLOCK_BAUS(PTR_REC_TO_HEAD(H2)))
+
+#define AVL_NULL ((ptr_record *) 0)
+
+#define AVL_IMPL_MASK \
+ ( AVL_IMPL_INSERT | AVL_IMPL_SEARCH | AVL_IMPL_REMOVE | AVL_IMPL_SUBST )
+
+#include "cavl_impl.h"
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_dflt_abort.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_dflt_abort.c
new file mode 100644
index 00000000..0bbee582
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_dflt_abort.c
@@ -0,0 +1,43 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+/* The function in this file performs default actions if self-auditing
+** finds heap corruption. Don't rely my feeble attempt to handle the
+** case where HMM is being used to implement the malloc and free standard
+** library functions. Rewrite the function if necessary to avoid using
+** I/O and execution termination functions that call malloc or free.
+** In Unix, for example, you would replace the fputs calls with calls
+** to the write system call using file handle number 2.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static int entered = 0;
+
+/* Print abort message, file and line. Terminate execution.
+*/
+void HMM_dflt_abort(const char *file, const char *line)
+ {
+ /* Avoid use of printf(), which is more likely to use heap. */
+
+ if (entered)
+ /* The standard I/O functions called a heap function and caused
+ ** an indirect recursive call to this function. So we'll have
+ ** to just exit without printing a message. */
+ while(1);
+
+ entered = 1;
+
+ fputs("\nABORT - Heap corruption\n" "File: ", stderr);
+ fputs(file, stderr);
+ fputs(" Line: ", stderr);
+ fputs(line, stderr);
+ fputs("\n\n", stderr);
+ fputs( "HMM_dflt_abort: while(1)!!!\n", stderr );
+ fflush(stderr);
+
+ while(1);
+ }
+
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_grow.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_grow.c
new file mode 100644
index 00000000..b90212dc
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_grow.c
@@ -0,0 +1,39 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+void U(grow_chunk)(U(descriptor) *desc, void *end, U(size_bau) n_baus)
+ {
+ #undef HEAD_PTR
+ #define HEAD_PTR ((head_record *) end)
+
+ end = BAUS_BACKWARD(end, DUMMY_END_BLOCK_BAUS);
+
+ #ifdef HMM_AUDIT_FAIL
+
+ if (HEAD_PTR->block_size != 0)
+ /* Chunk does not have valid dummy end block. */
+ HMM_AUDIT_FAIL
+
+ #endif
+
+ /* Create a new block that absorbs the old dummy end block. */
+ HEAD_PTR->block_size = n_baus;
+
+ /* Set up the new dummy end block. */
+ {
+ head_record *dummy = (head_record *) BAUS_FORWARD(end, n_baus);
+ dummy->previous_block_size = n_baus;
+ dummy->block_size = 0;
+ }
+
+ /* Simply free the new block, allowing it to coalesce with any
+ ** free block at that was the last block in the chunk prior to
+ ** growth.
+ */
+ U(free)(desc, HEAD_TO_PTR_REC(end));
+
+ #undef HEAD_PTR
+ }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_largest.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_largest.c
new file mode 100644
index 00000000..60b0cf7f
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_largest.c
@@ -0,0 +1,49 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+U(size_aau) U(largest_available)(U(descriptor) *desc)
+ {
+ U(size_bau) largest;
+
+ if (!(desc->avl_tree_root))
+ largest = 0;
+ else
+ {
+ #ifdef HMM_AUDIT_FAIL
+ /* Audit root block in AVL tree. */
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+ #endif
+
+ largest =
+ BLOCK_BAUS(
+ PTR_REC_TO_HEAD(
+ U(avl_search)(
+ (U(avl_avl) *) &(desc->avl_tree_root),
+ (U(size_bau)) ~ (U(size_bau)) 0, AVL_LESS)));
+ }
+
+ if (desc->last_freed)
+ {
+ /* Size of last freed block. */
+ register U(size_bau) lf_size;
+
+ #ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(desc->last_freed)
+ #endif
+
+ lf_size = BLOCK_BAUS(desc->last_freed);
+
+ if (lf_size > largest)
+ largest = lf_size;
+ }
+
+ /* Convert largest size to AAUs and subract _head size leaving payload
+ ** size.
+ */
+ return(largest ?
+ ((largest * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT)) - HEAD_AAUS) :
+ 0);
+ }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_resize.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_resize.c
new file mode 100644
index 00000000..c17d15a7
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_resize.c
@@ -0,0 +1,107 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+int U(resize)(U(descriptor) *desc, void *mem, U(size_aau) n)
+ {
+ U(size_aau) i;
+ head_record *next_head_ptr;
+ head_record *head_ptr = PTR_REC_TO_HEAD(mem);
+
+ /* Flag. */
+ int next_block_free;
+
+ /* Convert n from desired block size in AAUs to BAUs. */
+ n += HEAD_AAUS;
+ n = DIV_ROUND_UP(n, HMM_BLOCK_ALIGN_UNIT);
+ if (n < MIN_BLOCK_BAUS)
+ n = MIN_BLOCK_BAUS;
+
+ #ifdef HMM_AUDIT_FAIL
+
+ AUDIT_BLOCK(head_ptr)
+
+ if (!IS_BLOCK_ALLOCATED(head_ptr))
+ HMM_AUDIT_FAIL
+
+ if (desc->avl_tree_root)
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+
+ #endif
+
+ i = head_ptr->block_size;
+
+ next_head_ptr =
+ (head_record *) BAUS_FORWARD(head_ptr, head_ptr->block_size);
+
+ next_block_free =
+ (next_head_ptr == desc->last_freed) ||
+ !IS_BLOCK_ALLOCATED(next_head_ptr);
+
+ if (next_block_free)
+ /* Block can expand into next free block. */
+ i += BLOCK_BAUS(next_head_ptr);
+
+ if (n > i)
+ /* Not enough room for block to expand. */
+ return(-1);
+
+ if (next_block_free)
+ {
+ #ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(next_head_ptr)
+ #endif
+
+ if (next_head_ptr == desc->last_freed)
+ desc->last_freed = 0;
+ else
+ U(out_of_free_collection)(desc, next_head_ptr);
+
+ next_head_ptr =
+ (head_record *) BAUS_FORWARD(head_ptr, (U(size_bau)) i);
+ }
+
+ /* Set i to number of "extra" BAUs. */
+ i -= n;
+
+ if (i < MIN_BLOCK_BAUS)
+ /* Not enough extra BAUs to be a block on their own, so just keep them
+ ** in the block being resized.
+ */
+ {
+ n += i;
+ i = n;
+ }
+ else
+ {
+ /* There are enough "leftover" BAUs in the next block to
+ ** form a remainder block. */
+
+ head_record *rem_head_ptr;
+
+ rem_head_ptr = (head_record *) BAUS_FORWARD(head_ptr, n);
+
+ rem_head_ptr->previous_block_size = (U(size_bau)) n;
+ rem_head_ptr->block_size = (U(size_bau)) i;
+
+ if (desc->last_freed)
+ {
+ #ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(desc->last_freed)
+ #endif
+
+ U(into_free_collection)(desc, (head_record *) (desc->last_freed));
+
+ desc->last_freed = 0;
+ }
+
+ desc->last_freed = rem_head_ptr;
+ }
+
+ head_ptr->block_size = (U(size_bau)) n;
+ next_head_ptr->previous_block_size = (U(size_bau)) i;
+
+ return(0);
+ }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_shrink.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_shrink.c
new file mode 100644
index 00000000..651bac66
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_shrink.c
@@ -0,0 +1,96 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+void U(shrink_chunk)(U(descriptor) *desc, U(size_bau) n_baus_to_shrink)
+ {
+ head_record *dummy_end_block = (head_record *)
+ BAUS_BACKWARD(desc->end_of_shrinkable_chunk, DUMMY_END_BLOCK_BAUS);
+
+ #ifdef HMM_AUDIT_FAIL
+
+ if (dummy_end_block->block_size != 0)
+ /* Chunk does not have valid dummy end block. */
+ HMM_AUDIT_FAIL
+
+ #endif
+
+ if (n_baus_to_shrink)
+ {
+ head_record *last_block = (head_record *)
+ BAUS_BACKWARD(
+ dummy_end_block, dummy_end_block->previous_block_size);
+
+ #ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(last_block)
+ #endif
+
+ if (last_block == desc->last_freed)
+ {
+ U(size_bau) bs = BLOCK_BAUS(last_block);
+
+ /* Chunk will not be shrunk out of existence if
+ ** 1. There is at least one allocated block in the chunk
+ ** and the amount to shrink is exactly the size of the
+ ** last block, OR
+ ** 2. After the last block is shrunk, there will be enough
+ ** BAUs left in it to form a minimal size block. */
+ int chunk_will_survive =
+ (PREV_BLOCK_BAUS(last_block) && (n_baus_to_shrink == bs)) ||
+ (n_baus_to_shrink <= (U(size_bau)) (bs - MIN_BLOCK_BAUS));
+
+ if (chunk_will_survive ||
+ (!PREV_BLOCK_BAUS(last_block) &&
+ (n_baus_to_shrink ==
+ (U(size_bau)) (bs + DUMMY_END_BLOCK_BAUS))))
+ {
+ desc->last_freed = 0;
+
+ if (chunk_will_survive)
+ {
+ bs -= n_baus_to_shrink;
+ if (bs)
+ {
+ /* The last (non-dummy) block was not completely
+ ** eliminated by the shrink. */
+
+ last_block->block_size = bs;
+
+ /* Create new dummy end record.
+ */
+ dummy_end_block =
+ (head_record *) BAUS_FORWARD(last_block, bs);
+ dummy_end_block->previous_block_size = bs;
+ dummy_end_block->block_size = 0;
+
+ #ifdef HMM_AUDIT_FAIL
+ if (desc->avl_tree_root)
+ AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+ #endif
+
+ U(into_free_collection)(desc, last_block);
+ }
+ else
+ {
+ /* The last (non-dummy) block was completely
+ ** eliminated by the shrink. Make its _head
+ ** the new dummy end block.
+ */
+ last_block->block_size = 0;
+ last_block->previous_block_size &= ~HIGH_BIT_BAU_SIZE;
+ }
+ }
+ }
+ #ifdef HMM_AUDIT_FAIL
+ else
+ HMM_AUDIT_FAIL
+ #endif
+ }
+ #ifdef HMM_AUDIT_FAIL
+ else
+ HMM_AUDIT_FAIL
+ #endif
+ }
+ }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_true.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_true.c
new file mode 100644
index 00000000..7d057a49
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_true.c
@@ -0,0 +1,21 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+U(size_aau) U(true_size)(void *payload_ptr)
+ {
+ register head_record *head_ptr = PTR_REC_TO_HEAD(payload_ptr);
+
+ #ifdef HMM_AUDIT_FAIL
+ AUDIT_BLOCK(head_ptr)
+ #endif
+
+ /* Convert block size from BAUs to AAUs. Subtract _head size, leaving
+ ** payload size.
+ */
+ return(
+ (BLOCK_BAUS(head_ptr) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT)) -
+ HEAD_AAUS);
+ }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_if.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_if.h
new file mode 100644
index 00000000..a2df0830
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_if.h
@@ -0,0 +1,216 @@
+/* Abstract AVL Tree Generic C Package.
+** Interface generation header file.
+**
+** This code is in the public domain. See cavl_tree.html for interface
+** documentation.
+**
+** Version: 1.5 Author: Walt Karas
+*/
+
+/* This header contains the definition of CHAR_BIT (number of bits in a
+** char). */
+#include <limits.h>
+
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__SC
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
+
+#ifndef AVL_SEARCH_TYPE_DEFINED_
+#define AVL_SEARCH_TYPE_DEFINED_
+
+typedef enum
+ {
+ AVL_EQUAL = 1,
+ AVL_LESS = 2,
+ AVL_GREATER = 4,
+ AVL_LESS_EQUAL = AVL_EQUAL | AVL_LESS,
+ AVL_GREATER_EQUAL = AVL_EQUAL | AVL_GREATER
+ }
+avl_search_type;
+
+#endif
+
+#ifdef AVL_UNIQUE
+
+#define L__ AVL_UNIQUE
+
+#else
+
+#define L__(X) X
+
+#endif
+
+/* Determine storage class for function prototypes. */
+#ifdef AVL_PRIVATE
+
+#define L__SC static
+
+#else
+
+#define L__SC extern
+
+#endif
+
+#ifdef AVL_SIZE
+
+#define L__SIZE AVL_SIZE
+
+#else
+
+#define L__SIZE unsigned long
+
+#endif
+
+typedef struct
+ {
+ #ifdef AVL_INSIDE_STRUCT
+
+ AVL_INSIDE_STRUCT
+
+ #endif
+
+ AVL_HANDLE root;
+ }
+L__(avl);
+
+/* Function prototypes. */
+
+L__SC void L__(init)(L__(avl) *tree);
+
+L__SC int L__(is_empty)(L__(avl) *tree);
+
+L__SC AVL_HANDLE L__(insert)(L__(avl) *tree, AVL_HANDLE h);
+
+L__SC AVL_HANDLE L__(search)(L__(avl) *tree, AVL_KEY k, avl_search_type st);
+
+L__SC AVL_HANDLE L__(search_least)(L__(avl) *tree);
+
+L__SC AVL_HANDLE L__(search_greatest)(L__(avl) *tree);
+
+L__SC AVL_HANDLE L__(remove)(L__(avl) *tree, AVL_KEY k);
+
+L__SC AVL_HANDLE L__(subst)(L__(avl) *tree, AVL_HANDLE new_node);
+
+#ifdef AVL_BUILD_ITER_TYPE
+
+L__SC int L__(build)(
+ L__(avl) *tree, AVL_BUILD_ITER_TYPE p, L__SIZE num_nodes);
+
+#endif
+
+/* ANSI C/ISO C++ require that a long have at least 32 bits. Set
+** L__EST_LONG_BIT to be the greatest multiple of 8 in the range
+** 32 - 64 (inclusive) that is less than or equal to the number of
+** bits in a long.
+*/
+
+#if (((LONG_MAX >> 31) >> 7) == 0)
+
+#define L__EST_LONG_BIT 32
+
+#elif (((LONG_MAX >> 31) >> 15) == 0)
+
+#define L__EST_LONG_BIT 40
+
+#elif (((LONG_MAX >> 31) >> 23) == 0)
+
+#define L__EST_LONG_BIT 48
+
+#elif (((LONG_MAX >> 31) >> 31) == 0)
+
+#define L__EST_LONG_BIT 56
+
+#else
+
+#define L__EST_LONG_BIT 64
+
+#endif
+
+/* Number of bits in a long. */
+#define L__LONG_BIT (sizeof(long) * CHAR_BIT)
+
+/* The macro L__BIT_ARR_DEFN defines a bit array whose index is a (0-based)
+** node depth. The definition depends on whether the maximum depth is more
+** or less than the number of bits in a single long.
+*/
+
+#if ((AVL_MAX_DEPTH) > L__EST_LONG_BIT)
+
+/* Maximum depth may be more than number of bits in a long. */
+
+#define L__BIT_ARR_DEFN(NAME) \
+ unsigned long NAME[((AVL_MAX_DEPTH) + L__LONG_BIT - 1) / L__LONG_BIT];
+
+#else
+
+/* Maximum depth is definitely less than number of bits in a long. */
+
+#define L__BIT_ARR_DEFN(NAME) unsigned long NAME;
+
+#endif
+
+/* Iterator structure. */
+typedef struct
+ {
+ /* Tree being iterated over. */
+ L__(avl) *tree_;
+
+ /* Records a path into the tree. If bit n is true, indicates
+ ** take greater branch from the nth node in the path, otherwise
+ ** take the less branch. bit 0 gives branch from root, and
+ ** so on. */
+ L__BIT_ARR_DEFN(branch)
+
+ /* Zero-based depth of path into tree. */
+ unsigned depth;
+
+ /* Handles of nodes in path from root to current node (returned by *). */
+ AVL_HANDLE path_h[(AVL_MAX_DEPTH) - 1];
+ }
+L__(iter);
+
+/* Iterator function prototypes. */
+
+L__SC void L__(start_iter)(
+ L__(avl) *tree, L__(iter) *iter, AVL_KEY k, avl_search_type st);
+
+L__SC void L__(start_iter_least)(L__(avl) *tree, L__(iter) *iter);
+
+L__SC void L__(start_iter_greatest)(L__(avl) *tree, L__(iter) *iter);
+
+L__SC AVL_HANDLE L__(get_iter)(L__(iter) *iter);
+
+L__SC void L__(incr_iter)(L__(iter) *iter);
+
+L__SC void L__(decr_iter)(L__(iter) *iter);
+
+L__SC void L__(init_iter)(L__(iter) *iter);
+
+#define AVL_IMPL_INIT 1
+#define AVL_IMPL_IS_EMPTY (1 << 1)
+#define AVL_IMPL_INSERT (1 << 2)
+#define AVL_IMPL_SEARCH (1 << 3)
+#define AVL_IMPL_SEARCH_LEAST (1 << 4)
+#define AVL_IMPL_SEARCH_GREATEST (1 << 5)
+#define AVL_IMPL_REMOVE (1 << 6)
+#define AVL_IMPL_BUILD (1 << 7)
+#define AVL_IMPL_START_ITER (1 << 8)
+#define AVL_IMPL_START_ITER_LEAST (1 << 9)
+#define AVL_IMPL_START_ITER_GREATEST (1 << 10)
+#define AVL_IMPL_GET_ITER (1 << 11)
+#define AVL_IMPL_INCR_ITER (1 << 12)
+#define AVL_IMPL_DECR_ITER (1 << 13)
+#define AVL_IMPL_INIT_ITER (1 << 14)
+#define AVL_IMPL_SUBST (1 << 15)
+
+#define AVL_IMPL_ALL (~0)
+
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__SC
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_impl.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_impl.h
new file mode 100644
index 00000000..21242da9
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_impl.h
@@ -0,0 +1,1181 @@
+/* Abstract AVL Tree Generic C Package.
+** Implementation generation header file.
+**
+** This code is in the public domain. See cavl_tree.html for interface
+** documentation.
+**
+** Version: 1.5 Author: Walt Karas
+*/
+
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__tree
+#undef L__MASK_HIGH_BIT
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
+#undef L__BIT_ARR_VAL
+#undef L__BIT_ARR_0
+#undef L__BIT_ARR_1
+#undef L__BIT_ARR_ALL
+#undef L__BIT_ARR_LONGS
+#undef L__IMPL_MASK
+#undef L__CHECK_READ_ERROR
+#undef L__CHECK_READ_ERROR_INV_DEPTH
+#undef L__SC
+#undef L__BALANCE_PARAM_PREFIX
+
+#ifdef AVL_UNIQUE
+
+#define L__ AVL_UNIQUE
+
+#else
+
+#define L__(X) X
+
+#endif
+
+/* Determine correct storage class for functions */
+#ifdef AVL_PRIVATE
+
+#define L__SC static
+
+#else
+
+#define L__SC
+
+#endif
+
+#ifdef AVL_SIZE
+
+#define L__SIZE AVL_SIZE
+
+#else
+
+#define L__SIZE unsigned long
+
+#endif
+
+#define L__MASK_HIGH_BIT ((int) ~ ((~ (unsigned) 0) >> 1))
+
+/* ANSI C/ISO C++ require that a long have at least 32 bits. Set
+** L__EST_LONG_BIT to be the greatest multiple of 8 in the range
+** 32 - 64 (inclusive) that is less than or equal to the number of
+** bits in a long.
+*/
+
+#if (((LONG_MAX >> 31) >> 7) == 0)
+
+#define L__EST_LONG_BIT 32
+
+#elif (((LONG_MAX >> 31) >> 15) == 0)
+
+#define L__EST_LONG_BIT 40
+
+#elif (((LONG_MAX >> 31) >> 23) == 0)
+
+#define L__EST_LONG_BIT 48
+
+#elif (((LONG_MAX >> 31) >> 31) == 0)
+
+#define L__EST_LONG_BIT 56
+
+#else
+
+#define L__EST_LONG_BIT 64
+
+#endif
+
+#define L__LONG_BIT (sizeof(long) * CHAR_BIT)
+
+#if ((AVL_MAX_DEPTH) > L__EST_LONG_BIT)
+
+/* The maximum depth may be greater than the number of bits in a long,
+** so multiple longs are needed to hold a bit array indexed by node
+** depth. */
+
+#define L__BIT_ARR_LONGS (((AVL_MAX_DEPTH) + L__LONG_BIT - 1) / L__LONG_BIT)
+
+#define L__BIT_ARR_DEFN(NAME) unsigned long NAME[L__BIT_ARR_LONGS];
+
+#define L__BIT_ARR_VAL(BIT_ARR, BIT_NUM) \
+ ((BIT_ARR)[(BIT_NUM) / L__LONG_BIT] & (1L << ((BIT_NUM) % L__LONG_BIT)))
+
+#define L__BIT_ARR_0(BIT_ARR, BIT_NUM) \
+ (BIT_ARR)[(BIT_NUM) / L__LONG_BIT] &= ~(1L << ((BIT_NUM) % L__LONG_BIT));
+
+#define L__BIT_ARR_1(BIT_ARR, BIT_NUM) \
+ (BIT_ARR)[(BIT_NUM) / L__LONG_BIT] |= 1L << ((BIT_NUM) % L__LONG_BIT);
+
+#define L__BIT_ARR_ALL(BIT_ARR, BIT_VAL) \
+ { int i = L__BIT_ARR_LONGS; do (BIT_ARR)[--i] = 0L - (BIT_VAL); while(i); }
+
+#else /* The bit array can definitely fit in one long */
+
+#define L__BIT_ARR_DEFN(NAME) unsigned long NAME;
+
+#define L__BIT_ARR_VAL(BIT_ARR, BIT_NUM) ((BIT_ARR) & (1L << (BIT_NUM)))
+
+#define L__BIT_ARR_0(BIT_ARR, BIT_NUM) (BIT_ARR) &= ~(1L << (BIT_NUM));
+
+#define L__BIT_ARR_1(BIT_ARR, BIT_NUM) (BIT_ARR) |= 1L << (BIT_NUM);
+
+#define L__BIT_ARR_ALL(BIT_ARR, BIT_VAL) (BIT_ARR) = 0L - (BIT_VAL);
+
+#endif
+
+#ifdef AVL_READ_ERRORS_HAPPEN
+
+#define L__CHECK_READ_ERROR(ERROR_RETURN) \
+{ if (AVL_READ_ERROR) return(ERROR_RETURN); }
+
+#else
+
+#define L__CHECK_READ_ERROR(ERROR_RETURN)
+
+#endif
+
+/* The presumed reason that an instantiation places additional fields
+** inside the AVL tree structure is that the SET_ and GET_ macros
+** need these fields. The "balance" function does not explicitly use
+** any fields in the AVL tree structure, so only pass an AVL tree
+** structure pointer to "balance" if it has instantiation-specific
+** fields that are (presumably) needed by the SET_/GET_ calls within
+** "balance".
+*/
+#ifdef AVL_INSIDE_STRUCT
+
+#define L__BALANCE_PARAM_CALL_PREFIX L__tree,
+#define L__BALANCE_PARAM_DECL_PREFIX L__(avl) *L__tree,
+
+#else
+
+#define L__BALANCE_PARAM_CALL_PREFIX
+#define L__BALANCE_PARAM_DECL_PREFIX
+
+#endif
+
+#ifdef AVL_IMPL_MASK
+
+#define L__IMPL_MASK (AVL_IMPL_MASK)
+
+#else
+
+/* Define all functions. */
+#define L__IMPL_MASK AVL_IMPL_ALL
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_INIT)
+
+L__SC void L__(init)(L__(avl) *L__tree) { L__tree->root = AVL_NULL; }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_IS_EMPTY)
+
+L__SC int L__(is_empty)(L__(avl) *L__tree)
+ { return(L__tree->root == AVL_NULL); }
+
+#endif
+
+/* Put the private balance function in the same compilation module as
+** the insert function. */
+#if (L__IMPL_MASK & AVL_IMPL_INSERT)
+
+/* Balances subtree, returns handle of root node of subtree after balancing.
+*/
+L__SC AVL_HANDLE L__(balance)(L__BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h)
+ {
+ AVL_HANDLE deep_h;
+
+ /* Either the "greater than" or the "less than" subtree of
+ ** this node has to be 2 levels deeper (or else it wouldn't
+ ** need balancing).
+ */
+ if (AVL_GET_BALANCE_FACTOR(bal_h) > 0)
+ {
+ /* "Greater than" subtree is deeper. */
+
+ deep_h = AVL_GET_GREATER(bal_h, 1);
+
+ L__CHECK_READ_ERROR(AVL_NULL)
+
+ if (AVL_GET_BALANCE_FACTOR(deep_h) < 0)
+ {
+ int bf;
+
+ AVL_HANDLE old_h = bal_h;
+ bal_h = AVL_GET_LESS(deep_h, 1);
+ L__CHECK_READ_ERROR(AVL_NULL)
+ AVL_SET_GREATER(old_h, AVL_GET_LESS(bal_h, 1))
+ AVL_SET_LESS(deep_h, AVL_GET_GREATER(bal_h, 1))
+ AVL_SET_LESS(bal_h, old_h)
+ AVL_SET_GREATER(bal_h, deep_h)
+
+ bf = AVL_GET_BALANCE_FACTOR(bal_h);
+ if (bf != 0)
+ {
+ if (bf > 0)
+ {
+ AVL_SET_BALANCE_FACTOR(old_h, -1)
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ }
+ else
+ {
+ AVL_SET_BALANCE_FACTOR(deep_h, 1)
+ AVL_SET_BALANCE_FACTOR(old_h, 0)
+ }
+ AVL_SET_BALANCE_FACTOR(bal_h, 0)
+ }
+ else
+ {
+ AVL_SET_BALANCE_FACTOR(old_h, 0)
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ }
+ }
+ else
+ {
+ AVL_SET_GREATER(bal_h, AVL_GET_LESS(deep_h, 0))
+ AVL_SET_LESS(deep_h, bal_h)
+ if (AVL_GET_BALANCE_FACTOR(deep_h) == 0)
+ {
+ AVL_SET_BALANCE_FACTOR(deep_h, -1)
+ AVL_SET_BALANCE_FACTOR(bal_h, 1)
+ }
+ else
+ {
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ AVL_SET_BALANCE_FACTOR(bal_h, 0)
+ }
+ bal_h = deep_h;
+ }
+ }
+ else
+ {
+ /* "Less than" subtree is deeper. */
+
+ deep_h = AVL_GET_LESS(bal_h, 1);
+ L__CHECK_READ_ERROR(AVL_NULL)
+
+ if (AVL_GET_BALANCE_FACTOR(deep_h) > 0)
+ {
+ int bf;
+ AVL_HANDLE old_h = bal_h;
+ bal_h = AVL_GET_GREATER(deep_h, 1);
+ L__CHECK_READ_ERROR(AVL_NULL)
+ AVL_SET_LESS(old_h, AVL_GET_GREATER(bal_h, 0))
+ AVL_SET_GREATER(deep_h, AVL_GET_LESS(bal_h, 0))
+ AVL_SET_GREATER(bal_h, old_h)
+ AVL_SET_LESS(bal_h, deep_h)
+
+ bf = AVL_GET_BALANCE_FACTOR(bal_h);
+ if (bf != 0)
+ {
+ if (bf < 0)
+ {
+ AVL_SET_BALANCE_FACTOR(old_h, 1)
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ }
+ else
+ {
+ AVL_SET_BALANCE_FACTOR(deep_h, -1)
+ AVL_SET_BALANCE_FACTOR(old_h, 0)
+ }
+ AVL_SET_BALANCE_FACTOR(bal_h, 0)
+ }
+ else
+ {
+ AVL_SET_BALANCE_FACTOR(old_h, 0)
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ }
+ }
+ else
+ {
+ AVL_SET_LESS(bal_h, AVL_GET_GREATER(deep_h, 0))
+ AVL_SET_GREATER(deep_h, bal_h)
+ if (AVL_GET_BALANCE_FACTOR(deep_h) == 0)
+ {
+ AVL_SET_BALANCE_FACTOR(deep_h, 1)
+ AVL_SET_BALANCE_FACTOR(bal_h, -1)
+ }
+ else
+ {
+ AVL_SET_BALANCE_FACTOR(deep_h, 0)
+ AVL_SET_BALANCE_FACTOR(bal_h, 0)
+ }
+ bal_h = deep_h;
+ }
+ }
+
+ return(bal_h);
+ }
+
+L__SC AVL_HANDLE L__(insert)(L__(avl) *L__tree, AVL_HANDLE h)
+ {
+ AVL_SET_LESS(h, AVL_NULL)
+ AVL_SET_GREATER(h, AVL_NULL)
+ AVL_SET_BALANCE_FACTOR(h, 0)
+
+ if (L__tree->root == AVL_NULL)
+ L__tree->root = h;
+ else
+ {
+ /* Last unbalanced node encountered in search for insertion point. */
+ AVL_HANDLE unbal = AVL_NULL;
+ /* Parent of last unbalanced node. */
+ AVL_HANDLE parent_unbal = AVL_NULL;
+ /* Balance factor of last unbalanced node. */
+ int unbal_bf;
+
+ /* Zero-based depth in tree. */
+ unsigned depth = 0, unbal_depth = 0;
+
+ /* Records a path into the tree. If bit n is true, indicates
+ ** take greater branch from the nth node in the path, otherwise
+ ** take the less branch. bit 0 gives branch from root, and
+ ** so on. */
+ L__BIT_ARR_DEFN(branch)
+
+ AVL_HANDLE hh = L__tree->root;
+ AVL_HANDLE parent = AVL_NULL;
+ int cmp;
+
+ do
+ {
+ if (AVL_GET_BALANCE_FACTOR(hh) != 0)
+ {
+ unbal = hh;
+ parent_unbal = parent;
+ unbal_depth = depth;
+ }
+ cmp = AVL_COMPARE_NODE_NODE(h, hh);
+ if (cmp == 0)
+ /* Duplicate key. */
+ return(hh);
+ parent = hh;
+ if (cmp > 0)
+ {
+ hh = AVL_GET_GREATER(hh, 1);
+ L__BIT_ARR_1(branch, depth)
+ }
+ else
+ {
+ hh = AVL_GET_LESS(hh, 1);
+ L__BIT_ARR_0(branch, depth)
+ }
+ L__CHECK_READ_ERROR(AVL_NULL)
+ depth++;
+ }
+ while (hh != AVL_NULL);
+
+ /* Add node to insert as leaf of tree. */
+ if (cmp < 0)
+ AVL_SET_LESS(parent, h)
+ else
+ AVL_SET_GREATER(parent, h)
+
+ depth = unbal_depth;
+
+ if (unbal == AVL_NULL)
+ hh = L__tree->root;
+ else
+ {
+ cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+ depth++;
+ unbal_bf = AVL_GET_BALANCE_FACTOR(unbal);
+ if (cmp < 0)
+ unbal_bf--;
+ else /* cmp > 0 */
+ unbal_bf++;
+ hh = cmp < 0 ? AVL_GET_LESS(unbal, 1) : AVL_GET_GREATER(unbal, 1);
+ L__CHECK_READ_ERROR(AVL_NULL)
+ if ((unbal_bf != -2) && (unbal_bf != 2))
+ {
+ /* No rebalancing of tree is necessary. */
+ AVL_SET_BALANCE_FACTOR(unbal, unbal_bf)
+ unbal = AVL_NULL;
+ }
+ }
+
+ if (hh != AVL_NULL)
+ while (h != hh)
+ {
+ cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+ depth++;
+ if (cmp < 0)
+ {
+ AVL_SET_BALANCE_FACTOR(hh, -1)
+ hh = AVL_GET_LESS(hh, 1);
+ }
+ else /* cmp > 0 */
+ {
+ AVL_SET_BALANCE_FACTOR(hh, 1)
+ hh = AVL_GET_GREATER(hh, 1);
+ }
+ L__CHECK_READ_ERROR(AVL_NULL)
+ }
+
+ if (unbal != AVL_NULL)
+ {
+ unbal = L__(balance)(L__BALANCE_PARAM_CALL_PREFIX unbal);
+ L__CHECK_READ_ERROR(AVL_NULL)
+ if (parent_unbal == AVL_NULL)
+ L__tree->root = unbal;
+ else
+ {
+ depth = unbal_depth - 1;
+ cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+ if (cmp < 0)
+ AVL_SET_LESS(parent_unbal, unbal)
+ else /* cmp > 0 */
+ AVL_SET_GREATER(parent_unbal, unbal)
+ }
+ }
+
+ }
+
+ return(h);
+ }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SEARCH)
+
+L__SC AVL_HANDLE L__(search)(L__(avl) *L__tree, AVL_KEY k, avl_search_type st)
+ {
+ int cmp, target_cmp;
+ AVL_HANDLE match_h = AVL_NULL;
+ AVL_HANDLE h = L__tree->root;
+
+ if (st & AVL_LESS)
+ target_cmp = 1;
+ else if (st & AVL_GREATER)
+ target_cmp = -1;
+ else
+ target_cmp = 0;
+
+ while (h != AVL_NULL)
+ {
+ cmp = AVL_COMPARE_KEY_NODE(k, h);
+ if (cmp == 0)
+ {
+ if (st & AVL_EQUAL)
+ {
+ match_h = h;
+ break;
+ }
+ cmp = -target_cmp;
+ }
+ else if (target_cmp != 0)
+ if (!((cmp ^ target_cmp) & L__MASK_HIGH_BIT))
+ /* cmp and target_cmp are both positive or both negative. */
+ match_h = h;
+ h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+ L__CHECK_READ_ERROR(AVL_NULL)
+ }
+
+ return(match_h);
+ }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SEARCH_LEAST)
+
+L__SC AVL_HANDLE L__(search_least)(L__(avl) *L__tree)
+ {
+ AVL_HANDLE h = L__tree->root;
+ AVL_HANDLE parent = AVL_NULL;
+
+ while (h != AVL_NULL)
+ {
+ parent = h;
+ h = AVL_GET_LESS(h, 1);
+ L__CHECK_READ_ERROR(AVL_NULL)
+ }
+
+ return(parent);
+ }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SEARCH_GREATEST)
+
+L__SC AVL_HANDLE L__(search_greatest)(L__(avl) *L__tree)
+ {
+ AVL_HANDLE h = L__tree->root;
+ AVL_HANDLE parent = AVL_NULL;
+
+ while (h != AVL_NULL)
+ {
+ parent = h;
+ h = AVL_GET_GREATER(h, 1);
+ L__CHECK_READ_ERROR(AVL_NULL)
+ }
+
+ return(parent);
+ }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_REMOVE)
+
+/* Prototype of balance function (called by remove) in case not in
+** same compilation unit.
+*/
+L__SC AVL_HANDLE L__(balance)(L__BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h);
+
+L__SC AVL_HANDLE L__(remove)(L__(avl) *L__tree, AVL_KEY k)
+ {
+ /* Zero-based depth in tree. */
+ unsigned depth = 0, rm_depth;
+
+ /* Records a path into the tree. If bit n is true, indicates
+ ** take greater branch from the nth node in the path, otherwise
+ ** take the less branch. bit 0 gives branch from root, and
+ ** so on. */
+ L__BIT_ARR_DEFN(branch)
+
+ AVL_HANDLE h = L__tree->root;
+ AVL_HANDLE parent = AVL_NULL;
+ AVL_HANDLE child;
+ AVL_HANDLE path;
+ int cmp, cmp_shortened_sub_with_path;
+ int reduced_depth;
+ int bf;
+ AVL_HANDLE rm;
+ AVL_HANDLE parent_rm;
+
+ for ( ; ; )
+ {
+ if (h == AVL_NULL)
+ /* No node in tree with given key. */
+ return(AVL_NULL);
+ cmp = AVL_COMPARE_KEY_NODE(k, h);
+ if (cmp == 0)
+ /* Found node to remove. */
+ break;
+ parent = h;
+ if (cmp > 0)
+ {
+ h = AVL_GET_GREATER(h, 1);
+ L__BIT_ARR_1(branch, depth)
+ }
+ else
+ {
+ h = AVL_GET_LESS(h, 1);
+ L__BIT_ARR_0(branch, depth)
+ }
+ L__CHECK_READ_ERROR(AVL_NULL)
+ depth++;
+ cmp_shortened_sub_with_path = cmp;
+ }
+ rm = h;
+ parent_rm = parent;
+ rm_depth = depth;
+
+ /* If the node to remove is not a leaf node, we need to get a
+ ** leaf node, or a node with a single leaf as its child, to put
+ ** in the place of the node to remove. We will get the greatest
+ ** node in the less subtree (of the node to remove), or the least
+ ** node in the greater subtree. We take the leaf node from the
+ ** deeper subtree, if there is one. */
+
+ if (AVL_GET_BALANCE_FACTOR(h) < 0)
+ {
+ child = AVL_GET_LESS(h, 1);
+ L__BIT_ARR_0(branch, depth)
+ cmp = -1;
+ }
+ else
+ {
+ child = AVL_GET_GREATER(h, 1);
+ L__BIT_ARR_1(branch, depth)
+ cmp = 1;
+ }
+ L__CHECK_READ_ERROR(AVL_NULL)
+ depth++;
+
+ if (child != AVL_NULL)
+ {
+ cmp = -cmp;
+ do
+ {
+ parent = h;
+ h = child;
+ if (cmp < 0)
+ {
+ child = AVL_GET_LESS(h, 1);
+ L__BIT_ARR_0(branch, depth)
+ }
+ else
+ {
+ child = AVL_GET_GREATER(h, 1);
+ L__BIT_ARR_1(branch, depth)
+ }
+ L__CHECK_READ_ERROR(AVL_NULL)
+ depth++;
+ }
+ while (child != AVL_NULL);
+
+ if (parent == rm)
+ /* Only went through do loop once. Deleted node will be replaced
+ ** in the tree structure by one of its immediate children. */
+ cmp_shortened_sub_with_path = -cmp;
+ else
+ cmp_shortened_sub_with_path = cmp;
+
+ /* Get the handle of the opposite child, which may not be null. */
+ child = cmp > 0 ? AVL_GET_LESS(h, 0) : AVL_GET_GREATER(h, 0);
+ }
+
+ if (parent == AVL_NULL)
+ /* There were only 1 or 2 nodes in this tree. */
+ L__tree->root = child;
+ else if (cmp_shortened_sub_with_path < 0)
+ AVL_SET_LESS(parent, child)
+ else
+ AVL_SET_GREATER(parent, child)
+
+ /* "path" is the parent of the subtree being eliminated or reduced
+ ** from a depth of 2 to 1. If "path" is the node to be removed, we
+ ** set path to the node we're about to poke into the position of the
+ ** node to be removed. */
+ path = parent == rm ? h : parent;
+
+ if (h != rm)
+ {
+ /* Poke in the replacement for the node to be removed. */
+ AVL_SET_LESS(h, AVL_GET_LESS(rm, 0))
+ AVL_SET_GREATER(h, AVL_GET_GREATER(rm, 0))
+ AVL_SET_BALANCE_FACTOR(h, AVL_GET_BALANCE_FACTOR(rm))
+ if (parent_rm == AVL_NULL)
+ L__tree->root = h;
+ else
+ {
+ depth = rm_depth - 1;
+ if (L__BIT_ARR_VAL(branch, depth))
+ AVL_SET_GREATER(parent_rm, h)
+ else
+ AVL_SET_LESS(parent_rm, h)
+ }
+ }
+
+ if (path != AVL_NULL)
+ {
+ /* Create a temporary linked list from the parent of the path node
+ ** to the root node. */
+ h = L__tree->root;
+ parent = AVL_NULL;
+ depth = 0;
+ while (h != path)
+ {
+ if (L__BIT_ARR_VAL(branch, depth))
+ {
+ child = AVL_GET_GREATER(h, 1);
+ AVL_SET_GREATER(h, parent)
+ }
+ else
+ {
+ child = AVL_GET_LESS(h, 1);
+ AVL_SET_LESS(h, parent)
+ }
+ L__CHECK_READ_ERROR(AVL_NULL)
+ depth++;
+ parent = h;
+ h = child;
+ }
+
+ /* Climb from the path node to the root node using the linked
+ ** list, restoring the tree structure and rebalancing as necessary.
+ */
+ reduced_depth = 1;
+ cmp = cmp_shortened_sub_with_path;
+ for ( ; ; )
+ {
+ if (reduced_depth)
+ {
+ bf = AVL_GET_BALANCE_FACTOR(h);
+ if (cmp < 0)
+ bf++;
+ else /* cmp > 0 */
+ bf--;
+ if ((bf == -2) || (bf == 2))
+ {
+ h = L__(balance)(L__BALANCE_PARAM_CALL_PREFIX h);
+ L__CHECK_READ_ERROR(AVL_NULL)
+ bf = AVL_GET_BALANCE_FACTOR(h);
+ }
+ else
+ AVL_SET_BALANCE_FACTOR(h, bf)
+ reduced_depth = (bf == 0);
+ }
+ if (parent == AVL_NULL)
+ break;
+ child = h;
+ h = parent;
+ depth--;
+ cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+ if (cmp < 0)
+ {
+ parent = AVL_GET_LESS(h, 1);
+ AVL_SET_LESS(h, child)
+ }
+ else
+ {
+ parent = AVL_GET_GREATER(h, 1);
+ AVL_SET_GREATER(h, child)
+ }
+ L__CHECK_READ_ERROR(AVL_NULL)
+ }
+ L__tree->root = h;
+ }
+
+ return(rm);
+ }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SUBST)
+
+L__SC AVL_HANDLE L__(subst)(L__(avl) *L__tree, AVL_HANDLE new_node)
+ {
+ AVL_HANDLE h = L__tree->root;
+ AVL_HANDLE parent = AVL_NULL;
+ int cmp, last_cmp;
+
+ /* Search for node already in tree with same key. */
+ for ( ; ; )
+ {
+ if (h == AVL_NULL)
+ /* No node in tree with same key as new node. */
+ return(AVL_NULL);
+ cmp = AVL_COMPARE_NODE_NODE(new_node, h);
+ if (cmp == 0)
+ /* Found the node to substitute new one for. */
+ break;
+ last_cmp = cmp;
+ parent = h;
+ h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+ L__CHECK_READ_ERROR(AVL_NULL)
+ }
+
+ /* Copy tree housekeeping fields from node in tree to new node. */
+ AVL_SET_LESS(new_node, AVL_GET_LESS(h, 0))
+ AVL_SET_GREATER(new_node, AVL_GET_GREATER(h, 0))
+ AVL_SET_BALANCE_FACTOR(new_node, AVL_GET_BALANCE_FACTOR(h))
+
+ if (parent == AVL_NULL)
+ /* New node is also new root. */
+ L__tree->root = new_node;
+ else
+ {
+ /* Make parent point to new node. */
+ if (last_cmp < 0)
+ AVL_SET_LESS(parent, new_node)
+ else
+ AVL_SET_GREATER(parent, new_node)
+ }
+
+ return(h);
+ }
+
+#endif
+
+#ifdef AVL_BUILD_ITER_TYPE
+
+#if (L__IMPL_MASK & AVL_IMPL_BUILD)
+
+L__SC int L__(build)(
+ L__(avl) *L__tree, AVL_BUILD_ITER_TYPE p, L__SIZE num_nodes)
+ {
+ /* Gives path to subtree being built. If bit n is false, branch
+ ** less from the node at depth n, if true branch greater. */
+ L__BIT_ARR_DEFN(branch)
+
+ /* If bit n is true, then for the current subtree at depth n, its
+ ** greater subtree has one more node than its less subtree. */
+ L__BIT_ARR_DEFN(rem)
+
+ /* Depth of root node of current subtree. */
+ unsigned depth = 0;
+
+ /* Number of nodes in current subtree. */
+ L__SIZE num_sub = num_nodes;
+
+ /* The algorithm relies on a stack of nodes whose less subtree has
+ ** been built, but whose greater subtree has not yet been built.
+ ** The stack is implemented as linked list. The nodes are linked
+ ** together by having the "greater" handle of a node set to the
+ ** next node in the list. "less_parent" is the handle of the first
+ ** node in the list. */
+ AVL_HANDLE less_parent = AVL_NULL;
+
+ /* h is root of current subtree, child is one of its children. */
+ AVL_HANDLE h;
+ AVL_HANDLE child;
+
+ if (num_nodes == 0)
+ {
+ L__tree->root = AVL_NULL;
+ return(1);
+ }
+
+ for ( ; ; )
+ {
+ while (num_sub > 2)
+ {
+ /* Subtract one for root of subtree. */
+ num_sub--;
+ if (num_sub & 1)
+ L__BIT_ARR_1(rem, depth)
+ else
+ L__BIT_ARR_0(rem, depth)
+ L__BIT_ARR_0(branch, depth)
+ depth++;
+ num_sub >>= 1;
+ }
+
+ if (num_sub == 2)
+ {
+ /* Build a subtree with two nodes, slanting to greater.
+ ** I arbitrarily chose to always have the extra node in the
+ ** greater subtree when there is an odd number of nodes to
+ ** split between the two subtrees. */
+
+ h = AVL_BUILD_ITER_VAL(p);
+ L__CHECK_READ_ERROR(0)
+ AVL_BUILD_ITER_INCR(p)
+ child = AVL_BUILD_ITER_VAL(p);
+ L__CHECK_READ_ERROR(0)
+ AVL_BUILD_ITER_INCR(p)
+ AVL_SET_LESS(child, AVL_NULL)
+ AVL_SET_GREATER(child, AVL_NULL)
+ AVL_SET_BALANCE_FACTOR(child, 0)
+ AVL_SET_GREATER(h, child)
+ AVL_SET_LESS(h, AVL_NULL)
+ AVL_SET_BALANCE_FACTOR(h, 1)
+ }
+ else /* num_sub == 1 */
+ {
+ /* Build a subtree with one node. */
+
+ h = AVL_BUILD_ITER_VAL(p);
+ L__CHECK_READ_ERROR(0)
+ AVL_BUILD_ITER_INCR(p)
+ AVL_SET_LESS(h, AVL_NULL)
+ AVL_SET_GREATER(h, AVL_NULL)
+ AVL_SET_BALANCE_FACTOR(h, 0)
+ }
+
+ while (depth)
+ {
+ depth--;
+ if (!L__BIT_ARR_VAL(branch, depth))
+ /* We've completed a less subtree. */
+ break;
+
+ /* We've completed a greater subtree, so attach it to
+ ** its parent (that is less than it). We pop the parent
+ ** off the stack of less parents. */
+ child = h;
+ h = less_parent;
+ less_parent = AVL_GET_GREATER(h, 1);
+ L__CHECK_READ_ERROR(0)
+ AVL_SET_GREATER(h, child)
+ /* num_sub = 2 * (num_sub - rem[depth]) + rem[depth] + 1 */
+ num_sub <<= 1;
+ num_sub += L__BIT_ARR_VAL(rem, depth) ? 0 : 1;
+ if (num_sub & (num_sub - 1))
+ /* num_sub is not a power of 2. */
+ AVL_SET_BALANCE_FACTOR(h, 0)
+ else
+ /* num_sub is a power of 2. */
+ AVL_SET_BALANCE_FACTOR(h, 1)
+ }
+
+ if (num_sub == num_nodes)
+ /* We've completed the full tree. */
+ break;
+
+ /* The subtree we've completed is the less subtree of the
+ ** next node in the sequence. */
+
+ child = h;
+ h = AVL_BUILD_ITER_VAL(p);
+ L__CHECK_READ_ERROR(0)
+ AVL_BUILD_ITER_INCR(p)
+ AVL_SET_LESS(h, child)
+
+ /* Put h into stack of less parents. */
+ AVL_SET_GREATER(h, less_parent)
+ less_parent = h;
+
+ /* Proceed to creating greater than subtree of h. */
+ L__BIT_ARR_1(branch, depth)
+ num_sub += L__BIT_ARR_VAL(rem, depth) ? 1 : 0;
+ depth++;
+
+ } /* end for ( ; ; ) */
+
+ L__tree->root = h;
+
+ return(1);
+ }
+
+#endif
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_INIT_ITER)
+
+/* Initialize depth to invalid value, to indicate iterator is
+** invalid. (Depth is zero-base.) It's not necessary to initialize
+** iterators prior to passing them to the "start" function.
+*/
+L__SC void L__(init_iter)(L__(iter) *iter) { iter->depth = ~0; }
+
+#endif
+
+#ifdef AVL_READ_ERRORS_HAPPEN
+
+#define L__CHECK_READ_ERROR_INV_DEPTH \
+{ if (AVL_READ_ERROR) { iter->depth = ~0; return; } }
+
+#else
+
+#define L__CHECK_READ_ERROR_INV_DEPTH
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_START_ITER)
+
+L__SC void L__(start_iter)(
+ L__(avl) *L__tree, L__(iter) *iter, AVL_KEY k, avl_search_type st)
+ {
+ AVL_HANDLE h = L__tree->root;
+ unsigned d = 0;
+ int cmp, target_cmp;
+
+ /* Save the tree that we're going to iterate through in a
+ ** member variable. */
+ iter->tree_ = L__tree;
+
+ iter->depth = ~0;
+
+ if (h == AVL_NULL)
+ /* Tree is empty. */
+ return;
+
+ if (st & AVL_LESS)
+ /* Key can be greater than key of starting node. */
+ target_cmp = 1;
+ else if (st & AVL_GREATER)
+ /* Key can be less than key of starting node. */
+ target_cmp = -1;
+ else
+ /* Key must be same as key of starting node. */
+ target_cmp = 0;
+
+ for ( ; ; )
+ {
+ cmp = AVL_COMPARE_KEY_NODE(k, h);
+ if (cmp == 0)
+ {
+ if (st & AVL_EQUAL)
+ {
+ /* Equal node was sought and found as starting node. */
+ iter->depth = d;
+ break;
+ }
+ cmp = -target_cmp;
+ }
+ else if (target_cmp != 0)
+ if (!((cmp ^ target_cmp) & L__MASK_HIGH_BIT))
+ /* cmp and target_cmp are both negative or both positive. */
+ iter->depth = d;
+ h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+ L__CHECK_READ_ERROR_INV_DEPTH
+ if (h == AVL_NULL)
+ break;
+ if (cmp > 0)
+ L__BIT_ARR_1(iter->branch, d)
+ else
+ L__BIT_ARR_0(iter->branch, d)
+ iter->path_h[d++] = h;
+ }
+ }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_START_ITER_LEAST)
+
+L__SC void L__(start_iter_least)(L__(avl) *L__tree, L__(iter) *iter)
+ {
+ AVL_HANDLE h = L__tree->root;
+
+ iter->tree_ = L__tree;
+
+ iter->depth = ~0;
+
+ L__BIT_ARR_ALL(iter->branch, 0)
+
+ while (h != AVL_NULL)
+ {
+ if (iter->depth != ~0)
+ iter->path_h[iter->depth] = h;
+ iter->depth++;
+ h = AVL_GET_LESS(h, 1);
+ L__CHECK_READ_ERROR_INV_DEPTH
+ }
+ }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_START_ITER_GREATEST)
+
+L__SC void L__(start_iter_greatest)(L__(avl) *L__tree, L__(iter) *iter)
+ {
+ AVL_HANDLE h = L__tree->root;
+
+ iter->tree_ = L__tree;
+
+ iter->depth = ~0;
+
+ L__BIT_ARR_ALL(iter->branch, 1)
+
+ while (h != AVL_NULL)
+ {
+ if (iter->depth != ~0)
+ iter->path_h[iter->depth] = h;
+ iter->depth++;
+ h = AVL_GET_GREATER(h, 1);
+ L__CHECK_READ_ERROR_INV_DEPTH
+ }
+ }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_GET_ITER)
+
+L__SC AVL_HANDLE L__(get_iter)(L__(iter) *iter)
+ {
+ if (iter->depth == ~0)
+ return(AVL_NULL);
+
+ return(iter->depth == 0 ?
+ iter->tree_->root : iter->path_h[iter->depth - 1]);
+ }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_INCR_ITER)
+
+L__SC void L__(incr_iter)(L__(iter) *iter)
+ {
+ #define L__tree (iter->tree_)
+
+ if (iter->depth != ~0)
+ {
+ AVL_HANDLE h =
+ AVL_GET_GREATER((iter->depth == 0 ?
+ iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
+ L__CHECK_READ_ERROR_INV_DEPTH
+
+ if (h == AVL_NULL)
+ do
+ {
+ if (iter->depth == 0)
+ {
+ iter->depth = ~0;
+ break;
+ }
+ iter->depth--;
+ }
+ while (L__BIT_ARR_VAL(iter->branch, iter->depth));
+ else
+ {
+ L__BIT_ARR_1(iter->branch, iter->depth)
+ iter->path_h[iter->depth++] = h;
+ for ( ; ; )
+ {
+ h = AVL_GET_LESS(h, 1);
+ L__CHECK_READ_ERROR_INV_DEPTH
+ if (h == AVL_NULL)
+ break;
+ L__BIT_ARR_0(iter->branch, iter->depth)
+ iter->path_h[iter->depth++] = h;
+ }
+ }
+ }
+
+ #undef L__tree
+ }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_DECR_ITER)
+
+L__SC void L__(decr_iter)(L__(iter) *iter)
+ {
+ #define L__tree (iter->tree_)
+
+ if (iter->depth != ~0)
+ {
+ AVL_HANDLE h =
+ AVL_GET_LESS((iter->depth == 0 ?
+ iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
+ L__CHECK_READ_ERROR_INV_DEPTH
+
+ if (h == AVL_NULL)
+ do
+ {
+ if (iter->depth == 0)
+ {
+ iter->depth = ~0;
+ break;
+ }
+ iter->depth--;
+ }
+ while (!L__BIT_ARR_VAL(iter->branch, iter->depth));
+ else
+ {
+ L__BIT_ARR_0(iter->branch, iter->depth)
+ iter->path_h[iter->depth++] = h;
+ for ( ; ; )
+ {
+ h = AVL_GET_GREATER(h, 1);
+ L__CHECK_READ_ERROR_INV_DEPTH
+ if (h == AVL_NULL)
+ break;
+ L__BIT_ARR_1(iter->branch, iter->depth)
+ iter->path_h[iter->depth++] = h;
+ }
+ }
+ }
+
+ #undef L__tree
+ }
+
+#endif
+
+/* Tidy up the preprocessor symbol name space. */
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__MASK_HIGH_BIT
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
+#undef L__BIT_ARR_VAL
+#undef L__BIT_ARR_0
+#undef L__BIT_ARR_1
+#undef L__BIT_ARR_ALL
+#undef L__CHECK_READ_ERROR
+#undef L__CHECK_READ_ERROR_INV_DEPTH
+#undef L__BIT_ARR_LONGS
+#undef L__IMPL_MASK
+#undef L__CHECK_READ_ERROR
+#undef L__CHECK_READ_ERROR_INV_DEPTH
+#undef L__SC
+#undef L__BALANCE_PARAM_CALL_PREFIX
+#undef L__BALANCE_PARAM_DECL_PREFIX
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/heapmm.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/heapmm.h
new file mode 100644
index 00000000..797e4d07
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/heapmm.h
@@ -0,0 +1,142 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+/* External header file for Heap Memory Manager. See documentation in
+** heapmm.html.
+*/
+
+#undef HMM_PROCESS
+
+/* Include once per configuration in a particular translation unit. */
+
+#ifndef HMM_CNFG_NUM
+
+/* Default configuration. */
+
+#ifndef HMM_INC_CNFG_DFLT
+#define HMM_INC_CNFG_DFLT
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 0
+
+/* Test configuration. */
+
+#ifndef HMM_INC_CNFG_0
+#define HMM_INC_CNFG_0
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 1
+
+#ifndef HMM_INC_CNFG_1
+#define HMM_INC_CNFG_1
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 2
+
+#ifndef HMM_INC_CNFG_2
+#define HMM_INC_CNFG_2
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 3
+
+#ifndef HMM_INC_CNFG_3
+#define HMM_INC_CNFG_3
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 4
+
+#ifndef HMM_INC_CNFG_4
+#define HMM_INC_CNFG_4
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 5
+
+#ifndef HMM_INC_CNFG_5
+#define HMM_INC_CNFG_5
+#define HMM_PROCESS
+#endif
+
+#endif
+
+#ifdef HMM_PROCESS
+
+#include "hmm_cnfg.h"
+
+/* Heap descriptor. */
+typedef struct HMM_UNIQUE(structure)
+ {
+ /* private: */
+
+ /* Pointer to (payload of) root node in AVL tree. This field should
+ ** really be the AVL tree descriptor (type avl_avl). But (in the
+ ** instantiation of the AVL tree generic package used in package) the
+ ** AVL tree descriptor simply contains a pointer to the root. So,
+ ** whenever a pointer to the AVL tree descriptor is needed, I use the
+ ** cast:
+ **
+ ** (avl_avl *) &(heap_desc->avl_tree_root)
+ **
+ ** (where heap_desc is a pointer to a heap descriptor). This trick
+ ** allows me to avoid including cavl_if.h in this external header. */
+ void *avl_tree_root;
+
+ /* Pointer to first byte of last block freed, after any coalescing. */
+ void *last_freed;
+
+ /* public: */
+
+ HMM_UNIQUE(size_bau) num_baus_can_shrink;
+ void *end_of_shrinkable_chunk;
+ }
+HMM_UNIQUE(descriptor);
+
+/* Prototypes for externally-callable functions. */
+
+void HMM_UNIQUE(init)(HMM_UNIQUE(descriptor) *desc);
+
+void * HMM_UNIQUE(alloc)(
+ HMM_UNIQUE(descriptor) *desc, HMM_UNIQUE(size_aau) num_addr_align_units);
+
+/* NOT YET IMPLEMENTED */
+void * HMM_UNIQUE(greedy_alloc)(
+ HMM_UNIQUE(descriptor) *desc, HMM_UNIQUE(size_aau) needed_addr_align_units,
+ HMM_UNIQUE(size_aau) coveted_addr_align_units);
+
+int HMM_UNIQUE(resize)(
+ HMM_UNIQUE(descriptor) *desc, void *mem,
+ HMM_UNIQUE(size_aau) num_addr_align_units);
+
+/* NOT YET IMPLEMENTED */
+int HMM_UNIQUE(greedy_resize)(
+ HMM_UNIQUE(descriptor) *desc, void *mem,
+ HMM_UNIQUE(size_aau) needed_addr_align_units,
+ HMM_UNIQUE(size_aau) coveted_addr_align_units);
+
+void HMM_UNIQUE(free)(HMM_UNIQUE(descriptor) *desc, void *mem);
+
+HMM_UNIQUE(size_aau) HMM_UNIQUE(true_size)(void *mem);
+
+HMM_UNIQUE(size_aau) HMM_UNIQUE(largest_available)(
+ HMM_UNIQUE(descriptor) *desc);
+
+void HMM_UNIQUE(new_chunk)(
+ HMM_UNIQUE(descriptor) *desc, void *start_of_chunk,
+ HMM_UNIQUE(size_bau) num_block_align_units);
+
+void HMM_UNIQUE(grow_chunk)(
+ HMM_UNIQUE(descriptor) *desc, void *end_of_chunk,
+ HMM_UNIQUE(size_bau) num_block_align_units);
+
+/* NOT YET IMPLEMENTED */
+void HMM_UNIQUE(shrink_chunk)(
+ HMM_UNIQUE(descriptor) *desc,
+ HMM_UNIQUE(size_bau) num_block_align_units);
+
+#endif /* defined HMM_PROCESS */
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_cnfg.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_cnfg.h
new file mode 100644
index 00000000..3a453ef4
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_cnfg.h
@@ -0,0 +1,105 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+/* Configure Heap Memory Manager for processor architecture, compiler,
+** and desired performance characteristics. This file is included
+** by heapmm.h, so these definitions can be used by code external to
+** HMM. You can change the default configuration, and/or create alternate
+** configuration(s).
+*/
+
+/* To allow for multiple configurations of HMM to be used in the same
+** compilation unit, undefine all preprocessor symbols that will be
+** defined below.
+*/
+#undef HMM_ADDR_ALIGN_UNIT
+#undef HMM_BLOCK_ALIGN_UNIT
+#undef HMM_UNIQUE
+#undef HMM_DESC_PARAM
+#undef HMM__SYM_TO_STRING
+#undef HMM_SYM_TO_STRING
+#undef HMM_AUDIT_FAIL
+
+/* Turn X into a string after one macro expansion pass of X. This trick
+** works with both GCC and Visual C++. */
+#define HMM_SYM_TO_STRING(X) HMM__SYM_TO_STRING(X)
+#define HMM__SYM_TO_STRING(X) #X
+
+#ifndef HMM_CNFG_NUM
+
+/* Default configuration. */
+
+/* Use hmm_ prefix to avoid identifier conflicts. */
+#define HMM_UNIQUE(BASE) hmm_ ## BASE
+
+/* Number of bytes in an Address Alignment Unit (AAU). */
+//fwghack
+//#define HMM_ADDR_ALIGN_UNIT sizeof(int)
+#define HMM_ADDR_ALIGN_UNIT 32
+
+/* Number of AAUs in a Block Alignment Unit (BAU). */
+#define HMM_BLOCK_ALIGN_UNIT 1
+
+/* Type of unsigned integer big enough to hold the size of a Block in AAUs. */
+typedef unsigned long HMM_UNIQUE(size_aau);
+
+/* Type of unsigned integer big enough to hold the size of a Block/Chunk
+** in BAUs. The high bit will be robbed. */
+typedef unsigned long HMM_UNIQUE(size_bau);
+
+void HMM_dflt_abort(const char *, const char *);
+
+/* Actions upon a self-audit failure. Must expand to a single complete
+** statement. If you remove the definition of this macro, no self-auditing
+** will be performed. */
+#define HMM_AUDIT_FAIL \
+ HMM_dflt_abort(__FILE__, HMM_SYM_TO_STRING(__LINE__));
+
+#elif HMM_CNFG_NUM == 0
+
+/* Definitions for testing. */
+
+#define HMM_UNIQUE(BASE) thmm_ ## BASE
+
+#define HMM_ADDR_ALIGN_UNIT sizeof(int)
+
+#define HMM_BLOCK_ALIGN_UNIT 3
+
+typedef unsigned HMM_UNIQUE(size_aau);
+
+typedef unsigned short HMM_UNIQUE(size_bau);
+
+/* Under this test setup, a long jump is done if there is a self-audit
+** failure.
+*/
+
+extern jmp_buf HMM_UNIQUE(jmp_buf);
+extern const char * HMM_UNIQUE(fail_file);
+extern unsigned HMM_UNIQUE(fail_line);
+
+#define HMM_AUDIT_FAIL \
+ { HMM_UNIQUE(fail_file) = __FILE__; HMM_UNIQUE(fail_line) = __LINE__; \
+ longjmp(HMM_UNIQUE(jmp_buf), 1); }
+
+#elif HMM_CNFG_NUM == 1
+
+/* Put configuration 1 definitions here (if there is a configuration 1). */
+
+#elif HMM_CNFG_NUM == 2
+
+/* Put configuration 2 definitions here. */
+
+#elif HMM_CNFG_NUM == 3
+
+/* Put configuration 3 definitions here. */
+
+#elif HMM_CNFG_NUM == 4
+
+/* Put configuration 4 definitions here. */
+
+#elif HMM_CNFG_NUM == 5
+
+/* Put configuration 5 definitions here. */
+
+#endif
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_intrnl.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_intrnl.h
new file mode 100644
index 00000000..bc6500d5
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_intrnl.h
@@ -0,0 +1,149 @@
+/* This code is in the public domain.
+** Version: 1.1 Author: Walt Karas
+*/
+
+#ifndef HMM_INTRNL_H_
+#define HMM_INTRNL_H_
+
+#include "heapmm.h"
+
+#define U(BASE) HMM_UNIQUE(BASE)
+
+/* Mask of high bit of variable of size_bau type. */
+#define HIGH_BIT_BAU_SIZE \
+ ((U(size_bau)) ~ (((U(size_bau)) ~ (U(size_bau)) 0) >> 1))
+
+/* Add a given number of AAUs to pointer. */
+#define AAUS_FORWARD(PTR, AAU_OFFSET) \
+ (((char *) (PTR)) + ((AAU_OFFSET) * ((U(size_aau)) HMM_ADDR_ALIGN_UNIT)))
+
+/* Subtract a given number of AAUs from pointer. */
+#define AAUS_BACKWARD(PTR, AAU_OFFSET) \
+ (((char *) (PTR)) - ((AAU_OFFSET) * ((U(size_aau)) HMM_ADDR_ALIGN_UNIT)))
+
+/* Add a given number of BAUs to a pointer. */
+#define BAUS_FORWARD(PTR, BAU_OFFSET) \
+ AAUS_FORWARD((PTR), (BAU_OFFSET) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT))
+
+/* Subtract a given number of BAUs to a pointer. */
+#define BAUS_BACKWARD(PTR, BAU_OFFSET) \
+ AAUS_BACKWARD((PTR), (BAU_OFFSET) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT))
+
+typedef struct head_struct
+ {
+ /* Sizes in Block Alignment Units. */
+ HMM_UNIQUE(size_bau) previous_block_size, block_size;
+ }
+head_record;
+
+typedef struct ptr_struct
+ {
+ struct ptr_struct *self, *prev, *next;
+ }
+ptr_record;
+
+/* Divide and round up any fraction to the next whole number. */
+#define DIV_ROUND_UP(NUMER, DENOM) (((NUMER) + (DENOM) - 1) / (DENOM))
+
+/* Number of AAUs in a block _head. */
+#define HEAD_AAUS DIV_ROUND_UP(sizeof(head_record), HMM_ADDR_ALIGN_UNIT)
+
+/* Number of AAUs in a block pointer record. */
+#define PTR_RECORD_AAUS DIV_ROUND_UP(sizeof(ptr_record), HMM_ADDR_ALIGN_UNIT)
+
+/* Number of BAUs in a dummy end record (at end of chunk). */
+#define DUMMY_END_BLOCK_BAUS DIV_ROUND_UP(HEAD_AAUS, HMM_BLOCK_ALIGN_UNIT)
+
+/* Minimum number of BAUs in a block (allowing room for the pointer record. */
+#define MIN_BLOCK_BAUS \
+ DIV_ROUND_UP(HEAD_AAUS + PTR_RECORD_AAUS, HMM_BLOCK_ALIGN_UNIT)
+
+/* Return number of BAUs in block (masking off high bit containing block
+** status). */
+#define BLOCK_BAUS(HEAD_PTR) \
+ (((head_record *) (HEAD_PTR))->block_size & ~HIGH_BIT_BAU_SIZE)
+
+/* Return number of BAUs in previous block (masking off high bit containing
+** block status). */
+#define PREV_BLOCK_BAUS(HEAD_PTR) \
+ (((head_record *) (HEAD_PTR))->previous_block_size & ~HIGH_BIT_BAU_SIZE)
+
+/* Set number of BAUs in previous block, preserving high bit containing
+** block status. */
+#define SET_PREV_BLOCK_BAUS(HEAD_PTR, N_BAUS) \
+ { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
+ h_ptr->previous_block_size &= HIGH_BIT_BAU_SIZE; \
+ h_ptr->previous_block_size |= (N_BAUS); }
+
+/* Convert pointer to pointer record of block to pointer to block's _head
+** record. */
+#define PTR_REC_TO_HEAD(PTR_REC_PTR) \
+ ((head_record *) AAUS_BACKWARD(PTR_REC_PTR, HEAD_AAUS))
+
+/* Convert pointer to block _head to pointer to block's pointer record. */
+#define HEAD_TO_PTR_REC(HEAD_PTR) \
+ ((ptr_record *) AAUS_FORWARD(HEAD_PTR, HEAD_AAUS))
+
+/* Returns non-zero if block is allocated. */
+#define IS_BLOCK_ALLOCATED(HEAD_PTR) \
+ (((((head_record *) (HEAD_PTR))->block_size | \
+ ((head_record *) (HEAD_PTR))->previous_block_size) & \
+ HIGH_BIT_BAU_SIZE) == 0)
+
+#define MARK_BLOCK_ALLOCATED(HEAD_PTR) \
+ { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
+ h_ptr->block_size &= ~HIGH_BIT_BAU_SIZE; \
+ h_ptr->previous_block_size &= ~HIGH_BIT_BAU_SIZE; }
+
+/* Mark a block as free when it is not the first block in a bin (and
+** therefore not a node in the AVL tree). */
+#define MARK_SUCCESSIVE_BLOCK_IN_FREE_BIN(HEAD_PTR) \
+ { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
+ h_ptr->block_size |= HIGH_BIT_BAU_SIZE; }
+
+/* Prototypes for internal functions implemented in one file and called in
+** another.
+*/
+
+void U(into_free_collection)(U(descriptor) *desc, head_record *head_ptr);
+
+void U(out_of_free_collection)(U(descriptor) *desc, head_record *head_ptr);
+
+void * U(alloc_from_bin)(
+ U(descriptor) *desc, ptr_record *bin_front_ptr, U(size_bau) n_baus);
+
+#ifdef HMM_AUDIT_FAIL
+
+/* Simply contains a reference to the HMM_AUDIT_FAIL macro and a
+** dummy return. */
+int U(audit_block_fail_dummy_return)(void);
+
+/* More sickness needed because C has no inline function (yes, it's the
+** "use the comma operator like a semicolon" thing.)
+*/
+
+/* Auditing a block consists of checking that the size in its _head
+** matches the previous block size in the _head of the next block. */
+#define AUDIT_BLOCK_AS_EXPR(HEAD_PTR) \
+ ((BLOCK_BAUS(HEAD_PTR) == \
+ PREV_BLOCK_BAUS(BAUS_FORWARD(HEAD_PTR, BLOCK_BAUS(HEAD_PTR)))) ? \
+ 0 : U(audit_block_fail_dummy_return)())
+
+#define AUDIT_BLOCK(HEAD_PTR) \
+ { void *h_ptr = (HEAD_PTR); AUDIT_BLOCK_AS_EXPR(h_ptr); }
+
+#endif
+
+/* Interface to AVL tree generic package instantiation. */
+
+#define AVL_UNIQUE(BASE) U(avl_ ## BASE)
+
+#define AVL_HANDLE ptr_record *
+
+#define AVL_KEY U(size_bau)
+
+#define AVL_MAX_DEPTH 64
+
+#include "cavl_if.h"
+
+#endif /* Include once. */
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.c
new file mode 100644
index 00000000..3bbebac6
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.c
@@ -0,0 +1,561 @@
+#define __ON2_MEM_C__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "on2_mem.h"
+
+#define INCLUDE_MEMORY_MANAGER 0 //include heap manager functionality
+#define INCLUDE_MEM_TRACKER 0 //include xon2_* calls in the lib
+#define INCLUDE_MEM_CHECKS 1 //include some basic safety checks in
+ //on2_memcpy, _memset, and _memmove
+#if INCLUDE_MEM_TRACKER
+# include "on2_mem_tracker.h"
+# if ON2_MEM_TRACKER_VERSION_CHIEF != 2
+# error "on2_mem requires memory tracker version 2 to track memory usage"
+# endif
+#endif
+
+#define ADDRESS_STORAGE_SIZE sizeof(size_t)
+
+#if defined(VXWORKS)
+# define DEFAULT_ALIGNMENT 32 //default addr alignment to use in
+ //calls to on2_* functions other
+ //than on2_memalign
+#else
+# define DEFAULT_ALIGNMENT 1
+#endif
+
+#if INCLUDE_MEM_TRACKER
+# define TRY_BOUNDS_CHECK 1 //when set to 1 pads each allocation,
+ //integrity can be checked using
+ //on2_MemoryTrackerCheckIntegrity
+ //or on free by defining
+ //TRY_BOUNDS_CHECK_ON_FREE
+#else
+# define TRY_BOUNDS_CHECK 0
+#endif
+
+#if TRY_BOUNDS_CHECK
+# define TRY_BOUNDS_CHECK_ON_FREE 0 //checks mem integrity on every
+ //free, very expensive
+# define BOUNDS_CHECK_VALUE 0xdeadbeef //value stored before/after ea.
+ //mem addr for bounds checking
+# define BOUNDS_CHECK_PAD_SIZE 32 //size of the padding before and
+ //after ea allocation to be filled
+ //with BOUNDS_CHECK_VALUE.
+ //this should be a multiple of 4
+#else
+# define BOUNDS_CHECK_VALUE 0
+# define BOUNDS_CHECK_PAD_SIZE 0
+#endif
+
+unsigned long g_AllocCount = 0;
+
+#if INCLUDE_MEMORY_MANAGER
+# include "heapmm.h"
+# include "hmm_intrnl.h"
+
+# define SHIFT_HMM_ADDR_ALIGN_UNIT 5
+# define TOTAL_MEMORY_TO_ALLOCATE 20971520 // 20 * 1024 * 1024
+//# define TOTAL_MEMORY_TO_ALLOCATE 10485100 // 10 * 1024 * 1024
+//# define TOTAL_MEMORY_TO_ALLOCATE 16777216 // 16 * 1024 * 1024
+
+# define MM_DYNAMIC_MEMORY 1
+# if MM_DYNAMIC_MEMORY
+ unsigned char* g_p_mng_memory_raw = NULL;
+ unsigned char* g_p_mng_memory = NULL;
+# else
+ unsigned char g_p_mng_memory[TOTAL_MEMORY_TO_ALLOCATE];
+# endif
+
+ size_t g_mm_memory_size = TOTAL_MEMORY_TO_ALLOCATE;
+
+ hmm_descriptor hmm_d;
+ int g_mngMemoryAllocated = 0;
+
+ static int On2_MM_CreateHeapMemory();
+ static void* On2_MM_realloc(void* memblk, size_t size);
+#endif //INCLUDE_MEMORY_MANAGER
+
+unsigned int on2_mem_get_version()
+{
+ unsigned int ver = ((unsigned int)(unsigned char)ON2_MEM_VERSION_CHIEF << 24 |
+ (unsigned int)(unsigned char)ON2_MEM_VERSION_MAJOR << 16 |
+ (unsigned int)(unsigned char)ON2_MEM_VERSION_MINOR << 8 |
+ (unsigned int)(unsigned char)ON2_MEM_VERSION_PATCH);
+ return ver;
+}
+
+int on2_mem_set_heap_size(size_t size)
+{
+ int ret = -1;
+
+ #if INCLUDE_MEMORY_MANAGER
+ #if MM_DYNAMIC_MEMORY
+ if(!g_mngMemoryAllocated && size) {
+ g_mm_memory_size = size;
+ ret = 0;
+ } else
+ ret = -3;
+ #else
+ ret = -2;
+ #endif
+ #else
+ (void)size;
+ #endif
+
+ return ret;
+}
+
+void* on2_memalign(size_t align, size_t size)
+{
+ void* addr,
+ * x = NULL;
+
+ #if INCLUDE_MEMORY_MANAGER
+ int number_aau;
+
+ if (On2_MM_CreateHeapMemory() < 0)
+ {
+ printf("[on2][mm] ERROR xon2_memalign() Couldn't create memory for Heap.\n");
+ }
+
+ number_aau = ((size + align + ADDRESS_STORAGE_SIZE) >>
+ SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
+
+ addr = hmm_alloc(&hmm_d, number_aau);
+ #else
+ addr = malloc(size + align + ADDRESS_STORAGE_SIZE);
+ #endif //INCLUDE_MEMORY_MANAGER
+
+ if(addr) {
+ ptrdiff_t align_ = align;
+
+ x = (void*)(((size_t)
+ ((unsigned char*)addr + ADDRESS_STORAGE_SIZE) + (align_ - 1)) & (size_t)-align_);
+ /* save the actual malloc address */
+ ((size_t*)x)[-1] = (size_t)addr;
+ }
+
+ return x;
+}
+
+void* on2_malloc(size_t size)
+{
+ return on2_memalign(DEFAULT_ALIGNMENT, size);
+}
+
+void* on2_calloc(size_t num, size_t size)
+{
+ void *x;
+
+ x = on2_memalign(DEFAULT_ALIGNMENT, num*size);
+
+ if(x)
+ memset(x, 0, num*size);
+
+ return x;
+}
+
+void* on2_realloc(void* memblk, size_t size)
+{
+ void* addr,
+ * new_addr = NULL;
+ int align = DEFAULT_ALIGNMENT;
+ /*
+ The realloc() function changes the size of the object pointed to by
+ ptr to the size specified by size, and returns a pointer to the
+ possibly moved block. The contents are unchanged up to the lesser
+ of the new and old sizes. If ptr is null, realloc() behaves like
+ malloc() for the specified size. If size is zero (0) and ptr is
+ not a null pointer, the object pointed to is freed.
+ */
+ if(!memblk)
+ new_addr = on2_malloc(size);
+ else if (!size)
+ on2_free(memblk);
+ else
+ {
+ addr = (void*)(((size_t*)memblk)[-1]);
+ memblk = NULL;
+
+ #if INCLUDE_MEMORY_MANAGER
+ new_addr = On2_MM_realloc(addr, size + align + ADDRESS_STORAGE_SIZE);
+ #else
+ new_addr = realloc(addr, size + align + ADDRESS_STORAGE_SIZE);
+ #endif
+ if(new_addr) {
+ addr = new_addr;
+ new_addr = (void*)(((size_t)
+ ((unsigned char*)new_addr + ADDRESS_STORAGE_SIZE) + (align - 1)) &
+ (size_t)-align);
+ /* save the actual malloc address */
+ ((size_t*)new_addr)[-1] = (size_t)addr;
+ }
+ }
+
+ return new_addr;
+}
+
+void on2_free(void* memblk)
+{
+ if(memblk) {
+ void* addr = (void*)(((size_t*)memblk)[-1]);
+ #if INCLUDE_MEMORY_MANAGER
+ hmm_free(&hmm_d, addr);
+ #else
+ free(addr);
+ #endif
+ }
+}
+
+#if INCLUDE_MEM_TRACKER
+
+void* xon2_memalign(size_t align, size_t size, char* file, int line)
+{
+ #if TRY_BOUNDS_CHECK
+ unsigned char *xBounds;
+ #endif
+
+ void *x;
+
+ if (g_AllocCount == 0)
+ {
+ int iRv = on2_MemoryTrackerInit(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE);
+ if (iRv < 0)
+ {
+ printf("ERROR xon2_malloc MEM_TRACK_USAGE error on2_MemoryTrackerInit().\n");
+ }
+ }
+
+ #if TRY_BOUNDS_CHECK
+ {
+ int i;
+ unsigned int tempme = BOUNDS_CHECK_VALUE;
+
+ xBounds = on2_memalign(align, size + (BOUNDS_CHECK_PAD_SIZE * 2));
+
+ for (i=0;i<BOUNDS_CHECK_PAD_SIZE;i+=sizeof(unsigned int))
+ {
+ memcpy(xBounds+i, &tempme, sizeof(unsigned int));
+ memcpy(xBounds + size + BOUNDS_CHECK_PAD_SIZE + i, &tempme, sizeof(unsigned int));
+ }
+ x = (void*)(xBounds + BOUNDS_CHECK_PAD_SIZE);
+ }
+ #else
+ x = on2_memalign(align, size);
+ #endif //TRY_BOUNDS_CHECK
+
+ g_AllocCount++;
+
+ on2_MemoryTrackerAdd((size_t)x, size, file, line);
+
+ return x;
+}
+
+void* xon2_malloc(size_t size, char *file, int line)
+{
+ return xon2_memalign(DEFAULT_ALIGNMENT, size, file, line);
+}
+
+void* xon2_calloc(size_t num, size_t size, char *file, int line)
+{
+ void* x = xon2_memalign(DEFAULT_ALIGNMENT, num*size, file, line);
+
+ if(x)
+ memset(x, 0, num*size);
+
+ return x;
+}
+
+void* xon2_realloc(void* memblk, size_t size, char *file, int line)
+{
+ struct MemBlock* p = NULL;
+ int orig_size = 0,
+ orig_line = 0;
+ char* orig_file = NULL;
+
+ #if TRY_BOUNDS_CHECK
+ unsigned char *xBounds = memblk ?
+ (unsigned char*)memblk - BOUNDS_CHECK_PAD_SIZE :
+ NULL;
+ #endif
+
+ void *x;
+
+ if (g_AllocCount == 0)
+ {
+ if (!on2_MemoryTrackerInit(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE))
+ {
+ printf("ERROR xon2_malloc MEM_TRACK_USAGE error on2_MemoryTrackerInit().\n");
+ }
+ }
+
+ if (p = on2_MemoryTrackerFind((size_t)memblk))
+ {
+ orig_size = p->size;
+ orig_file = p->file;
+ orig_line = p->line;
+ }
+
+ #if TRY_BOUNDS_CHECK_ON_FREE
+ on2_MemoryTrackerCheckIntegrity(file, line);
+ #endif
+
+ //have to do this regardless of success, because
+ //the memory that does get realloc'd may change
+ //the bounds values of this block
+ on2_MemoryTrackerRemove((size_t)memblk);
+
+ #if TRY_BOUNDS_CHECK
+ {
+ xBounds = on2_realloc(xBounds, size + (BOUNDS_CHECK_PAD_SIZE * 2));
+
+ if (xBounds)
+ {
+ int i;
+ unsigned int tempme = BOUNDS_CHECK_VALUE;
+
+ for (i=0;i<BOUNDS_CHECK_PAD_SIZE;i+=sizeof(unsigned int))
+ {
+ memcpy(xBounds+i, &tempme, 4);
+ memcpy(xBounds + size + BOUNDS_CHECK_PAD_SIZE + i, &tempme, 4);
+ }
+
+ x = (void*)(xBounds + BOUNDS_CHECK_PAD_SIZE);
+ }
+ else
+ x = NULL;
+ }
+ #else
+ x = on2_realloc(memblk, size);
+ #endif //TRY_BOUNDS_CHECK
+
+ if (x)
+ on2_MemoryTrackerAdd((size_t)x, size, file, line);
+ else
+ on2_MemoryTrackerAdd((size_t)memblk, orig_size, orig_file, orig_line);
+
+ return x;
+}
+
+void xon2_free(void *pAddress, char *file, int line)
+{
+ #if TRY_BOUNDS_CHECK
+ unsigned char *pBoundsAddress = (unsigned char*)pAddress;
+ pBoundsAddress -= BOUNDS_CHECK_PAD_SIZE;
+ #endif
+
+ #if !TRY_BOUNDS_CHECK_ON_FREE
+ (void)file; (void)line;
+ #endif
+
+ if(pAddress)
+ {
+ g_AllocCount--;
+
+ #if TRY_BOUNDS_CHECK_ON_FREE
+ on2_MemoryTrackerCheckIntegrity(file, line);
+ #endif
+
+ //if the addr isn't found in the list, assume it was allocated via
+ //on2_ calls not xon2_, therefore it does not contain any padding
+ if (on2_MemoryTrackerRemove((size_t)pAddress) == -2)
+ pBoundsAddress = pAddress;
+
+ #if TRY_BOUNDS_CHECK
+ on2_free(pBoundsAddress);
+ #else
+ on2_free(pAddress);
+ #endif
+ }
+}
+
+#endif /*INCLUDE_MEM_TRACKER*/
+
+#if INCLUDE_MEM_CHECKS
+#if defined(VXWORKS)
+/* This function is only used to get a stack trace of the player
+object so we can se where we are having a problem. */
+int getMyTT(int task)
+{
+ tt(task);
+
+ return 0;
+}
+#endif
+#endif
+
+void * on2_memcpy(void *dest, const void *source, size_t length)
+{
+ #if INCLUDE_MEM_CHECKS
+ if (((intptr_t)dest < 0x4000) || ((intptr_t)source < 0x4000))
+ {
+ printf("WARNING: on2_memcpy dest:0x%p source:0x%p len:%d\n", dest, source, length);
+
+ #if defined(VXWORKS)
+ sp(getMyTT, taskIdSelf(), 0, 0, 0, 0, 0, 0, 0, 0);
+
+ on2Timer_Sleep(10000);
+ #endif
+ }
+ #endif
+
+ return memcpy(dest, source, length);
+}
+
+
+void * on2_memset(void *dest, int val, size_t length)
+{
+ #if INCLUDE_MEM_CHECKS
+ if ((intptr_t)dest < 0x4000)
+ {
+ printf("WARNING: on2_memset dest:0x%p val:%d len:%d\n", dest, val, length);
+
+ #if defined(VXWORKS)
+ sp(getMyTT, taskIdSelf(), 0, 0, 0, 0, 0, 0, 0, 0);
+
+ on2Timer_Sleep(10000);
+ #endif
+ }
+ #endif
+
+ return memset(dest, val, length);
+}
+
+
+void * on2_memmove(void *dest, const void *src, size_t count)
+{
+ #if INCLUDE_MEM_CHECKS
+ if (((intptr_t)dest < 0x4000) || ((intptr_t)src < 0x4000))
+ {
+ printf("WARNING: on2_memmove dest:0x%p src:0x%p count:%d\n", dest, src, count);
+
+ #if defined(VXWORKS)
+ sp(getMyTT, taskIdSelf(), 0, 0, 0, 0, 0, 0, 0, 0);
+
+ on2Timer_Sleep(10000);
+ #endif
+ }
+ #endif
+
+ return memmove(dest, src, count);
+}
+
+#if INCLUDE_MEMORY_MANAGER
+
+static int On2_MM_CreateHeapMemory()
+{
+ int iRv = 0;
+
+ if (!g_mngMemoryAllocated)
+ {
+ #if MM_DYNAMIC_MEMORY
+ g_p_mng_memory_raw =
+ (unsigned char*)malloc(g_mm_memory_size + HMM_ADDR_ALIGN_UNIT);
+
+ if (g_p_mng_memory_raw)
+ {
+ g_p_mng_memory = (unsigned char*)((((unsigned int)g_p_mng_memory_raw) +
+ HMM_ADDR_ALIGN_UNIT-1) &
+ -(int)HMM_ADDR_ALIGN_UNIT);
+
+ printf("[on2][mm] total memory size:%d g_p_mng_memory_raw:0x%x g_p_mng_memory:0x%x\n"
+ , g_mm_memory_size + HMM_ADDR_ALIGN_UNIT
+ , (unsigned int)g_p_mng_memory_raw
+ , (unsigned int)g_p_mng_memory);
+ }
+ else
+ {
+ printf("[on2][mm] Couldn't allocate memory:%d for on2 memory manager.\n"
+ , g_mm_memory_size);
+
+ iRv = -1;
+ }
+
+ if (g_p_mng_memory)
+ #endif
+ {
+ int chunkSize = 0;
+
+ g_mngMemoryAllocated = 1;
+
+ hmm_init(&hmm_d);
+
+ chunkSize = g_mm_memory_size >> SHIFT_HMM_ADDR_ALIGN_UNIT;
+
+ chunkSize -= DUMMY_END_BLOCK_BAUS;
+
+ printf("[on2][mm] memory size:%d for on2 memory manager. g_p_mng_memory:0x%x chunkSize:%d\n"
+ , g_mm_memory_size
+ , (unsigned int)g_p_mng_memory
+ , chunkSize);
+
+ hmm_new_chunk(&hmm_d, (void*)g_p_mng_memory, chunkSize);
+ }
+ #if MM_DYNAMIC_MEMORY
+ else
+ {
+ printf("[on2][mm] Couldn't allocate memory:%d for on2 memory manager.\n"
+ , g_mm_memory_size);
+
+ iRv = -1;
+ }
+ #endif
+ }
+
+ return iRv;
+}
+
+static void* On2_MM_realloc(void* memblk, size_t size)
+{
+ void* pRet = NULL;
+
+ if (On2_MM_CreateHeapMemory() < 0)
+ {
+ printf("[on2][mm] ERROR On2_MM_realloc() Couldn't create memory for Heap.\n");
+ }
+ else
+ {
+ int iRv = 0;
+ int old_num_aaus;
+ int new_num_aaus;
+
+ old_num_aaus = hmm_true_size(memblk);
+ new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
+
+ if (old_num_aaus == new_num_aaus)
+ {
+ pRet = memblk;
+ }
+ else
+ {
+ iRv = hmm_resize(&hmm_d, memblk, new_num_aaus);
+ if (iRv == 0)
+ {
+ pRet = memblk;
+ }
+ else
+ {
+ /* Error. Try to malloc and then copy data. */
+ void* pFromMalloc;
+
+ new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
+ pFromMalloc = hmm_alloc(&hmm_d, new_num_aaus);
+
+ if (pFromMalloc)
+ {
+ on2_memcpy(pFromMalloc, memblk, size);
+ hmm_free(&hmm_d, memblk);
+
+ pRet = pFromMalloc;
+ }
+ }
+ }
+ }
+
+ return pRet;
+}
+
+#endif //INCLUDE_MEMORY_MANAGER
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.xcodeproj/project.pbxproj b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..8b6d6d89
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.xcodeproj/project.pbxproj
@@ -0,0 +1,197 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 42;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 0CC4DD1E0BB7930400837D4E /* on2_mem.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CC4DD1D0BB7930400837D4E /* on2_mem.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+ 0CC4DD1D0BB7930400837D4E /* on2_mem.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; path = on2_mem.c; sourceTree = "<group>"; };
+ D2AAC046055464E500DB518D /* libon2_mem.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libon2_mem.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ D289987405E68DCB004EDB86 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 08FB7794FE84155DC02AAC07 /* on2_mem */ = {
+ isa = PBXGroup;
+ children = (
+ 08FB7795FE84155DC02AAC07 /* Source */,
+ C6A0FF2B0290797F04C91782 /* Documentation */,
+ 1AB674ADFE9D54B511CA2CBB /* Products */,
+ );
+ name = on2_mem;
+ sourceTree = "<group>";
+ };
+ 08FB7795FE84155DC02AAC07 /* Source */ = {
+ isa = PBXGroup;
+ children = (
+ 0CC4DD1D0BB7930400837D4E /* on2_mem.c */,
+ );
+ name = Source;
+ sourceTree = "<group>";
+ };
+ 1AB674ADFE9D54B511CA2CBB /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ D2AAC046055464E500DB518D /* libon2_mem.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ C6A0FF2B0290797F04C91782 /* Documentation */ = {
+ isa = PBXGroup;
+ children = (
+ );
+ name = Documentation;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+ D2AAC043055464E500DB518D /* Headers */ = {
+ isa = PBXHeadersBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+ D2AAC045055464E500DB518D /* on2_mem */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "on2_mem" */;
+ buildPhases = (
+ D2AAC043055464E500DB518D /* Headers */,
+ D2AAC044055464E500DB518D /* Sources */,
+ D289987405E68DCB004EDB86 /* Frameworks */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = on2_mem;
+ productName = on2_mem;
+ productReference = D2AAC046055464E500DB518D /* libon2_mem.a */;
+ productType = "com.apple.product-type.library.static";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 08FB7793FE84155DC02AAC07 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "on2_mem" */;
+ hasScannedForEncodings = 1;
+ mainGroup = 08FB7794FE84155DC02AAC07 /* on2_mem */;
+ projectDirPath = "";
+ targets = (
+ D2AAC045055464E500DB518D /* on2_mem */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+ D2AAC044055464E500DB518D /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 0CC4DD1E0BB7930400837D4E /* on2_mem.c in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 1DEB91EC08733DB70010E9CD /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_MODEL_TUNING = G5;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ INSTALL_PATH = /usr/local/lib;
+ PRODUCT_NAME = on2_mem;
+ ZERO_LINK = YES;
+ };
+ name = Debug;
+ };
+ 1DEB91ED08733DB70010E9CD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ARCHS = (
+ ppc,
+ i386,
+ );
+ GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+ GCC_MODEL_TUNING = G5;
+ INSTALL_PATH = /usr/local/lib;
+ PRODUCT_NAME = on2_mem;
+ };
+ name = Release;
+ };
+ 1DEB91F008733DB70010E9CD /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ OBJROOT = build;
+ PREBINDING = NO;
+ SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+ SYMROOT = ../../../../lib/osx;
+ USER_HEADER_SEARCH_PATHS = include;
+ };
+ name = Debug;
+ };
+ 1DEB91F108733DB70010E9CD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ OBJROOT = build;
+ PREBINDING = NO;
+ SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+ SYMROOT = ../../../../lib/osx;
+ USER_HEADER_SEARCH_PATHS = include;
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "on2_mem" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1DEB91EC08733DB70010E9CD /* Debug */,
+ 1DEB91ED08733DB70010E9CD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "on2_mem" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1DEB91F008733DB70010E9CD /* Debug */,
+ 1DEB91F108733DB70010E9CD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem_tracker.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem_tracker.c
new file mode 100644
index 00000000..cf3d0e24
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem_tracker.c
@@ -0,0 +1,596 @@
+#define __ON2_MEM_TRACKER_C__
+/*
+ on2_mem_tracker.c
+
+ jwz 2003-09-30:
+ Stores a list of addreses, their size, and file and line they came from.
+ All exposed lib functions are prefaced by on2_ and allow the global list
+ to be thread safe.
+ Current supported platforms are:
+ Linux, Win32, WinCE and VxWorks
+ Further support can be added by defining the platform specific mutex
+ in the MemoryTracker struct as well as calls to create/destroy/lock/unlock
+ the mutex in on2_MemoryTrackerInit/Destroy and MemoryTrackerLockMutex/UnlockMutex
+*/
+
+#if defined(LINUX)
+#include <pthread.h>
+#elif defined(WIN32) || defined(_WIN32_WCE)
+#include <windows.h>
+#include <winbase.h>
+#elif defined(VXWORKS)
+#include <semLib.h>
+#endif
+
+#include "on2_mem_tracker.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> //VXWORKS doesn't have a malloc/memory.h file,
+ //this should pull in malloc,free,etc.
+#include <stdarg.h>
+
+#undef on2_malloc //undefine any on2_mem macros that may affect calls to
+#undef on2_free //memory functions in this file
+#undef on2_memcpy
+#undef on2_memset
+
+struct MemoryTracker
+{
+ struct MemBlock * head,
+ * tail;
+ int len,
+ totalsize;
+ unsigned int current_allocated,
+ max_allocated;
+
+ #if defined(LINUX)
+ pthread_mutex_t mutex;
+ #elif defined(WIN32) || defined(_WIN32_WCE)
+ HANDLE mutex;
+ #elif defined(VXWORKS)
+ SEM_ID mutex;
+ #else
+ #error "No mutex type defined for this platform!"
+ #endif
+
+ int padding_size,
+ pad_value;
+
+};
+
+
+/* prototypes for internal library functions */
+static void memtrack_log(const char* fmt, ...);
+static void MemoryTrackerDump();
+static void MemoryTrackerCheckIntegrity(char* file, unsigned int line);
+static void MemoryTrackerAdd(size_t addr, unsigned int size,
+ char* file, unsigned int line);
+static int MemoryTrackerRemove(size_t addr);
+static struct MemBlock* MemoryTrackerFind(size_t addr);
+
+static int MemoryTrackerLockMutex();
+static int MemoryTrackerUnlockMutex();
+
+static struct MemoryTracker memtrack; //our global memory allocation list
+static int g_bMemTrackerInited = 0; //indicates whether the global list has
+ //been initialized (1:yes/0:no)
+static FILE* g_logfile = NULL;
+static int g_logtype = 0;
+
+/*
+ *
+ * Exposed library functions
+ *
+*/
+
+/*
+ on2_MemoryTrackerInit(int padding_size, int pad_value)
+ padding_size - the size of the padding before and after each mem addr.
+ Values > 0 indicate that integrity checks can be performed
+ by inspecting these areas.
+ pad_value - the initial value within the padding area before and after
+ each mem addr.
+
+ Initializes global memory tracker structure
+ Allocates the _head of the list
+*/
+int on2_MemoryTrackerInit(int padding_size, int pad_value)
+{
+ if (!g_bMemTrackerInited)
+ {
+ if (memtrack.head = (struct MemBlock*)malloc(sizeof(struct MemBlock)))
+ {
+ int ret;
+
+ memset(memtrack.head, 0, sizeof(struct MemBlock));
+
+ memtrack.tail = memtrack.head;
+
+ memtrack.current_allocated = 0;
+ memtrack.max_allocated = 0;
+
+ memtrack.padding_size = padding_size;
+ memtrack.pad_value = pad_value;
+
+ #if defined(LINUX)
+ ret = pthread_mutex_init(&memtrack.mutex,
+ NULL); /*mutex attributes (NULL=default)*/
+ #elif defined(WIN32) || defined(_WIN32_WCE)
+ memtrack.mutex = CreateMutex(NULL, /*security attributes*/
+ FALSE, /*we don't want initial ownership*/
+ NULL); /*mutex name*/
+ ret = !memtrack.mutex;
+ #elif defined(VXWORKS)
+ memtrack.mutex = semBCreate(SEM_Q_FIFO, /*SEM_Q_FIFO non-priority based mutex*/
+ SEM_FULL); /*SEM_FULL initial state is unlocked*/
+ ret = !memtrack.mutex;
+ #endif
+
+ if (ret)
+ {
+ memtrack_log("on2_MemoryTrackerInit: Error creating mutex!\n");
+
+ free(memtrack.head);
+ memtrack.head = NULL;
+ }
+ else
+ {
+ memtrack_log("Memory Tracker init'd, v."on2_mem_tracker_version"\n");
+ g_bMemTrackerInited = 1;
+ }
+ }
+ }
+
+ return g_bMemTrackerInited;
+}
+
+/*
+ on2_MemoryTrackerDestroy()
+ If our global struct was initialized zeros out all its members,
+ frees memory and destroys it's mutex
+*/
+void on2_MemoryTrackerDestroy()
+{
+ if (!MemoryTrackerLockMutex())
+ {
+ struct MemBlock* p = memtrack.head,
+ * p2 = memtrack.head;
+
+ MemoryTrackerDump();
+
+ while(p)
+ {
+ p2 = p;
+ p = p->next;
+
+ free(p2);
+ }
+
+ memtrack.head = NULL;
+ memtrack.tail = NULL;
+ memtrack.len = 0;
+ memtrack.current_allocated = 0;
+ memtrack.max_allocated = 0;
+
+ if(!g_logtype && g_logfile && g_logfile != stderr) {
+ fclose(g_logfile);
+ g_logfile = NULL;
+ }
+
+ MemoryTrackerUnlockMutex();
+
+ g_bMemTrackerInited = 0;
+ }
+}
+
+/*
+ on2_MemoryTrackerAdd(size_t addr, unsigned int size,
+ char * file, unsigned int line)
+ addr - memory address to be added to list
+ size - size of addr
+ file - the file addr was referenced from
+ line - the line in file addr was referenced from
+ Adds memory address addr, it's size, file and line it came from
+ to the global list via the thread safe internal library function
+*/
+void on2_MemoryTrackerAdd(size_t addr, unsigned int size,
+ char * file, unsigned int line)
+{
+ MemoryTrackerAdd(addr, size, file, line);
+}
+
+/*
+ on2_MemoryTrackerRemove(size_t addr)
+ addr - memory address to be removed from list
+ Removes addr from the global list via the thread safe
+ internal remove function
+ Return:
+ Same as described for MemoryTrackerRemove
+*/
+int on2_MemoryTrackerRemove(size_t addr)
+{
+ return MemoryTrackerRemove(addr);
+}
+
+/*
+ on2_MemoryTrackerFind(size_t addr)
+ addr - address to be found in list
+ Return:
+ If found, pointer to the memory block that matches addr
+ NULL otherwise
+*/
+struct MemBlock* on2_MemoryTrackerFind(size_t addr)
+{
+ struct MemBlock* p = NULL;
+
+ if (!MemoryTrackerLockMutex())
+ {
+ p = MemoryTrackerFind(addr);
+ MemoryTrackerUnlockMutex();
+ }
+
+ return p;
+}
+
+/*
+ on2_MemoryTrackerDump()
+ Locks the memory tracker's mutex and calls the internal
+ library function to dump the current contents of the
+ global memory allocation list
+*/
+void on2_MemoryTrackerDump()
+{
+ if (!MemoryTrackerLockMutex())
+ {
+ MemoryTrackerDump();
+ MemoryTrackerUnlockMutex();
+ }
+}
+
+/*
+ on2_MemoryTrackerCheckIntegrity(char* file, unsigned int line)
+ file - The file name where the check was placed
+ line - The line in file where the check was placed
+ Locks the memory tracker's mutex and calls the internal
+ integrity check function to inspect every address in the global
+ memory allocation list
+*/
+void on2_MemoryTrackerCheckIntegrity(char* file, unsigned int line)
+{
+ if (!MemoryTrackerLockMutex())
+ {
+ MemoryTrackerCheckIntegrity(file, line);
+ MemoryTrackerUnlockMutex();
+ }
+}
+
+/*
+ on2_MemoryTrackerSetLogType
+ Sets the logging type for the memory tracker. Based on the value it will
+ direct its output to the appropriate place.
+ Return:
+ 0: on success
+ -1: if the logging type could not be set, because the value was invalid
+ or because a file could not be opened
+*/
+int on2_MemoryTrackerSetLogType(int type, char* option)
+{
+ int ret = -1;
+
+ switch(type) {
+ case 0:
+ g_logtype = 0;
+ if(!option) {
+ g_logfile = stderr;
+ ret = 0;
+ } else {
+ if (g_logfile = fopen(option, "w"))
+ ret = 0;
+ }
+ break;
+#if defined(WIN32) && !defined(_WIN32_WCE)
+ case 1:
+ g_logtype = type;
+ ret = 0;
+ break;
+#endif
+ default:
+ break;
+ }
+
+ //output the version to the new logging destination
+ if(!ret)
+ memtrack_log("Memory Tracker init'd, v."on2_mem_tracker_version"\n");
+
+ return ret;
+}
+
+/*
+ *
+ * END - Exposed library functions
+ *
+*/
+
+
+/*
+ *
+ * Internal library functions
+ *
+*/
+
+static void memtrack_log(const char* fmt, ...)
+{
+ va_list list;
+
+ va_start(list, fmt);
+ switch(g_logtype) {
+ case 0:
+ if (g_logfile) {
+ vfprintf(g_logfile, fmt, list);
+ fflush(g_logfile);
+ }
+ break;
+#if defined(WIN32) && !defined(_WIN32_WCE)
+ case 1:
+ {
+ char temp[1024];
+ _vsnprintf(temp, sizeof(temp)/sizeof(char)-1, fmt, list);
+ OutputDebugString(temp);
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+ va_end(list);
+}
+
+/*
+ MemoryTrackerDump()
+ Dumps the current contents of the global memory allocation list
+*/
+static void MemoryTrackerDump()
+{
+ int i = 0;
+ struct MemBlock* p = (memtrack.head ? memtrack.head->next : NULL);
+
+ memtrack_log("Currently Allocated= %d; Max allocated= %d\n",
+ memtrack.current_allocated, memtrack.max_allocated);
+
+ while(p)
+ {
+ memtrack_log("memblocks[%d].addr= 0x%.8x, memblocks[%d].size= %d, file: %s, line: %d\n", i,
+ p->addr, i, p->size,
+ p->file, p->line);
+
+ p = p->next;
+ ++i;
+ }
+}
+
+/*
+ MemoryTrackerCheckIntegrity(char* file, unsigned int file)
+ file - the file name where the check was placed
+ line - the line in file where the check was placed
+ If a padding_size was supplied to on2_MemoryTrackerInit()
+ this function will ea. addr in the list verifying that
+ addr-padding_size and addr+padding_size is filled with pad_value
+*/
+static void MemoryTrackerCheckIntegrity(char* file, unsigned int line)
+{
+ if (memtrack.padding_size)
+ {
+ int i,
+ index = 0;
+ unsigned int * pShowMe,
+ * pShowMe2;
+ unsigned int tempme = memtrack.pad_value,
+ dead1,
+ dead2;
+ unsigned char *xBounds;
+ struct MemBlock* p = memtrack.head->next;
+
+ while (p)
+ {
+ xBounds = (unsigned char*)p->addr;
+
+ //back up ON2_BYTE_ALIGNMENT
+ xBounds -= memtrack.padding_size;
+
+ for (i=0;i<memtrack.padding_size;i+=sizeof(unsigned int))
+ {
+ pShowMe = (unsigned int*)(xBounds+i);
+ pShowMe2 = (unsigned int*)(xBounds + p->size + memtrack.padding_size + i);
+
+ memcpy(&dead1, pShowMe, sizeof(unsigned int));
+ memcpy(&dead2, pShowMe2, sizeof(unsigned int));
+
+ if ((dead1 != tempme) || (dead2 != tempme))
+ {
+ memtrack_log("\n[on2_mem integrity check failed]:\n"
+ " index[%d] {%s:%d} addr=0x%x, size= %d,"
+ " file: %s, line: %d c0:0x%x c1:0x%x\n",
+ index, file, line, p->addr, p->size, p->file,
+ p->line, dead1, dead2);
+ }
+ }
+
+ ++index;
+ p = p->next;
+ }
+ }
+}
+
+/*
+ MemoryTrackerAdd(size_t addr, unsigned int size,
+ char * file, unsigned int line)
+ Adds an address (addr), it's size, file and line number to our list.
+ Adjusts the total bytes allocated and max bytes allocated if necessary.
+ If memory cannot be allocated the list will be destroyed.
+*/
+void MemoryTrackerAdd(size_t addr, unsigned int size,
+ char * file, unsigned int line)
+{
+ if (!MemoryTrackerLockMutex())
+ {
+ struct MemBlock* p;
+
+ p = malloc(sizeof(struct MemBlock));
+
+ if (p)
+ {
+ p->prev = memtrack.tail;
+ p->prev->next = p;
+ p->addr = addr;
+ p->size = size;
+ p->line = line;
+ p->file = file;
+ p->next = NULL;
+
+ memtrack.tail = p;
+
+ memtrack.current_allocated += size;
+
+ if (memtrack.current_allocated > memtrack.max_allocated)
+ memtrack.max_allocated = memtrack.current_allocated;
+
+ MemoryTrackerUnlockMutex();
+ }
+ else
+ {
+ memtrack_log("MemoryTrackerAdd: error allocating memory!\n");
+ MemoryTrackerUnlockMutex();
+ on2_MemoryTrackerDestroy();
+ }
+ }
+}
+
+/*
+ MemoryTrackerRemove(size_t addr)
+ Removes an address and its corresponding size (if they exist)
+ from the memory tracker list and adjusts the current number
+ of bytes allocated.
+ Return:
+ 0: on success
+ -1: if the mutex could not be locked
+ -2: if the addr was not found in the list
+*/
+int MemoryTrackerRemove(size_t addr)
+{
+ int ret = -1;
+
+ if (!MemoryTrackerLockMutex())
+ {
+ struct MemBlock* p;
+
+ if (p = MemoryTrackerFind(addr))
+ {
+ memtrack.current_allocated -= p->size;
+
+ p->prev->next = p->next;
+ if (p->next)
+ p->next->prev = p->prev;
+ else
+ memtrack.tail = p->prev;
+
+ ret = 0;
+ free(p);
+ }
+ else
+ {
+ memtrack_log("MemoryTrackerRemove(): addr not found in list, 0x%.8x\n", addr);
+ ret = -2;
+ }
+
+ MemoryTrackerUnlockMutex();
+ }
+
+ return ret;
+}
+
+/*
+ MemoryTrackerFind(size_t addr)
+ Finds an address in our addrs list
+ NOTE: the mutex MUST be locked in the other internal
+ functions before calling this one. This avoids
+ the need for repeated locking and unlocking as in Remove
+ Returns: pointer to the mem block if found, NULL otherwise
+*/
+static struct MemBlock* MemoryTrackerFind(size_t addr)
+{
+ struct MemBlock* p = NULL;
+
+ if (memtrack.head)
+ {
+ p = memtrack.head->next;
+
+ while(p && (p->addr != addr))
+ p = p->next;
+ }
+
+ return p;
+}
+
+/*
+ MemoryTrackerLockMutex()
+ Locks the memory tracker mutex with a platform specific call
+ Returns:
+ 0: Success
+ <0: Failure, either the mutex was not initialized
+ or the call to lock the mutex failed
+*/
+static int MemoryTrackerLockMutex()
+{
+ int ret = -1;
+
+ if (g_bMemTrackerInited)
+ {
+
+ #if defined(LINUX)
+ ret = pthread_mutex_lock(&memtrack.mutex);
+ #elif defined(WIN32) || defined(_WIN32_WCE)
+ ret = WaitForSingleObject(memtrack.mutex, INFINITE);
+ #elif defined(VXWORKS)
+ ret = semTake(memtrack.mutex, WAIT_FOREVER);
+ #endif
+
+ if (ret)
+ {
+ memtrack_log("MemoryTrackerLockMutex: mutex lock failed\n");
+ }
+ }
+
+ return ret;
+}
+
+/*
+ MemoryTrackerUnlockMutex()
+ Unlocks the memory tracker mutex with a platform specific call
+ Returns:
+ 0: Success
+ <0: Failure, either the mutex was not initialized
+ or the call to unlock the mutex failed
+*/
+static int MemoryTrackerUnlockMutex()
+{
+ int ret = -1;
+
+ if (g_bMemTrackerInited)
+ {
+
+ #if defined(LINUX)
+ ret = pthread_mutex_unlock(&memtrack.mutex);
+ #elif defined(WIN32) || defined(_WIN32_WCE)
+ ret = !ReleaseMutex(memtrack.mutex);
+ #elif defined(VXWORKS)
+ ret = semGive(memtrack.mutex);
+ #endif
+
+ if (ret)
+ {
+ memtrack_log("MemoryTrackerUnlockMutex: mutex unlock failed\n");
+ }
+ }
+
+ return ret;
+}