Initial community commit

author: Jef <jef@targetspot.com> 2024-09-24 08:54:57 -0400
committer: Jef <jef@targetspot.com> 2024-09-24 08:54:57 -0400
commit: 20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree: 12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/libvpShared/corelibs
parent: 537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
download: winamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz
256 files changed, 110635 insertions, 0 deletions
diff --git a/Src/libvpShared/corelibs/CpuID/CPUIdLib.plg b/Src/libvpShared/corelibs/CpuID/CPUIdLib.plg
new file mode 100644
index 00000000..746a4e1b
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/CPUIdLib.plg
@@ -0,0 +1,1786 @@
+<html>
+<body>
+<pre>
+<h1>Build Log</h1>
+<h3>
+--------------------Configuration: CPUIdLib - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB85.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\VerifyXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\VerifyXMMReg.obj .\Win32\VerifyXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB85.bat"
+Creating temporary file "C:\tmp\RSPB86.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\TrashXMMreg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\TrashXMMreg.obj .\Win32\TrashXMMreg.asm
+]
+Creating command line "C:\tmp\RSPB86.bat"
+Creating temporary file "C:\tmp\RSPB87.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\InitXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\InitXMMReg.obj .\Win32\InitXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB87.bat"
+Creating temporary file "C:\tmp\RSPB88.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\D9xOSSupXMM.lst /Fo .\..\..\..\ObjectCode\cpuID\release\D9xOSSupXMM.obj .\Win32\D9xOSSupXMM.asm
+]
+Creating command line "C:\tmp\RSPB88.bat"
+Creating temporary file "C:\tmp\RSPB89.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\cpuid.lst /Fo .\..\..\..\ObjectCode\cpuID\release\cpuid.obj .\Win32\cpuid.asm
+]
+Creating command line "C:\tmp\RSPB89.bat"
+Creating temporary file "C:\tmp\RSPB8A.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\include" /I "..\..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fo"..\..\..\ObjectCode\cpuID\release/" /Fd"..\..\..\ObjectCode\cpuID\release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\cid.c"
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\Wmt_CpuID.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB8A.tmp" 
+Performing Custom Build Step on .\Win32\VerifyXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\VerifyXMMReg.asm
+Performing Custom Build Step on .\Win32\TrashXMMreg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\TrashXMMreg.asm
+Performing Custom Build Step on .\Win32\InitXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\InitXMMReg.asm
+Performing Custom Build Step on .\Win32\D9xOSSupXMM.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\D9xOSSupXMM.asm
+Performing Custom Build Step on .\Win32\cpuid.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\cpuid.asm
+Creating temporary file "C:\tmp\RSPB8B.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\Release\s_cpuid.lib" 
+\NEWZIP\ObjectCode\cpuID\release\cid.obj
+\NEWZIP\ObjectCode\cpuID\release\Wmt_CpuID.obj
+\NEWZIP\ObjectCode\cpuID\release\cpuid.obj
+\NEWZIP\ObjectCode\cpuID\release\D9xOSSupXMM.obj
+\NEWZIP\ObjectCode\cpuID\release\InitXMMReg.obj
+\NEWZIP\ObjectCode\cpuID\release\TrashXMMreg.obj
+\NEWZIP\ObjectCode\cpuID\release\VerifyXMMReg.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB8B.tmp"
+<h3>Output Window</h3>
+Compiling...
+cid.c
+Generating Code...
+Compiling...
+Wmt_CpuID.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cpuid.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: CPUIdLib - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB8C.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\VerifyXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\VerifyXMMReg.obj .\Win32\VerifyXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB8C.bat"
+Creating temporary file "C:\tmp\RSPB8D.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\TrashXMMreg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\TrashXMMreg.obj .\Win32\TrashXMMreg.asm
+]
+Creating command line "C:\tmp\RSPB8D.bat"
+Creating temporary file "C:\tmp\RSPB8E.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\InitXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\InitXMMReg.obj .\Win32\InitXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB8E.bat"
+Creating temporary file "C:\tmp\RSPB8F.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\D9xOSSupXMM.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\D9xOSSupXMM.obj .\Win32\D9xOSSupXMM.asm
+]
+Creating command line "C:\tmp\RSPB8F.bat"
+Creating temporary file "C:\tmp\RSPB90.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\cpuid.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\cpuid.obj .\Win32\cpuid.asm
+]
+Creating command line "C:\tmp\RSPB90.bat"
+Creating temporary file "C:\tmp\RSPB91.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /GX /Z7 /Od /I "..\..\include" /I "..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\cpuID\debug/CPUIdLib.pch" /YX /Fo"..\..\..\ObjectCode\cpuID\debug/" /Fd"..\..\..\ObjectCode\cpuID\debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\cid.c"
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\Wmt_CpuID.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB91.tmp" 
+Performing Custom Build Step on .\Win32\VerifyXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\VerifyXMMReg.asm
+Performing Custom Build Step on .\Win32\TrashXMMreg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\TrashXMMreg.asm
+Performing Custom Build Step on .\Win32\InitXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\InitXMMReg.asm
+Performing Custom Build Step on .\Win32\D9xOSSupXMM.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\D9xOSSupXMM.asm
+Performing Custom Build Step on .\Win32\cpuid.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\cpuid.asm
+Creating temporary file "C:\tmp\RSPB92.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\debug\s_cpuid.lib" 
+\NEWZIP\ObjectCode\cpuID\debug\cid.obj
+\NEWZIP\ObjectCode\cpuID\debug\Wmt_CpuID.obj
+\NEWZIP\ObjectCode\cpuID\debug\cpuid.obj
+\NEWZIP\ObjectCode\cpuID\debug\D9xOSSupXMM.obj
+\NEWZIP\ObjectCode\cpuID\debug\InitXMMReg.obj
+\NEWZIP\ObjectCode\cpuID\debug\TrashXMMreg.obj
+\NEWZIP\ObjectCode\cpuID\debug\VerifyXMMReg.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB92.tmp"
+<h3>Output Window</h3>
+Compiling...
+cid.c
+Wmt_CpuID.cpp
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cpuid.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: colorconversions - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB93.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.obj .\Win32\rgb32toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB93.bat"
+Creating temporary file "C:\tmp\RSPB94.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.obj .\Win32\rgb32toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB94.bat"
+Creating temporary file "C:\tmp\RSPB95.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.obj .\Win32\rgb24toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB95.bat"
+Creating temporary file "C:\tmp\RSPB96.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.obj .\Win32\rgb24toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB96.bat"
+Creating temporary file "C:\tmp\RSPB97.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\..\include" /I "..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\ColorSpaces\Release/colorconversions.pch" /YX /Fo"..\..\..\ObjectCode\ColorSpaces\Release/" /Fd"..\..\..\ObjectCode\ColorSpaces\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\uyvytoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yuy2toyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yvyutoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\ColorConversions.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\lutbl.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12f.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB97.tmp" 
+Performing Custom Build Step on .\Win32\rgb32toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb32toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_mmx.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_mmx.asm
+Creating temporary file "C:\tmp\RSPB98.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\release\s_cconv.lib" 
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\ColorConversions.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\lutbl.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvitorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvitoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB98.tmp"
+<h3>Output Window</h3>
+Compiling...
+uyvytoyv12_mmx.c
+yuy2toyv12_mmx.c
+yvyutoyv12_mmx.c
+ColorConversions.c
+lutbl.c
+rgb24toyv12.c
+rgb24toyv12f.c
+rgb32toyv12.c
+rgb32toyv12f.c
+rgbtorgb.c
+rgbtoyuv.c
+rgbtoyuvi.c
+uyvytoyv12.c
+uyvytoyv12f.c
+yuvitorgb.c
+yuvitoyuv.c
+yuvtorgb.c
+yuvtoyuv.c
+yuvtoyuvi.c
+yuy2toyv12.c
+yuy2toyv12f.c
+yvyutoyv12.c
+yvyutoyv12f.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cconv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: colorconversions - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB99.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.obj .\Win32\rgb32toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB99.bat"
+Creating temporary file "C:\tmp\RSPB9A.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.obj .\Win32\rgb32toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB9A.bat"
+Creating temporary file "C:\tmp\RSPB9B.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.obj .\Win32\rgb24toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB9B.bat"
+Creating temporary file "C:\tmp\RSPB9C.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.obj .\Win32\rgb24toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB9C.bat"
+Creating temporary file "C:\tmp\RSPB9D.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /GX /Z7 /Od /I "..\..\include" /I "..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\ColorSpaces\Debug/colorconversions.pch" /YX /Fo"..\..\..\ObjectCode\ColorSpaces\Debug/" /Fd"..\..\..\ObjectCode\ColorSpaces\Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\uyvytoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yuy2toyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yvyutoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\ColorConversions.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\lutbl.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12f.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB9D.tmp" 
+Performing Custom Build Step on .\Win32\rgb32toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb32toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_mmx.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_mmx.asm
+Creating temporary file "C:\tmp\RSPB9E.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\debug\s_cconv.lib" 
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\ColorConversions.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\lutbl.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvitorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvitoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB9E.tmp"
+<h3>Output Window</h3>
+Compiling...
+uyvytoyv12_mmx.c
+yuy2toyv12_mmx.c
+yvyutoyv12_mmx.c
+ColorConversions.c
+lutbl.c
+rgb24toyv12.c
+rgb24toyv12f.c
+rgb32toyv12.c
+rgb32toyv12f.c
+rgbtorgb.c
+rgbtoyuv.c
+rgbtoyuvi.c
+uyvytoyv12.c
+uyvytoyv12f.c
+yuvitorgb.c
+yuvitoyuv.c
+yuvtorgb.c
+yuvtoyuv.c
+yuvtoyuvi.c
+yuy2toyv12.c
+yuy2toyv12f.c
+yvyutoyv12.c
+yvyutoyv12f.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cconv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: dxv - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB9F.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\include" /I "..\..\include" /I "..\..\..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\ObjectCode\dxv\Release/dxv.pch" /YX /Fo"..\..\..\..\ObjectCode\dxv\Release/" /Fd"..\..\..\..\ObjectCode\dxv\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\vscreen.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\ximage.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB9F.tmp" 
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Release\s_dxv.lib"  \NEWZIP\ObjectCode\dxv\Release\vscreen.obj \NEWZIP\ObjectCode\dxv\Release\ximage.obj "
+<h3>Output Window</h3>
+Compiling...
+vscreen.c
+ximage.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_dxv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: dxv - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA0.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\include" /I "..\..\include\win32" /I "..\..\include" /I "..\..\..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\ObjectCode\dxv\debug/dxv.pch" /YX /Fo"..\..\..\..\ObjectCode\dxv\debug/" /Fd"..\..\..\..\ObjectCode\dxv\debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\vscreen.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\ximage.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA0.tmp" 
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Debug\s_dxv.lib"  \NEWZIP\ObjectCode\dxv\debug\vscreen.obj \NEWZIP\ObjectCode\dxv\debug\ximage.obj "
+<h3>Output Window</h3>
+Compiling...
+vscreen.c
+ximage.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_dxv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: on2_mem - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA1.tmp" with contents
+[
+/nologo /MT /W3 /GX /O2 /I "..\..\include" /I "..\..\..\common\include" /I "..\..\memory_manager\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\obj\on2_mem\win32\release/on2_mem.pch" /YX /Fo"..\..\..\..\obj\on2_mem\win32\release/" /Fd"..\..\..\..\obj\on2_mem\win32\release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_alloc.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_base.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_dflt_abort.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_grow.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_largest.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_resize.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_shrink.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_true.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem_tracker.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA1.tmp" 
+Creating temporary file "C:\tmp\RSPBA2.tmp" with contents
+[
+/nologo /out:"..\..\..\..\..\..\lib\win32\release\on2_mem.lib" 
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_alloc.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_base.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_dflt_abort.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_grow.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_largest.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_resize.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_shrink.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_true.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\on2_mem.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\on2_mem_tracker.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBA2.tmp"
+<h3>Output Window</h3>
+Compiling...
+hmm_alloc.c
+hmm_base.c
+hmm_dflt_abort.c
+hmm_grow.c
+hmm_largest.c
+hmm_resize.c
+hmm_shrink.c
+hmm_true.c
+on2_mem.c
+on2_mem_tracker.c
+Creating library...
+
+
+
+<h3>Results</h3>
+on2_mem.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: on2_mem - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA3.tmp" with contents
+[
+/nologo /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\common\include" /I "..\..\memory_manager\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\obj\on2_mem\win32\debug/on2_mem.pch" /YX /Fo"..\..\..\..\obj\on2_mem\win32\debug/" /Fd"..\..\..\..\obj\on2_mem\win32\debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_alloc.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_base.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_dflt_abort.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_grow.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_largest.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_resize.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_shrink.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_true.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem_tracker.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA3.tmp" 
+Creating temporary file "C:\tmp\RSPBA4.tmp" with contents
+[
+/nologo /out:"..\..\..\..\..\..\lib\win32\debug\on2_mem.lib" 
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_alloc.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_base.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_dflt_abort.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_grow.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_largest.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_resize.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_shrink.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_true.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\on2_mem.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\on2_mem_tracker.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBA4.tmp"
+<h3>Output Window</h3>
+Compiling...
+hmm_alloc.c
+hmm_base.c
+hmm_dflt_abort.c
+hmm_grow.c
+hmm_largest.c
+hmm_resize.c
+hmm_shrink.c
+hmm_true.c
+on2_mem.c
+on2_mem_tracker.c
+Creating library...
+
+
+
+<h3>Results</h3>
+on2_mem.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: preproc - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA5.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\vp60\include" /I "..\include" /I "..\..\include" /I ".\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /Fp"Release/preproc.pch" /YX /Fo"Release/" /Fd"Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\preproc\preproc.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA5.tmp" 
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Release\s_preproc.lib"  .\Release\preproc.obj "
+<h3>Output Window</h3>
+Compiling...
+preproc.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_preproc.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: preproc - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA6.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\vp60\include" /I "..\..\include" /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /Fo"Debug/" /Fd"Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\preproc\preproc.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA6.tmp" 
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Debug\s_preproc.lib"  .\Debug\preproc.obj "
+<h3>Output Window</h3>
+Compiling...
+preproc.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_preproc.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6d - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA7.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Ox /Ot /Oa /Ow /Og /Oi /Ob2 /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6D_EXPORTS" /D "PREDICT_2D" /D "PBDLL" /D "VFW_PB" /D "USE_DRAWDIB" /D "POSTPROCESS" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6d\Release/" /Fd"..\..\..\..\ObjectCode\vp6d\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\boolhuff.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\debug.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\DFrameR.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\FrameIni.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\Huffman.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\pb_globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\quantize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\recon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\TokenEntropy.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\vfwpbdll_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\dsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\quantindexmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\vp60dxv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\xprintf\xprintf.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA7.tmp" 
+Creating temporary file "C:\tmp\RSPBA8.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Release\s_vp60d.lib" 
+\NEWZIP\VP6\ObjectCode\vp6d\Release\boolhuff.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\debug.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\DFrameR.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\FrameIni.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\Huffman.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\pb_globals.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\quantize.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\recon.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\TokenEntropy.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\vfwpbdll_if.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\dsystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\quantindexmmx.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\vp60dxv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\xprintf.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBA8.tmp"
+<h3>Output Window</h3>
+Compiling...
+boolhuff.c
+debug.c
+decodembs.c
+decodemode.c
+decodemv.c
+DFrameR.c
+FrameIni.c
+Huffman.c
+pb_globals.c
+quantize.c
+recon.c
+TokenEntropy.c
+vfwpbdll_if.c
+dsystemdependant.c
+quantindexmmx.c
+vp60dxv.c
+Generating Code...
+Compiling...
+xprintf.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60d.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6d - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBA9.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /I "..\..\..\include" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6D_EXPORTS" /D "PREDICT_2D" /D "PBDLL" /D "VFW_PB" /D "USE_DRAWDIB" /D "POSTPROCESS" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6d\Debug/" /Fd"..\..\..\..\ObjectCode\vp6d\Debug/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\boolhuff.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\debug.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\DFrameR.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\FrameIni.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\Huffman.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\pb_globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\quantize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\recon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\TokenEntropy.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\vfwpbdll_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\dsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\quantindexmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\vp60dxv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\xprintf\xprintf.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPBA9.tmp" 
+Creating temporary file "C:\tmp\RSPBAA.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Debug\s_vp60d.lib" 
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\boolhuff.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\debug.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\DFrameR.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\FrameIni.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\Huffman.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\pb_globals.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\quantize.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\recon.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\TokenEntropy.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\vfwpbdll_if.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\dsystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\quantindexmmx.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\vp60dxv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\xprintf.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBAA.tmp"
+<h3>Output Window</h3>
+Compiling...
+boolhuff.c
+debug.c
+decodembs.c
+decodemode.c
+decodemv.c
+DFrameR.c
+FrameIni.c
+Huffman.c
+pb_globals.c
+quantize.c
+recon.c
+TokenEntropy.c
+vfwpbdll_if.c
+dsystemdependant.c
+quantindexmmx.c
+vp60dxv.c
+Generating Code...
+Compiling...
+xprintf.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60d.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6e - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBAB.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmSAD.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmSAD.obj .\cx\Win32\XmmSAD.asm
+]
+Creating command line "C:\tmp\RSPBAB.bat"
+Creating temporary file "C:\tmp\RSPBAC.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmGetSAD8.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmGetSAD8.obj .\CX\Win32\XmmGetSAD8.asm
+]
+Creating command line "C:\tmp\RSPBAC.bat"
+Creating temporary file "C:\tmp\RSPBAD.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmGetError.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmGetError.obj .\cx\Win32\XmmGetError.asm
+]
+Creating command line "C:\tmp\RSPBAD.bat"
+Creating temporary file "C:\tmp\RSPBAE.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\MmxEncodeMath.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\MmxEncodeMath.obj .\cx\Win32\MmxEncodeMath.asm
+]
+Creating command line "C:\tmp\RSPBAE.bat"
+Creating temporary file "C:\tmp\RSPBAF.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\Include\vp60" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6E_EXPORTS" /D "PREDICT_2D" /D "VFW_COMP" /D "COMPDLL" /D "POSTPROCESS" /D "CPUISLITTLEENDIAN" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6e\Release/" /Fd"..\..\..\..\ObjectCode\vp6e\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\Comp_Globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Encode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\fullframefdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\mcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\misc_common.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PackVideo.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PickModes.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\RawBuffer.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Tokenize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Transform.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\twopass.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\COptFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\csystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\CWmtFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\WmtTransform.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBAF.tmp" 
+Performing Custom Build Step on .\cx\Win32\XmmSAD.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\XmmSAD.asm
+Performing Custom Build Step on .\CX\Win32\XmmGetSAD8.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\CX\Win32\XmmGetSAD8.asm
+Performing Custom Build Step on .\cx\Win32\XmmGetError.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\XmmGetError.asm
+Performing Custom Build Step on .\cx\Win32\MmxEncodeMath.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\MmxEncodeMath.asm
+Creating temporary file "C:\tmp\RSPBB0.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Release\s_vp60e.lib" 
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Comp_Globals.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Encode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\fullframefdct.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\mcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\misc_common.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\PackVideo.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\PickModes.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\RawBuffer.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Tokenize.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Transform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\twopass.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\vfwcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\vfwcomp_if.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\COptFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\csystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\CWmtFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\WmtTransform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\MmxEncodeMath.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmGetError.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmGetSAD8.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmSAD.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBB0.tmp"
+<h3>Output Window</h3>
+Compiling...
+Comp_Globals.c
+Encode.c
+encodembs.c
+encodemode.c
+encodemv.c
+fullframefdct.c
+mcomp.c
+misc_common.c
+PackVideo.c
+PickModes.c
+RawBuffer.c
+Tokenize.c
+Transform.c
+twopass.c
+vfwcomp.c
+vfwcomp_if.c
+COptFunctions.c
+csystemdependant.c
+CWmtFunctions.c
+WmtTransform.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60e.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6e - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBB1.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmSAD.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmSAD.obj .\cx\Win32\XmmSAD.asm
+]
+Creating command line "C:\tmp\RSPBB1.bat"
+Creating temporary file "C:\tmp\RSPBB2.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmGetSAD8.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmGetSAD8.obj .\CX\Win32\XmmGetSAD8.asm
+]
+Creating command line "C:\tmp\RSPBB2.bat"
+Creating temporary file "C:\tmp\RSPBB3.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmGetError.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmGetError.obj .\cx\Win32\XmmGetError.asm
+]
+Creating command line "C:\tmp\RSPBB3.bat"
+Creating temporary file "C:\tmp\RSPBB4.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\MmxEncodeMath.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\MmxEncodeMath.obj .\cx\Win32\MmxEncodeMath.asm
+]
+Creating command line "C:\tmp\RSPBB4.bat"
+Creating temporary file "C:\tmp\RSPBB5.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\Include\vp60" /I "..\..\..\..\include\vp60" /D "vp6E_EXPORTS" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PREDICT_2D" /D "VFW_COMP" /D "COMPDLL" /D "POSTPROCESS" /D "CPUISLITTLEENDIAN" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6e\debug/" /Fd"..\..\..\..\ObjectCode\vp6e\debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\Comp_Globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Encode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\fullframefdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\mcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\misc_common.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PackVideo.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PickModes.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\RawBuffer.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Tokenize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Transform.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\twopass.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\COptFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\csystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\CWmtFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\WmtTransform.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBB5.tmp" 
+Performing Custom Build Step on .\cx\Win32\XmmSAD.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\XmmSAD.asm
+Performing Custom Build Step on .\CX\Win32\XmmGetSAD8.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\CX\Win32\XmmGetSAD8.asm
+Performing Custom Build Step on .\cx\Win32\XmmGetError.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\XmmGetError.asm
+Performing Custom Build Step on .\cx\Win32\MmxEncodeMath.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\MmxEncodeMath.asm
+Creating temporary file "C:\tmp\RSPBB6.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Debug\s_vp60e.lib" 
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Comp_Globals.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Encode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\fullframefdct.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\mcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\misc_common.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\PackVideo.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\PickModes.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\RawBuffer.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Tokenize.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Transform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\twopass.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\vfwcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\vfwcomp_if.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\COptFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\csystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\CWmtFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\WmtTransform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\MmxEncodeMath.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmGetError.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmGetSAD8.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmSAD.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBB6.tmp"
+<h3>Output Window</h3>
+Compiling...
+Comp_Globals.c
+Encode.c
+encodembs.c
+encodemode.c
+encodemv.c
+fullframefdct.c
+mcomp.c
+misc_common.c
+PackVideo.c
+PickModes.c
+RawBuffer.c
+Tokenize.c
+Transform.c
+twopass.c
+vfwcomp.c
+vfwcomp_if.c
+COptFunctions.c
+csystemdependant.c
+CWmtFunctions.c
+WmtTransform.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60e.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vppp - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBB7.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /Ob2 /I ".\include" /I "..\include" /I "..\vp60\include" /I "..\..\..\include" /I "..\..\include" /D "_MBCS" /D "_LIB" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\ObjectCode\vpppd6\Release/" /Fd"..\..\..\..\ObjectCode\vpppd6\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\borders.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\clamp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\deblock.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\DeInterlace.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\dering.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\loopfilter.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\postproc.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\scale.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\simpledeblocker.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\clamp_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceMmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceWmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\doptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\loopf_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\newlooptest_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\scaleopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\simpledeblock_asm.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBB7.tmp" 
+Creating temporary file "C:\tmp\RSPBB8.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\release\s_vpppd.lib" 
+\NEWZIP\ObjectCode\vpppd6\Release\borders.obj
+\NEWZIP\ObjectCode\vpppd6\Release\clamp.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblock.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlace.obj
+\NEWZIP\ObjectCode\vpppd6\Release\dering.obj
+\NEWZIP\ObjectCode\vpppd6\Release\loopfilter.obj
+\NEWZIP\ObjectCode\vpppd6\Release\postproc.obj
+\NEWZIP\ObjectCode\vpppd6\Release\scale.obj
+\NEWZIP\ObjectCode\vpppd6\Release\simpledeblocker.obj
+\NEWZIP\ObjectCode\vpppd6\Release\clamp_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblockopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblockwmtopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlaceMmx.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlaceWmt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deringopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deringwmtopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\doptsystemdependant.obj
+\NEWZIP\ObjectCode\vpppd6\Release\loopf_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\newlooptest_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\scaleopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\simpledeblock_asm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBB8.tmp"
+<h3>Output Window</h3>
+Compiling...
+borders.c
+clamp.c
+deblock.c
+DeInterlace.c
+dering.c
+loopfilter.c
+postproc.c
+scale.c
+simpledeblocker.c
+clamp_asm.c
+deblockopt.c
+deblockwmtopt.c
+DeInterlaceMmx.c
+DeInterlaceWmt.c
+deringopt.c
+deringwmtopt.c
+doptsystemdependant.c
+loopf_asm.c
+newlooptest_asm.c
+scaleopt.c
+Generating Code...
+Compiling...
+simpledeblock_asm.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpppd.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vppp - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBB9.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\vp60\include" /I "..\..\..\include" /I "..\..\include" /D "_MBCS" /D "_LIB" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\ObjectCode\vppp\Debug/vppp.pch" /YX /Fo"..\..\..\..\ObjectCode\vppp\Debug/" /Fd"..\..\..\..\ObjectCode\vppp\Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\borders.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\clamp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\deblock.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\DeInterlace.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\dering.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\loopfilter.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\postproc.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\scale.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\simpledeblocker.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\clamp_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceMmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceWmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\doptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\loopf_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\newlooptest_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\scaleopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\simpledeblock_asm.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBB9.tmp" 
+Creating temporary file "C:\tmp\RSPBBA.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vpppd.lib" 
+\NEWZIP\ObjectCode\vppp\Debug\borders.obj
+\NEWZIP\ObjectCode\vppp\Debug\clamp.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblock.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlace.obj
+\NEWZIP\ObjectCode\vppp\Debug\dering.obj
+\NEWZIP\ObjectCode\vppp\Debug\loopfilter.obj
+\NEWZIP\ObjectCode\vppp\Debug\postproc.obj
+\NEWZIP\ObjectCode\vppp\Debug\scale.obj
+\NEWZIP\ObjectCode\vppp\Debug\simpledeblocker.obj
+\NEWZIP\ObjectCode\vppp\Debug\clamp_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblockopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblockwmtopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlaceMmx.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlaceWmt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deringopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deringwmtopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\doptsystemdependant.obj
+\NEWZIP\ObjectCode\vppp\Debug\loopf_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\newlooptest_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\scaleopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\simpledeblock_asm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBBA.tmp"
+<h3>Output Window</h3>
+Compiling...
+borders.c
+clamp.c
+deblock.c
+DeInterlace.c
+dering.c
+loopfilter.c
+postproc.c
+scale.c
+simpledeblocker.c
+clamp_asm.c
+deblockopt.c
+deblockwmtopt.c
+DeInterlaceMmx.c
+DeInterlaceWmt.c
+deringopt.c
+deringwmtopt.c
+doptsystemdependant.c
+loopf_asm.c
+newlooptest_asm.c
+scaleopt.c
+simpledeblock_asm.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpppd.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vputil - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBBB.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\vp60\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fo"..\..\..\..\ObjectCode\vputil\Release/" /Fd"..\..\..\..\ObjectCode\vputil\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\fdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\idctpart.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\reconstruct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\vputil.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxrecon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\uoptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\vputilasm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtrecon.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBBB.tmp" 
+Creating temporary file "C:\tmp\RSPBBC.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Release\s_vputil.lib" 
+\NEWZIP\ObjectCode\vputil\Release\fdct.obj
+\NEWZIP\ObjectCode\vputil\Release\idctpart.obj
+\NEWZIP\ObjectCode\vputil\Release\reconstruct.obj
+\NEWZIP\ObjectCode\vputil\Release\vputil.obj
+\NEWZIP\ObjectCode\vputil\Release\fdctmmx.obj
+\NEWZIP\ObjectCode\vputil\Release\fdctwmt.obj
+\NEWZIP\ObjectCode\vputil\Release\filtmmx.obj
+\NEWZIP\ObjectCode\vputil\Release\filtwmt.obj
+\NEWZIP\ObjectCode\vputil\Release\mmxidct.obj
+\NEWZIP\ObjectCode\vputil\Release\mmxrecon.obj
+\NEWZIP\ObjectCode\vputil\Release\uoptsystemdependant.obj
+\NEWZIP\ObjectCode\vputil\Release\vputilasm.obj
+\NEWZIP\ObjectCode\vputil\Release\wmtidct.obj
+\NEWZIP\ObjectCode\vputil\Release\wmtrecon.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBBC.tmp"
+<h3>Output Window</h3>
+Compiling...
+fdct.c
+idctpart.c
+reconstruct.c
+vputil.c
+fdctmmx.c
+fdctwmt.c
+filtmmx.c
+filtwmt.c
+mmxidct.c
+mmxrecon.c
+uoptsystemdependant.c
+vputilasm.c
+wmtidct.c
+wmtrecon.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vputil.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vputil - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBBD.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\vp60\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\..\ObjectCode\vputil\Debug/vputil.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vputil\Debug/" /Fd"..\..\..\..\..\ObjectCode\vputil\Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\fdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\idctpart.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\reconstruct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\vputil.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxrecon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\uoptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\vputilasm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtrecon.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBBD.tmp" 
+Creating temporary file "C:\tmp\RSPBBE.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vputil.lib" 
+\ObjectCode\vputil\Debug\fdct.obj
+\ObjectCode\vputil\Debug\idctpart.obj
+\ObjectCode\vputil\Debug\reconstruct.obj
+\ObjectCode\vputil\Debug\vputil.obj
+\ObjectCode\vputil\Debug\fdctmmx.obj
+\ObjectCode\vputil\Debug\fdctwmt.obj
+\ObjectCode\vputil\Debug\filtmmx.obj
+\ObjectCode\vputil\Debug\filtwmt.obj
+\ObjectCode\vputil\Debug\mmxidct.obj
+\ObjectCode\vputil\Debug\mmxrecon.obj
+\ObjectCode\vputil\Debug\uoptsystemdependant.obj
+\ObjectCode\vputil\Debug\vputilasm.obj
+\ObjectCode\vputil\Debug\wmtidct.obj
+\ObjectCode\vputil\Debug\wmtrecon.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBBE.tmp"
+<h3>Output Window</h3>
+Compiling...
+fdct.c
+idctpart.c
+reconstruct.c
+vputil.c
+fdctmmx.c
+fdctwmt.c
+filtmmx.c
+filtwmt.c
+mmxidct.c
+mmxrecon.c
+uoptsystemdependant.c
+vputilasm.c
+wmtidct.c
+wmtrecon.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vputil.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vpxblit - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBBF.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\const.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\const.obj .\wx86\const.asm
+]
+Creating command line "C:\tmp\RSPBBF.bat"
+Creating temporary file "C:\tmp\RSPBC0.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcy00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcy00.obj .\wx86\bcy00.asm
+]
+Creating command line "C:\tmp\RSPBC0.bat"
+Creating temporary file "C:\tmp\RSPBC1.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcu00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcu00.obj .\wx86\bcu00.asm
+]
+Creating command line "C:\tmp\RSPBC1.bat"
+Creating temporary file "C:\tmp\RSPBC2.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bct10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bct10.obj .\wx86\bct10.asm
+]
+Creating command line "C:\tmp\RSPBC2.bat"
+Creating temporary file "C:\tmp\RSPBC3.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bct00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bct00.obj .\wx86\bct00.asm
+]
+Creating command line "C:\tmp\RSPBC3.bat"
+Creating temporary file "C:\tmp\RSPBC4.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs10.obj .\wx86\bcs10.asm
+]
+Creating command line "C:\tmp\RSPBC4.bat"
+Creating temporary file "C:\tmp\RSPBC5.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs00.obj .\wx86\bcs00.asm
+]
+Creating command line "C:\tmp\RSPBC5.bat"
+Creating temporary file "C:\tmp\RSPBC6.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf10.obj .\wx86\bcf10.asm
+]
+Creating command line "C:\tmp\RSPBC6.bat"
+Creating temporary file "C:\tmp\RSPBC7.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf00.obj .\wx86\bcf00.asm
+]
+Creating command line "C:\tmp\RSPBC7.bat"
+Creating temporary file "C:\tmp\RSPBC8.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcd00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcd00.obj .\wx86\bcd00.asm
+]
+Creating command line "C:\tmp\RSPBC8.bat"
+Creating temporary file "C:\tmp\RSPBC9.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc10.obj .\wx86\bcc10.asm
+]
+Creating command line "C:\tmp\RSPBC9.bat"
+Creating temporary file "C:\tmp\RSPBCA.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc00.obj .\wx86\bcc00.asm
+]
+Creating command line "C:\tmp\RSPBCA.bat"
+Creating temporary file "C:\tmp\RSPBCB.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /I ".\generic" /D "_WINDOWS" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\..\ObjectCode\vpxblit\Release/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\ctables.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\wksetblt.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBCB.tmp" 
+Creating temporary file "C:\tmp\RSPBCC.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /D "_WINDOWS" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\..\ObjectCode\vpxblit\Release/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcu00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcy00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\vpx_reg.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_targa_c.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBCC.tmp" 
+Performing Custom Build Step on .\wx86\const.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\const.asm
+Performing Custom Build Step on .\wx86\bcy00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcy00.asm
+Performing Custom Build Step on .\wx86\bcu00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcu00.asm
+Performing Custom Build Step on .\wx86\bct10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bct10.asm
+Performing Custom Build Step on .\wx86\bct00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bct00.asm
+Performing Custom Build Step on .\wx86\bcs10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcs10.asm
+Performing Custom Build Step on .\wx86\bcs00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcs00.asm
+Performing Custom Build Step on .\wx86\bcf10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcf10.asm
+Performing Custom Build Step on .\wx86\bcf00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcf00.asm
+Performing Custom Build Step on .\wx86\bcd00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcd00.asm
+Performing Custom Build Step on .\wx86\bcc10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcc10.asm
+Performing Custom Build Step on .\wx86\bcc00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcc00.asm
+Creating temporary file "C:\tmp\RSPBCD.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Release\s_vpxblit.lib" 
+\ObjectCode\vpxblit\Release\ctables.obj
+\ObjectCode\vpxblit\Release\wksetblt.obj
+\ObjectCode\vpxblit\Release\bcf00_c.obj
+\ObjectCode\vpxblit\Release\bcf10_c.obj
+\ObjectCode\vpxblit\Release\bcs00_c.obj
+\ObjectCode\vpxblit\Release\bcs10_c.obj
+\ObjectCode\vpxblit\Release\bct00_c.obj
+\ObjectCode\vpxblit\Release\bct10_c.obj
+\ObjectCode\vpxblit\Release\bcu00_c.obj
+\ObjectCode\vpxblit\Release\bcy00_c.obj
+\ObjectCode\vpxblit\Release\vpx_reg.obj
+\ObjectCode\vpxblit\Release\bct00_targa_c.obj
+\ObjectCode\vpxblit\Release\bcc00.obj
+\ObjectCode\vpxblit\Release\bcc10.obj
+\ObjectCode\vpxblit\Release\bcd00.obj
+\ObjectCode\vpxblit\Release\bcf00.obj
+\ObjectCode\vpxblit\Release\bcf10.obj
+\ObjectCode\vpxblit\Release\bcs00.obj
+\ObjectCode\vpxblit\Release\bcs10.obj
+\ObjectCode\vpxblit\Release\bct00.obj
+\ObjectCode\vpxblit\Release\bct10.obj
+\ObjectCode\vpxblit\Release\bcu00.obj
+\ObjectCode\vpxblit\Release\bcy00.obj
+\ObjectCode\vpxblit\Release\const.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBCD.tmp"
+<h3>Output Window</h3>
+Compiling...
+ctables.c
+wksetblt.c
+Generating Code...
+Compiling...
+bcf00_c.c
+bcf10_c.c
+bcs00_c.c
+bcs10_c.c
+bct00_c.c
+bct10_c.c
+bcu00_c.c
+bcy00_c.c
+vpx_reg.c
+bct00_targa_c.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpxblit.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vpxblit - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPBCE.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\const.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\const.obj .\wx86\const.asm
+]
+Creating command line "C:\tmp\RSPBCE.bat"
+Creating temporary file "C:\tmp\RSPBCF.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcy00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcy00.obj .\wx86\bcy00.asm
+]
+Creating command line "C:\tmp\RSPBCF.bat"
+Creating temporary file "C:\tmp\RSPBD0.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcu00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcu00.obj .\wx86\bcu00.asm
+]
+Creating command line "C:\tmp\RSPBD0.bat"
+Creating temporary file "C:\tmp\RSPBD1.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct10.obj .\wx86\bct10.asm
+]
+Creating command line "C:\tmp\RSPBD1.bat"
+Creating temporary file "C:\tmp\RSPBD2.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct00.obj .\wx86\bct00.asm
+]
+Creating command line "C:\tmp\RSPBD2.bat"
+Creating temporary file "C:\tmp\RSPBD3.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs10.obj .\wx86\bcs10.asm
+]
+Creating command line "C:\tmp\RSPBD3.bat"
+Creating temporary file "C:\tmp\RSPBD4.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs00.obj .\wx86\bcs00.asm
+]
+Creating command line "C:\tmp\RSPBD4.bat"
+Creating temporary file "C:\tmp\RSPBD5.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf10.obj .\wx86\bcf10.asm
+]
+Creating command line "C:\tmp\RSPBD5.bat"
+Creating temporary file "C:\tmp\RSPBD6.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf00.obj .\wx86\bcf00.asm
+]
+Creating command line "C:\tmp\RSPBD6.bat"
+Creating temporary file "C:\tmp\RSPBD7.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcd00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcd00.obj .\wx86\bcd00.asm
+]
+Creating command line "C:\tmp\RSPBD7.bat"
+Creating temporary file "C:\tmp\RSPBD8.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc10.obj .\wx86\bcc10.asm
+]
+Creating command line "C:\tmp\RSPBD8.bat"
+Creating temporary file "C:\tmp\RSPBD9.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc00.obj .\wx86\bcc00.asm
+]
+Creating command line "C:\tmp\RSPBD9.bat"
+Creating temporary file "C:\tmp\RSPBDA.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /I ".\generic" /D "_WINDOWS" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\..\ObjectCode\vpxblit\Debug/vpxblit.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\ctables.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\wksetblt.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBDA.tmp" 
+Creating temporary file "C:\tmp\RSPBDB.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /D "_WINDOWS" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\..\ObjectCode\vpxblit\Debug/vpxblit.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcu00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcy00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\vpx_reg.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_targa_c.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPBDB.tmp" 
+Performing Custom Build Step on .\wx86\const.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\const.asm
+Performing Custom Build Step on .\wx86\bcy00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcy00.asm
+Performing Custom Build Step on .\wx86\bcu00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcu00.asm
+Performing Custom Build Step on .\wx86\bct10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bct10.asm
+Performing Custom Build Step on .\wx86\bct00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bct00.asm
+Performing Custom Build Step on .\wx86\bcs10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcs10.asm
+Performing Custom Build Step on .\wx86\bcs00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcs00.asm
+Performing Custom Build Step on .\wx86\bcf10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcf10.asm
+Performing Custom Build Step on .\wx86\bcf00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcf00.asm
+Performing Custom Build Step on .\wx86\bcd00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcd00.asm
+Performing Custom Build Step on .\wx86\bcc10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcc10.asm
+Performing Custom Build Step on .\wx86\bcc00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcc00.asm
+Creating temporary file "C:\tmp\RSPBDC.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vpxblit.lib" 
+\ObjectCode\vpxblit\Debug\ctables.obj
+\ObjectCode\vpxblit\Debug\wksetblt.obj
+\ObjectCode\vpxblit\Debug\bcf00_c.obj
+\ObjectCode\vpxblit\Debug\bcf10_c.obj
+\ObjectCode\vpxblit\Debug\bcs00_c.obj
+\ObjectCode\vpxblit\Debug\bcs10_c.obj
+\ObjectCode\vpxblit\Debug\bct00_c.obj
+\ObjectCode\vpxblit\Debug\bct10_c.obj
+\ObjectCode\vpxblit\Debug\bcu00_c.obj
+\ObjectCode\vpxblit\Debug\bcy00_c.obj
+\ObjectCode\vpxblit\Debug\vpx_reg.obj
+\ObjectCode\vpxblit\Debug\bct00_targa_c.obj
+\ObjectCode\vpxblit\Debug\bcc00.obj
+\ObjectCode\vpxblit\Debug\bcc10.obj
+\ObjectCode\vpxblit\Debug\bcd00.obj
+\ObjectCode\vpxblit\Debug\bcf00.obj
+\ObjectCode\vpxblit\Debug\bcf10.obj
+\ObjectCode\vpxblit\Debug\bcs00.obj
+\ObjectCode\vpxblit\Debug\bcs10.obj
+\ObjectCode\vpxblit\Debug\bct00.obj
+\ObjectCode\vpxblit\Debug\bct10.obj
+\ObjectCode\vpxblit\Debug\bcu00.obj
+\ObjectCode\vpxblit\Debug\bcy00.obj
+\ObjectCode\vpxblit\Debug\const.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPBDC.tmp"
+<h3>Output Window</h3>
+Compiling...
+ctables.c
+wksetblt.c
+Compiling...
+bcf00_c.c
+bcf10_c.c
+bcs00_c.c
+bcs10_c.c
+bct00_c.c
+bct10_c.c
+bcu00_c.c
+bcy00_c.c
+vpx_reg.c
+bct00_targa_c.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpxblit.lib - 0 error(s), 0 warning(s)
+</pre>
+</body>
+</html>
diff --git a/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj b/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj
new file mode 100644
index 00000000..c95896c5
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj
@@ -0,0 +1,354 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>17.0</VCProjectVersion>
+    <ProjectGuid>{77A73D85-7602-42F3-BAC4-8D7F7BFF8659}</ProjectGuid>
+    <RootNamespace>CPUIdLib</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\obj\CPUIdLib\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <OutDir>..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\obj\CPUIdLib\$(PlatformShortName)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\obj\CPUIdLib\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <OutDir>..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\obj\CPUIdLib\$(PlatformShortName)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnableManifest>false</VcpkgEnableManifest>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\..\libvp6\corelibs\include;..\..\..\libvp6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\..\libvp6\corelibs\include;..\..\..\libvp6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>..\..\..\libvp6\corelibs\include;..\..\..\libvp6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>..\..\..\libvp6\corelibs\include;..\..\..\libvp6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="Win32\cid.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="Win32\Wmt_CpuID.cpp">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="Win32\cpuid.asm">
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="Win32\D9xOSSupXMM.asm">
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="Win32\InitXMMReg.asm">
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="Win32\TrashXMMreg.asm">
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="Win32\VerifyXMMReg.asm">
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ml /Zi /Zm /Cx /c /coff /Fl"$(IntDir)%(Filename)".lst /Fo "$(IntDir)%(Filename)".obj "%(FullPath)"
+</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\cdxv\dxv2\dxv2.vcxproj">
+      <Project>{adac45fd-b93f-40a3-85b2-dbeca1283614}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj.filters b/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj.filters
new file mode 100644
index 00000000..dc027c5f
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/CPUIdLib.vcxproj.filters
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{8ff77253-bd53-4d72-a4d0-4620071c05d4}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions>
+    </Filter>
+    <Filter Include="Source Files\Win32">
+      <UniqueIdentifier>{71967989-d210-421e-9b32-ca6c33a448ee}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="Win32\cid.c">
+      <Filter>Source Files\Win32</Filter>
+    </ClCompile>
+    <ClCompile Include="Win32\Wmt_CpuID.cpp">
+      <Filter>Source Files\Win32</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="Win32\cpuid.asm">
+      <Filter>Source Files\Win32</Filter>
+    </CustomBuild>
+    <CustomBuild Include="Win32\D9xOSSupXMM.asm">
+      <Filter>Source Files\Win32</Filter>
+    </CustomBuild>
+    <CustomBuild Include="Win32\InitXMMReg.asm">
+      <Filter>Source Files\Win32</Filter>
+    </CustomBuild>
+    <CustomBuild Include="Win32\TrashXMMreg.asm">
+      <Filter>Source Files\Win32</Filter>
+    </CustomBuild>
+    <CustomBuild Include="Win32\VerifyXMMReg.asm">
+      <Filter>Source Files\Win32</Filter>
+    </CustomBuild>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/D9xOSSupXMM.asm b/Src/libvpShared/corelibs/CpuID/Win32/D9xOSSupXMM.asm
new file mode 100644
index 00000000..35594caf
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/D9xOSSupXMM.asm
@@ -0,0 +1,99 @@
+;//==========================================================================
+;//
+;//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;//  PURPOSE.
+;//
+;//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+
+;
+; **-Does9xOSSupportXMM
+;
+; This function will verify if the operating system supports the XMM
+; instructions.  According to Intel documentation 
+;
+;       Intel Architecture
+;       Software Developer
+;       Manual
+;       Volume 1:
+;       Basic Architecture
+;
+; The following needs to be true for the OS to suppor the XMM instructions
+;
+;   CR0.EM(bit 2) = 0 (emulation disabled)
+;   CR4.OSFXSR(bit 9) = 1 (OS supports saving SIMD floating-point state during context
+;                          switches)
+;
+;  * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * *
+; 
+; This function will NOT run on windows NT systems.  The function reads control registers
+; which are protected under Windows NT.  If you attempt to run this function under Windows NT a
+; protected mode access violation will be generated.
+;
+;  * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * * * * * N O T E * * *
+;
+; Assumptions:
+;  Access to system control registers CR0 and CR4 are not protected
+;
+; Input:
+;   None
+;
+; Output:
+;   1 Returned if OS supports XMM instructions
+;   0 Returned if OS does not support XMM instructions
+;
+;
+
+
+        .586
+        .MODEL  flat, SYSCALL, os_dos
+        .DATA 
+
+NAME x86cpuid
+
+PUBLIC Does9xOSSupportXMM_
+PUBLIC _Does9xOSSupportXMM
+
+        .CODE
+
+; int Does9xOSSupportXMM( void )
+Does9xOSSupportXMM_:
+_Does9xOSSupportXMM:
+    push    esi ;safety sh*&
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+; check to see if OS supports SIMD instructions
+    mov     edx,cr0
+    bt      edx,2                           ; ensure no emulation
+    jnae    NoXMMSupport
+
+    mov     edx,cr4
+    bt      edx,9                           ; OS support SIMD
+    jnc     NoXMMSupport
+
+; we support XMM instructions
+    mov     eax,1
+    jmp     Exit
+
+NoXMMSupport:
+;    mov     eax,0                           ; OS does not support XMM instructions
+
+Exit:
+    pop     edx ;safety sh*&
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+    ret
+
+;************************************************
+         END
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/InitXMMReg.asm b/Src/libvpShared/corelibs/CpuID/Win32/InitXMMReg.asm
new file mode 100644
index 00000000..4b827162
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/InitXMMReg.asm
@@ -0,0 +1,100 @@
+;//==========================================================================
+;//
+;//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;//  PURPOSE.
+;//
+;//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+
+
+;
+; **-InitXMMReg
+;
+; This function is meant to be run on a Windows NT system to
+; try and determine if the OS supports the XMM registers or
+; not.
+;
+; This function is number 1 in a set of three.  The other
+; functions are...
+;
+;    TrashXMMReg
+;    VerifyXMMReg
+;
+; Assumptions:
+;   None
+;
+; Input:
+;   None
+;
+; Output:
+;  No return value.  But XMM registers 
+;  0, 1, 2 initilized to a predetermined
+;  value
+;
+;
+        .686P
+		.XMM
+        .MODEL  flat, SYSCALL, os_dos
+        .DATA 
+
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA' 
+
+        ALIGN 32
+
+PUBLIC XMM0Init
+PUBLIC XMM1Init
+PUBLIC XMM2Init
+
+
+    XMM0Init    REAL4   1.1
+                REAL4   2.2
+                REAL4   3.3
+                REAL4   4.4
+                        
+    XMM1Init    REAL4   5.5
+                REAL4   6.6
+                REAL4   7.7
+                REAL4   8.8
+                        
+    XMM2Init    REAL4   9.9
+                REAL4   10.10
+                REAL4   11.11
+                REAL4   12.12
+
+
+NAME InitXMMReg
+
+PUBLIC InitXMMReg_
+PUBLIC _InitXMMReg
+
+        .CODE
+
+; void InitXMMReg( void )
+InitXMMReg_:
+_InitXMMReg:
+    push    esi ;safety sh*&
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+    movaps  xmm0,XMM0Init
+    movaps  xmm1,XMM1Init
+    movaps  xmm2,XMM2Init
+
+Exit:
+    pop     edx ;safety sh*&
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+    ret
+
+;************************************************
+         END
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/TrashXMMreg.asm b/Src/libvpShared/corelibs/CpuID/Win32/TrashXMMreg.asm
new file mode 100644
index 00000000..cf783158
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/TrashXMMreg.asm
@@ -0,0 +1,87 @@
+;//==========================================================================
+;//
+;//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;//  PURPOSE.
+;//
+;//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+
+
+;
+; **-TrashXMMReg
+;
+; This function is meant to be run on a Windows NT system to
+; try and determine if the OS supports the XMM registers or
+; not.
+;
+; This function is number 2 in a set of three.  The other
+; functions are...
+;
+;    InitXMMReg
+;    VerifyXMMReg
+;
+; Assumptions:
+;   No necessary for this function to work properly but
+;   IntiXMMReg should have been called to initilize the
+;   XMM registers to a predetermined value
+;
+; Input:
+;   None
+;
+; Output:
+;  No return value.  But XMM registers 
+;  0, 1, 2 written to 0's
+;
+;
+
+        .686P
+		.XMM
+        .MODEL  flat, SYSCALL, os_dos
+        .DATA 
+
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA' 
+
+         ALIGN 32
+
+    Zeros       REAL4   0.0
+                REAL4   0.0
+                REAL4   0.0
+                REAL4   0.0
+
+
+NAME TrashXMMReg
+
+PUBLIC TrashXMMReg_
+PUBLIC _TrashXMMReg
+
+        .CODE
+
+; void TrashXMMReg( void )
+TrashXMMReg_:
+_TrashXMMReg:
+    push    esi ;safety sh*&
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+    movaps  xmm0,Zeros
+    movaps  xmm1,Zeros
+    movaps  xmm2,Zeros
+
+Exit:
+    pop     edx ;safety sh*&
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+    ret
+
+;************************************************
+         END
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/VerifyXMMReg.asm b/Src/libvpShared/corelibs/CpuID/Win32/VerifyXMMReg.asm
new file mode 100644
index 00000000..b8837705
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/VerifyXMMReg.asm
@@ -0,0 +1,99 @@
+;//==========================================================================
+;//
+;//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;//  PURPOSE.
+;//
+;//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+
+
+;
+; **-VerifyXMMReg
+;
+; This function is meant to be run on a Windows NT system to
+; try and determine if the OS supports the XMM registers or
+; not.
+;
+; This function is number 3 in a set of three.  The other
+; functions are...
+;
+;    InitXMMReg
+;    TrashXMMReg
+;
+; Assumptions:
+;   Assumes that InitXMMReg was called to initilize the XMM registers.
+;   Assumes that TrashXMMReg was called from a different thread to clear
+;   the values in the XMM registers.
+;
+; Input:
+;   None
+;
+; Output:
+;   Return 1 (True) if the XMM registers are at the correct values.
+;   (os supports XMM registers)
+;
+;   Return 0 (False) if the XMM registers are not at the correct values.
+;   (os does not support the XMM registers)
+;
+
+        .686P
+		.XMM
+        .MODEL  flat, SYSCALL, os_dos
+        .DATA 
+
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA' 
+
+         ALIGN 32
+
+
+NAME VerifyXMMReg
+
+PUBLIC VerifyXMMReg_
+PUBLIC _VerifyXMMReg
+
+
+EXTERN XMM0Init:REAL4
+EXTERN XMM1Init:REAL4
+EXTERN XMM2Init:REAL4
+
+
+        .CODE
+
+; int VerifyXMMReg( void )
+VerifyXMMReg_:
+_VerifyXMMReg:
+    push    esi ;safety sh*&
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+    mov     eax,0                       ; assume will fail
+
+    comiss  xmm0,XMM0Init               ; check XMM0
+    jne     Exit
+
+    comiss  xmm1,XMM1Init
+    jne     Exit
+
+    comiss  xmm2,XMM2Init
+    jne     Exit
+
+    mov     eax,1                       ; OS supports XMM registers
+
+Exit:
+    pop     edx ;safety sh*&
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+    ret
+
+;************************************************
+         END
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/Wmt_CpuID.cpp b/Src/libvpShared/corelibs/CpuID/Win32/Wmt_CpuID.cpp
new file mode 100644
index 00000000..0936b7c9
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/Wmt_CpuID.cpp
@@ -0,0 +1,149 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+/****************************************************************************
+ *
+ *   Module Title :     Wmt_CpuID.cpp
+ *
+ *   Description  :     willamette processor detection functions
+ *
+ *
+ *****************************************************************************
+ */
+ 
+/****************************************************************************
+ *  Header Files
+ *****************************************************************************
+ */
+
+
+#include <excpt.h>
+#include <string.h>
+
+
+extern "C" {
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     WillametteNewInstructionSupport()
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     retrun true if the processor support willamette new 
+ *						instructions, return false otherwise
+ *						
+ *
+ *  FUNCTION      :     detect willamette processor
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+int WillametteNewInstructionHWSupport()
+{
+
+	int HWSupport = 0;
+	char brand[12];
+
+	__try 
+	{
+		__asm
+		{
+			
+			lea		esi,		brand
+			mov		eax,		0
+			cpuid				
+			mov		[esi],		ebx
+			mov		[esi+4],	edx
+			mov		[esi+8],	ecx
+
+		}
+
+	}
+	__except(EXCEPTION_EXECUTE_HANDLER)
+	{
+
+		if(_exception_code())
+		{
+			//cout<<endl<<"*******CPUID is not supported**********"<<endl;
+			return 0;
+		}
+		return 0;
+
+	}
+
+
+	if(strncmp(brand, "GenuineIntel", 12)!=0)
+	{
+		
+		//cout<<endl<<"this is not an intel processor1"<<endl;
+		return 0;
+	}
+
+	__asm 
+	{
+			mov		eax,	1
+			cpuid	
+			test	edx,	04000000h
+			jz		NotFound
+			mov		[HWSupport], 1
+			
+NotFound:
+			nop
+
+	}
+
+	return (HWSupport);
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     WillametteNewInstructionOSSupport()
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     retrun true if the OS support willamette new 
+ *						instructions, return false otherwise
+ *						
+ *
+ *  FUNCTION      :     detect willamette processor
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+int	WillametteNewInstructionOSSupport()
+{
+	__try
+	{
+		__asm xorpd		xmm0, xmm0
+	}
+	__except(EXCEPTION_EXECUTE_HANDLER)
+	{
+		if(_exception_code())
+		{
+			return 0;
+		}
+		return 0;
+	}
+	return 1;
+}
+
+}
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/cid.c b/Src/libvpShared/corelibs/CpuID/Win32/cid.c
new file mode 100644
index 00000000..988d625c
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/cid.c
@@ -0,0 +1,152 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include <windows.h>
+#include <stdarg.h>
+#include "cpuidlib.h"
+#include "cidasm.h"
+#include <process.h>
+#include <stdio.h>
+
+extern int	WillametteNewInstructionOSSupport();
+extern int	WillametteNewInstructionHWSupport();
+
+
+/*
+ * **-DoesOSSupportXMM
+ *
+ * This function will check to see if the operating supports the XMM (Pentium III) instructions
+ * The XMM functionality adds 8 128-bit registers to the pentium II register set.  With the addition
+ * of the new registers the OS needs to preserve and restore the registers on task switches.
+ *
+ * Inputs:
+ *  None
+ *
+ * Outputs:
+ *  True returned if the OS supports the XMM instructions.
+ *  False returned if the OS does not suppor the XMM instructions.
+ */
+int DoesOSSupportXMM( void )
+{
+   OSVERSIONINFO OSInformation;           // Data structure where OS version will be filled in
+   int           ReturnValue = FALSE;     // Preload to fail
+
+   // need to initilize size of OS info structure before calling GetVersionEx
+   OSInformation.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+   
+   if( !GetVersionEx( &OSInformation ) )  // Get OS information
+   {
+      /*
+       * having trouble getting OS information
+       * to be safe will return that we do not support XMM
+       * instructions
+       */
+      // ReturnValue = FALSE;
+   }
+
+   if( OSInformation.dwPlatformId == VER_PLATFORM_WIN32_NT )
+   //   if( 1 )
+   {
+      /*
+       * If we are on a windows NT system we cannot directly
+       * read the control registers to see if the OS supports
+       * the XMM instructions.  We will just check to see if
+       * service pack 4 is installed.
+       */
+       int ServicePackNumber;
+       
+       if( strcmp(OSInformation.szCSDVersion, "" ) != 0 ) // is there a service pack installed?
+       {
+           // Yes, get service pack revision
+           char Junk[132], Junk2[132];
+
+           sscanf( OSInformation.szCSDVersion, "%s %s %d", Junk, Junk2, &ServicePackNumber );
+       }
+       else
+       {
+           ServicePackNumber = 0;
+       }
+
+       if( OSInformation.dwMajorVersion == 4 && // must be versio 4 or greater
+           ServicePackNumber >= 4 ||              // must have service pack 4 or greater
+			OSInformation.dwMajorVersion >=5)
+       {
+           ReturnValue = TRUE;
+       }
+       else
+       {
+           // ReturnValue = FALSE;
+       }
+           
+#if 0
+       // some handy debugging info if you are desperate
+       printf("OS Major Revision %d\n", OSInformation.dwMajorVersion );
+       printf("OS Minor REvision %d\n", OSInformation.dwMinorVersion  );
+       printf("Service Pack Number %d\n", ServicePackNumber );
+#endif
+   }
+   else
+   {
+      /*
+       * we are on a Windows 9x system.
+       */
+      //if( Does9xOSSupportXMM())         // does the Windows 9x support the XMM instructions?
+      {
+         ReturnValue = TRUE;            // yup
+      }
+      //else
+      //{
+         //ReturnValue = FALSE;           // Nope, don't support XMM instructions
+      //}
+   }
+
+   return( ReturnValue );
+}
+
+/*
+ * **-findCPUId
+ *
+ * See cpuidlib.h for a detailed description of this function
+ */
+PROCTYPE findCPUId( void )
+{
+   PROCTYPE CpuType;
+// return 0;
+// return (PII);         // drop to next lowest type of CPU which should be the Pentium II processor
+
+   CpuType = getCPUType();      // Get version of processor
+
+   // The code to check whether willammete instructions are called attempts to run 
+   // an illegal instruction.  Under 98 mplayer crashes the os as soon as the illegal 
+   // instruction is called, so I've disabled it.  
+
+   if( CpuType == XMM )         // If the CPU supports XMM (Pentium III) instructions
+   {
+//      if( DoesOSSupportXMM())   // need to check to see if the OS supports the XMM instructions
+      {
+		  
+		 if( WillametteNewInstructionHWSupport()&&
+		  WillametteNewInstructionOSSupport())
+		  {
+					CpuType = WMT;					
+		  }
+      }
+//      else
+//      {
+         // os does not support the XMM instructions
+//         CpuType = PII;         // drop to next lowest type of CPU which should be the Pentium II processor
+//      }
+   }
+   return( CpuType );
+}
+
+
diff --git a/Src/libvpShared/corelibs/CpuID/Win32/cpuid.asm b/Src/libvpShared/corelibs/CpuID/Win32/cpuid.asm
new file mode 100644
index 00000000..705d9d9b
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/Win32/cpuid.asm
@@ -0,0 +1,280 @@
+;//==========================================================================
+;//
+;//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;//  PURPOSE.
+;//
+;//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+
+;
+; **-getCPUType
+;
+; This function will return a code indicating the type of the processor
+; that is in the system.  If the processor type is unknown the generic
+; x86 (Intel 486) type is returned
+;
+; parts taken from intel's AP-485 
+;
+;put checks for cmov and mmx support ????
+;
+; Assumptions:
+;  None
+;
+; Input:
+;  None
+;
+; Output:
+;  Code for CPU type returned.  See cpuidlib.h for the supported
+;  types.
+;
+
+
+
+        .586
+        .MODEL  flat, SYSCALL, os_dos
+        .DATA 
+
+NAME x86cpuid
+
+PUBLIC getCPUType_
+PUBLIC _getCPUType
+
+CPU_ID MACRO 
+    db 0fh                      ; Hardcoded CPUID instruction 
+    db 0a2h 
+ENDM
+
+;see cpuidlib.h
+X86         EQU 0                   ; /* 486, Pentium plain, or any other x86 compatible */
+PMMX        EQU 1                   ; /* Pentium with MMX */
+PPRO        EQU 2                   ; /* Pentium Pro */
+PII         EQU 3                   ; /* Pentium II */
+C6X86       EQU 4					
+C6X86MX     EQU 5
+AMDK63D     EQU 6
+AMDK6       EQU 7
+AMDK5       EQU 8
+XMM         EQU 11
+WMT			EQU 12					;/* Willamette */
+
+
+_486        EQU 4h
+PENT        EQU 50h
+PENTMMX     EQU 54h
+PENTPRO     EQU 61h
+PENTII      EQU 63h
+SIMD        EQU 25
+
+AMD_K63D    EQU 58h
+AMD_K6      EQU 56h
+AMD_K5      EQU 50h             ; K5 has models 0 - 6
+
+_6X86       EQU 52h
+_6X86MX     EQU 60h
+
+
+_vendor_id      db "------------" 
+intel_id        db "GenuineIntel" 
+amd_id          db "AuthenticAMD" 
+cyrix_id        db "CyrixInstead" 
+
+        .CODE
+
+getCPUType_:
+_getCPUType:
+    push    esi ;safety sh*&
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+;------------------------------------------------
+; Intel486 processor check 
+; Checking for ability to set/clear ID flag (Bit 21) in EFLAGS 
+; which indicates the presence of a processor with the CPUID 
+; instruction.
+;------------------------------------------------
+check_80486: 
+    pushfd                                  ; push original EFLAGS
+    pop     eax                             ; get original EFLAGS 
+    mov     ebp,X86                         ; rv
+    mov     ecx, eax                        ; save original EFLAGS 
+    xor     eax, 200000h                    ; flip ID bit in EFLAGS 
+    push    eax                             ; save new EFLAGS value on stack 
+    popfd                                   ; replace current EFLAGS value 
+    pushfd                                  ; get new EFLAGS 
+    pop     eax                             ; store new EFLAGS in EAX 
+    xor     eax, ecx                        ; can not toggle ID bit, 
+    je      end_cpu_type486                 ; processor=80486
+
+;------------------------------------------------
+; Execute CPUID instruction to not determine vendor, family, 
+; model, stepping and features. For the purpose of this 
+; code, only the initial set of CPUID information is saved.
+;------------------------------------------------
+;    push    ebx                             ; save registers 
+;    push    esi 
+;    push    edi 
+;    push    edx
+;    push    ecx
+
+;    mov     ebp,X86                         ; rv
+
+    mov     eax, 0                          ; set up for CPUID instruction 
+    CPU_ID                                  ; get and save vendor ID
+
+    mov     DWORD PTR _vendor_id, ebx 
+    mov     DWORD PTR _vendor_id[+4], edx 
+    mov     DWORD PTR _vendor_id[+8], ecx
+
+    cmp     DWORD PTR intel_id, ebx 
+    jne     IsProc_AMD
+    cmp     DWORD PTR intel_id[+4], edx 
+    jne     end_cpuid_type 
+    cmp     DWORD PTR intel_id[+8], ecx 
+    jne     end_cpuid_type                  ; if not equal, not an Intel processor
+
+    cmp     eax, 1                          ; make sure 1 is valid input for CPUID 
+    jl      end_cpuid_type                  ; if not, jump to end 
+
+    mov     eax, 1 
+    CPU_ID                                  ; get family/model/stepping/features 
+
+    mov     ebp,XMM                         ; assume PIII
+
+    bt      edx,SIMD                        ; check for SIMD support
+    jnae    end_cpuid_type
+
+SIMDContinue:
+    shr     eax, 4                          ; isolate family and model
+    mov     ebp,PII                         ; assume PII
+
+    and     eax,0ffh                        ;mask out type and reserved
+    nop
+
+    cmp     eax,PENTII
+    jge     end_cpuid_type
+
+    mov     ebp,PPRO
+    
+    cmp     eax,PENTPRO
+    je      end_cpuid_type
+
+    mov     ebp,PMMX
+    
+    cmp     eax,PENTMMX
+    je      end_cpuid_type
+
+    mov     ebp,X86
+    
+    cmp     eax,PENT
+    jge     end_cpuid_type
+
+;    mov     ebp,X86
+
+end_cpuid_type: 
+    mov     eax,ebp
+
+;remove these pops ???
+
+;    pop     edi                             ; restore registers 
+;    pop     esi 
+;    pop     ebx 
+;    pop     edx
+;    pop     ecx
+   
+end_cpu_type:
+    pop     edx ;safety sh*&
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+    ret
+
+end_cpu_type486:
+    mov     eax,ebp
+    pop     edx ;safety sh*&
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+    ret
+
+;------------------------------------------------
+IsProc_AMD:
+    cmp     DWORD PTR amd_id, ebx 
+    jne     IsProc_CYRIX
+
+    cmp     DWORD PTR amd_id[+4], edx 
+    jne     end_cpuid_type 
+
+    cmp     DWORD PTR amd_id[+8], ecx 
+    jne     end_cpuid_type                  ; if not equal, not an AMD processor
+
+    cmp     eax, 1                          ; make sure 1 is valid input for CPUID 
+    jl      end_cpuid_type                  ; if not, jump to end 
+
+    mov     eax, 1 
+    CPU_ID                                  ; get family/model/stepping/features 
+
+    shr     eax, 4                          ; isolate family and model
+    mov     ebp,AMDK63D    
+
+    and     eax,0ffh                        ;mask out type and reserved
+    nop
+
+    cmp     eax,AMD_K63D
+    jge     end_cpuid_type
+
+    mov     ebp,AMDK6    
+    nop
+
+    cmp     eax,AMD_K6
+    jge     end_cpuid_type
+
+    mov     ebp,X86
+    nop
+
+    cmp     eax,AMD_K5
+    jge     end_cpuid_type
+
+    mov     ebp,X86
+    jmp     end_cpuid_type
+
+;------------------------------------------------
+IsProc_CYRIX:
+    cmp     DWORD PTR cyrix_id, ebx 
+    jne     end_cpuid_type
+
+    cmp     DWORD PTR cyrix_id[+4], edx 
+    jne     end_cpuid_type 
+
+    cmp     DWORD PTR cyrix_id[+8], ecx 
+    jne     end_cpuid_type                  ; if not equal, not an CYRIX processor
+
+    cmp     eax, 1                          ; make sure 1 is valid input for CPUID 
+    jl      end_cpuid_type                  ; if not, jump to end 
+
+    mov     eax, 1 
+    CPU_ID                                  ; get family/model/stepping/features 
+
+    shr     eax, 4                          ; isolate family and model
+    mov     ebp,C6X86MX
+
+    and     eax,0ffh                        ;mask out type and reserved
+    nop
+
+    cmp     eax,_6X86MX
+    je      end_cpuid_type
+
+    mov     ebp,X86
+    jmp     end_cpuid_type
+;************************************************
+         END
diff --git a/Src/libvpShared/corelibs/CpuID/readme.txt b/Src/libvpShared/corelibs/CpuID/readme.txt
new file mode 100644
index 00000000..26d6e842
--- /dev/null
+++ b/Src/libvpShared/corelibs/CpuID/readme.txt
@@ -0,0 +1,22 @@
+This library contains functions
+that will determine the type of CPU that is in your system.  See cpuidlib.h for
+a more detailed description of the functions that are avaliable.
+
+If you want to use the library all you need to do is to fetch
+
+ - cpuidlib.h
+ - cpuidlib.lib
+
+
+October 14 1999
+Jong Chen
+    
+    This is the initial revision of the library.
+    
+    At the moment the code is not fully tested.  The code that tests for OS support
+    of Pentium III instructions has only been tested on systems with OS that
+    support the Pentium III instructions.  It has not been tested in a
+    configuration where we will detect the the OS will not support the Pentium III
+    instructions.
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/Huffman.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/Huffman.h
new file mode 100644
index 00000000..d598c5e2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/Huffman.h
@@ -0,0 +1,71 @@
+/****************************************************************************
+*
+*   Module Title :     Huffman.h
+*
+*   Description  :     Huffman Coding header file.
+*
+****************************************************************************/
+#ifndef __INC_HUFFMAN_H
+#define __INC_HUFFMAN_H
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "type_aliases.h"
+#include "boolhuff.h"
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/
+#define HUFF_LUT_LEVELS 6
+
+/****************************************************************************
+*  Types
+****************************************************************************/  
+typedef struct _tokenorptr
+{
+    unsigned int selector : 1;   // 1 bit selector 0->ptr, 1->token
+    unsigned int value : 7;
+} tokenorptr;
+
+typedef struct _huffnode
+{
+	union
+	{
+		char l;
+		tokenorptr left;
+    } leftunion; 
+	union
+	{
+		char r;
+		tokenorptr right;
+    } rightunion; 
+	unsigned char freq;
+
+} HUFF_NODE;
+
+/****************************************************************************
+*   Data structures
+****************************************************************************/
+typedef struct _HUFF_TALBE_NODE
+{
+    unsigned short flag     :1;      // bit 0: 1-Token, 0-Index
+    unsigned short value    :5;      // value: the value of the Token or the Index to the huffman tree
+    unsigned short unused   :6;      // not used for now
+    unsigned short length   :4;      // Huffman code length of the token
+} HUFF_TABLE_NODE;
+
+/****************************************************************************
+*  Functions
+****************************************************************************/
+extern void VP6_BuildHuffLookupTable ( HUFF_NODE * HuffTreeRoot, UINT16 * HuffTable );
+extern void VP6_BuildHuffTree ( HUFF_NODE *hn, unsigned int *counts, int values );
+extern void VP6_CreateCodeArray( HUFF_NODE *hn,
+                      int node,
+                      unsigned int *codearray,
+                      unsigned char *lengtharray,
+					  int codevalue, 
+                      int codelength );
+extern void VP6_EncodeValue ( BOOL_CODER *bc, HUFF_NODE *hn, int value, int length );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/RawBuffer.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/RawBuffer.h
new file mode 100644
index 00000000..cabfcf13
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/RawBuffer.h
@@ -0,0 +1,34 @@
+/****************************************************************************
+*
+*   Module Title :     RAW_BUFFER.h
+*
+*   Description  :     Raw bit manipulation routines header file.
+*
+****************************************************************************/
+#ifndef __INC_RAWBUFFER_H
+#define __INC_RAWBUFFER_H
+
+/****************************************************************************
+*   Header Files
+****************************************************************************/
+#include "type_aliases.h"
+
+/****************************************************************************
+*   Typedefs
+****************************************************************************/
+typedef struct RAW_BUFFER
+{
+    UINT32 pos;                  // Offset of "current" UINT32 in buffer
+    INT32  byte_bit_offset;      // Offset of next free bit in current UINT8
+    UINT32 DataBlock;
+    UINT8 *Buffer;
+} RAW_BUFFER;
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+extern void InitAddRawBitsToBuffer ( RAW_BUFFER *buf, UINT8 *Buffer );
+extern void AddRawBitsToBuffer( RAW_BUFFER *buf, UINT32 data, UINT32 bits );
+extern void EndAddRawBitsToBuffer( RAW_BUFFER *buf );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/SystemDependant.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/SystemDependant.h
new file mode 100644
index 00000000..2788b40b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/SystemDependant.h
@@ -0,0 +1,21 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.h
+*
+*   Description  :     Miscellaneous system dependant functions header
+*
+****************************************************************************/
+#ifndef __INC_SYSTEMDEPENDANT_H
+#define __INC_SYSTEMDEPENDANT_H
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+extern void VP6_IssueWarning ( char * WarningMessage );
+extern void PauseProcess ( unsigned int SleepMs );
+
+// System dynamic memory allocation
+char *SytemGlobalAlloc ( unsigned int Size );   
+void SystemGlobalFree ( char * MemPtr );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/TokenEntropy.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/TokenEntropy.h
new file mode 100644
index 00000000..e23c410d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/TokenEntropy.h
@@ -0,0 +1,100 @@
+/****************************************************************************
+*
+*   Module Title :     TokenEntropy.h
+*
+*   Description  :     Entropy coding header file.
+*
+****************************************************************************/
+#ifndef __INC_TOKEN_ENTROPY_H
+#define __INC_TOKEN_ENTROPY_H
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "type_aliases.h"
+#include "boolhuff.h"
+#include "codec_common.h"
+#include "huffman.h"
+
+/****************************************************************************
+*  Constants
+****************************************************************************/
+
+// VP6 hufman table AC bands
+#define VP6_AC_BANDS			6
+
+// Tokens								Value		Extra Bits (range + sign)
+#define ZERO_TOKEN              0		//0			Extra Bits 0+0
+#define ONE_TOKEN               1		//1			Extra Bits 0+1       
+#define TWO_TOKEN               2		//2			Extra Bits 0+1 
+#define THREE_TOKEN             3		//3			Extra Bits 0+1
+#define FOUR_TOKEN              4		//4			Extra Bits 0+1
+#define DCT_VAL_CATEGORY1		5		//5-6		Extra Bits 1+1
+#define DCT_VAL_CATEGORY2		6		//7-10		Extra Bits 2+1
+#define DCT_VAL_CATEGORY3		7		//11-26		Extra Bits 4+1
+#define DCT_VAL_CATEGORY4		8		//11-26		Extra Bits 5+1
+#define DCT_VAL_CATEGORY5		9		//27-58		Extra Bits 5+1
+#define DCT_VAL_CATEGORY6		10		//59+		Extra Bits 11+1	
+#define DCT_EOB_TOKEN           11		//EOB		Extra Bits 0+0
+#define MAX_ENTROPY_TOKENS      (DCT_EOB_TOKEN + 1)  
+#define ILLEGAL_TOKEN			255
+
+#define DC_TOKEN_CONTEXTS		3 // 00, 0!0, !0!0
+#define CONTEXT_NODES			(MAX_ENTROPY_TOKENS-7)
+
+#define PREC_CASES				3
+#define ZERO_RUN_PROB_CASES     14 
+
+#define DC_PROBABILITY_UPDATE_THRESH	100
+
+#define ZERO_CONTEXT_NODE		0
+#define EOB_CONTEXT_NODE		1
+#define ONE_CONTEXT_NODE		2
+#define LOW_VAL_CONTEXT_NODE	3
+#define TWO_CONTEXT_NODE		4
+#define THREE_CONTEXT_NODE		5
+#define HIGH_LOW_CONTEXT_NODE	6
+#define CAT_ONE_CONTEXT_NODE	7
+#define CAT_THREEFOUR_CONTEXT_NODE	8
+#define CAT_THREE_CONTEXT_NODE	9
+#define CAT_FIVE_CONTEXT_NODE	10
+
+#define PROB_UPDATE_BASELINE_COST	7
+
+#define MAX_PROB				254
+#define DCT_MAX_VALUE			2048
+
+#define ZRL_BANDS				2
+#define ZRL_BAND2				6
+
+#define SCAN_ORDER_BANDS		16
+#define SCAN_BAND_UPDATE_BITS   4
+
+/****************************************************************************
+*  Typedefs
+****************************************************************************/        
+typedef struct LineEq
+{
+    INT32	M;
+    INT32	C;
+} LINE_EQ;
+
+/****************************************************************************
+*  Exports
+****************************************************************************/        
+extern const UINT32 VP6_ProbCost[256];
+extern const UINT8  ExtraBitLengths_VP6[MAX_ENTROPY_TOKENS];
+extern const UINT32 VP6_DctRangeMinVals[MAX_ENTROPY_TOKENS];
+
+extern const UINT8 VP6_DcUpdateProbs[2][MAX_ENTROPY_TOKENS-1];
+extern const UINT8 VP6_AcUpdateProbs[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS-1];
+extern const UINT8 VP6_PrevTokenIndex[MAX_ENTROPY_TOKENS];
+
+extern const UINT8 ScanBandUpdateProbs[BLOCK_SIZE];
+
+extern const UINT8 ZrlUpdateProbs[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+extern const UINT8 ZeroRunProbDefaults[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+
+extern UINT8 PrecZeroRunLength[BLOCK_SIZE];
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/boolhuff.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/boolhuff.h
new file mode 100644
index 00000000..9818284e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/boolhuff.h
@@ -0,0 +1,67 @@
+/****************************************************************************
+*
+*   Module Title :     boolhuff.h
+*
+*   Description  :     Bool Coder header file.
+*
+****************************************************************************/
+#ifndef __INC_BOOLHUFF_H 
+#define __INC_BOOLHUFF_H
+
+#ifdef NOTNORMALIZED
+
+typedef struct _boolcoder
+{
+    unsigned char *buffer;
+    unsigned int pos;
+	union
+	{
+		unsigned int value;
+		unsigned char v[4];
+	};
+    unsigned int range;
+} BOOL_CODER;
+
+#else 
+
+typedef struct 
+{
+	unsigned int lowvalue;
+	unsigned int range;
+	unsigned int value;
+	         int count;
+	unsigned int pos;
+    unsigned char *buffer;
+
+	// Variables used to track bit costs without outputing to the bitstream
+	unsigned int  MeasureCost;
+	unsigned long BitCounter;
+} BOOL_CODER;
+
+#endif 
+
+// Section cost measaurement stats
+//#define MEASURE_SECTION_COSTS 1
+#if defined MEASURE_SECTION_COSTS
+
+extern unsigned int Sectionbits[10];
+extern unsigned int ActiveSection;
+
+#define HEADER_SECTION 0
+#define MODE_SECTION   1
+#define MV_SECTION     2
+#define CONTEXT_OVERHEADS_SECTION 3
+#define DC_SECTION     4
+#define AC_SECTION     5
+
+#endif
+extern void VP6_StartDecode ( BOOL_CODER *bc, unsigned char *buffer );
+extern int  VP6_DecodeBool ( BOOL_CODER *bc, int context );
+extern int  VP6_DecodeBool128 ( BOOL_CODER *bc );
+extern void VP6_StopDecode ( BOOL_CODER *bc );
+extern void VP6_StartEncode ( BOOL_CODER *bc, unsigned char *buffer );
+extern void VP6_EncodeBool ( BOOL_CODER *bc, int x, int context );
+extern void VP6_EncodeBool2 ( BOOL_CODER *bc, int x, int context );
+extern void VP6_StopEncode ( BOOL_CODER *bc );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/compdll.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/compdll.h
new file mode 100644
index 00000000..bda11a8f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/compdll.h
@@ -0,0 +1,607 @@
+/****************************************************************************
+*
+*   Module Title :     COMPDLL.H
+*
+*   Description  :     Encoder definitions.
+*
+*****************************************************************************
+*/
+#ifndef __INC_COMPDLL_H
+#define __INC_COMPDLL_H
+
+#include "codec_common.h"
+#include "preprocif.h"
+#include "preproc.h"
+#include "pbdll.h"
+#include "vp60_comp_interface.h"
+#include "RawBuffer.h"
+#include <stdio.h>
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/
+// Debug/stats code
+//#define PSNR_ON
+//#define FILE_PSNR 
+#define MIN_BPB_FACTOR          0.1
+#define MAX_BPB_FACTOR          10.0
+#define KEY_FRAME_CONTEXT       5
+
+// GF update constants
+#define DEFAULT_GF_UPDATE_INTERVAL	8
+#define DEFAULT_2PASS_GF_UPDATE_INTERVAL 4
+#define MIN_GF_UPDATE_INTERVAL		4
+#define MAX_GF_UPDATE_INTERVAL		8
+#define GF_UPDATE_MOTION_INTERVAL	48
+#define MAX_GF_UPDATE_MOTION		16
+#define GF_DEFAULT_MOTION_CMPLX		12
+#define GF_MODE_DIST_THRESH1		50 
+#define GF_MODE_DIST_THRESH2		25
+#define GF_MAX_VAR_THRESH			36
+#define FIRSTPASS_Q                 32
+//#define FULLFRAMEFDCT
+/****************************************************************************
+*  Types
+*****************************************************************************
+*/
+
+typedef struct CONFIG_TYPE2
+{
+    UINT32 TargetBandwidth;
+    UINT32 OutputFrameRate;
+
+    UINT32 FirstFrameQ;
+    UINT32 BaseQ;
+    UINT32 WorstQuality;		// Worst Quality allowed.
+    UINT32 ActiveWorstQuality;	// Reflects worst quality Currently allowed (specified as an index where 0 is worst quality)
+    UINT32 ActiveBestQuality;	// Reflects best quality currently allowed (specified as an index where 0 is worst quality)
+
+} CONFIG_TYPE2;
+
+typedef enum
+{
+    DCT_COEF_TOKEN,
+    MODE_TOKEN,
+    BLOCKMAP_TOKEN,
+    MV_TOKEN
+} TOKENTYPE;
+
+typedef struct _TOKENEXTRA
+{
+    INT32  Token;
+    UINT32 Extra;
+
+    INT32  LastTokenL;      // Last token in block LEFT
+    INT32  LastTokenA;      // Last token in block ABOVE
+
+} TOKENEXTRA;
+
+typedef struct LineEq2
+{
+    double  M;
+    double  C;
+
+} LINE_EQ2;
+
+typedef struct
+{
+	BLOCK_CONTEXT *  AbovePtr;
+	BLOCK_CONTEXT    Above;
+	BLOCK_CONTEXT *  LeftPtr;
+	BLOCK_CONTEXT    Left;
+	Q_LIST_ENTRY  *  LastDcPtr;
+	Q_LIST_ENTRY     LastDc;
+
+} MB_DC_CONTEXT;
+
+typedef struct MOTION_STATS
+{
+	UINT32	NumMvs;
+	UINT32  SumAbsX;
+	UINT32  SumAbsY;
+	INT32   SumX;
+	INT32   SumY;
+	UINT32  SumXSq;
+	UINT32  SumYSq;
+
+} MOTION_STATS;
+
+typedef struct
+{
+    double           MotionSpeed;
+    double           VarianceX;
+    double           VarianceY;
+    double           PercentGolden;
+    double           PercentMotionY;
+    double           PercentMotion;
+    double           PercentNewMotion;
+    unsigned int     QValue;
+    double           MeanInterError;
+    double           MeanIntraError;
+    double           BitsPerMacroblock;
+    double           SqBitsPerMacroblock;
+    double           PSNR;
+    int              isGolden;
+    int              isKey;
+    int              count;
+    int              frame;
+} FIRSTPASS_STATS;
+
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern UINT32 (*FiltBlockBilGetSad)(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+extern UINT32 (*GetSAD16)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32);
+extern UINT32 (*GetSadHalfPixel16)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32);
+extern void   (*fdct_short) ( INT16 * InputData, INT16 * OutputData );
+extern void   (*idctc[65])( INT16 *InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern UINT32 (*GetSAD)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32);
+extern UINT32 (*GetSadHalfPixel)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32  );
+extern UINT32 (*GetInterError)( UINT8 *, INT32, UINT8 *,  UINT8 *, INT32 );
+extern UINT32 (*GetIntraError)( UINT8 *, INT32);
+extern void   (*Sub8)( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride );
+extern void   (*Sub8_128)( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride );
+extern void   (*Sub8Av2)( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride );
+
+#define HUGE_ERROR              (1<<28)  //  Out of range test value
+
+// Number of search sites for heirachical search (8*steps)+1 
+// so for  (+- 32 pixels) = 5 step = 41 (previously 4 step = 33)
+#define MAX_SEARCH_SITES        41       
+
+typedef struct CP_INSTANCE * xCP_INST;
+
+typedef struct CP_INSTANCE
+{
+    PB_INSTANCE pb; // playback
+
+    CONFIG_TYPE2 Configuration;
+
+    YUV_BUFFER_CONFIG InputConfig;
+    YUV_BUFFER_CONFIG YuvInputData;
+    INT32  SizeStep;
+    INT32  LastSizeStep;
+
+    INT32  QuickCompress;
+    BOOL   GoldenFrameEnabled;
+    BOOL   InterPrediction;
+    BOOL   MotionCompensation;
+    BOOL   AutoKeyFrameEnabled;
+    INT32  ForceKeyFrameEvery;
+    INT32  AutoKeyFrameThreshold;
+    INT32  LastKeyFrame;
+    INT32  MinimumDistanceToKeyFrame;
+    INT32  KeyFrameDataTargetOrig;        // Data rate target for key frames
+    INT32  KeyFrameDataTarget;            // Data rate target for key frames
+    UINT32 KeyFrameFrequency;
+    BOOL   DropFramesAllowed;
+	BOOL   DropFrame;
+    INT32  DropCount;
+	INT32  MaxDropCount;
+	INT32  MaxConsecDroppedFrames;
+    UINT32 QualitySetting;
+    UINT32 PreProcFilterLevel;
+    BOOL   AllowSpatialResampling;
+	UINT8  RdOpt;		// 0 - off, 1 - basic rd on, 2 - all rd options on
+
+    // Compressor Statistics
+    double TotErrScore;
+    UINT32 InterError;
+
+    UINT32 LastInterError;
+    UINT32 LastIntraError;
+    UINT32 MVErrorPerBit; 
+    UINT32 ErrorPerBit; 
+    UINT32 IntraError;
+    INT64  KeyFrameCount;                          // Count of key frames.
+    INT64  TotKeyFrameBytes;
+    UINT32 LastKeyFrameSize;
+    UINT32 PriorKeyFrameSize[KEY_FRAME_CONTEXT];
+    UINT32 PriorKeyFrameDistance[KEY_FRAME_CONTEXT];
+    INT32  FrameQuality[6];
+    int    DecoderErrorCode;        // Decoder error flag.
+    INT32  ThreshMapThreshold;
+    INT32  TotalMotionScore;
+    INT64  TotalByteCount;
+    INT32  FixedQ;
+
+	// Used for prediction filter selection
+	UINT32 MotionInterErr;
+	UINT32 MotionIntraErr;
+	UINT8  BaselineAlpha;
+	UINT8  BaselineBicThresh;
+
+    // Frame Statistics
+    INT64  CurrentFrame;
+    UINT32 LastFrameSize;
+    UINT32 ThisFrameSize;
+    BOOL   ThisIsFirstFrame;
+    BOOL   ThisIsKeyFrame;
+	BOOL   GfRecoveryFrame;
+    UINT32 FrameError ;
+
+	// Stats for normal inter frames (excludes GFU frames and key frames)
+	UINT32 NiFrames;
+	UINT32 NiTotQi;					
+	UINT32 NiAvQi;
+
+    INT32  MotionScore;
+	UINT32  FirstSixthBoundary;		// Macro block index marking the first sixth of the image
+	UINT32  LastSixthBoundary;		// Macro block index marking the last sixth of the image
+
+    /* Rate Targeting variables */
+    double BpbCorrectionFactor;
+	double KeyFrameBpbCorrectionFactor;
+	double GfuBpbCorrectionFactor;
+
+    // Controlling Block Selection
+    UINT32 MVChangeFactor;
+    UINT32 FourMvChangeFactor;
+    UINT32 ExhaustiveSearchThresh;
+    UINT32 BlockExhaustiveSearchThresh;
+    UINT32 MinImprovementForFourMV;
+    UINT32 FourMVThreshold;
+    UINT32 IntraThresh;
+
+	UINT32 MinErrorForMacroBlockMVSearch;
+	UINT32 MinErrorForBlockMVSearch;
+	UINT32 MinErrorForGoldenMVSearch;
+
+    UINT16 *FrameZeroCountsAlloc;
+    UINT16 *FrameZeroCounts;
+	UINT32 FrameNzCount[BLOCK_SIZE][2];
+	UINT8  NewScanOrderBands[BLOCK_SIZE];
+
+    // Frames
+    YUV_BUFFER_ENTRY *yuv0ptr;			// Un-pre-processed raw input (but scaled if appropriate)
+    YUV_BUFFER_ENTRY *yuv1ptr;
+
+    // Token Buffers
+    TOKENEXTRA *CoeffTokens;
+    TOKENEXTRA *CoeffTokenPtr;
+
+    INT16  LastDC[3];
+
+    BOOL_CODER bc;
+    BOOL_CODER bc2;
+
+    UINT8  *DataOutputBuffer;
+    UINT8  MBCodingMode;        // Coding mode flags
+
+    INT32  MVPixelOffsetY[MAX_SEARCH_SITES];
+    UINT32 InterTripOutThresh;
+    INT32  MVSearchSteps;
+    INT32  MVOffsetX[MAX_SEARCH_SITES];
+    INT32  MVOffsetY[MAX_SEARCH_SITES];
+    INT8   SubPixelXOffset[9];       // Half pixel MV offsets for X
+    INT8   SubPixelYOffset[9];       // Half pixel MV offsets for Y
+
+    Q_LIST_ENTRY    *quantized_list;
+
+    MOTION_VECTOR   MVector;
+    INT16  *DCT_codes;          //Buffer that stores the result of Forward DCT
+    INT16  *DCTDataBuffer;      //Input data buffer for Forward DCT
+
+    // Motion compensation related variables
+    UINT32  MvMaxExtent;
+
+    INT32  byte_bit_offset;
+
+    UINT32 NearestError[4];
+    UINT32 NearError[4];
+    UINT32 ZeroError[4];
+    UINT32 BestError[4];
+	UINT32 ErrorBins[128];
+
+    xPP_INST pp;    // preprocessor
+
+#if defined PSNR_ON
+    double TotPsnr;
+    double MinPsnr;
+    double MaxPsnr;
+    double TotYPsnr;
+    double MinYPsnr;
+    double MaxYPsnr;
+    double TotUPsnr;
+    double MinUPsnr;
+    double MaxUPsnr;
+    double TotVPsnr;
+    double MinVPsnr;
+    double MaxVPsnr;
+    double TotalSqError;
+#endif
+#if defined FULLFRAMEFDCT
+    Q_LIST_ENTRY  (*FDCTCoeffs)[64];
+#endif
+
+    // Structures for entropy contexts
+    UINT32 FrameDcTokenDist[2][MAX_ENTROPY_TOKENS];
+    //UINT32 FrameAcTokenDist[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+    UINT32 FrameAcTokenDist[PREC_CASES][2][8][16];
+    // Extra structures needed to decide if we choose huffman and DC / EOB runs
+    UINT32 FrameDcTokenDist2[2][MAX_ENTROPY_TOKENS];
+    //UINT32 FrameAcTokenDist2[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+    UINT32 FrameAcTokenDist2[PREC_CASES][2][8][16];
+
+    // AWG Debug Accumulate token count for entire run
+    UINT32 CumulativeFrameDcTokenDist[2][MAX_ENTROPY_TOKENS];
+    UINT32 CumulativeFrameAcTokenDist[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+
+	// Storage for the first frame entropy probabilities.
+	// These are re-used for all subsequent key frames when we are operating in
+	// error (drop frame) ressiliant mode.
+	UINT8 FirstFrameDcProbs[2*(MAX_ENTROPY_TOKENS-1)];
+	UINT8 FirstFrameAcProbs[2*PREC_CASES*VP6_AC_BANDS*(MAX_ENTROPY_TOKENS-1)];
+
+	UINT32 FrameZrlDist[ZRL_BANDS][64];
+	UINT32 FrameZeroCount[ZRL_BANDS];
+	UINT8  FrameZrlProbs[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+	UINT32 FrameZrlBranchHits[ZRL_BANDS][ZERO_RUN_PROB_CASES][2];
+
+    // Last token coded this block.
+    UINT32 MBModeCount[4][MAX_MODES+1];
+    UINT32 BModeCount[MAX_MODES+1];
+	UINT32 CountModeSameAsLast[4][MAX_MODES+1];
+	UINT32 CountModeDiffFrLast[4][MAX_MODES+1];
+
+    UINT32 ModeCodeArray[4][MAX_MODES+1][MAX_MODES+1];
+    UINT8  ModeLengthArray[4][MAX_MODES+1][MAX_MODES+1];
+
+	UINT32 MBModeCostBoth[11];
+	UINT32 MBModeCostNoNear[11];
+	UINT32 MBModeCostNoNearest[11];
+	UINT32 BModeCost[11];
+	UINT32 MvBaselineDist[2][MV_ENTROPY_TOKENS];
+	UINT32 FrameMvCount;
+	UINT32 EstModeCost[2][MAX_MODES];
+	UINT32 EstMVCost[2][MV_ENTROPY_TOKENS];
+	UINT32 * EstMvCostPtrX;
+	UINT32 * EstMvCostPtrY;
+
+	// Data structure used in re-calculating MV probability nodes	
+	UINT8  NewMvSignProbs[2];
+	UINT8  NewIsMvShortProb[2];
+	UINT8  NewMvShortProbs[2][7];
+	UINT8  NewMvSizeProbs[2][LONG_MV_BITS];
+
+	UINT32 NewMvSignHits[2][2];
+	UINT32 NewIsMvShortHits[2][2];
+	UINT32 NewMvShortHits[2][7][2];
+	UINT32 NewMvSizeHits[2][LONG_MV_BITS][2];
+
+
+    UINT32 nExperimentals;
+    INT32  Experimental[C_SET_EXPERIMENTAL_MAX - C_SET_EXPERIMENTAL_MIN + 1];
+
+	// Bandwidth and buffer control variables
+	INT32  PerFrameBandwidth;				// Target for average bandwidth per frame.
+    INT32  InterFrameTarget;				// Average "inter" frame bit target corrected for key frame costs
+    INT32  ThisFrameTarget;					// Modified rate target for this frame
+
+	BOOL   BufferedMode;					// FALSE = Tight buffering (Video Conferencing mode); TRUE = normal buffered/streaming mode.
+	BOOL   ErrorResilliantMode;				// A mode used for VC etc. to make the codec more resilliant to dropped frames.
+	INT32  StartingBufferLevel;             // The initial encoder buffer level
+	INT32  BytesOffTarget;				    // How far off target are we in repect of target bytes for clip 
+	INT32  OptimalBufferLevel;				// The buffer level target we strive to reach / maintain.
+	INT32  BufferLevel;                     // Buffer level based upon the max sustainable rate used for rate targeting
+	INT32  MaxBufferLevel;			        // The maximum permited value for the buffer level.
+	INT32  DropFramesWaterMark;				// Buffer fullness watermark for forced drop frames.
+	INT32  ResampleDownWaterMark;			// Buffer fullness watermark for downwards spacial re-sampling
+	INT32  ResampleUpWaterMark;				// Buffer fullness watermark where returning to larger image size is consdered
+	INT32  LastKeyFrameBufferLevel;			// Used to monitor changes in buffer level when considering re-sampling.
+
+	INT32  Speed;
+	INT32  CPUUsed;
+
+	UINT32 ModeMvCostEstimate;				// Running total of cost estimates for modes and MVs in this frame.
+
+	// Variables used in regulating cost of new motion vectors based upon an estimate of new MV frequency.
+	UINT32 FrameNewMvCounter;
+	UINT32 FrameModeCounter;
+	UINT32 MvEpbCorrection;
+	UINT32 LastFrameNewMvUsage;				// 0 = Low 9 = High
+
+	UINT32 * MbBestErr;
+
+    UINT32 EstDcTokenCosts[2][MAX_ENTROPY_TOKENS];
+    UINT32 EstAcTokenCosts[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+    UINT32 EstZrlCosts[ZRL_BANDS][64];
+
+	// Data structures used to save and restor MB and DC contexts during rate distortion
+	MACROBLOCK_INFO CopyMbi;
+	BLOCK_CONTEXT AboveCopyY[2];		
+	BLOCK_CONTEXT AboveCopyU;		
+	BLOCK_CONTEXT AboveCopyV;		
+	BLOCK_CONTEXT LeftYCopy[2];
+	BLOCK_CONTEXT LeftUCopy;
+	BLOCK_CONTEXT LeftVCopy;
+	Q_LIST_ENTRY  LastDcYCopy[3];
+	Q_LIST_ENTRY  LastDcUCopy[3];
+	Q_LIST_ENTRY  LastDcVCopy[3];
+
+	MB_DC_CONTEXT MbDcContexts[MAX_MODES][6];		// Per mode, per block position data structure for and MB
+
+	UINT32 avgPickModeTime;
+	UINT32 avgEncodeTime;
+	UINT32 avgPackVideoTime;
+
+	UINT32 ForceHScale;
+	UINT32 ForceHRatio;
+	UINT32 ForceVScale;
+	UINT32 ForceVRatio;
+	BOOL   ForceInternalSize;
+
+	PreProcInstance preproc;
+
+    // Buffers for output bitstream partitions
+    UINT8 *OutputBuffer2;
+    RAW_BUFFER RawBuffer;
+
+    // In Huffman mode runs of zeros at DC position & runs
+    // of EOB at first AC position are used
+    INT32 CurrentDcZeroRun[2];
+    TOKENEXTRA *DcZeroRunStartPtr[2];
+    INT32 CurrentAc1EobRun[2];
+    TOKENEXTRA *Ac1EobRunStartPtr[2];
+    
+    // DEBUG
+    UINT32 HuffCost;
+    UINT32 CostShannon;
+
+	BOOL   AllowScanOrderUpdates;
+    INT32  FrameRateInput;
+    INT32  FrameRateDropFrames;
+    INT32  FrameRateDropCount;
+
+	// Stats for monitoring frame mode and MV data
+	UINT32 ModeDist[MAX_MODES];
+
+	// Stats collected about the use of motion vectors in the curent frame
+	MOTION_STATS FrameMvStats;			
+									
+	// Variables used in control of GF update
+	UINT32 FramesTillGfUpdateDue;
+	INT32 GfUpdateInterval;
+	UINT32 GfuMotionSpeed;
+	UINT32 GfuMotionComplexity;
+	UINT32 GfuBoost;
+	UINT32 GfUsage;					// GF usage metric 
+	UINT32 LastGfOrKFrameQ;
+    
+    // variables for 5 region diamond MV search
+    INT32  DSMVSearchSteps;
+    INT32  DSMVPixelOffsetY[MAX_SEARCH_SITES];
+    INT32  DSMVOffsetX[MAX_SEARCH_SITES];
+    INT32  DSMVOffsetY[MAX_SEARCH_SITES];
+
+    // 2 pass stats
+    INT32  pass;
+    FIRSTPASS_STATS fps;
+    FIRSTPASS_STATS fpmss;
+    FILE *fs;
+    FILE *ss;
+    INT32  GoldenFrameBoost;
+    INT32  MbsSinceGolden;
+    INT32  OneGoldenFrame;
+    INT32  KFBoost;
+    INT32  InterBoostFreq;
+    INT32  InterBoost;
+    INT32  GoldenMbsSinceGolden;
+    INT32  GoldenMbsThisFrame;
+    INT32  InterErrorb;
+    INT32  FramesToKey;
+    double FirstPassPSNR;
+    INT32  ActualTargetBitRate;
+    INT32  KFForced;
+    INT32  NextKFForced;
+    INT32  CalculatedWorstQ;
+    INT32  PassedInWorstQ;
+
+
+    // new parameters
+
+    BOOL   DisableGolden;                   // disable golden frame updates
+    BOOL   VBMode;                          // run in variable bandwidth 1 pass mode
+    BOOL   EndUsage;						// Local file playback mode / vs streamed
+	BOOL   AutoWorstQ;						// Auto adjust worst quality.... 1 pass vbr within buffering constraints
+    UINT32 BestAllowedQ;                    // best allowed quality ( save bits by disallowings frames that are too high quality ) 
+    INT32  UnderShootPct;                   // target a percentage of the actual frame to allow for sections that go over
+
+    INT32  MaxAllowedDatarate;              // maximum the datarate is allowed to go.
+    INT32  MaximumBufferSize;               // maximum buffer size.
+
+    BOOL   TwoPassVBREnabled;               // two pass variable bandwidth enabled
+    INT32  TwoPassVBRBias;                  // how variable do we want to target?
+    INT32  TwoPassVBRMaxSection;            // maximum 
+    INT32  TwoPassVBRMinSection;            // minimum 
+    INT32  Pass;                            // which pass of the compression are we running.
+    double TotalBitsLeftInClip;
+    double FramesYetToEncode;
+    double TotalBitsPerMB;
+
+	// Prediction mode parameters for VP6.2
+	UINT8  LastPredictionFilterMode;
+	UINT8  LastPredictionFilterMvSizeThresh;
+	UINT32 LastPredictionFilterVarThresh;
+	UINT8  LastPredictionFilterAlpha;
+
+
+    UINT32 (*FindMvViaSearch)
+        (xCP_INST cpi,
+        CODING_MODE	Mode,
+        UINT8 *SrcPtr,
+        UINT8 *RefPtr,
+        MOTION_VECTOR *MV,
+        UINT8 **BestBlockPtr,
+        UINT32 BlockSize);
+
+    void (*FindBestHalfPixelMv)
+        (xCP_INST cpi,
+        CODING_MODE	Mode,
+        UINT8 *SrcPtr,
+        UINT8 *RefPtr,
+        MOTION_VECTOR *MV,
+        UINT32 BlockSize,
+        UINT32 *MinError,
+        UINT8  BitShift);
+
+    void (*FindBestQuarterPixelMv)
+        (xCP_INST cpi,
+        CODING_MODE	Mode,
+        UINT8 *SrcPtr,
+        UINT8 *RefPtr,
+        MOTION_VECTOR *MV,
+        UINT32 BlockSize,
+        UINT32 *MinError,
+        UINT8  BitShift);
+
+} CP_INSTANCE;
+
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+UINT32 (*GetMBFrameVertVar)(CP_INSTANCE *cpi);
+UINT32 (*GetMBFieldVertVar)(CP_INSTANCE *cpi);
+UINT32 (*GetBlockReconErr)(CP_INSTANCE *cpi, UINT32 bp);
+
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern void UpdateFrame(CP_INSTANCE *cpi);
+extern UINT32 EncodeData(CP_INSTANCE *cpi);
+
+// Loop optimizations
+extern void InitMapArrays();
+
+// Codec
+extern void SUB8( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+               INT32 SourceStride, INT32 ReconStride );
+extern void SUB8_128( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+               INT32 SourceStride );
+extern void SUB8AV2( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+              INT32 SourceStride, INT32 ReconStride );
+
+extern CP_INSTANCE * CreateCPInstance(void);
+extern void DeleteCPInstance(CP_INSTANCE **cpi);
+extern void CMachineSpecificConfig(void);
+extern void fdct_short_C ( INT16 * InputData, INT16 * OutputData );
+
+extern BOOL EAllocateFragmentInfo(CP_INSTANCE *cpi);
+extern BOOL EAllocateFrameInfo(CP_INSTANCE *cpi);
+extern void EDeleteFragmentInfo(CP_INSTANCE *cpi);
+extern void EDeleteFrameInfo(CP_INSTANCE *cpi);
+extern UINT32 PickIntra( CP_INSTANCE *cpi );
+extern UINT32 PickModes( CP_INSTANCE *cpi, UINT32 *InterError, UINT32 *IntraError);
+
+extern void ClampAndUpdateQ ( CP_INSTANCE *cpi, UINT32 QIndex);
+extern void EncodeFrameMbs(CP_INSTANCE *cpi);
+extern void CCONV ChangeEncoderSize(CP_INSTANCE* cpi, UINT32 Width, UINT32 Height);
+extern void CopyOrResize(CP_INSTANCE* cpi, BOOL ResetPreproc );
+extern UINT32 TokenizeFrag(CP_INSTANCE* cpi, INT16*  RawData, UINT32 Plane, BLOCK_CONTEXT* Above, BLOCK_CONTEXT* Left);
+extern void PredictScanOrder( CP_INSTANCE *cpi );
+extern void BuildScanOrder( PB_INSTANCE *pbi, UINT8 * );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemode.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemode.h
new file mode 100644
index 00000000..96580a24
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemode.h
@@ -0,0 +1,69 @@
+/****************************************************************************
+*        
+*   Module Title :     decodemode.h
+*
+*   Description  :     Functions for decoding modes and motionvectors 
+*
+****************************************************************************/
+#ifndef __INC_DECODEMODE_H
+#define __INC_DECODEMODE_H
+
+#ifndef STRICT
+#define STRICT              /* Strict type checking */
+#endif
+
+/****************************************************************************
+*  Module statics
+****************************************************************************/        
+#define MODETYPES       3
+#define MODEVECTORS     16
+#define PROBVECTORXMIT  174
+#define PROBIDEALXMIT   254
+
+/****************************************************************************
+*  Typedefs
+****************************************************************************/        
+typedef struct _modeContext
+{
+	UINT8 left;
+	UINT8 above;
+	UINT8 last;
+} MODE_CONTEXT;
+
+typedef struct _htorp
+{
+    unsigned char selector : 1;   // 1 bit selector 0->ptr, 1->token
+    unsigned char value : 7;
+} torp;
+
+typedef struct _hnode
+{
+	torp left;
+	torp right;
+} HNODE;
+
+typedef enum _MODETYPE 
+{
+	MACROBLOCK,
+	NONEAREST_MACROBLOCK,
+	NONEAR_MACROBLOCK,
+	BLOCK,
+} MODETYPE;
+
+/****************************************************************************
+*  Exports
+****************************************************************************/        
+extern UINT8 Stats[9][4][4][4];
+extern const UINT8 VP6_ModeVq[MODETYPES][MODEVECTORS][MAX_MODES*2];
+extern const UINT8 VP6_BaselineXmittedProbs[4][2][MAX_MODES];
+
+extern void	VP6_BuildModeTree ( PB_INSTANCE *pbi );
+extern void VP6_decodeModeAndMotionVector ( PB_INSTANCE *pbi, UINT32 MBrow, UINT32 MBcol );
+
+/****************************************************************************
+*  Function Prototypes
+****************************************************************************/
+INLINE int mbClass(int i);
+void VP6_DecodeModeProbs(PB_INSTANCE *pbi);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemv.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemv.h
new file mode 100644
index 00000000..ca4f56bc
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/decodemv.h
@@ -0,0 +1,33 @@
+/****************************************************************************
+*        
+*   Module Title :     decodemv.h
+*
+*   Description  :     Functions for decoding modes and motionvectors 
+*
+****************************************************************************/
+#ifndef __INC_DECODEMV_H
+#define __INC_DECODEMV_H
+
+#ifndef STRICT
+#define STRICT              /* Strict type checking */
+#endif
+
+/****************************************************************************
+*  Module statics
+****************************************************************************/        
+#define MV_NODES	17
+
+/****************************************************************************
+*  Exports
+****************************************************************************/        
+extern const UINT8 DefaultMvShortProbs[2][7];
+extern const UINT8 VP6_MvUpdateProbs[2][MV_NODES];
+extern const UINT8 DefaultMvLongProbs[2][LONG_MV_BITS];
+extern const UINT8 DefaultIsShortProbs[2];
+extern const UINT8 DefaultSignProbs[2];
+
+extern void VP6_FindNearestandNextNearest(PB_INSTANCE* pbi, UINT32 MBrow, UINT32 MBcol, UINT8 Frame, int *type);
+extern void VP6_ConfigureMvEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType );
+extern void VP6_decodeMotionVector(	PB_INSTANCE *pbi,	MOTION_VECTOR *mv,	CODING_MODE Mode );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/misc_common.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/misc_common.h
new file mode 100644
index 00000000..d470bcb5
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/misc_common.h
@@ -0,0 +1,27 @@
+/****************************************************************************
+*
+*   Module Title :     MiscCommon.h
+*
+*   Description  :     Miscellaneous common routines header file
+*
+*****************************************************************************
+*/
+#ifndef __MISC_COMMON_H
+#define __MISC_COMMON_H
+
+#include "type_aliases.h"
+#include "compdll.h"
+
+/****************************************************************************
+*  Function Prototypes
+****************************************************************************/
+extern double GetEstimatedBpb( CP_INSTANCE *cpi, UINT32 TargetQIndex );
+extern void UpdateBpbCorrectionFactor( CP_INSTANCE *cpi, UINT32 FrameSize );
+extern void UpRegulateMB( CP_INSTANCE *cpi, UINT32 RegulationQ, UINT32 SB, UINT32 MB, BOOL NoCheck );
+extern void ClampAndUpdateQ ( CP_INSTANCE *cpi, UINT32 QIndex );
+extern void RegulateQ( CP_INSTANCE *cpi, INT32 TargetBits );
+extern void ConfigureQuality( CP_INSTANCE *cpi, UINT32 QualityValue );
+extern void CopyBackExtraFrags(CP_INSTANCE *cpi);
+extern void VP6_PredictFilteredBlock(PB_INSTANCE* pbi, INT16* OutputPtr, UINT32 bp);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/pbdll.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/pbdll.h
new file mode 100644
index 00000000..530ba62f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/pbdll.h
@@ -0,0 +1,498 @@
+/****************************************************************************
+*
+*   Module Title :     pbdll.h
+*
+*   Description  :     Decoder definition header file.
+*
+****************************************************************************/
+#ifndef __INC_PBDLL_H
+#define __INC_PBDLL_H
+
+/****************************************************************************
+*  Module statics.
+****************************************************************************/
+#define VAL_RANGE   256     // Must come before header files--REMOVE THIS DEPENDENCY!!
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "huffman.h"
+#include "tokenentropy.h"
+#include "vfw_pb_interface.h"
+#include "postproc_if.h"
+#include "vputil_if.h"
+#include "quantize.h"
+#include "boolhuff.h"
+#include "rawbuffer.h"
+
+/****************************************************************************
+*  MACROS
+****************************************************************************/
+
+
+// Enumeration of how block is coded
+// VP6.2 version is >= 8
+#define CURRENT_ENCODE_VERSION  8
+#define CURRENT_DECODE_VERSION  8
+
+#define SIMPLE_PROFILE			0
+#define PROFILE_1				1
+#define PROFILE_2				2
+#define ADVANCED_PROFILE		3
+
+// Loop filter options
+#define NO_LOOP_FILTER			0
+#define LOOP_FILTER_BASIC		2
+#define LOOP_FILTER_DERING		3
+
+#define UMV_BORDER              48
+#define STRIDE_EXTRA            (UMV_BORDER * 2)
+#define BORDER_MBS				(UMV_BORDER>>4)
+
+#define MAX_MV_EXTENT           63      //  Max search distance in half pixel increments
+#define MV_ENTROPY_TOKENS       511     
+#define LONG_MV_BITS            8
+
+#define PPROC_QTHRESH           64
+
+#define MAX_MODES               10
+
+#define MAX_NEAREST_ADJ_INDEX	2 
+
+#define Y_MVSHIFT       0x2 
+#define UV_MVSHIFT      0x3
+#define Y_MVMODMASK     0x3
+#define UV_MVMODMASK    0x7
+
+//    INT32  MvShift;                 // motion vector shift value
+//    INT32  MvModMask; 
+
+// Prediction filter modes:
+// Note: when trying to use an enum here we ran into an odd compiler bug in
+// the WriteFrameHeader() code. Also an enum type is implicitly an int which 
+// is a bit big for something that can only have 3 values
+#define BILINEAR_ONLY_PM	    0
+#define BICUBIC_ONLY_PM		    1
+#define AUTO_SELECT_PM		    2
+
+#define DCProbOffset(A,B) \
+	( (A) * (MAX_ENTROPY_TOKENS-1) \
+    + (B) )
+
+#define ACProbOffset(A,B,C,D) \
+	( (A) * PREC_CASES * VP6_AC_BANDS * (MAX_ENTROPY_TOKENS-1) \
+	+ (B) * VP6_AC_BANDS * (MAX_ENTROPY_TOKENS-1) \
+	+ (C) * (MAX_ENTROPY_TOKENS-1) \
+	+ (D) ) 
+
+#define DcNodeOffset(A,B,C) \
+	( (A) * DC_TOKEN_CONTEXTS * CONTEXT_NODES \
+	+ (B) * CONTEXT_NODES \
+	+ (C) ) 
+
+
+#define MBOffset(row,col) ( (row) * pbi->MBCols + (col) )
+
+/****************************************************************************
+*  Types
+****************************************************************************/
+typedef enum
+{
+    CODE_INTER_NO_MV        = 0x0,      // INTER prediction, (0,0) motion vector implied.
+    CODE_INTRA              = 0x1,      // INTRA i.e. no prediction.
+    CODE_INTER_PLUS_MV      = 0x2,      // INTER prediction, non zero motion vector.
+    CODE_INTER_NEAREST_MV   = 0x3,      // Use Last Motion vector
+    CODE_INTER_NEAR_MV      = 0x4,      // Prior last motion vector
+    CODE_USING_GOLDEN       = 0x5,      // 'Golden frame' prediction (no MV).
+    CODE_GOLDEN_MV          = 0x6,      // 'Golden frame' prediction plus MV.
+    CODE_INTER_FOURMV       = 0x7,      // Inter prediction 4MV per macro block.
+    CODE_GOLD_NEAREST_MV    = 0x8,      // Use Last Motion vector
+    CODE_GOLD_NEAR_MV       = 0x9,      // Prior last motion vector
+    DO_NOT_CODE             = 0x10       // Fake Mode
+} CODING_MODE;
+
+typedef struct
+{
+    unsigned int FragCodingMode   :  4;
+    int          MVectorX         :  8;
+    int          MVectorY         :  8;
+} FRAG_INFO;
+
+typedef struct _DCINFO
+{
+    Q_LIST_ENTRY dc;
+    short frame;
+} DCINFO;
+
+// defined so i don't have to remember which block goes where
+typedef enum
+{
+    TOP_LEFT_Y_BLOCK        = 0,
+    TOP_RIGHT_Y_BLOCK       = 1,
+    BOTTOM_LEFT_Y_BLOCK     = 2,
+    BOTTOM_RIGHT_Y_BLOCK    = 3,
+    U_BLOCK                 = 4,
+    V_BLOCK                 = 5
+} BLOCK_POSITION;
+
+// all the information gathered from a block to be used as context in the next block
+typedef struct
+{
+    UINT8        Token;
+    CODING_MODE  Mode;
+    UINT16       Frame;
+    Q_LIST_ENTRY Dc;
+    UINT8        unused[3];
+}  BLOCK_CONTEXT;
+
+// all the contexts maintained for a frame
+typedef struct
+{
+    BLOCK_CONTEXT    LeftY[2];   // 1 for each block row in a macroblock
+    BLOCK_CONTEXT    LeftU;
+    BLOCK_CONTEXT    LeftV;
+
+    BLOCK_CONTEXT   *AboveY;
+    BLOCK_CONTEXT   *AboveU;
+    BLOCK_CONTEXT   *AboveV;
+
+//    BLOCK_CONTEXT   *AboveYAlloc;
+//    BLOCK_CONTEXT   *AboveUAlloc;
+//    BLOCK_CONTEXT   *AboveVAlloc;
+
+    Q_LIST_ENTRY     LastDcY[4]; // 1 for each frame 
+    Q_LIST_ENTRY     LastDcU[4];
+    Q_LIST_ENTRY     LastDcV[4];
+
+} FRAME_CONTEXT;
+
+// Structure to hold last token values at each position in block
+typedef UINT8 TOKENBUFFER[256];
+
+
+
+typedef struct
+{
+    INT16 *dequantPtr;
+    INT16 *coeffsPtr;
+    INT8 *reconPtr;
+
+    INT32  MvShift;                 // motion vector shift value
+    INT32  MvModMask;               // motion vector mod mask
+
+    INT32  FrameReconStride;        // Stride of the frame
+    INT32  CurrentReconStride;      // pitch of reconstruction
+
+    INT32  CurrentSourceStride;     // pitch of source (compressor only)
+	INT32  FrameSourceStride;		// Stride of the frame (compressor only)
+    UINT32 Plane;                   // plane block is from (compressor only)
+
+    BLOCK_CONTEXT  *Above;          // above block context
+    BLOCK_CONTEXT  *Left;           // left block context
+    Q_LIST_ENTRY   *LastDc;         // last dc value seen
+
+    UINT32 thisRecon;               // index for recon
+    UINT32 Source;                  // index for source (compressor only)
+
+    UINT32 EobPos;
+
+	UINT8	*BaselineProbsPtr;
+	UINT8	*ContextProbsPtr;
+
+	UINT8	*AcProbsBasePtr; 
+	UINT8	*DcProbsBasePtr; 
+	UINT8	*DcNodeContextsBasePtr; 
+    UINT8	*ZeroRunProbsBasePtr;
+
+//    BOOL_CODER *br; 
+//    INT32	token;
+//    UINT8 *MergedScanOrder;
+//    UINT8 *MergedScanOrderPtr;
+
+}BLOCK_DX_INFO;
+
+
+typedef struct
+{
+    BOOL_CODER *br;
+
+    BLOCK_DX_INFO blockDxInfo[6];
+
+    CODING_MODE   Mode;             // mode macroblock coded as
+
+//note: these should be moved into blockDxInfo
+    CODING_MODE   BlockMode[6];     // mode macroblock coded as
+    MOTION_VECTOR Mv[6];            // one motion vector per block u and v calculated from rest
+
+
+    MOTION_VECTOR NearestInterMVect;// nearest mv in last frame
+    MOTION_VECTOR NearInterMVect;   // near mv in last frame
+	INT32         NearestMvIndex;   // Indicates how neare nearest is.
+    MOTION_VECTOR NearestGoldMVect; // nearest mv in gold frame
+    MOTION_VECTOR NearGoldMVect;    // near mv in gold frame
+	INT32         NearestGMvIndex;  // Indicates how neare nearest is.
+
+	INT32  Interlaced;				// is the macroblock interlaced?
+
+//    Q_LIST_ENTRY  *CoeffsAlloc;     // coefficients 64 per frag 4 y in raster order, u then v
+} MACROBLOCK_INFO;
+
+// Frame Header type
+typedef struct FRAME_HEADER
+{
+    UINT8 *buffer;
+    UINT32 value;
+    INT32  bits_available;
+    UINT32 pos;
+} FRAME_HEADER;
+
+typedef struct _BITREADER
+{
+	int bitsinremainder;				// # of bits still used in remainder
+	UINT32 remainder;					// remaining bits from original long
+	const unsigned char * position;		// character pointer position within data
+} BITREADER;
+
+// Playback Instance Definition
+typedef struct PB_INSTANCE
+{
+	MACROBLOCK_INFO  mbi;		// all the information needed for one macroblock
+	FRAME_CONTEXT    fc;		// all of the context information needed for a frame
+	QUANTIZER	    *quantizer;
+
+    // Should be able to delete these entries when VP5 complete
+	INT32      CodedBlockIndex;		   
+	UINT8	  *DataOutputInPtr;		  
+    FRAG_INFO *FragInfo;
+//    FRAG_INFO *FragInfoAlloc;
+
+    /* Current access points fopr input and output buffers */
+    BOOL_CODER br;
+	BOOL_CODER br2;
+    BITREADER  br3;
+
+	// Decoder and Frame Type Information
+	UINT8   Vp3VersionNo;
+	UINT8	VpProfile;
+
+	UINT32  PostProcessingLevel;	   /* Perform post processing */
+	UINT32  ProcessorFrequency;	   /* CPU frequency	*/
+	UINT32  CPUFree;
+	UINT8   FrameType;       
+
+	CONFIG_TYPE Configuration;	// frame configuration
+	UINT32  CurrentFrameSize;
+
+	UINT32  YPlaneSize;  
+	UINT32  UVPlaneSize;  
+	UINT32  VFragments;
+	UINT32  HFragments;
+	UINT32  UnitFragments;
+	UINT32  YPlaneFragments;
+	UINT32  UVPlaneFragments;
+	
+	UINT32  ReconYPlaneSize;
+	UINT32  ReconUVPlaneSize;
+	
+	UINT32  YDataOffset;
+	UINT32  UDataOffset;
+	UINT32  VDataOffset;
+	UINT32  ReconYDataOffset;
+	UINT32  ReconUDataOffset;
+	UINT32  ReconVDataOffset;
+
+	UINT32  MacroBlocks;	// Number of Macro-Blocks in Y component
+	UINT32  MBRows;			// Number of rows of MacroBlocks in a Y frame
+	UINT32  MBCols;			// Number of cols of MacroBlocks in a Y frame
+    UINT32	ScaleWidth;
+    UINT32	ScaleHeight;
+    UINT32	OutputWidth;
+    UINT32	OutputHeight;
+	
+	// Frame Buffers 
+	YUV_BUFFER_ENTRY *ThisFrameRecon;
+//	YUV_BUFFER_ENTRY *ThisFrameReconAlloc;
+	YUV_BUFFER_ENTRY *GoldenFrame; 
+//	YUV_BUFFER_ENTRY *GoldenFrameAlloc; 
+	YUV_BUFFER_ENTRY *LastFrameRecon;
+//	YUV_BUFFER_ENTRY *LastFrameReconAlloc;
+	YUV_BUFFER_ENTRY *PostProcessBuffer;
+//	YUV_BUFFER_ENTRY *PostProcessBufferAlloc;
+	YUV_BUFFER_ENTRY *ScaleBuffer;     /* new buffer for testing new loop filtering scheme */
+//	YUV_BUFFER_ENTRY *ScaleBufferAlloc; 	
+
+    Q_LIST_ENTRY *quantized_list;  
+//    INT16		 *ReconDataBuffer;
+    INT16		 *ReconDataBuffer[6];
+//	INT16		 *ReconDataBufferAlloc;
+//	UINT8         FragCoefEOB;	   // Position of last non 0 coef within QFragData
+	INT16		 *TmpReconBuffer;
+//	INT16		 *TmpReconBufferAlloc;
+	INT16		 *TmpDataBuffer;
+//	INT16		 *TmpDataBufferAlloc;
+    
+//	UINT8		 *LoopFilteredBlockAlloc;
+	UINT8		 *LoopFilteredBlock;
+
+    void (**idct)(INT16 *InputData, INT16 *QuantMatrix, INT16 * OutputData );
+
+	POSTPROC_INST    postproc;
+
+	TOKENBUFFER LastToken;			// LTIndex of tokens at each position in block
+
+    CODING_MODE      LastMode;      // Last Mode decoded;
+
+	UINT8 DcProbs[2*(MAX_ENTROPY_TOKENS-1)];
+	UINT8 AcProbs[2*PREC_CASES*VP6_AC_BANDS*(MAX_ENTROPY_TOKENS-1)];
+
+                               //3             MAX_ENTROPY_TOKENS-7                     
+//	UINT8 DcNodeContexts[2][DC_TOKEN_CONTEXTS][CONTEXT_NODES];								// Plane, Contexts, Node
+	UINT8 DcNodeContexts[2 * DC_TOKEN_CONTEXTS * CONTEXT_NODES];								// Plane, Contexts, Node
+	
+	UINT8 ZeroRunProbs[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+
+    UINT8 MergedScanOrder[BLOCK_SIZE + 65];
+	UINT8 ModifiedScanOrder[BLOCK_SIZE];
+	UINT8 EobOffsetTable[BLOCK_SIZE];
+	UINT8 ScanBands[BLOCK_SIZE];
+
+    UINT8 MBModeProb[11];
+    UINT8 BModeProb[11];
+
+	UINT8  PredictionFilterMode;
+	UINT8  PredictionFilterMvSizeThresh;
+	UINT32 PredictionFilterVarThresh;
+	UINT8  PredictionFilterAlpha;
+	
+	BOOL   RefreshGoldenFrame;
+
+	UINT8 Inter00Prob;
+	UINT32 AvgFrameQIndex;
+
+	BOOL testMode;
+
+    UINT32 mvNearOffset[16];
+	
+	int probInterlaced;
+	char *MBInterlaced;
+	char *predictionMode;
+	MOTION_VECTOR *MBMotionVector;
+//	char *MBInterlacedAlloc;
+//	char *predictionModeAlloc;
+//	MOTION_VECTOR *MBMotionVectorAlloc;
+
+	UINT8  MvSignProbs[2];
+	UINT8  IsMvShortProb[2];
+	UINT8  MvShortProbs[2][7];
+	UINT8  MvQPelProbs[2];
+	UINT8  MvHalfPixelProbs[2];
+	UINT8  MvLowBitProbs[2];
+	UINT8  MvSizeProbs[2][LONG_MV_BITS];
+
+	UINT8 probXmitted[4][2][MAX_MODES];
+	UINT8 probModeSame[4][MAX_MODES];
+	UINT8 probMode[4][MAX_MODES][MAX_MODES-1]; // nearest+near,nearest only, nonearest+nonear, 10 preceding modes, 9 nodes
+
+	UINT32 maxTimePerFrame;
+	UINT32 thisDecodeTime;
+	UINT32 avgDecodeTime;
+	UINT32 avgPPTime[10];
+	UINT32 avgBlitTime;
+
+	// Does this frame use multiple data streams
+	// Multistream is implicit for SIMPLE_PROFILE
+	BOOL   MultiStream;
+
+    // Huffman code tables for DC, AC & Zero Run Length
+    UINT32 DcHuffCode[2][MAX_ENTROPY_TOKENS];
+    UINT8  DcHuffLength[2][MAX_ENTROPY_TOKENS];
+    UINT32 DcHuffProbs[2][MAX_ENTROPY_TOKENS];
+    HUFF_NODE DcHuffTree[2][MAX_ENTROPY_TOKENS];
+
+    UINT32 AcHuffCode[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+    UINT8  AcHuffLength[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+    UINT32 AcHuffProbs[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+    HUFF_NODE AcHuffTree[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS];
+
+    UINT32 ZeroHuffCode[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+    UINT8  ZeroHuffLength[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+    UINT32 ZeroHuffProbs[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+    HUFF_NODE ZeroHuffTree[ZRL_BANDS][ZERO_RUN_PROB_CASES];
+
+    /* FAST look-up-table for huffman Trees */
+    UINT16 DcHuffLUT[2][1<<HUFF_LUT_LEVELS];
+    UINT16 AcHuffLUT[PREC_CASES][2][VP6_AC_BANDS][1<<HUFF_LUT_LEVELS];
+    UINT16 ZeroHuffLUT[ZRL_BANDS][1<<HUFF_LUT_LEVELS];
+
+    RAW_BUFFER  HuffBuffer;
+
+    // Second partition buffer details
+    FRAME_HEADER Header;
+    UINT32 Buff2Offset;
+
+	// Note: Use of huffman codes for DCT data is only allowed 
+	// when using multiple data streams / partitions
+	BOOL   UseHuffman;	
+
+    // Counters for runs of zeros at DC & EOB at first AC position in Huffman mode
+    INT32  CurrentDcRunLen[2];
+    INT32  CurrentAc1RunLen[2];
+
+    // Should we do loop filtering.
+	// In simple profile this is ignored and there is no loop filtering	
+	UINT8  UseLoopFilter;
+
+    // Control of dering loop/prediction filter
+	UINT32 DrCutOff;
+	UINT32 DrThresh[256];
+
+    UINT32 BlackClamp;
+    UINT32 WhiteClamp;
+
+    UINT32 DeInterlaceMode;
+	
+	UINT32 AddNoiseMode;
+
+} PB_INSTANCE;
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+extern UINT8        LimitVal_VP31[VAL_RANGE * 3];
+extern BOOL         VP6_ModeUsesMC[MAX_MODES]; // table to indicate if the given mode uses motion estimation
+extern const int    VP6_Mode2Frame[DO_NOT_CODE];
+extern const INT32  VP6_CoeffToBand[65];
+extern const UINT8  DefaultNonInterlacedScanBands[BLOCK_SIZE]; 
+extern const UINT8  DefaultInterlacedScanBands[BLOCK_SIZE];
+
+extern PB_INSTANCE *VP6_CreatePBInstance ( void );
+extern void		    VP6_DeletePBInstance ( PB_INSTANCE** );
+extern BOOL	        VP6_LoadFrame ( PB_INSTANCE *pbi );
+extern void	        VP6_SetFrameType ( PB_INSTANCE *pbi, UINT8 FrType );
+extern UINT8        VP6_GetFrameType ( PB_INSTANCE *pbi );
+extern BOOL	        VP6_InitFrameDetails ( PB_INSTANCE *pbi );
+extern void	        VP6_ErrorTrap ( PB_INSTANCE *pbi, int ErrorCode );
+extern BOOL	        VP6_AllocateFragmentInfo ( PB_INSTANCE *pbi );
+extern BOOL	        VP6_AllocateFrameInfo ( PB_INSTANCE *pbi, unsigned int FrameSize );
+extern void	        VP6_DeleteFragmentInfo ( PB_INSTANCE *pbi );
+extern void	        VP6_DeleteFrameInfo ( PB_INSTANCE *pbi );
+extern void	        VP6_DMachineSpecificConfig ( void );
+extern UINT32	    VP6_bitread1 ( BOOL_CODER *br ) ;
+extern UINT32	    VP6_bitread ( BOOL_CODER *br, int bits );
+extern void         vp6_appendframe ( PB_INSTANCE *pbi );
+extern void		    VP6_readTSC ( unsigned long *tsc );
+extern void         VP6_ConfigureContexts ( PB_INSTANCE *pbi );
+extern void         VP6_ResetAboveContext ( PB_INSTANCE *pbi );
+extern void         VP6_ResetLeftContext ( PB_INSTANCE *pbi );
+extern void         VP6_UpdateContext ( PB_INSTANCE *pbi, BLOCK_CONTEXT *c, BLOCK_POSITION bp );
+extern void         VP6_UpdateContextA ( PB_INSTANCE *pbi, BLOCK_CONTEXT *c, BLOCK_POSITION bp );
+
+extern void         VP6_PredictDC ( PB_INSTANCE *pbi, BLOCK_POSITION bp );
+extern void         VP6_PredictDC_MB ( PB_INSTANCE *pbi );
+
+extern void         VP6_ReconstructBlock ( PB_INSTANCE *pbi, BLOCK_POSITION bp );
+//extern void         VP6_ReconstructMacroBlock ( PB_INSTANCE *pbi);
+extern void			VP6_PredictFilteredBlock(PB_INSTANCE* pbi, INT16* OutputPtr, UINT32 bp);                    
+#endif              
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/quantize.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/quantize.h
new file mode 100644
index 00000000..747cc160
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/quantize.h
@@ -0,0 +1,65 @@
+/****************************************************************************
+*
+*   Module Title :     quantize.h
+*
+*   Description  :     Quantizer header file.
+*
+****************************************************************************/
+#ifndef __INC_QUANTIZE_H
+#define __INC_QUANTIZE_H
+
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "codec_common_interface.h"
+
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+
+
+
+/****************************************************************************
+*  Structures
+****************************************************************************/
+typedef struct 
+{
+	UINT32 FrameQIndex;					   // Quality specified as a table index 
+	UINT32 LastFrameQIndex;	
+	short round[8];
+	short mult[8];
+	short zbin[8];
+	UINT32 QThreshTable[Q_TABLE_SIZE];	   // ac quantizer scale values
+
+    UINT32 *transIndex;					   // array to reorder zig zag to idct's ordering
+	UINT8   quant_index[64];			   // array to reorder from raster to zig zag
+
+	// used by the dequantizer 
+	Q_LIST_ENTRY * dequant_coeffs[2];	   // pointer to current dequantization tables
+	Q_LIST_ENTRY * dequant_coeffsAlloc[2]; // alloc so we can keep alligned
+
+	INT32 QuantCoeffs[2][64];			   // Quantizer values table
+	INT32 QuantRound[2][64];			   // Quantizer rounding table
+	INT32 ZeroBinSize[2][64];			   // Quantizer zero bin table
+	INT32 ZlrZbinCorrections[2][64];	   // Zbin corrections based upon zero run length.
+
+} QUANTIZER;
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+extern const UINT8 VP6_QTableSelect[6];
+extern const Q_LIST_ENTRY VP6_DcQuant[Q_TABLE_SIZE];
+
+extern void (*VP6_quantize) ( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+extern void (*VP6_BuildQuantIndex)( QUANTIZER * pbi);
+extern void VP6_InitQTables ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+extern void VP6_UpdateQ ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+extern void VP6_UpdateQC ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+extern QUANTIZER * VP6_CreateQuantizer ( void );
+extern void VP6_DeleteQuantizer ( QUANTIZER **pbi );          
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/vp5d.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/vp5d.h
new file mode 100644
index 00000000..116fd54a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/vp5d.h
@@ -0,0 +1,112 @@
+#ifndef vp5d_h
+#define vp5d_h 1
+
+// Interface between vp3d.dll and Albany's DXV adaptor/blitter.
+// Timothy S. Murphy 13 September 1999.
+
+
+// The main object "defined" here.
+
+struct VP3decompressor;
+
+
+// Some conveniences.
+
+typedef unsigned char uchar;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+
+// FourCC codes.  Should agree with microsoft's definition
+// sans their stupid types and include files.
+
+typedef ulong FourCC;
+
+#define MakeFourCC( a, b, c, d) ( \
+	(ulong) (uchar) a \
+	| (ulong) (uchar) b << 8 \
+	| (ulong) (uchar) c << 16 \
+	| (ulong) (uchar) d << 24 \
+)
+
+// A temporary fourCC for Eric & I to use til the bit stream stabilizes.
+// (Eric - "hurl4cc" should NOT appear anywhere in your code, I just put it
+// here so you can check the fourCC representations in memory and files.)
+
+#define hurl4cc MakeFourCC( 'H', 'U', 'R', 'L')
+
+// The actual fourCC for now; similar remarks apply.
+
+#define VP30 1
+
+#if VP30
+#	define wilk4cc MakeFourCC( 'V', 'P', '3', '0')
+#else
+#	define wilk4cc MakeFourCC( 'W', 'I', 'L', 'K')
+#endif
+
+
+// Array of fourCC codes, has length _and_ is null-terminated.
+// As Donald Knuth once said,
+// "Some people occasionally like a little extra redundancy sometimes."
+
+typedef struct { const FourCC * codes;  uint numCodes;}  FourCClist;
+
+
+// YUV buffer configuration.
+
+typedef struct {
+
+	ulong Ywidth, Yheight, UVwidth, UVheight;
+
+	long Ystride, UVstride;
+
+	const uchar *Ybuf, *Ubuf, *Vbuf;
+
+} YUVbufferLayout;
+
+
+#if __cplusplus
+#	define Decompressor VP3decompressor
+	extern "C" {
+#else
+#	define Decompressor struct VP3decompressor
+#endif
+
+#if defined(MACPPC)
+#define _stdcall 
+#endif
+
+
+// Return array of fourCC codes supported.
+
+const FourCClist * _stdcall VP3DfourCClist();
+
+
+// Create a decompressor for a particular supported stream type.
+// Returns 0 on failure.
+
+Decompressor * _stdcall VP3DcreateDecompressor( FourCC streamType);
+
+void _stdcall VP3DdestroyDecompressor( Decompressor *);
+
+
+// Advance to next frame, returning reference to updated YUV buffer.
+
+const YUVbufferLayout * _stdcall VP3DnextFrame
+( 
+	Decompressor *, const uchar * CXdata, ulong CXdataLengthInBytes
+);
+
+void _stdcall VP3DblitBGR(
+	const Decompressor *, uchar * outRGB, long outStride, long outHeight
+);
+
+
+#if __cplusplus
+	}
+#endif
+
+#undef Decompressor
+
+#endif	// vp3d_h
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/xprintf.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/xprintf.h
new file mode 100644
index 00000000..fb9d4c14
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Include/xprintf.h
@@ -0,0 +1,31 @@
+/****************************************************************************
+*
+*   Module Title :     xprintf.h
+*
+*   Description  :     Debug print interface header file.
+*
+****************************************************************************/
+#ifndef __INC_XPRINTF_H
+#define __INC_XPRINTF_H
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "pbdll.h"
+
+/****************************************************************************
+*  Functions
+****************************************************************************/
+#if __cplusplus
+extern "C"
+{
+#endif
+
+// Display a printf style message on the current video frame
+extern int vp6_xprintf(const PB_INSTANCE* ppbi, long pixel, const char* format, ...);
+
+#if __cplusplus
+}
+#endif
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/Makefile b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Makefile
new file mode 100644
index 00000000..7bf40ed2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/Makefile
@@ -0,0 +1,72 @@
+## Target to built
+
+TARGET 			=libvp6e
+
+## TOOLS
+CC      		= ecc
+LD      		= ecc
+AR      		= ar
+OBJDUMP 		= objdump
+RM      		= rm -f
+
+## Directories
+TOPDIR  		=C:\DuckSoft
+PRIVATEINCLUDE  =${TOPDIR}\private\include
+PRIVATEINCLUDE2 =${TOPDIR}\private\include\vp60
+CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
+CDXVINCLUDE     =${TOPDIR}\private\corelibs\cdxv\include 
+VP6INCLUDE      =${TOPDIR}\private\corelibs\cdxv\vp60\vp60\include
+CXGENERIC       =${TOPDIR}\private\corelibs\cdxv\vp60\vp60\cx\generic
+OBJDIR          =${TOPDIR}\ObjectCode\bspvp6e
+CURRENTDIR 		=${TOPDIR}\private\corelibs\cdxv\vp60\vp60 
+LIBDIR			=${TOPDIR}\private\corelibs\lib\mapca 
+
+## Compile Flags
+ALLINCLUDES     =-I${CXGENERIC} -I${VP6INCLUDE} -I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${PRIVATEINCLUDE2} 
+VP6DEFINES		=-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
+ETIDEFINES      =-DMAPCA
+ALLDEFINES      =${VP6DEFINES} ${ETIDEFINES}
+DEBUG			=-O2
+CFLAGS 			=-msvc -align 8 -ms -etswp -mP3OPT_nonlocal_calls_through_register=true \
+				-mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
+				-magen_interroutine_padding
+ALLFLAGS 		= $(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
+
+
+## Files
+OBJS		= 	cx\bsp\PackVideo.o		 \
+				cx\bsp\PickModes.o		 \
+				cx\generic\RawBuffer.o	 \
+				cx\bsp\bspTokenize.o	 \
+				cx\generic\Transform.o	 \
+				cx\bsp\encode.o		     \
+				cx\bsp\encodembs.o		 \
+				cx\bsp\encodemode.o		 \
+				cx\generic\encodemv.o	 \
+				cx\bsp\mcomp.o			 \
+				cx\generic\misc_common.o \
+				cx\generic\twopass.o	 \
+				cx\bsp\vfwcomp.o		 \
+				cx\generic\vfwcomp_if.o	 \
+				cx\bsp\bspComp_Globals.o \
+				cx\bsp\mcompopt.o		 \
+				cx\bsp\bsptransform.o	 \
+				cx\bsp\CSystemDependant.o 
+
+SRCS			= $(OBJS:.o=.c)
+ 
+ARTARGET		= ${TARGET}.a
+
+# archive
+
+ARTARGET:${OBJS}
+			${AR} -cr ${ARTARGET} ${OBJS}
+			mv ${ARTARGET} ${LIBDIR}
+
+${OBJS} : ${SRCS}
+			$(CC) $(ALLFLAGS) -c $*.c -o $*.o
+
+clean:
+			${RM} ${OBJS} ${ARTARGET}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CFrameW.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CFrameW.h
new file mode 100644
index 00000000..d2ceb50d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CFrameW.h
@@ -0,0 +1,21 @@
+/****************************************************************************
+*
+*   Module Title :     CFrameW.h
+*
+*   Description  :     Frame writing functions.
+*
+****************************************************************************/
+#ifndef __INC_CFRAMEW_H
+#define __INC_CFRAMEW_H
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "type_aliases.h"
+
+/****************************************************************************
+*  Functions
+****************************************************************************/
+extern void WriteFrameHeader ( CP_INSTANCE *cpi );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CSystemDependant.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CSystemDependant.c
new file mode 100644
index 00000000..4ba43888
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/CSystemDependant.c
@@ -0,0 +1,79 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking. */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <string.h>
+#include <time.h>
+#include <stdlib.h>   
+#include <stdio.h>  
+
+#include "pbdll.h"
+#include "compdll.h"
+#include "mcomp.h"
+#include "quantize.h"
+#include "resource.h"    /* Resource IDs. */  
+
+/****************************************************************************
+*  Explicit imports
+****************************************************************************/
+#if defined(POSTPROCESS)
+extern void FDct1d4 (INT16 *InputData, INT16 * OutputData);
+extern void IDct4( INT16 *InputData, INT16 *OutputData);
+#endif 
+
+extern UINT32 ComputeBlockReconError ( CP_INSTANCE *cpi, UINT32 bp);
+extern UINT32 GetSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8  * RefPtr,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern UINT32 GetHalfPixelSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8 * RefPtr,UINT8 * RefPtr2,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern UINT32 GetIntraErrorC( UINT8* DataPtr, INT32 SourceStride);
+extern UINT32 GetInterErr(  UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr1,  UINT8 * RefDataPtr2, INT32 RefStride );
+extern UINT32 GetSumAbsDiffs( UINT8 * NewDataPtr, INT32 SourceStride, UINT8  * RefDataPtr, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+extern UINT32 GetHalfPixelSumAbsDiffs( UINT8 * SrcData, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+extern void VP6_quantize_c( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+extern UINT32 GetMBFieldVerticalVariance( CP_INSTANCE *cpi);
+extern UINT32 FiltBlockBilGetSad_C(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CMachineSpecificConfig
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Sets function pointers to vanilla "C" implementations.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void CMachineSpecificConfig ( void )
+{
+    
+        GetSAD16           = GetSumAbsDiffs16;
+        GetSadHalfPixel16  = GetHalfPixelSumAbsDiffs16; 
+
+        GetSAD             = GetSumAbsDiffs;
+        GetSadHalfPixel    = GetHalfPixelSumAbsDiffs;
+        GetInterError      = GetInterErr;
+		GetIntraError      = GetIntraErrorC;
+		fdct_short         = fdct_short_C;
+		VP6_quantize       = VP6_quantize_c;
+        Sub8               = SUB8;
+        Sub8_128           = SUB8_128;
+        Sub8Av2            = SUB8AV2;
+        GetMBFrameVertVar  = GetMBFrameVerticalVariance;
+        GetMBFieldVertVar  = GetMBFieldVerticalVariance;
+        FiltBlockBilGetSad = FiltBlockBilGetSad_C;
+        GetBlockReconErr   = ComputeBlockReconError;
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Comp_Globals.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Comp_Globals.c
new file mode 100644
index 00000000..a58c8aff
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Comp_Globals.c
@@ -0,0 +1,371 @@
+/****************************************************************************
+*
+*   Module Title :     Comp_Globals.c
+*
+*   Description  :     Global compressor functions & declarations.
+* 
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/ 
+#include <math.h>       // For Abs()
+#include "compdll.h"
+#include "mcomp.h" 
+
+/****************************************************************************
+*  Macros
+****************************************************************************/ 
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+INT32 *XX_LUT;
+static INT32 XSquaredTable[511];
+
+// Motion compensation related variables
+INT32 *AbsX_LUT = NULL;
+static INT32 AbsXTable[511];
+
+UINT32 (*FiltBlockBilGetSad)(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+UINT32 (*GetSAD16)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32);
+UINT32 (*GetSadHalfPixel16)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32);
+UINT32 (*GetSAD)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32);
+UINT32 (*GetSadHalfPixel)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32);
+UINT32 (*GetInterError)( UINT8 *, INT32, UINT8 *,  UINT8 *, INT32 );
+UINT32 (*GetIntraError)( UINT8 *, INT32 );
+void   (*fdct_short) ( INT16 * InputData, INT16 * OutputData );
+void   (*Sub8)( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride);
+void   (*Sub8_128)( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride );
+void   (*Sub8Av2)( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride );
+
+/****************************************************************************
+*  Explicit Imports
+****************************************************************************/
+extern unsigned int CPUFrequency;
+extern void VP6_DeleteTmpBuffers(PB_INSTANCE * pbi);
+extern BOOL VP6_AllocateTmpBuffers(PB_INSTANCE * pbi);
+extern void VP6_VPInitLibrary(void);
+extern void VP6_VPDeInitLibrary(void);
+extern void FillValueTokens ( void );
+
+/****************************************************************************
+ *
+ *  ROUTINE       :     EDeleteFragmentInfo
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Deletes memory allocated for member data structures.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+void EDeleteFragmentInfo ( CP_INSTANCE *cpi )
+{
+    if( cpi->DCT_codes )
+        duck_free( cpi->DCT_codes );
+    cpi->DCT_codes = 0;
+
+    if( cpi->DCTDataBuffer )
+        duck_free( cpi->DCTDataBuffer);
+    cpi->DCTDataBuffer = 0;
+
+    if( cpi->quantized_list)
+        duck_free( cpi->quantized_list);
+    cpi->quantized_list = 0;
+
+    if( cpi->MbBestErr )
+        duck_free(cpi->MbBestErr);
+    cpi->MbBestErr = 0;
+
+#if defined FULLFRAMEFDCT
+    if( cpi->FDCTCoeffs)
+        duck_free(cpi->FDCTCoeffs);
+    cpi->FDCTCoeffs = 0;
+#endif
+
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :     EAllocateFragmentInfo
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     BOOL: TRUE on success, FALSE if allocation failure.
+ *
+ *  FUNCTION      :     Allocates memory for encoder data structures.
+ *
+ *  SPECIAL NOTES :     Uses ROUNDUP32 to align pointers to 32-byte boundaries.
+ *
+ ****************************************************************************/
+BOOL EAllocateFragmentInfo ( CP_INSTANCE *cpi )
+{
+    // De-allocate existing memory
+    EDeleteFragmentInfo(cpi);
+
+    // Allocate new memory
+    cpi->DCT_codes = duck_memalign(32, 64*sizeof(INT16), DMEM_GENERAL);
+    if(!cpi->DCT_codes) { EDeleteFragmentInfo(cpi); return FALSE; }
+
+    cpi->quantized_list = duck_memalign(32, 64*sizeof(Q_LIST_ENTRY), DMEM_GENERAL);
+    if(!cpi->quantized_list) { EDeleteFragmentInfo(cpi); return FALSE; }
+
+    cpi->DCTDataBuffer = duck_memalign(32, 64*sizeof(INT16), DMEM_GENERAL);
+    if(!cpi->DCTDataBuffer) { EDeleteFragmentInfo(cpi); return FALSE; }
+
+    cpi->MbBestErr = (UINT32 *) duck_memalign(32, cpi->pb.MacroBlocks * sizeof(UINT32), DMEM_GENERAL);
+    if(!cpi->MbBestErr) { EDeleteFrameInfo(cpi); return FALSE; }
+
+#if defined FULLFRAMEFDCT
+    cpi->FDCTCoeffs= (Q_LIST_ENTRY(*)[64]) duck_memalign(32, sizeof(Q_LIST_ENTRY)*64* cpi->pb.UnitFragments , DMEM_GENERAL);
+    if(!cpi->FDCTCoeffs) {EDeleteFragmentInfo(cpi); return FALSE;}
+#endif
+
+
+    return TRUE;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :     EDeleteFrameInfo
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Deletes memory allocated for frame buffers.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+void EDeleteFrameInfo ( CP_INSTANCE *cpi )
+{
+    if(cpi->yuv0ptr)
+        duck_free(cpi->yuv0ptr);
+    cpi->yuv0ptr = 0;
+
+    if(cpi->yuv1ptr)
+        duck_free(cpi->yuv1ptr);
+    cpi->yuv1ptr = 0;
+
+    if( cpi->CoeffTokens )
+        duck_free(cpi->CoeffTokens);
+    cpi->CoeffTokens = 0;
+
+    if( cpi->OutputBuffer2 )
+        duck_free(cpi->OutputBuffer2);
+    cpi->OutputBuffer2 = 0;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :     EAllocateFrameInfo
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     BOOL: TRUE on success, FALSE if allocation failure.
+ *
+ *  FUNCTION      :     Allocates memory for frame buffers.
+ *
+ *  SPECIAL NOTES :     Uses ROUNDUP32 to align pointers to 32-byte boundaries.
+ *
+ ****************************************************************************/
+BOOL EAllocateFrameInfo ( CP_INSTANCE *cpi )
+{
+    int FrameSize = cpi->pb.ReconYPlaneSize + 2 * cpi->pb.ReconUVPlaneSize;
+
+    // De-allocate existing memory
+    EDeleteFrameInfo ( cpi );
+
+    // Allocate frame buffers aligned to 32-byte boundaries
+    cpi->yuv0ptr = duck_memalign(32, FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+    if(!cpi->yuv0ptr) { EDeleteFrameInfo(cpi); return FALSE; }
+
+    cpi->yuv1ptr = duck_memalign(32, FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+    if(!cpi->yuv1ptr) { EDeleteFrameInfo(cpi); return FALSE; }
+
+    cpi->CoeffTokens = duck_memalign(32, FrameSize*sizeof(TOKENEXTRA), DMEM_GENERAL);
+    if(!cpi->CoeffTokens) { EDeleteFrameInfo(cpi); return FALSE; }
+
+    // Allocate the temporary output buffer for packed dct data
+    cpi->OutputBuffer2 = duck_memalign(32, FrameSize, DMEM_GENERAL);
+    if(!cpi->OutputBuffer2) { EDeleteFrameInfo(cpi); return FALSE; }
+
+    return TRUE;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :     DeleteCPInstance
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     CP_INSTANCE **cpi  : Pointer to pointer to encoder instance.    
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Deletes memory allocated for encoder instance and sets
+ *                      encoder instance pointer to NULL.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+void DeleteCPInstance ( CP_INSTANCE **cpi )
+{
+    if ( *cpi != NULL )
+    {
+		DeletePreProc ( &(*cpi)->preproc );
+        VP6_DeleteTmpBuffers ( &(*cpi)->pb );
+        duck_free ( *cpi );
+        *cpi = NULL;
+    }
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :     CreateCPInstance
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     CP_INSTANCE *: Pointer to new encoder instance or NULL.
+ *
+ *  FUNCTION      :     Creates and initializes an encoder instance.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+CP_INSTANCE *CreateCPInstance ( void )
+{
+    UINT32  i;
+    CP_INSTANCE *cpi;
+
+    // Allocate encoder data structure
+    int cpi_size = sizeof( CP_INSTANCE );
+    cpi = duck_malloc ( cpi_size, DMEM_GENERAL );
+    if ( !cpi )
+        return NULL;
+
+    // Initialize
+    memset ( (unsigned char *)cpi, 0, cpi_size );
+    
+    // Allocate decoder buffers
+    if ( !VP6_AllocateTmpBuffers(&cpi->pb) )
+    {
+        DeleteCPInstance(&cpi);
+        return NULL;
+    }
+
+    // Initialise Configuration structure to legal values
+    cpi->Configuration.BaseQ                = 32;
+    cpi->Configuration.FirstFrameQ          = 32;
+    cpi->Configuration.WorstQuality         = 32;
+    cpi->Configuration.ActiveWorstQuality   = 8;
+	cpi->Configuration.ActiveBestQuality    = Q_TABLE_SIZE - 4;
+    cpi->Configuration.OutputFrameRate      = 30;
+    cpi->Configuration.TargetBandwidth      = 100*1024;
+
+    cpi->MVChangeFactor                 = 14;
+    cpi->FourMvChangeFactor             = 8;
+    cpi->ExhaustiveSearchThresh         = 2500;
+    cpi->MinImprovementForFourMV        = 100;
+    cpi->FourMVThreshold                = 10000;
+    cpi->IntraThresh                    = 25;
+    cpi->InterTripOutThresh             = 5000;
+    cpi->BpbCorrectionFactor            = 1.0;
+	cpi->KeyFrameBpbCorrectionFactor    = 1.0;
+    cpi->GoldenFrameEnabled             = TRUE;
+    cpi->InterPrediction                = TRUE;
+    cpi->MotionCompensation             = TRUE;
+    cpi->ThreshMapThreshold             = 5;
+    cpi->QuickCompress                  = TRUE;
+	cpi->RdOpt                          = 0;
+    cpi->PreProcFilterLevel             = 4;
+	cpi->FixedQ                         = -1;
+    cpi->pb.idct                        = idctc;
+	cpi->pb.ProcessorFrequency          = CPUFrequency;
+
+    memset ( cpi->pb.DcProbs, 0, sizeof(cpi->pb.DcProbs) );
+    memset ( cpi->pb.AcProbs, 0, sizeof(cpi->pb.AcProbs) );
+
+    cpi->nExperimentals = 0;
+    for ( i=0; i<C_SET_EXPERIMENTAL_MAX-C_SET_EXPERIMENTAL_MIN+1; i++ )
+        cpi->Experimental[i] = 0;
+
+	// Access pointers to MV cost array
+	cpi->EstMvCostPtrX = &cpi->EstMVCost[0][MV_ENTROPY_TOKENS / 2];
+	cpi->EstMvCostPtrY = &cpi->EstMVCost[1][MV_ENTROPY_TOKENS / 2];
+
+    return cpi;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :     VPEInitLibrary
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Fully initializes the playback library.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+void VPEInitLibrary ( void )
+{
+    int i;
+
+    // Initialise the decompressor
+    VP6_VPInitLibrary();
+    CMachineSpecificConfig();
+
+    // Prepare Abs difference lookup table
+    AbsX_LUT = &AbsXTable[255];
+    for ( i=(-255); i<=255; i++ )
+        AbsX_LUT[i] = abs(i);
+
+    // Prepare table of squared error values
+    XX_LUT = &XSquaredTable[255];
+    for ( i=(-255); i<=255; i++ )
+        XX_LUT[i] = i*i;
+
+    // Prepare table of tokens for fast look-up
+    FillValueTokens();
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :     VPEDeInitLibrary
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     De-initializes the playback library.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+void VPEDeInitLibrary ( void )
+{
+    VP6_VPDeInitLibrary();
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PackVideo.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PackVideo.c
new file mode 100644
index 00000000..da2a1c5c
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PackVideo.c
@@ -0,0 +1,1840 @@
+/****************************************************************************
+*
+*   Module Title :     PackVideo.c
+*
+*   Description  :     Bitstream Packing Routines for VP6.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "compdll.h"
+#include "encodemode.h"
+#include "encodemv.h"
+#include "TokenEntropy.h"
+#include "systemdependant.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define PROB_UPDATE_CORECTION	(-1)		
+
+#define MAX_DC_ZRL  74      // Maximum run of zeros at DC position (11 + 63)
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+UINT32 scanupdates[64][2];
+
+const UINT8 DcZrlHuffCode[5]    = { 0, 1, 2, 6, 7  };
+const UINT8 DcZrlHuffLength[5]  = { 2, 2, 2, 3, 3  };
+const UINT8 DcZrlExtraOffset[5] = { 1, 2, 3, 7, 11 };
+const UINT8 DcZrlExtraLength[5] = { 0, 0, 2, 2, 6  };
+
+const UINT8 DcZrlHuffBand[MAX_DC_ZRL+1] = 
+{
+    0, 0, 1, 2, 2, 2, 2, 3,
+    3, 3, 3, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4
+};
+
+/****************************************************************************
+*  Imports
+****************************************************************************/ 
+extern void ConvertBoolTrees ( PB_INSTANCE *pbi );
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : GetOptimalFrameZrlProbs
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :	None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Calculate optimal ZRL node probabilities and
+ *                  hit counts from ZRL distribution data.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void GetOptimalFrameZrlProbs ( CP_INSTANCE *cpi )
+{
+	UINT32 i,j;
+	UINT32 Sum, Sum2;
+	UINT32 BitSums[ZRL_BANDS][6][2];
+	UINT32 RunLength;
+	UINT32 Count;
+	UINT32 Index;
+
+	// Clear down BitSums workspace
+	memset ( BitSums, 0, sizeof(BitSums) );
+
+	// Work out the optimised nodes probabilities relating to explicit values
+	for ( i=0; i<ZRL_BANDS; i++ )
+	{
+		// branch hits and probility for the top node ( is Run > 4 )
+		Sum = cpi->FrameZeroCount[i];
+		Sum2 = cpi->FrameZrlDist[i][1] + cpi->FrameZrlDist[i][2] + cpi->FrameZrlDist[i][3] + cpi->FrameZrlDist[i][4];
+		cpi->FrameZrlBranchHits[i][0][0] = Sum2;
+		cpi->FrameZrlBranchHits[i][0][1] = Sum - Sum2;
+		if ( Sum )
+			cpi->FrameZrlProbs[i][0] = (Sum2 * 255) / Sum;
+
+		// Second Node 1,2 vs 3,4
+		Sum = cpi->FrameZrlDist[i][1] + cpi->FrameZrlDist[i][2] + cpi->FrameZrlDist[i][3] + cpi->FrameZrlDist[i][4];
+		Sum2 = cpi->FrameZrlDist[i][1] + cpi->FrameZrlDist[i][2];
+		cpi->FrameZrlBranchHits[i][1][0] = Sum2;
+		cpi->FrameZrlBranchHits[i][1][1] = Sum - Sum2;
+		if ( Sum )
+			cpi->FrameZrlProbs[i][1] = (Sum2 * 255) / Sum;
+
+		// Third Node 1 vs 2
+		Sum = cpi->FrameZrlDist[i][1] + cpi->FrameZrlDist[i][2];
+		Sum2 = cpi->FrameZrlDist[i][1];
+		cpi->FrameZrlBranchHits[i][2][0] = Sum2;
+		cpi->FrameZrlBranchHits[i][2][1] = Sum - Sum2;
+		if ( Sum )
+			cpi->FrameZrlProbs[i][2] = (Sum2 * 255) / Sum;
+
+		// Fourth Node 3 vs 4
+		Sum = cpi->FrameZrlDist[i][3] + cpi->FrameZrlDist[i][4];
+		Sum2 = cpi->FrameZrlDist[i][3];
+		cpi->FrameZrlBranchHits[i][3][0] = Sum2;
+		cpi->FrameZrlBranchHits[i][3][1] = Sum - Sum2;
+		if ( Sum )
+			cpi->FrameZrlProbs[i][3] = (Sum2 * 255) / Sum;
+
+		// Fifth Node 5-8 vs >8
+		Sum = cpi->FrameZeroCount[i] - 
+			  (cpi->FrameZrlDist[i][1] + cpi->FrameZrlDist[i][2] + cpi->FrameZrlDist[i][3] + cpi->FrameZrlDist[i][4]);
+		Sum2 = cpi->FrameZrlDist[i][5] + cpi->FrameZrlDist[i][6] + cpi->FrameZrlDist[i][7] + cpi->FrameZrlDist[i][8];
+		cpi->FrameZrlBranchHits[i][4][0] = Sum2;
+		cpi->FrameZrlBranchHits[i][4][1] = Sum - Sum2;
+		if ( Sum )
+			cpi->FrameZrlProbs[i][4] = (Sum2 * 255) / Sum;
+
+		// Sixth Node 5,6 vs 7,8
+		Sum = cpi->FrameZrlDist[i][5] + cpi->FrameZrlDist[i][6] + cpi->FrameZrlDist[i][7] + cpi->FrameZrlDist[i][8];
+		Sum2 = cpi->FrameZrlDist[i][5] + cpi->FrameZrlDist[i][6];
+		cpi->FrameZrlBranchHits[i][5][0] = Sum2;
+		cpi->FrameZrlBranchHits[i][5][1] = Sum - Sum2;
+		if ( Sum )
+			cpi->FrameZrlProbs[i][5] = (Sum2 * 255) / Sum;
+
+  		// Seventh Node 5 vs 6
+		Sum = cpi->FrameZrlDist[i][5] + cpi->FrameZrlDist[i][6];
+		Sum2 = cpi->FrameZrlDist[i][5];
+		cpi->FrameZrlBranchHits[i][6][0] = Sum2;
+		cpi->FrameZrlBranchHits[i][6][1] = Sum - Sum2;
+		if ( Sum )
+			cpi->FrameZrlProbs[i][6] = (Sum2 * 255) / Sum;
+
+  		// Eighth Node 7 vs 8
+		Sum = cpi->FrameZrlDist[i][7] + cpi->FrameZrlDist[i][8];
+		Sum2 = cpi->FrameZrlDist[i][7];
+		cpi->FrameZrlBranchHits[i][7][0] = Sum2;
+		cpi->FrameZrlBranchHits[i][7][1] = Sum - Sum2;
+		if ( Sum )
+			cpi->FrameZrlProbs[i][7] = (Sum2 * 255) / Sum;
+	}
+
+	// Work out the bit probabilities for the remaining nodes
+	for ( i=0; i<ZRL_BANDS; i++ )
+	{
+		for ( j=9; j<64; j++ )
+		{
+			RunLength = j - 9;
+			Count = cpi->FrameZrlDist[i][j];
+
+			BitSums[i][5][((RunLength >> 5) & 1)] += Count;
+			BitSums[i][4][((RunLength >> 4) & 1)] += Count;
+			BitSums[i][3][((RunLength >> 3) & 1)] += Count;
+			BitSums[i][2][((RunLength >> 2) & 1)] += Count;
+			BitSums[i][1][((RunLength >> 1) & 1)] += Count;
+			BitSums[i][0][(RunLength & 1)] += Count;
+		}
+
+		for ( j=0; j<6; j++ )
+		{
+			Index = j + 8;					// Index into FrameZrlProbs[] etc. 
+			Sum = BitSums[i][j][0] + BitSums[i][j][1];
+			Sum2 = BitSums[i][j][0];
+			cpi->FrameZrlBranchHits[i][Index][0] = Sum2;
+			cpi->FrameZrlBranchHits[i][Index][1] = Sum - Sum2;
+			if ( Sum )
+				cpi->FrameZrlProbs[i][Index] = (Sum2 * 255) / Sum;
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ConvertDistribution
+ *
+ *  INPUTS        : CP_INSTANCE *cpi          : Pointer to encoder instance (NOT USED).
+ *                  UINT32 *Distribution      : Token histogram array.
+ *
+ *  OUTPUTS       :	UINT8 *Probabilities      : Pointer to array of node probs.
+ *                  UINT32 BranchChoices[][2] : Histogram of 1/0 branch decisions.
+ * 
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Converts a token distribution array into a set of tree
+ *                  node probabilities.
+ *
+ *  SPECIAL NOTES : The format of the binary decision tree is fixed. 
+ *
+ ****************************************************************************/
+void ConvertDistribution
+(
+    CP_INSTANCE *cpi,
+    UINT32 *Distribution,
+    UINT8 *Probabilities, 
+    UINT32 BranchChoices[][2] 
+)
+{
+	UINT32 i;
+	UINT32 Tmp1;
+	UINT32 Tmp2;
+	UINT32 SumTokens = 0;
+
+	// Count the token
+	for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+		SumTokens += Distribution[i];
+
+	// Set the default output probabilities
+	for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+	{
+		Probabilities[i]    = 128;
+		BranchChoices[i][0] = 0;
+		BranchChoices[i][1] = 0;
+	}
+
+	// Trap cases where there are no tokens
+	if ( SumTokens > 0 )
+	{
+		// The first probability we are interested in is the 0 context
+		Probabilities[ZERO_CONTEXT_NODE] = (UINT8)(((Distribution[DCT_EOB_TOKEN]+Distribution[ZERO_TOKEN]) * 255)/SumTokens);
+		BranchChoices[ZERO_CONTEXT_NODE][0] = (Distribution[DCT_EOB_TOKEN] + Distribution[ZERO_TOKEN]);
+		BranchChoices[ZERO_CONTEXT_NODE][1] = SumTokens - (Distribution[DCT_EOB_TOKEN] + Distribution[ZERO_TOKEN]);
+		if ( Probabilities[ZERO_CONTEXT_NODE] == 0 )
+			Probabilities[ZERO_CONTEXT_NODE] = 1;
+		else if ( Probabilities[ZERO_CONTEXT_NODE] > MAX_PROB )
+			Probabilities[ZERO_CONTEXT_NODE] = MAX_PROB;
+
+		// Next the Zero/EOB split
+		Tmp1 = Distribution[DCT_EOB_TOKEN];
+		Tmp2 = Distribution[DCT_EOB_TOKEN]+Distribution[ZERO_TOKEN];
+		BranchChoices[EOB_CONTEXT_NODE][0] = Tmp1;
+		BranchChoices[EOB_CONTEXT_NODE][1] = Tmp2-Tmp1;
+		if ( Tmp2  )
+		{
+			Probabilities[EOB_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+			if ( Probabilities[EOB_CONTEXT_NODE] == 0 )
+				Probabilities[EOB_CONTEXT_NODE] = 1;
+			else if ( Probabilities[EOB_CONTEXT_NODE] > MAX_PROB )
+				Probabilities[EOB_CONTEXT_NODE] = MAX_PROB;
+		}
+
+		// Now the One Context
+		Tmp1 = Distribution[ONE_TOKEN];
+		Tmp2 = SumTokens - (Distribution[DCT_EOB_TOKEN]+Distribution[ZERO_TOKEN]);
+		BranchChoices[ONE_CONTEXT_NODE][0] = Tmp1;
+		BranchChoices[ONE_CONTEXT_NODE][1] = Tmp2-Tmp1;
+		if ( Tmp2 )
+		{
+			Probabilities[ONE_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+			if ( Probabilities[ONE_CONTEXT_NODE] == 0 )
+				Probabilities[ONE_CONTEXT_NODE] = 1;
+			else if ( Probabilities[ONE_CONTEXT_NODE] > MAX_PROB )
+				Probabilities[ONE_CONTEXT_NODE] = MAX_PROB;
+		}
+
+		// Now the LowVal Context
+		Tmp1 = Distribution[TWO_TOKEN] + Distribution[THREE_TOKEN] + Distribution[FOUR_TOKEN];
+		Tmp2 = Tmp2 - Distribution[ONE_TOKEN];
+		BranchChoices[LOW_VAL_CONTEXT_NODE][0] = Tmp1;
+		BranchChoices[LOW_VAL_CONTEXT_NODE][1] = Tmp2-Tmp1;
+		if ( Tmp2 )
+		{
+			Probabilities[LOW_VAL_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+			if ( Probabilities[LOW_VAL_CONTEXT_NODE] == 0 )
+				Probabilities[LOW_VAL_CONTEXT_NODE] = 1;
+			else if ( Probabilities[LOW_VAL_CONTEXT_NODE] > MAX_PROB )
+				Probabilities[LOW_VAL_CONTEXT_NODE] = MAX_PROB;
+		}
+
+		// Now the TWO Context
+		Tmp1 = Distribution[TWO_TOKEN];
+		Tmp2 = Distribution[TWO_TOKEN] + Distribution[THREE_TOKEN] + Distribution[FOUR_TOKEN];
+		BranchChoices[TWO_CONTEXT_NODE][0] = Tmp1;
+		BranchChoices[TWO_CONTEXT_NODE][1] = Tmp2-Tmp1;
+		if ( Tmp2 )
+		{
+			Probabilities[TWO_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+			if ( Probabilities[TWO_CONTEXT_NODE] == 0 )
+				Probabilities[TWO_CONTEXT_NODE] = 1;
+			else if ( Probabilities[TWO_CONTEXT_NODE] > MAX_PROB )
+				Probabilities[TWO_CONTEXT_NODE] = MAX_PROB;
+		}
+
+		// Now the Three Context
+		Tmp1 = Distribution[THREE_TOKEN];
+		Tmp2 = Distribution[THREE_TOKEN] + Distribution[FOUR_TOKEN];
+		BranchChoices[THREE_CONTEXT_NODE][0] = Tmp1;
+		BranchChoices[THREE_CONTEXT_NODE][1] = Tmp2-Tmp1;
+		if ( Tmp2 )
+		{
+			Probabilities[THREE_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+			if ( Probabilities[THREE_CONTEXT_NODE] == 0 )
+				Probabilities[THREE_CONTEXT_NODE] = 1;
+			else if ( Probabilities[THREE_CONTEXT_NODE] > MAX_PROB )
+				Probabilities[THREE_CONTEXT_NODE] = MAX_PROB;
+		}
+
+		// Now the HighLowVal Context
+		Tmp1 = Distribution[DCT_VAL_CATEGORY1] + Distribution[DCT_VAL_CATEGORY2];
+		Tmp2 = Distribution[DCT_VAL_CATEGORY1] + Distribution[DCT_VAL_CATEGORY2] + Distribution[DCT_VAL_CATEGORY3] + Distribution[DCT_VAL_CATEGORY4] + Distribution[DCT_VAL_CATEGORY5] + Distribution[DCT_VAL_CATEGORY6];
+		BranchChoices[HIGH_LOW_CONTEXT_NODE][0] = Tmp1;
+		BranchChoices[HIGH_LOW_CONTEXT_NODE][1] = Tmp2-Tmp1;
+		if ( Tmp2 )
+		{
+			Probabilities[HIGH_LOW_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+			if ( Probabilities[HIGH_LOW_CONTEXT_NODE] == 0 )
+				Probabilities[HIGH_LOW_CONTEXT_NODE] = 1;
+			else if ( Probabilities[HIGH_LOW_CONTEXT_NODE] > MAX_PROB )
+				Probabilities[HIGH_LOW_CONTEXT_NODE] = MAX_PROB;
+		}	
+
+		// Now the Cat1 Context
+		Tmp1 = Distribution[DCT_VAL_CATEGORY1];
+		Tmp2 = Distribution[DCT_VAL_CATEGORY1] + Distribution[DCT_VAL_CATEGORY2];
+		BranchChoices[CAT_ONE_CONTEXT_NODE][0] = Tmp1;
+		BranchChoices[CAT_ONE_CONTEXT_NODE][1] = Tmp2-Tmp1;
+		if ( Tmp2 )
+		{
+			Probabilities[CAT_ONE_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+			if ( Probabilities[CAT_ONE_CONTEXT_NODE] == 0 )
+				Probabilities[CAT_ONE_CONTEXT_NODE] = 1;
+			else if ( Probabilities[CAT_ONE_CONTEXT_NODE] > MAX_PROB )
+				Probabilities[CAT_ONE_CONTEXT_NODE] = MAX_PROB;
+		}
+		
+		// Now the Cat3/4 Context
+		Tmp1 = Distribution[DCT_VAL_CATEGORY3] + Distribution[DCT_VAL_CATEGORY4];
+		Tmp2 = Distribution[DCT_VAL_CATEGORY3] + Distribution[DCT_VAL_CATEGORY4] + Distribution[DCT_VAL_CATEGORY5] + Distribution[DCT_VAL_CATEGORY6];
+		BranchChoices[CAT_THREEFOUR_CONTEXT_NODE][0] = Tmp1;
+		BranchChoices[CAT_THREEFOUR_CONTEXT_NODE][1] = Tmp2-Tmp1;
+		if ( Tmp2 )
+		{
+			Probabilities[CAT_THREEFOUR_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+			if ( Probabilities[CAT_THREEFOUR_CONTEXT_NODE] == 0 )
+				Probabilities[CAT_THREEFOUR_CONTEXT_NODE] = 1;
+			else if ( Probabilities[CAT_THREEFOUR_CONTEXT_NODE] > MAX_PROB )
+				Probabilities[CAT_THREEFOUR_CONTEXT_NODE] = MAX_PROB;
+		}
+
+		// Now the Cat3 Context
+		Tmp1 = Distribution[DCT_VAL_CATEGORY3];
+		Tmp2 = Distribution[DCT_VAL_CATEGORY3] + Distribution[DCT_VAL_CATEGORY4];
+		BranchChoices[CAT_THREE_CONTEXT_NODE][0] = Tmp1;
+		BranchChoices[CAT_THREE_CONTEXT_NODE][1] = Tmp2-Tmp1;
+		if ( Tmp2 )
+		{
+			Probabilities[CAT_THREE_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+			if ( Probabilities[CAT_THREE_CONTEXT_NODE] == 0 )
+				Probabilities[CAT_THREE_CONTEXT_NODE] = 1;
+			else if ( Probabilities[CAT_THREE_CONTEXT_NODE] > MAX_PROB )
+				Probabilities[CAT_THREE_CONTEXT_NODE] = MAX_PROB;
+		}
+
+		// Now the Cat5 Context
+		Tmp1 = Distribution[DCT_VAL_CATEGORY5];
+		Tmp2 = Distribution[DCT_VAL_CATEGORY5] + Distribution[DCT_VAL_CATEGORY6];
+		BranchChoices[CAT_FIVE_CONTEXT_NODE][0] = Tmp1;
+		BranchChoices[CAT_FIVE_CONTEXT_NODE][1] = Tmp2-Tmp1;
+		if ( Tmp2 )
+		{
+			Probabilities[CAT_FIVE_CONTEXT_NODE] = (UINT8)(((Tmp1 * 255) + (Tmp2 >> 1))/Tmp2);
+			if ( Probabilities[CAT_FIVE_CONTEXT_NODE] == 0 )
+				Probabilities[CAT_FIVE_CONTEXT_NODE] = 1;
+			else if ( Probabilities[CAT_FIVE_CONTEXT_NODE] > MAX_PROB )
+				Probabilities[CAT_FIVE_CONTEXT_NODE] = MAX_PROB;
+		}
+	}
+
+	// Adjust the probabilities to a 7 bit resolution
+	for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+	{
+		Probabilities[i] &= ~1;
+		if ( Probabilities[i] == 0 )	// 0 not legal.
+			Probabilities[i] = 1;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : AddBitsToBuffer
+ *
+ *  INPUTS        :	BOOL_CODER *bc : Pointer to a bool coder instance.
+ * 					UINT32 data    : Data value to be encoder by bc.
+ *                  UINT32 bits    : Number of bits of data to be encoded.
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Uses the specified Bool Coder to encode the specified
+ *                  data value which has the specified number of bits.
+ *
+ *  SPECIAL NOTES : Fixed probability of 128 (0x80) is used to encode
+ *                  each bit in turn. The least-significant bit is 
+ *                  encoded first.
+ *
+ ****************************************************************************/
+void AddBitsToBuffer ( BOOL_CODER *bc, UINT32 data, UINT32 bits )
+{
+	int bit;
+
+    for( bit=bits-1; bit>=0; bit-- )
+		VP6_EncodeBool ( bc, (1&(data>>bit)), 0x80 );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : WriteFrameHeader
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Writes a frame header to the bitstream.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void WriteFrameHeader ( CP_INSTANCE *cpi )
+{
+    RAW_BUFFER *Buffer = &cpi->RawBuffer;
+	BOOL_CODER  *bc  = &cpi->bc;
+    PB_INSTANCE *pbi = &cpi->pb;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = HEADER_SECTION;
+#endif
+
+    // Output the frame type (base/key frame or inter frame)
+    AddRawBitsToBuffer( Buffer, (UINT32)pbi->FrameType, 1 );
+    
+	// Quantizer
+	AddRawBitsToBuffer( Buffer, pbi->quantizer->FrameQIndex, 6 );
+
+    // Flag to indicate if we are using two bool coder streams.
+	// Note that this flag is ignored by the decoder in SIMPLE_PROFILE
+	// where the use of two streams is implicit
+	AddRawBitsToBuffer ( Buffer, (UINT32)((pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE)) ? 1 : 0), 1);
+  
+	// If the frame was a base frame then write out the frame dimensions. 
+	if ( pbi->FrameType == BASE_FRAME )
+	{
+		// Add the version and profile details
+		AddRawBitsToBuffer ( Buffer, (UINT32)pbi->Vp3VersionNo, 5 );
+		AddRawBitsToBuffer ( Buffer, (UINT32)pbi->VpProfile, 2 );
+
+		// is this keyframe section of the file interlaced
+		AddRawBitsToBuffer ( Buffer, (UINT32)(pbi->Configuration.Interlaced), 1);
+
+		// encoded size vertical and horizontal
+		AddBitsToBuffer( bc, (UINT32)(pbi->VFragments>>1), 8 );             
+		AddBitsToBuffer( bc, (UINT32)(pbi->HFragments>>1), 8 );         
+		
+		if( ( pbi->Configuration.HScale > 1 || pbi->Configuration.VScale > 1 ) &&
+			( cpi->AllowSpatialResampling == 0 && !cpi->ForceInternalSize ))
+		{
+			// scaled and cropped output size in macroblocks
+			AddBitsToBuffer( bc, (UINT32)(cpi->YuvInputData.YHeight * pbi->Configuration.VScale / pbi->Configuration.VRatio >> 4), 8 );         
+			AddBitsToBuffer( bc, (UINT32)(cpi->YuvInputData.YWidth * pbi->Configuration.HScale / pbi->Configuration.HRatio >> 4), 8 );             
+		}
+		else
+		{
+			// scaled and cropped output size in macroblocks
+			AddBitsToBuffer( bc, (UINT32)(cpi->YuvInputData.YHeight >> 4), 8 );         
+			AddBitsToBuffer( bc, (UINT32)(cpi->YuvInputData.YWidth >> 4), 8 ); 
+		}
+
+		// scaling mode
+		AddBitsToBuffer( bc, (UINT32)(pbi->Configuration.ScalingMode), 2);
+
+		// Unless in SIMPLE_PROFILE transmit data to describe the filter 
+        // strategy for fractional pels (Applies until next key frame)
+		if ( pbi->VpProfile != SIMPLE_PROFILE )
+		{
+			// Indicate what type of filtering we should use in motion prediction.
+			// Applies until next key frame.
+			if ( pbi->PredictionFilterMode == AUTO_SELECT_PM )
+			{
+				AddBitsToBuffer( bc,  (UINT32)1, 1 );
+				AddBitsToBuffer( bc,  (UINT32)(pbi->PredictionFilterVarThresh >> ((cpi->pb.Vp3VersionNo  > 7) ? 0 : 5)), 5 );
+				AddBitsToBuffer( bc,  (UINT32)(pbi->PredictionFilterMvSizeThresh), 3 );
+			}
+			else
+			{
+				AddBitsToBuffer( bc,  (UINT32)0, 1 );
+				AddBitsToBuffer( bc,  (UINT32)(pbi->PredictionFilterMode == BICUBIC_ONLY_PM) ? 1 : 0, 1 );
+			}
+
+			// If the ENCODER VERSION is > 7 then we add the VP6.2 specific stuff
+			if ( cpi->pb.Vp3VersionNo > 7 )
+				AddBitsToBuffer( bc,  (UINT32)pbi->PredictionFilterAlpha, 4 );
+
+			cpi->LastPredictionFilterMode = pbi->PredictionFilterMode;
+			cpi->LastPredictionFilterVarThresh = pbi->PredictionFilterVarThresh;
+			cpi->LastPredictionFilterMvSizeThresh = pbi->PredictionFilterMvSizeThresh;
+			cpi->LastPredictionFilterAlpha = pbi->PredictionFilterAlpha;
+		}
+    }
+	// Non key frame specific stuff
+	else
+	{
+		// Flag whether or not the golden frame should be updated this frame
+		AddBitsToBuffer( bc, (pbi->RefreshGoldenFrame) ? 1 : 0, 1 );
+
+		// Indicate whether loop filter is to be used. 
+		// This flag is ignored if we are in SIMPLE_PROFILE	
+		if ( pbi->VpProfile != SIMPLE_PROFILE )
+		{
+			if ( pbi->UseLoopFilter == NO_LOOP_FILTER )
+			{
+				AddBitsToBuffer( bc, 0, 1 );
+			}
+			else if ( pbi->UseLoopFilter == LOOP_FILTER_BASIC )
+			{
+				AddBitsToBuffer( bc, 1, 1 );
+				AddBitsToBuffer( bc, 0, 1 );
+			}
+			else // LOOP_FILTER_DERING
+			{
+				AddBitsToBuffer( bc, 1, 1 );
+				AddBitsToBuffer( bc, 1, 1 );
+			}
+
+			// Should we update prediction modes etc. VP6.2 and later
+			if ( cpi->pb.Vp3VersionNo > 7 )
+			{
+				if ( (pbi->PredictionFilterMode != cpi->LastPredictionFilterMode) ||
+					 (pbi->PredictionFilterVarThresh != cpi->LastPredictionFilterVarThresh) ||
+					 (pbi->PredictionFilterMvSizeThresh != cpi->LastPredictionFilterMvSizeThresh) ||
+					 (pbi->PredictionFilterAlpha != cpi->LastPredictionFilterAlpha) )
+				{
+					// Idicate a change
+					AddBitsToBuffer( bc, 1, 1 );
+
+					// Indicate what type of filtering we should use in motion prediction.
+					// Applies until next key frame.
+					if ( pbi->PredictionFilterMode == AUTO_SELECT_PM )
+					{
+						AddBitsToBuffer( bc,  (UINT32)1, 1 );
+						AddBitsToBuffer( bc,  (UINT32)pbi->PredictionFilterVarThresh, 5 );
+						AddBitsToBuffer( bc,  (UINT32)(pbi->PredictionFilterMvSizeThresh), 3 );
+					}
+					else
+					{
+						AddBitsToBuffer( bc,  (UINT32)0, 1 );
+						AddBitsToBuffer( bc,  (UINT32)(pbi->PredictionFilterMode == BICUBIC_ONLY_PM) ? 1 : 0, 1 );
+					}
+
+					AddBitsToBuffer( bc,  (UINT32)pbi->PredictionFilterAlpha, 4 );
+
+					cpi->LastPredictionFilterMode = pbi->PredictionFilterMode;
+					cpi->LastPredictionFilterVarThresh = pbi->PredictionFilterVarThresh;
+					cpi->LastPredictionFilterMvSizeThresh = pbi->PredictionFilterMvSizeThresh;
+					cpi->LastPredictionFilterAlpha = pbi->PredictionFilterAlpha;
+				}
+				else
+				{
+					// No change this frame
+					AddBitsToBuffer( bc, 0, 1 );
+				}
+			}
+		}
+	}
+
+	// All frames (key frame and inter)
+	if ( pbi->UseHuffman )
+        AddBitsToBuffer( bc, 1, 1 );
+    else
+	    AddBitsToBuffer( bc, 0, 1 );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6AddHuffmanToken
+ *
+ *  INPUTS        : CP_INSTANCE *cpi       : Pointer to encoder instance.
+ *	                TOKENEXTRA *TokenExtra : Token & extrabits to be encoded.
+ *                  UINT32 *HuffCode       : Array of Huffman codes for tokens.
+ *                  UINT8  *HuffLength     : Array of lengths of each HuffCode entry.
+ *                  UINT32 *ZeroCode       : Array of Huffman codes for zero runs.
+ *                  UINT8  *ZeroLength     : Array of lengths of each ZeroLength entry.
+ *	                UINT8  *CoefIndex      : DCT coeff position token occurs at.
+ * 
+ *  OUTPUTS       : None
+ * 
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Adds a single token any any associated extra-bits
+ *                  to the bitstream using Huffman tokens.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+INLINE 
+void VP6AddHuffmanToken
+(
+	CP_INSTANCE *cpi,
+	TOKENEXTRA *TokenExtra,
+    UINT32 *HuffCode,
+    UINT8  *HuffLength,
+    UINT32 *ZeroCode,
+    UINT8  *ZeroLength,
+	UINT8 *CoefIndex
+)
+{
+    INT32 Token = TokenExtra->Token;
+    UINT32 Extra = TokenExtra->Extra;
+    PB_INSTANCE *pbi = &cpi->pb;
+
+    // Output Huffman code for zero run length
+    if ( Token == ZERO_TOKEN )
+    {
+        if ( *CoefIndex > 0 ) 
+        {
+            // Output Huffman code for Token
+            AddRawBitsToBuffer ( &pbi->HuffBuffer, HuffCode[Token], HuffLength[Token] );
+
+            if ( Extra >= 8 )
+            {
+                // Zero run greater than 8 coded with extra bits greater than 8
+                AddRawBitsToBuffer ( &pbi->HuffBuffer, ZeroCode[8], ZeroLength[8] );
+
+                // Zero run of 8 or more coded with fixed 6-bits
+                AddRawBitsToBuffer ( &pbi->HuffBuffer, Extra-8, 6 );
+            }
+            else
+            {
+                // Zero run less than 8 coded with Huffman code
+                AddRawBitsToBuffer ( &pbi->HuffBuffer, ZeroCode[Extra], ZeroLength[Extra] );
+            }
+
+            // Step the coefindex on by run length - 1 for AC zero runs
+		    // Note that TokenExtra->Extra = run length - 1
+            *CoefIndex += Extra;
+        }
+        else if ( Extra > 0 )
+        {
+            // Zero at DC
+            UINT32 DcZrlBand = DcZrlHuffBand[Extra];
+
+            // Zero token
+            AddRawBitsToBuffer ( &pbi->HuffBuffer, HuffCode[Token], HuffLength[Token] );
+
+            // Run length token & extra bits
+            AddRawBitsToBuffer ( &pbi->HuffBuffer, DcZrlHuffCode[DcZrlBand], DcZrlHuffLength[DcZrlBand] );
+            if ( DcZrlExtraLength[DcZrlBand] )
+                AddRawBitsToBuffer ( &pbi->HuffBuffer, Extra-DcZrlExtraOffset[DcZrlBand], DcZrlExtraLength[DcZrlBand] );
+        }
+    }
+    else if ( Token == DCT_EOB_TOKEN )
+    {
+        if ( *CoefIndex > 1 )
+        {
+            // EOB token beyond first AC in scan
+            AddRawBitsToBuffer ( &pbi->HuffBuffer, HuffCode[Token], HuffLength[Token] );
+        }
+        else if ( Extra > 0 )
+        {
+            // Temp use same codes for EOB runs as for DC zero runs        
+            UINT32 DcZrlBand = DcZrlHuffBand[Extra];
+
+            // EOB token at first AC in scan
+            AddRawBitsToBuffer ( &pbi->HuffBuffer, HuffCode[Token], HuffLength[Token] );
+
+            // Run length token & extra bits
+            AddRawBitsToBuffer ( &pbi->HuffBuffer, DcZrlHuffCode[DcZrlBand], DcZrlHuffLength[DcZrlBand] );
+            if ( DcZrlExtraLength[DcZrlBand] )
+                AddRawBitsToBuffer ( &pbi->HuffBuffer, Extra-DcZrlExtraOffset[DcZrlBand], DcZrlExtraLength[DcZrlBand] );
+        }
+    }
+    else 
+    {
+        // Output Huffman code for Token
+        AddRawBitsToBuffer ( &pbi->HuffBuffer, HuffCode[Token], HuffLength[Token] );
+
+        // Output Extra bits
+	    if ( ExtraBitLengths_VP6[Token] )
+            AddRawBitsToBuffer ( &pbi->HuffBuffer, Extra, ExtraBitLengths_VP6[Token] );
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6AddToken
+ *
+ *  INPUTS        : CP_INSTANCE *cpi        : Pointer to encoder instance (NOT USED).
+ *                  BOOL_CODER *bc          : Pointer to Bool Coder to be used.
+ *	                TOKENEXTRA *TokenExtra  : Token & extrabits to be encoded.
+ *	                UINT8 *BaselineProbsPtr : 
+ *	                UINT8 *ContextProbsPtr  : Array of tree node probs
+ *	                UINT8 *ZeroRunProbsPtr  : Array of probs for aero run lengths.
+ *	                UINT8 *CoefIndex        : DCT coeff position token occurs at.
+ *	                BOOL  NonZeroImplicit   : Flag indicating whether a zero token
+ *                                            is prohibited due to context.
+ *
+ *  OUTPUTS       : None
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Adds a single token any any associated extra-bits
+ *                  to the bitstream using a Bool Coder.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void VP6AddToken
+(
+	CP_INSTANCE *cpi,
+    BOOL_CODER *bc,
+	TOKENEXTRA *TokenExtra,
+	UINT8 *BaselineProbsPtr,
+	UINT8 *ContextProbsPtr,
+	UINT8 *ZeroRunProbsPtr,
+	UINT8 *CoefIndex,
+	BOOL  NonZeroImplicit
+)
+{
+	// Case statement to output code patterns for the token.
+	switch ( TokenExtra->Token )
+	{
+	case DCT_EOB_TOKEN:		// 00
+		VP6_EncodeBool ( bc, 0, ContextProbsPtr[ZERO_CONTEXT_NODE] );	// Zero value branch
+		VP6_EncodeBool ( bc, 0, ContextProbsPtr[EOB_CONTEXT_NODE ] );	// EOB vs 0 branch
+		break;
+
+	case ZERO_TOKEN:	 	// 01
+		VP6_EncodeBool ( bc, 0, ContextProbsPtr[ZERO_CONTEXT_NODE] );	// Zero value branch
+
+		// For DC there is no run length and EOB is not allowed
+		if ( *CoefIndex > 0 )
+		{
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[EOB_CONTEXT_NODE] );// EOB vs 0 branch
+
+			// Step the coefindex on by run length - 1 
+			// Note that TokenExtra->Extra = run length - 1
+			*CoefIndex += TokenExtra->Extra;
+
+			// Now code the zero run length
+			if ( TokenExtra->Extra < 8 )								// run lengths 2, 3, 4
+			{
+				switch ( TokenExtra->Extra )
+				{
+				case 0:
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[0] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[1] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[2] );
+					break;
+				case 1:
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[0] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[1] );
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[2] );
+					break;
+				case 2:
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[0] );
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[1] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[3] );
+					break;
+				case 3:
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[0] );
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[1] );
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[3] );
+					break;
+				case 4:
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[0] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[4] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[5] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[6] );
+					break;
+				case 5:
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[0] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[4] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[5] );
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[6] );
+					break;
+				case 6:
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[0] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[4] );
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[5] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[7] );
+					break;
+				case 7:
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[0] );
+					VP6_EncodeBool ( bc, 0, ZeroRunProbsPtr[4] );
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[5] );
+					VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[7] );
+					break;
+				}
+			}
+			else
+			{
+				TokenExtra->Extra -= 8;
+
+				// Run length > 8
+				VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[0] );	
+				VP6_EncodeBool ( bc, 1, ZeroRunProbsPtr[4] );	
+
+				// Code run length -8
+				VP6_EncodeBool ( bc, (1&TokenExtra->Extra), ZeroRunProbsPtr[8] );			    // Bit 0
+				VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), ZeroRunProbsPtr[9] );		// Bit 1
+				VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), ZeroRunProbsPtr[10] );		// Bit 2
+				VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>3)), ZeroRunProbsPtr[11] );		// Bit 3
+				VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>4)), ZeroRunProbsPtr[12] );		// Bit 4
+				VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>5)), ZeroRunProbsPtr[13] );		// Bit 5
+			}
+		}
+		break;
+
+	case ONE_TOKEN:			// 10 X	
+		if ( !NonZeroImplicit )
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+		VP6_EncodeBool ( bc, 0, ContextProbsPtr[ONE_CONTEXT_NODE] );	  // One Branch
+	
+		VP6_EncodeBool ( bc, TokenExtra->Extra, 128 );					  // Sign
+		break;
+
+	case TWO_TOKEN:			// 1100 X				
+		if ( !NonZeroImplicit )
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] );	  // One Branch
+
+		VP6_EncodeBool ( bc, 0, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] );  // Low Value Branch
+		VP6_EncodeBool ( bc, 0, ContextProbsPtr[TWO_CONTEXT_NODE] );	  // 2 Branch
+	
+		VP6_EncodeBool ( bc, TokenExtra->Extra, 128);					  // Sign
+		break;
+
+	case THREE_TOKEN:			// 11010 X			
+		if ( !NonZeroImplicit )
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] );	  // One Branch
+		VP6_EncodeBool ( bc, 0, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] );  // Low Value Branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[TWO_CONTEXT_NODE] );	  // 2 Branch
+		VP6_EncodeBool ( bc, 0, BaselineProbsPtr[THREE_CONTEXT_NODE] );	  // Three Branch
+
+		VP6_EncodeBool ( bc, TokenExtra->Extra, 128 );					  // Sign
+		break;
+
+	case FOUR_TOKEN:			// 11011 X			
+		if ( !NonZeroImplicit )
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] );	  // One Branch
+		VP6_EncodeBool ( bc, 0, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] );  // Low Value Branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[TWO_CONTEXT_NODE] );	  // 2 Branch
+		VP6_EncodeBool ( bc, 1, BaselineProbsPtr[THREE_CONTEXT_NODE] );	  // Three Branch
+
+		VP6_EncodeBool ( bc, TokenExtra->Extra, 128 );					  // Sign
+		break;
+
+	case DCT_VAL_CATEGORY1:		// 11100 XX
+		if ( !NonZeroImplicit )
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] );	  // One Branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] );  // Low Value Branch
+
+		VP6_EncodeBool ( bc, 0, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] );// HighLow Value Branch
+		VP6_EncodeBool ( bc, 0, BaselineProbsPtr[CAT_ONE_CONTEXT_NODE] ); // Cat1 Value Branch
+
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 159 );			  // Data Bit
+		VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128 );				  // Sign Bit
+		break;
+
+	case DCT_VAL_CATEGORY2:		// 11101	XXX	
+		if ( !NonZeroImplicit )
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] ); // Zero value branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] );	  // One Branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] );  // Low Value Branch 
+
+		VP6_EncodeBool ( bc, 0, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] );// HighLow Value Branch
+		VP6_EncodeBool ( bc, 1, BaselineProbsPtr[CAT_ONE_CONTEXT_NODE] ); // Cat1 Value Branch
+
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), 165 );			  // Data Bits
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 145 );
+		VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128 );				  // Sign Bit
+		break;
+
+	case DCT_VAL_CATEGORY3:		// 111 100	XXXXX
+		if ( !NonZeroImplicit )
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] );		// Zero value branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] );			// One Branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] );		// Low Value Branch
+
+		VP6_EncodeBool ( bc, 1, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] );		// HighLow Value Branch
+		VP6_EncodeBool ( bc, 0, BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE] );	// Cat3/4 Value Branch
+		VP6_EncodeBool ( bc, 0, BaselineProbsPtr[CAT_THREE_CONTEXT_NODE] );		// Cat3 Value Branch
+
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>3)), 173 );					// Data Bits
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), 148 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 140 );
+		VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128 );						// Sign Bit
+		break;
+
+	case DCT_VAL_CATEGORY4:		// 111101	XXXXX
+		if ( !NonZeroImplicit )
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] );		// Zero value branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] );			// One Branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] );		// Low Value Branch
+
+		VP6_EncodeBool ( bc, 1, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] );		// HighLow Value Branch
+		VP6_EncodeBool ( bc, 0, BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE] );	// Cat3/4 Value Branch
+		VP6_EncodeBool ( bc, 1, BaselineProbsPtr[CAT_THREE_CONTEXT_NODE] );		// Cat3 Value Branch
+
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>4)), 176 );					// More significant bits more likely to be 0
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>3)), 155 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), 140 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 135 );
+		VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128 );						// Sign Bit
+		break;
+
+	case DCT_VAL_CATEGORY5:		// 111110	XXXXXX		
+		if ( !NonZeroImplicit )
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] );		// Zero value branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] );			// One Branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] );		// Low Value Branch
+
+		VP6_EncodeBool ( bc, 1, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] );		// HighLow Value Branch
+		VP6_EncodeBool ( bc, 1, BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE] );	// Cat3/4 Value Branch
+		VP6_EncodeBool ( bc, 0, BaselineProbsPtr[CAT_FIVE_CONTEXT_NODE] );		// Cat5/6 Value Branch
+
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>5)), 180 );					// Data Bits
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>4)), 157 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>3)), 141 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), 134 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 130 );
+		VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128);						// Sign Bit
+		break;
+
+	case DCT_VAL_CATEGORY6:		// 111111 XXXXXXXXXXXX	
+		if ( !NonZeroImplicit )
+			VP6_EncodeBool ( bc, 1, ContextProbsPtr[ZERO_CONTEXT_NODE] );		// Zero value branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[ONE_CONTEXT_NODE] );			// One Branch
+		VP6_EncodeBool ( bc, 1, ContextProbsPtr[LOW_VAL_CONTEXT_NODE] );		// Low Value Branch
+
+		VP6_EncodeBool ( bc, 1, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE] );		// HighLow Value Branch
+		VP6_EncodeBool ( bc, 1, BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE] );	// Cat3/4 Value Branch
+		VP6_EncodeBool ( bc, 1, BaselineProbsPtr[CAT_FIVE_CONTEXT_NODE] );		// Cat5/6 Value Branch
+
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>11)), 254 );				// Data Bits
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>10)), 254 );		
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>9)), 243 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>8)), 230 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>7)), 196 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>6)), 177 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>5)), 153 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>4)), 140 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>3)), 133 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>2)), 130 );
+		VP6_EncodeBool ( bc, (1&(TokenExtra->Extra>>1)), 129 );
+		VP6_EncodeBool ( bc, (1&TokenExtra->Extra), 128 );						// Sign Bit
+		break;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : UpdateContextProbs
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Determines which context probabilities to update and 
+ *                  encodes the changes to the bitstream.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void UpdateContextProbs ( CP_INSTANCE *cpi )
+{
+	UINT32	i,j;
+	UINT32	Plane;
+	UINT32	Band;
+	INT32   Prec;
+	INT32   OldBits;
+	INT32   NewBits;
+	UINT8   PrecNonZero;
+	INT32   ProbUpdateCost;
+	UINT8   Probs[MAX_ENTROPY_TOKENS-1];
+	UINT8   LastProb[MAX_ENTROPY_TOKENS-1];
+	UINT32  BranchChoices[MAX_ENTROPY_TOKENS-1][2];
+	
+	BOOL_CODER  *bc  = &cpi->bc;
+    PB_INSTANCE *pbi = &cpi->pb;
+	UINT8 FrameType = VP6_GetFrameType ( pbi );
+
+	// Clear down last prob structure
+	memset ( LastProb, 128, MAX_ENTROPY_TOKENS-1 );
+
+	// Baseline DC probabilities for Y and then UV Planes.
+	for ( Plane=0; Plane<2; Plane++ )
+	{
+		if ( FrameType == BASE_FRAME ) 
+			memcpy ( pbi->DcProbs+DCProbOffset(Plane,0), LastProb, MAX_ENTROPY_TOKENS-1 ); 
+
+	    ConvertDistribution ( cpi, cpi->FrameDcTokenDist[Plane], Probs, BranchChoices );
+
+		// Are there any updates for this set.
+		for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+		{
+			OldBits = ((BranchChoices[i][0] * VP6_ProbCost[pbi->DcProbs[DCProbOffset(Plane,i)]])/256) +
+					  ((BranchChoices[i][1] * VP6_ProbCost[255 - pbi->DcProbs[DCProbOffset(Plane,i)]])/256);
+			NewBits = ((BranchChoices[i][0] * VP6_ProbCost[Probs[i]])/256) +
+					  ((BranchChoices[i][1] * VP6_ProbCost[255 - Probs[i]])/256);
+
+			ProbUpdateCost = PROB_UPDATE_BASELINE_COST + PROB_UPDATE_CORECTION + (VP6_ProbCost[255 - VP6_DcUpdateProbs[Plane][i]]/256);
+
+            if ( (OldBits - NewBits) > ProbUpdateCost )
+			{
+				// Probabilities sent
+				VP6_EncodeBool ( bc, 1, VP6_DcUpdateProbs[Plane][i] );
+
+				AddBitsToBuffer ( bc, Probs[i] >> 1, PROB_UPDATE_BASELINE_COST );
+
+				// Update the last probability records
+				pbi->DcProbs[DCProbOffset(Plane,i)] = Probs[i];
+				LastProb[i] = Probs[i];
+			}
+			else
+			{
+				// Probabilities not sent
+				VP6_EncodeBool ( bc, 0, VP6_DcUpdateProbs[Plane][i] );
+			}
+		}
+	}
+
+	// If we are in Error resilliant mode and this was the first frame then take a copy of the 
+	// entropy probabilities used for re-use on subsequent key frames.
+	if ( (cpi->ErrorResilliantMode) && (cpi->CurrentFrame == 1) )
+	{
+		memcpy( cpi->FirstFrameDcProbs, pbi->DcProbs, sizeof(cpi->FirstFrameDcProbs) );
+	}
+
+	// Are we supporting dynamic scan order updates
+	if ( ( (pbi->Configuration.Interlaced) || (cpi->AllowScanOrderUpdates) ) &&
+		 ( !cpi->ErrorResilliantMode ) )
+	{
+		VP6_EncodeBool ( bc, 1, 128 );
+
+		// Transmit changes to the AC scan order banding
+		for ( i=1; i<BLOCK_SIZE; i++ )
+		{
+			// Should we update the ceoffs band
+			if ( cpi->NewScanOrderBands[i] != pbi->ScanBands[i] )
+			{
+				VP6_EncodeBool ( bc, 1, ScanBandUpdateProbs[i] );
+				AddBitsToBuffer ( bc, cpi->NewScanOrderBands[i], SCAN_BAND_UPDATE_BITS );
+				pbi->ScanBands[i] = cpi->NewScanOrderBands[i];
+				scanupdates[i][1]++;
+			}
+			else
+			{
+				VP6_EncodeBool ( bc, 0, ScanBandUpdateProbs[i] );
+				scanupdates[i][0]++;
+			}
+		}
+	}
+	else
+	{
+		VP6_EncodeBool ( bc, 0, 128 );
+	}
+
+	// Reset Zero run probabilities to defaults values for key frames
+	if ( FrameType == BASE_FRAME )
+	{
+		memcpy ( pbi->ZeroRunProbs, ZeroRunProbDefaults, sizeof(pbi->ZeroRunProbs) );
+	}
+
+	// Update the Zero Run probabilities
+	memcpy ( cpi->FrameZrlProbs, pbi->ZeroRunProbs, sizeof(cpi->FrameZrlProbs) );
+	if ( !cpi->ErrorResilliantMode )
+		GetOptimalFrameZrlProbs( cpi );
+
+	// Transmit any changes needed
+	for ( i=0; i<ZRL_BANDS; i++ )
+	{
+		for ( j=0; j<ZERO_RUN_PROB_CASES; j++ )
+		{
+			// Work out if saving enough to justify update TBD,
+			OldBits = ((cpi->FrameZrlBranchHits[i][j][0] * VP6_ProbCost[pbi->ZeroRunProbs[i][j]])/256) +
+					  ((cpi->FrameZrlBranchHits[i][j][1] * VP6_ProbCost[255 - pbi->ZeroRunProbs[i][j]])/256);
+			NewBits = ((cpi->FrameZrlBranchHits[i][j][0] * VP6_ProbCost[cpi->FrameZrlProbs[i][j]])/256) +
+					  ((cpi->FrameZrlBranchHits[i][j][1] * VP6_ProbCost[255 - cpi->FrameZrlProbs[i][j]])/256);
+
+			ProbUpdateCost = PROB_UPDATE_BASELINE_COST + (VP6_ProbCost[255 - ZrlUpdateProbs[i][j]]/256);
+
+			if ( (OldBits - NewBits) > ProbUpdateCost )
+			{
+				// Probabilities sent
+				VP6_EncodeBool ( bc, 1, ZrlUpdateProbs[i][j] );
+				AddBitsToBuffer( bc, cpi->FrameZrlProbs[i][j] >> 1, PROB_UPDATE_BASELINE_COST );
+				pbi->ZeroRunProbs[i][j] = (cpi->FrameZrlProbs[i][j] & ~1);
+				pbi->ZeroRunProbs[i][j] += (pbi->ZeroRunProbs[i][j] == 0) ? 1 : 0;
+			}
+			else 
+			{
+				// Probability not sent
+				VP6_EncodeBool ( bc, 0, ZrlUpdateProbs[i][j] );
+			}
+		}		
+	}
+
+	// Baseline probabilities for each AC band.
+	// Prec=0 means last token in current block was 0: Prec=1 means it was !0
+	for ( Prec=0; Prec<PREC_CASES; Prec++ )
+	{
+		PrecNonZero = (Prec > 0) ? 1 : 0;
+
+		// Baseline probabilities for each AC band.
+		for ( Plane=0; Plane<2; Plane++ )
+		{
+			for ( Band=0; Band<VP6_AC_BANDS; Band++ )
+			{
+				// Decide whether to transmit probability data based upon number of tokens represented
+				ConvertDistribution ( cpi, cpi->FrameAcTokenDist[Prec][Plane][Band], Probs, BranchChoices );
+
+				if ( FrameType == BASE_FRAME )
+					memcpy( pbi->AcProbs+ACProbOffset(Plane,Prec,Band,0), LastProb, MAX_ENTROPY_TOKENS-1 ); 
+
+				for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+				{
+					OldBits = ((BranchChoices[i][0] * VP6_ProbCost[pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)]])/256) +
+							  ((BranchChoices[i][1] * VP6_ProbCost[255 - pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)]])/256);
+					NewBits = ((BranchChoices[i][0] * VP6_ProbCost[Probs[i]])/256) +
+							  ((BranchChoices[i][1] * VP6_ProbCost[255 - Probs[i]])/256);
+
+					ProbUpdateCost = PROB_UPDATE_BASELINE_COST + PROB_UPDATE_CORECTION + (VP6_ProbCost[255 - VP6_AcUpdateProbs[Prec][Plane][Band][i]]/256);
+
+                    if ( (OldBits - NewBits) > ProbUpdateCost )
+					{
+						// Probabilities sent
+						VP6_EncodeBool ( bc, 1, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+						AddBitsToBuffer ( bc, Probs[i] >> 1, PROB_UPDATE_BASELINE_COST );
+
+						// Update the last probability records
+						pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = Probs[i];
+						LastProb[i] = Probs[i];
+					}
+					else
+					{
+						// Probabilities not sent
+						VP6_EncodeBool ( bc, 0, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+					}
+				}
+			}
+		}
+	}
+
+	// If we are in Error resilliant mode and this was the first frame then take a copy of the 
+	// entropy probabilities used for re-use on subsequent key frames.
+	if ( cpi->ErrorResilliantMode && (cpi->CurrentFrame == 1) )
+		memcpy ( cpi->FirstFrameAcProbs, pbi->AcProbs, sizeof(cpi->FirstFrameAcProbs) );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : UpdateContextProbs2
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Sends a selected set of updated context info to the bitstream.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void UpdateContextProbs2 ( CP_INSTANCE *cpi )
+{
+	UINT32 i,j;
+	UINT32 Plane;
+	UINT32 Band;
+	INT32  Prec;
+	UINT8  PrecNonZero;
+	UINT8  Probs[MAX_ENTROPY_TOKENS-1];
+	UINT32 BranchChoices[MAX_ENTROPY_TOKENS-1][2];
+
+	BOOL_CODER  *bc  = &cpi->bc;
+	PB_INSTANCE *pbi = &cpi->pb;
+	UINT8  ActiveDcNodes[2]    = { 3, 2 };		                  // Y, UV
+	UINT8  ActiveAcNodes[3][2] = { { 1, 1 },{ 2, 0 },{ 1, 0 } };  // {Y, UV} for each prec case
+	UINT8  ActiveAcBands[3][2] = { { 1, 1 },{ 1, 0 },{ 1, 0 } };  // {Y, UV} for each prec case
+
+	// Baseline DC probabilities for Y and then UV Planes.
+	for ( Plane=0; Plane<2; Plane++ )
+	{
+	    ConvertDistribution ( cpi, cpi->FrameDcTokenDist[Plane], Probs, BranchChoices );
+
+		// Some nodes are always updated
+		// The rest are never updated but are left at the key frame values
+		for ( i=0; i<ActiveDcNodes[Plane]; i++ )
+		{
+			// Probabilities sent
+			VP6_EncodeBool ( bc, 1, VP6_DcUpdateProbs[Plane][i] );
+			AddBitsToBuffer ( bc, Probs[i] >> 1, PROB_UPDATE_BASELINE_COST );
+
+			// Update the last probability records
+			pbi->DcProbs[DCProbOffset(Plane,i)] = Probs[i];
+		}
+		for ( i=ActiveDcNodes[Plane]; i<MAX_ENTROPY_TOKENS-1; i++ )
+		{
+			// Probabilities not sent
+			VP6_EncodeBool ( bc, 0, VP6_DcUpdateProbs[Plane][i] );
+		}
+	}
+
+	// Do not change the scan order banding in error resilient mode
+    VP6_EncodeBool ( bc, 0, 128 );
+
+	// For now do not update ZRL probabilities in error resilient mode
+	for ( i=0; i<2; i++ )
+	{
+		for ( j=0; j<ZERO_RUN_PROB_CASES; j++ )
+		{
+			// Probability not sent
+			VP6_EncodeBool ( bc, 0, ZrlUpdateProbs[i][j] );
+		}		
+	}
+
+	// Baseline probabilities for each AC band.
+	// Prec=0 means last token in current block was 0: Prec=1 means it was !0
+	for ( Prec=0; Prec<PREC_CASES; Prec++ )
+	{
+		PrecNonZero = (Prec > 0) ? 1 : 0;
+
+		// Baseline probabilities for each AC band.
+		for ( Plane=0; Plane<2; Plane++ )
+		{
+			// For the first couple of AC bands we always update the first few probabilities.
+			// For the higher AC bands we never update probabilities
+			for ( Band=0; Band<ActiveAcBands[Prec][Plane]; Band++ )
+			{
+				// Decide whether to transmit probability data based upon number of tokens represented
+				ConvertDistribution ( cpi, cpi->FrameAcTokenDist[Prec][Plane][Band], Probs, BranchChoices );
+
+				for ( i=0; i<ActiveAcNodes[Prec][Plane]; i++ )
+				{
+					// Probabilities sent
+					VP6_EncodeBool ( bc, 1, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+					AddBitsToBuffer ( bc, Probs[i] >> 1, PROB_UPDATE_BASELINE_COST );
+
+					// Update the last probability records
+					pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = Probs[i];
+				}
+
+				for ( i=ActiveAcNodes[Prec][Plane]; i<MAX_ENTROPY_TOKENS-1; i++ )
+				{
+					// Probabilities not sent
+					VP6_EncodeBool ( bc, 0, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+				}
+			}
+			
+            for ( Band=ActiveAcBands[Prec][Plane]; Band<VP6_AC_BANDS; Band++ )
+			{
+				for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+				{
+					// Probabilities not sent
+					VP6_EncodeBool ( bc, 0, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+				}
+			}
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : UpdateContextProbs3
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Updates all baseline probabilities (except first frame
+ *					when in error resilliant mode).
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void UpdateContextProbs3(CP_INSTANCE *cpi)
+{
+	UINT32	i,j;
+	UINT32	Plane;
+	UINT32	Band;
+	INT32   Prec;
+	UINT8	Prob;
+	UINT8   LastProb[MAX_ENTROPY_TOKENS-1];
+
+	BOOL_CODER  *bc  = &cpi->bc;
+    PB_INSTANCE *pbi = &cpi->pb;
+
+	// Clear down last prob structure
+	memset ( LastProb, 128, MAX_ENTROPY_TOKENS-1 );
+
+	// Copy over the DC probabilities used for the first frame
+	memcpy ( pbi->DcProbs, cpi->FirstFrameDcProbs, sizeof(pbi->DcProbs) );
+
+	// Baseline DC probabilities for Y and then UV Planes.
+	for ( Plane=0; Plane<2; Plane++ )
+	{
+		// Are there any updates for this set.
+		for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+		{
+			Prob = pbi->DcProbs[DCProbOffset(Plane,i)];
+			if ( Prob != LastProb [i] )
+			{
+				// Send probabilities
+				VP6_EncodeBool ( bc, 1, VP6_DcUpdateProbs[Plane][i] );
+				AddBitsToBuffer ( bc, Prob >> 1, PROB_UPDATE_BASELINE_COST );
+
+				LastProb[i] = Prob;
+			}
+			else
+			{
+				// Probabilities not sent
+				VP6_EncodeBool ( bc, 0, VP6_DcUpdateProbs[Plane][i] );
+			}
+		}
+	}
+
+	// Reset Zero run probabilities to defaults values for key frames
+	memcpy ( pbi->ZeroRunProbs, ZeroRunProbDefaults, sizeof(pbi->ZeroRunProbs) );
+
+	// Do not change the scan order banding in error resilient mode
+	VP6_EncodeBool ( bc, 0, 128 );
+
+	// For now do not update ZRL probabilities in error resilient mode
+	for ( i=0; i<2; i++ )
+	{
+		for ( j=0; j<ZERO_RUN_PROB_CASES; j++ )
+		{
+			// Probability not sent
+			VP6_EncodeBool ( bc, 0, ZrlUpdateProbs[i][j] );
+		}		
+	}
+
+	// Copy over the AC probabilities used for the first frame
+	memcpy ( pbi->AcProbs, cpi->FirstFrameAcProbs, sizeof(pbi->AcProbs) );
+
+	// Baseline probabilities for each AC band.
+	// Prec=0 means last token in current block was 0: Prec=1 means it was !0
+	for ( Prec=0; Prec<PREC_CASES; Prec++ )
+	{
+		// Baseline probabilities for each AC band.
+		for ( Plane=0; Plane<2; Plane++ )
+		{
+			for ( Band=0; Band<VP6_AC_BANDS; Band++ )
+			{
+				for ( i=0; i<MAX_ENTROPY_TOKENS-1; i++ )
+				{
+					Prob = pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)];
+					if ( Prob != LastProb [i] )
+					{
+						// Probabilities sent
+						VP6_EncodeBool ( bc, 1, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+						AddBitsToBuffer ( bc, Prob >> 1, PROB_UPDATE_BASELINE_COST );
+
+						LastProb [i] = Prob;
+					}
+					else
+					{
+						// Probabilities not sent
+						VP6_EncodeBool ( bc, 0, VP6_AcUpdateProbs[Prec][Plane][Band][i] );
+					}
+				}
+			}
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PackHuffmanCoeffs
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Outputs the list of tokens generated for the frame
+ *                  using Huffman coding.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void PackHuffmanCoeffs ( CP_INSTANCE *cpi )
+{
+	UINT32	Plane;
+	UINT8 PrecTokenIndex;
+	TOKENEXTRA *j;
+	TOKENEXTRA *First;
+	TOKENEXTRA *Last;
+	
+    BOOL_CODER  *bc  = &cpi->bc;
+	PB_INSTANCE *pbi = &cpi->pb;
+    UINT8 FrameType = VP6_GetFrameType ( pbi );
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = HEADER_SECTION;
+#endif
+
+	// Work out which context probabilities need to be updated
+	// and output the changes to the bitstream.
+	//
+	// Error resilliant mode uses a fixed probability update pattern to make the entropy 
+	// code more resilliant to dropped frames
+	if ( cpi->ErrorResilliantMode )
+	{
+		// In "error resilliant / VC" mode use an update mechanism that is more tolerant of dropped frames.
+		if ( FrameType == BASE_FRAME )
+		{
+			if ( cpi->CurrentFrame == 1 )
+				UpdateContextProbs( cpi );
+			else
+				UpdateContextProbs3( cpi );
+		}
+		else
+			UpdateContextProbs2( cpi );
+	}
+	else
+	{
+		UpdateContextProbs( cpi );
+	}
+
+	// Create all the context specific propabilities
+	VP6_ConfigureContexts ( pbi );
+
+	// probability that the macroblock is interlaced
+	if(pbi->Configuration.Interlaced)
+		AddBitsToBuffer ( bc, (UINT32)(pbi->probInterlaced), 8 );
+
+    // Create Huffman codes for tokens based on tree probabilities
+    ConvertBoolTrees ( pbi );
+
+	// encode coefficients 
+	First=cpi->CoeffTokens;
+	Last=cpi->CoeffTokenPtr;
+	{
+		UINT8 coef;
+		UINT32 now;
+		unsigned int MBrow, MBcol, block;
+
+        j = First;
+		now = bc->pos * 8 - 4;
+		for ( MBrow=BORDER_MBS; MBrow<pbi->MBRows - BORDER_MBS; MBrow++ )
+		{
+			for ( MBcol=BORDER_MBS; MBcol<pbi->MBCols-BORDER_MBS; MBcol++ )
+			{
+				// dumb way to encode the interlaced decision but it works!!!
+				{
+					UINT8 prob = pbi->probInterlaced;
+
+					// super simple context adjustment
+					if(MBcol>BORDER_MBS)
+					{
+						if(pbi->MBInterlaced[MBOffset(MBrow,MBcol-1)])
+							prob = prob - (prob>>1);
+						else 
+							prob = prob + ((256-prob)>>1);
+					}
+					
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = HEADER_SECTION;
+#endif
+
+					if ( pbi->Configuration.Interlaced )
+						VP6_EncodeBool ( bc, pbi->MBInterlaced[MBOffset(MBrow,MBcol)], prob );
+				}
+
+				if ( pbi->FrameType != BASE_FRAME )
+					encodeModeAndMotionVector ( cpi, MBrow, MBcol );
+
+				for ( block=0 ; block<6 ; block++ )
+				{
+					Plane = block>3;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = DC_SECTION;
+#endif
+
+					// DC Token
+					coef = 0;
+                    VP6AddHuffmanToken ( cpi, j, 
+                        pbi->DcHuffCode[Plane], 
+                        pbi->DcHuffLength[Plane],  
+                        pbi->ZeroHuffCode[0], 
+                        pbi->ZeroHuffLength[0], 
+                        &coef );
+
+					PrecTokenIndex = VP6_PrevTokenIndex[j->Token];
+					j++;
+
+                    for ( coef=1; coef<64; coef++ )
+                    {
+                        UINT32 ZrlBand = (coef >= ZRL_BAND2) ? 1 : 0;
+                        
+                        // Restrict to 4 AC bands when using Huffman                        
+                        UINT32 AcBand = VP6_CoeffToBand[coef];
+                        AcBand = (AcBand < 4) ? AcBand : 3;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = AC_SECTION;
+#endif
+
+                        VP6AddHuffmanToken ( cpi, j, 
+                            pbi->AcHuffCode[PrecTokenIndex][Plane][AcBand], 
+                            pbi->AcHuffLength[PrecTokenIndex][Plane][AcBand],  
+                            pbi->ZeroHuffCode[ZrlBand], 
+                            pbi->ZeroHuffLength[ZrlBand], &coef );
+                        
+                        PrecTokenIndex = VP6_PrevTokenIndex[j->Token];
+                        
+                        if( j->Token == DCT_EOB_TOKEN )
+                            coef=64;
+                        j++;
+                    } 
+				}
+			}
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PackArithmeticCoeffs
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Outputs the list of tokens generated for the frame
+ *                  using a Bool Coder.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void PackArithmeticCoeffs ( CP_INSTANCE *cpi )
+{
+	UINT32	Plane;
+	UINT8   PrecTokenIndex;
+	TOKENEXTRA *j;
+	TOKENEXTRA *First;
+	TOKENEXTRA *Last;
+    BOOL_CODER *nbc;
+	
+    BOOL_CODER  *bc  = &cpi->bc;
+	PB_INSTANCE *pbi = &cpi->pb;
+	UINT8	FrameType = VP6_GetFrameType ( pbi );
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = CONTEXT_OVERHEADS_SECTION;
+#endif
+
+	// Select which bool coder partition to use
+	if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+		nbc = &cpi->bc2;
+	else
+		nbc = &cpi->bc;
+
+	// Work out which context probabilities need to be updated
+	// and output the changes to the bitstream.
+	//
+	// Error resilliant mode uses a fixed probability update pattern to make the entropy 
+	// code more resilliant to dropped frames
+	if ( cpi->ErrorResilliantMode )
+	{
+		// In "error resilliant / VC" mode use an update mechanism that is more tolerant of dropped frames.
+		if ( FrameType == BASE_FRAME )
+		{
+			if ( cpi->CurrentFrame == 1 )
+				UpdateContextProbs( cpi );
+			else
+				UpdateContextProbs3( cpi );
+		}
+		else
+			UpdateContextProbs2( cpi );
+	}
+	else
+	{
+		UpdateContextProbs( cpi );
+	}
+
+	// Create all the context specific propabilities
+	VP6_ConfigureContexts ( pbi );
+
+	// probability that the macroblock is interlaced
+	if ( pbi->Configuration.Interlaced )
+		AddBitsToBuffer ( bc, (UINT32)(pbi->probInterlaced), 8 );
+
+	// encode coefficients 
+	First=cpi->CoeffTokens;
+	Last=cpi->CoeffTokenPtr;
+	{
+		UINT8 coef;
+		UINT32 now;
+		unsigned int MBrow,MBcol,block;
+
+        j = First;
+
+		now = bc->pos * 8 - 4;
+		for ( MBrow=BORDER_MBS; MBrow<pbi->MBRows - BORDER_MBS; MBrow++ )
+		{
+			for ( MBcol=BORDER_MBS; MBcol<pbi->MBCols-BORDER_MBS; MBcol++ )
+			{
+				// dumb way to encode the interlaced decision but it works!!!
+				{
+					UINT8 prob = pbi->probInterlaced;
+
+					// super simple context adjustment
+					if ( MBcol>BORDER_MBS )
+					{
+						if ( pbi->MBInterlaced[MBOffset(MBrow,MBcol-1)] )
+							prob = prob - (prob>>1);
+						else 
+							prob = prob + ((256-prob)>>1);
+					}
+					
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = CONTEXT_OVERHEADS_SECTION;
+#endif
+
+					if ( pbi->Configuration.Interlaced )
+						VP6_EncodeBool(	bc, pbi->MBInterlaced[MBOffset(MBrow,MBcol)], prob );
+				}
+
+				if ( pbi->FrameType != BASE_FRAME )
+					encodeModeAndMotionVector ( cpi, MBrow, MBcol );
+
+				for ( block=0 ; block<6 ; block++ )
+				{
+					Plane = block>3;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = DC_SECTION;
+#endif
+
+					// DC Token
+                    coef = 0;
+                    VP6AddToken ( cpi, nbc,
+                        j, 
+                        pbi->DcProbs+DCProbOffset(Plane,0), 
+//                        pbi->DcNodeContexts[Plane][j->LastTokenL + j->LastTokenA], 
+				        (pbi->DcNodeContexts + DcNodeOffset(Plane, (j->LastTokenL + j->LastTokenA), 0)),
+                        pbi->ZeroRunProbs[0], &coef, FALSE );
+             		
+                    PrecTokenIndex = VP6_PrevTokenIndex[j->Token];
+					j++;
+
+                    for ( coef=1; coef<64; coef++ )
+                    {
+                     
+                        UINT32   band = VP6_CoeffToBand[coef];
+                        UINT8	*AcProbsPtr = pbi->AcProbs + ACProbOffset(Plane,PrecTokenIndex,band,0 );
+                        
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = AC_SECTION;
+#endif
+
+                        VP6AddToken ( cpi, nbc,
+                            j, AcProbsPtr, AcProbsPtr, pbi->ZeroRunProbs[(coef >= ZRL_BAND2) ? 1 : 0], 
+                            &coef, ((coef>1) && (PrecTokenIndex == 0)) );
+                        
+                        PrecTokenIndex = VP6_PrevTokenIndex[j->Token];
+                        
+                        if ( j->Token == DCT_EOB_TOKEN )
+                            coef=64;
+                        j++;                      
+                    } 
+				}
+			}
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PackCodedVideo
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Outputs the compressed frame to the bitstream: writes
+ *                  a frame header and entropy codes associated lists
+ *                  of tokens.
+ *
+ *  SPECIAL NOTES : Uses either Huffman or Bool coding depending on 
+ *                  pbi->UseHuffman flag. 
+ *
+ ****************************************************************************/
+extern double ModeBits;
+extern double ModeBits2;
+
+void PackCodedVideo ( CP_INSTANCE *cpi )
+{
+    UINT32 Buffer2Offset;
+	unsigned int duration;
+	unsigned int starttsc,endtsc;
+	
+	BOOL_CODER  *bc  = &cpi->bc;
+    PB_INSTANCE *pbi = &cpi->pb;
+	BOOL KeyFrame = (pbi->FrameType == BASE_FRAME);
+
+	VP6_readTSC ( &starttsc );
+
+	// Initialise the raw buffer i/o and the two bool coders.
+    InitAddRawBitsToBuffer ( &cpi->RawBuffer, pbi->DataOutputInPtr );
+
+	// Start the bool coder or coders
+	if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+	{
+		// Start the first bool coder: Allow for the raw header bytes.
+		VP6_StartEncode ( bc, (pbi->DataOutputInPtr + ((KeyFrame) ? 4 : 3)) );		
+
+		// Create either second Bool or Huffman coded partition
+		if ( pbi->UseHuffman )
+			InitAddRawBitsToBuffer ( &pbi->HuffBuffer, cpi->OutputBuffer2 );
+		else
+			VP6_StartEncode ( &cpi->bc2, cpi->OutputBuffer2 );
+	}
+	else
+	{
+		// Start the first bool coder: Allow for the raw header bytes.
+		VP6_StartEncode( bc, (pbi->DataOutputInPtr + ((KeyFrame) ? 2 : 1)) );		
+	}
+
+	// Set flag to insure ouput to the bitstream rather than simulated cost analysis
+	bc->MeasureCost = FALSE;
+
+    if ( pbi->UseHuffman )
+    {
+        // AWG Using runs so copy correct distribution
+        memcpy ( cpi->FrameDcTokenDist, cpi->FrameDcTokenDist2, sizeof(cpi->FrameDcTokenDist2) );
+        memcpy ( cpi->FrameAcTokenDist, cpi->FrameAcTokenDist2, sizeof(cpi->FrameAcTokenDist2) );
+    }
+
+    // Write out the frame header information including size. 
+    WriteFrameHeader ( cpi );
+
+    // The tree is not needed (implicit) for key frames
+    if ( !KeyFrame ) 
+    {
+		// Error resilliant mode uses a fixed probability update pattern to make the entropy 
+		// code more resilliant to dropped frames
+		if ( cpi->ErrorResilliantMode )
+		{
+			UpdateModeProbs(cpi);
+			BuildandPackMvTree2( cpi );
+		}
+		else
+		{
+			UpdateModeProbs(cpi);
+			BuildandPackMvTree( cpi );
+		}
+    }
+   
+	if ( pbi->UseHuffman )
+        PackHuffmanCoeffs ( cpi );
+    else
+        PackArithmeticCoeffs ( cpi );
+
+	// Stop the bool coders and work out this frame size.
+	VP6_StopEncode ( bc );
+
+    // ThisFrameSize is measured in bits
+	if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+	{
+		// Offset to second bitstream partition from start of buffer
+		Buffer2Offset = 4 + bc->pos;
+
+		// Write offset to third bitstream partition 
+		AddRawBitsToBuffer ( &cpi->RawBuffer, Buffer2Offset, 16 );
+
+		if ( pbi->UseHuffman )
+		{
+			// Flush buffer for second Huffman coded output partition
+			EndAddRawBitsToBuffer ( &pbi->HuffBuffer );   
+
+	        // ThisFrameSize is measured in bits
+			cpi->ThisFrameSize = (Buffer2Offset + pbi->HuffBuffer.pos)*8;
+
+			memcpy ( &cpi->RawBuffer.Buffer[Buffer2Offset], pbi->HuffBuffer.Buffer, pbi->HuffBuffer.pos );
+		}
+ 		else
+		{
+			// Stop the second bool coder
+			VP6_StopEncode ( &cpi->bc2);
+
+			// Work out the frame size
+			cpi->ThisFrameSize = (Buffer2Offset + cpi->bc2.pos)*8;
+
+			// Assemble output bitstream from two bitstream partitions
+			memcpy ( &pbi->DataOutputInPtr[Buffer2Offset], cpi->bc2.buffer, cpi->bc2.pos );
+		}
+	}
+	else
+	{
+		// Raw header bits + coded bits
+        cpi->ThisFrameSize = ((KeyFrame ? 2 : 1) + bc->pos)*8;
+	}
+
+	// Stop and flush the raw bits encoder used for the frist part of the header
+    EndAddRawBitsToBuffer ( &cpi->RawBuffer );
+
+    // Get time & compute duration
+	VP6_readTSC ( &endtsc );
+	duration = ( endtsc - starttsc )/ pbi->ProcessorFrequency ;
+
+	if( cpi->avgPackVideoTime == 0)
+		cpi->avgPackVideoTime = duration;
+	else
+		cpi->avgPackVideoTime = ( 7 * cpi->avgPackVideoTime + duration ) >> 3;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PickModes.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PickModes.c
new file mode 100644
index 00000000..9573d1c2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/PickModes.c
@@ -0,0 +1,2190 @@
+/****************************************************************************
+*
+*   Module Title :     PickModes.c
+*
+*   Description  :     Coding mode selection functions.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <math.h>           // For abs()
+#include "mcomp.h"
+#include "tokenentropy.h"
+#include "compdll.h"
+#include "decodemode.h"
+#include "decodemv.h"
+#include "encodemode.h"
+
+/****************************************************************************
+*  Imports
+****************************************************************************/ 
+extern void PredictBlock ( 	CP_INSTANCE *cpi, 	BLOCK_POSITION bp );
+extern UINT8 TokenizeFrag_RD (	CP_INSTANCE *cpi, INT16 * RawData, UINT32 Plane, UINT32 *MbCost );
+extern INT32 *XX_LUT;
+extern void GetQuantizedCoeffsMSE_RD( INT16 * DctCodes,INT16 * Coeffs,INT16 * DequantMatrix,UINT32 *MSE);
+
+extern void PredictDCE 
+( 
+	CP_INSTANCE *cpi,
+	BLOCK_POSITION bp
+);
+
+/****************************************************************************
+*  Macros
+****************************************************************************/        
+#define KF_INDICATOR_THRESH (5 << 12)           //was 12800 (3 << 12)
+
+#define EPB					(cpi->ErrorPerBit)
+
+#define MIN_ERR             100
+#define MAX_ERR             20000
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/         
+static const UINT32 IntraThreshTable[Q_TABLE_SIZE] =
+{
+    47, 46, 45, 40, 39, 38, 37, 36,
+    35, 34, 33, 32, 31, 30, 29, 28,
+    27, 26, 25, 25, 24, 24, 23, 23,
+    22,	21, 21, 20,	19, 19, 18, 18,  
+    17, 17, 17, 16, 16, 15, 15, 14,  
+    14, 13, 13, 12, 12, 11, 11, 10,
+     9,  9,  9,  7,  6,  6,  5,  4, 
+     4,  3,  3,  2,  1,  0,  0,  0 
+};
+
+static const UINT32 IntraFactors[Q_TABLE_SIZE] = 
+{
+    128, 128, 128, 128, 128, 128, 128, 128,		
+    128, 128, 128, 128, 128, 128, 128, 128,	
+    128, 128, 128, 128, 128, 128, 128, 128,	
+    128, 128, 128, 128, 128, 128, 128, 128,	
+    128, 128, 128, 128, 128, 128, 126, 122,	
+    120, 118, 116, 114, 112, 110, 108, 106,
+    104, 102, 100,  98,  94,  90,  88,  84,
+     80,  76,  72,  64,  56,  48,  32,  32
+};
+
+static const UINT32 ErrorPerBit[Q_TABLE_SIZE] = 
+{
+    300, 250, 200, 180, 170, 160, 150, 145,
+    140, 130, 120, 114, 110, 102,  98,  95,
+     90,  85,  80,  78,  76,  74,  72,  70,
+     68,  64,  62,  58,  56,  54,  52,  50,
+     49,  48,  47,  46,  45,  44,  43,  42, 	 
+     41,  40,  39,  38,  37,  36,  35,  34,
+     33,  33,  32,  31,  30,  27,  24,  19, 
+     17,  15,  12,   9,   7,   4,   2,   1
+};
+
+static const UINT32 FourModeImprovement[Q_TABLE_SIZE] = 
+{
+    250, 225, 210, 200, 195, 180, 165, 150,   
+    140, 130, 120, 114, 110, 102,  98,  95,
+     90,  85,  80,  78,  76,  74,  72,  70,
+     68,  64,  62,  58,  56,  54,  52,  50,
+     49,  48,  47,  46,  45,  44,  43,  42, 	 
+     41,  40,  39,  38,  37,  36,  35,  34,
+     33,  33,  32,  31,  30,  27,  24,  19, 
+     17,  15,  12,   9,   7,   4,   2,   1
+};
+
+static const UINT32 MvEpbCorrectionTable[10] = 
+{
+	650, 500, 400, 300, 250, 200, 150, 100, 75, 50 
+};
+
+/***************** RATE DISTORTION STATIC TABLES *****************/
+static const UINT32 RateMult[Q_TABLE_SIZE] = 
+{
+     700, 650, 600, 550, 450, 450, 400, 375,
+     350, 325, 300, 275, 250, 225, 200, 190,
+	 180, 170, 160, 151, 142, 134, 126, 119,
+	 112, 106, 100,  95,  90,  85,  80,  75,
+	  70,  66,  62,  58,  54,  50,  47,  44,
+	  41,  38,  35,  33,  31,  29,  27,  25,
+	  23,  21,  19,  17,  15,  13,  11,   9,
+	  7,    5,   3,   2,   3,   1,   1,   1
+};
+
+
+static const UINT32 RateDiv[Q_TABLE_SIZE] = 
+{
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 2, 1, 2, 4
+};
+
+// Using the proportion of new mvs in the last frame as a measure of complexity
+// this table is used to apply a correction to the rate multiplier used in RD.
+// 128 is neutral, higher prefers rate, lower prefers dist.
+static const UINT32 RateMultCorrection[10] = 
+{
+	120,  125,  130,   140,   150,   165,   180,   195,   200,   220
+};
+
+static const INT32 RdMvCostCorrection[10] = 
+{
+
+	36,  15,  12,   4,   3,   2,   1,   0,   0,   0
+};
+
+/****************************************************************************
+*
+*			RD SPECIFIC CODE
+*
+*****************************************************************************/
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ComputeBlockReconError
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Distortion metric for the block 
+ *
+ *  FUNCTION      : Computes a reconstruction distortion metric for a block.
+ *
+ *  SPECIAL NOTES : None 
+
+ *
+ ****************************************************************************/
+UINT32 ComputeBlockReconError ( CP_INSTANCE *cpi, UINT32 bp)
+{
+    UINT32 i, j;
+
+	UINT8 *NewDataPtr   = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[bp].Source];
+	UINT8 *RefDataPtr1  = &cpi->pb.ThisFrameRecon[cpi->pb.mbi.blockDxInfo[bp].thisRecon];
+    INT32  SourceStride = cpi->pb.mbi.blockDxInfo[bp].CurrentSourceStride;
+	INT32  ReconStride  = cpi->pb.mbi.blockDxInfo[bp].CurrentReconStride;
+
+    INT32  XXDiff;
+    INT32  XXSum = 0;
+	INT32  MaxXXDiff = 0;
+
+	static UINT32 MaxDiff = 0;
+
+    // Mode of interpolation chosen based upon on the offset of the second reference pointer
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+		for ( j=0; j<BLOCK_HEIGHT_WIDTH; j++ )
+		{
+			XXDiff =  XX_LUT[(int)NewDataPtr[j] - (int)RefDataPtr1[j]];
+	        XXSum += XXDiff;
+
+			if ( XXDiff > MaxXXDiff )
+				MaxXXDiff = XXDiff;
+		}
+
+        // Step to next row of block.
+        NewDataPtr  += SourceStride;
+        RefDataPtr1 += ReconStride;
+    }
+
+	// Compute distortion value
+	return  (UINT32)(XXSum + (2 * MaxXXDiff)) << 6;    
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : RdSaveMbContext
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                  UINT32 MBcol     : Macroblock column number.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Saves the context information for a macro-block.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void RdSaveMbContext ( CP_INSTANCE *cpi, UINT32 MBcol )
+{
+	PB_INSTANCE *pbi = &cpi->pb;
+
+	// Save mbi to restore later
+	memcpy ( &cpi->CopyMbi, &pbi->mbi, sizeof(MACROBLOCK_INFO) );
+
+	// Save the frame dc context
+	memcpy (  cpi->AboveCopyY, &pbi->fc.AboveY[MBcol*2], sizeof(BLOCK_CONTEXT)*2 );
+	memcpy ( &cpi->AboveCopyU, &pbi->fc.AboveU[MBcol],   sizeof(BLOCK_CONTEXT) );
+	memcpy ( &cpi->AboveCopyV, &pbi->fc.AboveV[MBcol],   sizeof(BLOCK_CONTEXT) );
+
+	memcpy (  cpi->LeftYCopy,  pbi->fc.LeftY, sizeof(BLOCK_CONTEXT)*2 );
+	memcpy ( &cpi->LeftUCopy, &pbi->fc.LeftU, sizeof(BLOCK_CONTEXT) );
+	memcpy ( &cpi->LeftVCopy, &pbi->fc.LeftV, sizeof(BLOCK_CONTEXT) );
+
+	memcpy ( cpi->LastDcYCopy, pbi->fc.LastDcY, sizeof(Q_LIST_ENTRY)*3 );
+	memcpy ( cpi->LastDcUCopy, pbi->fc.LastDcU, sizeof(Q_LIST_ENTRY)*3 );
+	memcpy ( cpi->LastDcVCopy, pbi->fc.LastDcV, sizeof(Q_LIST_ENTRY)*3 );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : RdRestoresMbContext
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                  UINT32 MBcol     : Macroblock column number.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Restores the contexts for a macro-block.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void RdRestoresMbContext( CP_INSTANCE *cpi, UINT32 MBcol )
+{
+	PB_INSTANCE *pbi = &cpi->pb;
+
+    // Restore the dc context data structures to how they were before the call to this function.
+	memcpy( &pbi->fc.AboveY[MBcol*2], cpi->AboveCopyY, sizeof(BLOCK_CONTEXT)*2 );
+	memcpy( &pbi->fc.AboveU[MBcol],  &cpi->AboveCopyU, sizeof(BLOCK_CONTEXT) );
+	memcpy( &pbi->fc.AboveV[MBcol],  &cpi->AboveCopyV, sizeof(BLOCK_CONTEXT) );
+
+	memcpy(  pbi->fc.LeftY,  cpi->LeftYCopy, sizeof(BLOCK_CONTEXT)*2 );
+	memcpy( &pbi->fc.LeftU, &cpi->LeftUCopy, sizeof(BLOCK_CONTEXT) );
+	memcpy( &pbi->fc.LeftV, &cpi->LeftVCopy, sizeof(BLOCK_CONTEXT) );
+
+	memcpy( pbi->fc.LastDcY, cpi->LastDcYCopy, sizeof(Q_LIST_ENTRY)*3 );
+	memcpy( pbi->fc.LastDcU, cpi->LastDcUCopy, sizeof(Q_LIST_ENTRY)*3 );
+	memcpy( pbi->fc.LastDcV, cpi->LastDcVCopy, sizeof(Q_LIST_ENTRY)*3 );
+
+	// Restore mbi values to their Y defaults for use in the rest of pickmodes
+	memcpy( &pbi->mbi, &cpi->CopyMbi, sizeof(MACROBLOCK_INFO) );
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : EncodeBlock_RD
+ *
+ *  INPUTS        : CP_INSTANCE *cpi        : Pointer to encoder instance.
+ *	                UINT32 MBrow            : Macro-block row number.
+ *	                UINT32 MBcol            : Macro-block column number.
+ *	                BLOCK_POSITION bp       : Position of block in MB (0-5).
+ *	                BOOL SaveBlockDcContext : Flag whether to save block context.
+ *
+ *  OUTPUTS       : UINT32 *Rate            : Approximation of number of bits required to code block.
+ *                  UINT32 *Dist            : Distortion of the encoded block.   
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Encodes a block in rate-distortion mode.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void EncodeBlock_RD
+( 
+	CP_INSTANCE *cpi,
+	UINT32 MBrow,
+	UINT32 MBcol,
+	BLOCK_POSITION bp,
+    UINT32 *Rate,
+    UINT32 *Dist,
+	BOOL SaveBlockDcContext
+)
+{
+	PB_INSTANCE *pbi = &cpi->pb;
+
+    UINT32 T_Error1;
+
+    // build a block predictor & subtract predictor from source we are trying to compress
+	PredictBlock ( cpi, bp );
+	
+    // forward DCT
+    fdct_short(cpi->DCTDataBuffer, cpi->DCT_codes); 
+    
+    // predict our dc values from the surrounding guys
+	PredictDCE (cpi, bp);
+
+	// quantize the coefficients
+	VP6_quantize ( pbi->quantizer, cpi->DCT_codes, pbi->mbi.blockDxInfo[bp].coeffsPtr, (UINT8)bp );   
+
+	// convert coefficients to tokens
+	//pbi->FragCoefEOB = (UINT8)
+    TokenizeFrag_RD ( cpi, pbi->mbi.blockDxInfo[bp].coeffsPtr, pbi->mbi.blockDxInfo[bp].Plane, Rate );
+
+    
+    GetQuantizedCoeffsMSE_RD(cpi->DCT_codes, 
+                                pbi->mbi.blockDxInfo[bp].coeffsPtr, 
+                                pbi->mbi.blockDxInfo[bp].dequantPtr,
+                                &T_Error1);
+    *Dist += T_Error1;
+    
+    // predict our dc values from the surrounding guys
+	VP6_PredictDC ( pbi, bp );
+
+	// update the context info for the next block
+	VP6_UpdateContextA ( pbi, pbi->mbi.blockDxInfo[bp].Above, bp );
+	VP6_UpdateContext  ( pbi, pbi->mbi.blockDxInfo[bp].Left,  bp );
+
+    
+	// If requested then save the DC context for this block in a data structure indexed by mode and block position.
+	// The saved values are used to update the DC context once the best coding method has been decided.
+	if ( SaveBlockDcContext )
+	{
+		memcpy ( &cpi->MbDcContexts[pbi->mbi.Mode][bp].Above, pbi->mbi.blockDxInfo[bp].Above, sizeof(BLOCK_CONTEXT) );
+		cpi->MbDcContexts[pbi->mbi.Mode][bp].AbovePtr = pbi->mbi.blockDxInfo[bp].Above;
+
+		memcpy ( &cpi->MbDcContexts[pbi->mbi.Mode][bp].Left, pbi->mbi.blockDxInfo[bp].Left, sizeof(BLOCK_CONTEXT) );
+		cpi->MbDcContexts[pbi->mbi.Mode][bp].LeftPtr = pbi->mbi.blockDxInfo[bp].Left;
+
+		memcpy ( &cpi->MbDcContexts[pbi->mbi.Mode][bp].LastDc, pbi->mbi.blockDxInfo[bp].LastDc, sizeof(Q_LIST_ENTRY) );
+		cpi->MbDcContexts[pbi->mbi.Mode][bp].LastDcPtr = pbi->mbi.blockDxInfo[bp].LastDc;
+	}
+    
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       : EncodeMacroBlock_RD
+ *
+ *  INPUTS        : CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *	                UINT32 *FragsToCheck : Pointer to list of blocks in the MB.
+ *                  UINT32 MBrow         : Macro-block row number.
+ *	                UINT32 MBcol         : Macro-block column number.
+ *
+ *  OUTPUTS       : UINT32 *Rate         : Pointer to Rate value (in bits).
+ *                  UINT32 *Dist         : Pointer to Distortion value.
+ *
+ *  RETURNS       : None.
+ *
+ *  FUNCTION      : Encodes the macro-block to the point where an estimate
+ *                  of the cost of coding and reconstruction error may be
+ *                  made.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void EncodeMacroBlock_RD
+(
+	CP_INSTANCE *cpi,
+	UINT32 *FragsToCheck,
+	UINT32 MBrow,
+	UINT32 MBcol,
+    UINT32 *Rate,
+    UINT32 *Dist
+)
+{
+	UINT32 Block;
+	PB_INSTANCE *pbi = &cpi->pb;
+
+	// Save the Macro Block and DC context
+	RdSaveMbContext ( cpi, MBcol );
+
+	// Clear down MB rate and distortion accumulators 
+	*Rate = 0;
+	*Dist = 0;
+
+	// Set up the Mb mode and Mv values
+    for ( Block=0; Block<6; Block++ )
+    {
+        pbi->mbi.Mv[Block].x = pbi->FragInfo[FragsToCheck[Block]].MVectorX;
+        pbi->mbi.Mv[Block].y = pbi->FragInfo[FragsToCheck[Block]].MVectorY;
+    } 
+
+	pbi->mbi.blockDxInfo[0].Above = &pbi->fc.AboveY[MBcol*2];
+	pbi->mbi.blockDxInfo[1].Above = &pbi->fc.AboveY[MBcol*2+1];
+	pbi->mbi.blockDxInfo[2].Above = &pbi->fc.AboveY[MBcol*2];
+	pbi->mbi.blockDxInfo[3].Above = &pbi->fc.AboveY[MBcol*2+1];
+	pbi->mbi.blockDxInfo[4].Above = &pbi->fc.AboveU[MBcol];
+	pbi->mbi.blockDxInfo[5].Above = &pbi->fc.AboveV[MBcol];
+
+	EncodeBlock_RD ( cpi, MBrow, MBcol, 0, Rate, Dist, TRUE );
+
+	EncodeBlock_RD ( cpi, MBrow, MBcol, 1, Rate, Dist, TRUE );
+
+	EncodeBlock_RD ( cpi, MBrow, MBcol, 2, Rate, Dist, TRUE );
+	
+	EncodeBlock_RD ( cpi, MBrow, MBcol, 3, Rate, Dist, TRUE );
+
+	EncodeBlock_RD ( cpi, MBrow, MBcol, 4, Rate, Dist, TRUE );
+
+	EncodeBlock_RD ( cpi, MBrow, MBcol, 5, Rate, Dist, TRUE );
+
+	// Restore the MB and dc context
+	RdRestoresMbContext ( cpi, MBcol );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : RdFunction
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                  UINT32 Rate      : Rate value (in bits).
+ *                  UINT32 Dist      : Distortion value.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: The computed rate-distortion value.
+ *
+ *  FUNCTION      : Evaluates a Rate-Distortion function for specified rate
+ *                  and distortion.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+UINT32 RdFunction ( CP_INSTANCE *cpi, UINT32 Rate, UINT32 Dist )
+{
+	UINT32 RdValue;
+	UINT32 A = RateMult[cpi->pb.quantizer->FrameQIndex];
+	UINT32 B = RateDiv[cpi->pb.quantizer->FrameQIndex];
+
+	// Apply a correction to the rate multiplier according to an estimate
+    // of complexity derived from last frame MV useage.
+	A = (A*RateMultCorrection[cpi->LastFrameNewMvUsage]) >> 7;
+	if ( A < 1 )
+		A = 1;
+	
+	RdValue = Dist + ((A * Rate) / B);
+	
+	return RdValue;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : RdModeCost
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *	                UINT32 MBrow     : Macro-block row number.
+ *	                UINT32 MBcol     : Macro-block column number.
+ *                  UINT8 Mode       : Coding mode for MB.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Approximate cost of coding mode (in bits).
+ *
+ *  FUNCTION      : Estimates the cost (in bits) of coding a mode.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+UINT32 RdModeCost ( CP_INSTANCE *cpi, UINT32 MBrow, UINT32 MBcol, UINT8 Mode )
+{
+	return modeCost ( cpi, MBrow, MBcol, Mode );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SetFragMotionVectorAndMode
+ *
+ *  INPUTS        : PB_INSTANCE *pbi                : Pointer to decoder instance.
+ *                  INT32 FragIndex                 : Block to set Mode & MV for.
+ *                  MOTION_VECTOR *ThisMotionVector : MV for the block.
+ *	                CODING_MODE mode                : Coding mode for the block.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Sets specified coding mode & motion vector for a block.
+ *
+ ****************************************************************************/
+void SetFragMotionVectorAndMode
+( 
+    PB_INSTANCE *pbi,
+    INT32 FragIndex,
+    MOTION_VECTOR *ThisMotionVector,
+	CODING_MODE mode
+)
+{
+    // Note the coding mode and vector for the block
+    pbi->FragInfo[FragIndex].FragCodingMode = mode;
+    pbi->FragInfo[FragIndex].MVectorX       = ThisMotionVector->x;
+    pbi->FragInfo[FragIndex].MVectorY       = ThisMotionVector->y;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PickIntra
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Total intra-error for the frame.
+ *
+ *  FUNCTION      : Selects INTRA coding mode for all macro-blocks in the
+ *                  frame. This is a suitable way to code key-frames as
+ *                  there is then no dependency on previously decoded data.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+UINT32 PickIntra ( CP_INSTANCE *cpi )
+{
+    UINT32  Temp;
+	UINT32  i;
+	UINT32	B;
+    UINT32	MBrow;
+    UINT32	MBcol;
+    UINT32	UVRow;
+    UINT32	UVColumn;
+	INT32	FragIndex;
+	UINT32  IntraError;
+    UINT32	UVFragOffset;
+	INT32   TopLeftIndex = 0;
+	UINT32  TotIntraError = 0;
+	UINT32  CountInterlaced = 0;
+	PB_INSTANCE *pbi = &cpi->pb;
+    //UINT32	BlockOffset[4] = { 0, 1, pbi->HFragments, pbi->HFragments+1 };
+    UINT32	BlockOffset[4];
+
+    BlockOffset[0] = 0;
+    BlockOffset[1] = 1;
+    BlockOffset[2] = pbi->HFragments;
+    BlockOffset[3] = pbi->HFragments+1;
+
+	for ( i=0; i<128; i++ )
+		cpi->ErrorBins[i] = pbi->UnitFragments / 4;
+
+	// Reset the mode+mv frame cost estimate (no modes or mvs for a key frame).
+	cpi->ModeMvCostEstimate = 0;
+
+	for ( MBrow=BORDER_MBS; MBrow<pbi->MBRows-BORDER_MBS; MBrow++ )
+	{
+		for ( MBcol=BORDER_MBS; MBcol<pbi->MBCols-BORDER_MBS; MBcol++ )
+		{
+			cpi->MBCodingMode = CODE_INTRA;
+            
+            pbi->mbi.blockDxInfo[0].Source = pbi->YDataOffset + 16*(MBrow-BORDER_MBS) *pbi->Configuration.VideoFrameWidth + 16*(MBcol-BORDER_MBS);
+			pbi->mbi.blockDxInfo[0].thisRecon = pbi->ReconYDataOffset + 16*MBrow * pbi->Configuration.YStride + 16*MBcol;
+
+			if ( pbi->Configuration.Interlaced /*&& GetMBFrameVertVar(cpi) > GetMBFieldVertVar(cpi)*/ )
+			{
+                // Code MB as two separate fields
+				pbi->mbi.Interlaced = 1;
+				pbi->MBInterlaced[MBOffset(MBrow,MBcol)] = 1;
+
+				pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+                pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+                pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+                pbi->mbi.blockDxInfo[3].CurrentSourceStride = 2 * pbi->Configuration.VideoFrameWidth;
+
+                pbi->mbi.blockDxInfo[1].Source = pbi->mbi.blockDxInfo[0].Source + 8;
+                pbi->mbi.blockDxInfo[2].Source = pbi->mbi.blockDxInfo[0].Source + pbi->Configuration.VideoFrameWidth;
+                pbi->mbi.blockDxInfo[3].Source = pbi->mbi.blockDxInfo[2].Source + 8;
+
+				CountInterlaced++;
+			}
+			else
+			{
+                // Code MB as a single progressive-scan MB
+				pbi->MBInterlaced[MBOffset(MBrow,MBcol)] = 0;
+				pbi->mbi.Interlaced = 0;
+
+				pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+                pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+                pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+                pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth;
+
+                pbi->mbi.blockDxInfo[1].Source = pbi->mbi.blockDxInfo[0].Source + 8;
+                pbi->mbi.blockDxInfo[2].Source = pbi->mbi.blockDxInfo[0].Source + (pbi->Configuration.VideoFrameWidth << 3);
+                pbi->mbi.blockDxInfo[3].Source = pbi->mbi.blockDxInfo[2].Source + 8;
+
+			}
+
+			for ( B=0; B<4; B++ )
+			{
+				FragIndex = TopLeftIndex + BlockOffset[B];
+				pbi->FragInfo[FragIndex].FragCodingMode = cpi->MBCodingMode;
+			}
+
+			// Matching fragments in the U and V planes
+			UVRow        = (FragIndex / (pbi->HFragments * 2));
+			UVColumn     = (FragIndex % pbi->HFragments) / 2;
+			UVFragOffset = (UVRow * (pbi->HFragments / 2)) + UVColumn;
+        
+			pbi->FragInfo[pbi->YPlaneFragments + UVFragOffset].FragCodingMode = cpi->MBCodingMode;
+			pbi->FragInfo[pbi->YPlaneFragments + pbi->UVPlaneFragments + UVFragOffset].FragCodingMode  = cpi->MBCodingMode;
+
+			// Keep a note of the total error score for the Y macro blocks for rate targeting purposes
+			IntraError = GetMBIntraError( cpi );
+
+        	Temp = (IntraError>>8);
+        	if ( Temp < MIN_ERR )
+        		Temp = MIN_ERR;
+	        else if (  Temp > MAX_ERR )
+		        Temp = MAX_ERR;
+	        TotIntraError += Temp;
+
+			TopLeftIndex += 2;
+		}
+
+		TopLeftIndex += pbi->HFragments;
+	}
+	
+    pbi->probInterlaced = 256-(1+254*CountInterlaced/pbi->MacroBlocks);
+
+    return TotIntraError;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SetMBMotionVectorsAndMode
+ *
+ *  INPUTS        : CP_INSTANCE *cpi            : Pointer to encoder instance.
+ *                  INT32 *FragIndexes          : Pointer to list of blocks in the MB.
+ *                  MOTION_VECTOR *MotionVector : MV for the MB.
+ *	                CODING_MODE mode            : Coding mode for the MB.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Sets the coding mode for the macro-block and coding mode
+ *                  and motion vector for each its 6 constituent blocks.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void SetMBMotionVectorsAndMode
+( 
+    CP_INSTANCE *cpi,     
+    UINT32 *FragIndexes,
+    UINT32 Mode,
+    MOTION_VECTOR *MotionVector
+)
+{
+	PB_INSTANCE *pbi = &cpi->pb;
+
+	pbi->mbi.Mode = Mode;
+    SetFragMotionVectorAndMode ( pbi, FragIndexes[0], MotionVector, Mode );
+    SetFragMotionVectorAndMode ( pbi, FragIndexes[1], MotionVector, Mode );
+    SetFragMotionVectorAndMode ( pbi, FragIndexes[2], MotionVector, Mode );
+    SetFragMotionVectorAndMode ( pbi, FragIndexes[3], MotionVector, Mode );
+    SetFragMotionVectorAndMode ( pbi, FragIndexes[4], MotionVector, Mode );
+    SetFragMotionVectorAndMode ( pbi, FragIndexes[5], MotionVector, Mode );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PickBetterMBMode
+ *
+ *  INPUTS        : CP_INSTANCE *cpi           : Pointer to encoder instance.
+ *	                UINT32 *FragsToCheck       : Pointer to list of 6 blocks in this MB.
+ *	                CODING_MODE mode           : Coding mode to evaluate.
+ *	                MOTION_VECTOR *ThisMVector : Pointer to MV associated with this mode.
+ *	                UINT32 MBrow               : MB row.
+ *	                UINT32 MBcol               : MB column.
+ *	                UINT8 *Frame               : Pointer to MB in previous frame reconstruction.
+ *	                CODING_MODE *BestMode      : Pointer to best mode found so far.
+ *	                UINT32 *Error              : Best error found so far.
+ *	                MOTION_VECTOR *mv          : Pointer to MV for best mode found so far.
+ *	                UINT32 *FourError          : Pointer to errors for 4 Y-blocks in MB.
+ *	                UINT32 *BestRate           : Pointer to best rate found so far.
+ *	                UINT32 *BestDist           : Pointer to best distortion found so far.
+ *	                UINT32 *BestRd             : Pointer to best RD-value found so far.
+ *
+ *  OUTPUTS       : CODING_MODE *BestMode      : Pointer to best mode found so far.
+ *                  UINT32 *Error              : Best error found so far.
+ *                  MOTION_VECTOR *mv          : Pointer to MV for best mode found so far.
+ *                  UINT32 *FourError          : Pointer to errors for 4 Y-blocks in MB.
+ *                  UINT32 *BestRate           : Pointer to best rate found so far.
+ *                  UINT32 *BestDist           : Pointer to best distortion found so far.
+ *                  UINT32 *BestRd             : Pointer to best RD-value found so far.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Evaluates the specified coding mode and if better than
+ *                  the best mode found so far, updates the relevant variables.
+ *
+ *  SPECIAL NOTES : If rate-distortion mode is enabled then an estimate of
+ *                  the rate & distortion is found by a dummy coding of the
+ *                  MB that does not output to the bitstream.
+ *
+ ****************************************************************************/
+void PickBetterMBMode
+(
+	CP_INSTANCE *cpi,
+	UINT32 * FragsToCheck,
+	CODING_MODE mode,
+	MOTION_VECTOR *ThisMVector, 
+	UINT32 MBrow,
+	UINT32 MBcol,
+	UINT8 *Frame,
+	CODING_MODE *BestMode,
+	UINT32 *Error,
+	MOTION_VECTOR *mv,
+	UINT32 *FourError,
+	UINT32 *BestRate,
+	UINT32 *BestDist,
+	UINT32 *BestRd
+)
+{
+	UINT32 ThisError;
+	UINT32 EstModeCost;
+
+	// Get an estimate of the mode cost
+	if ( cpi->RdOpt ) 
+		EstModeCost = RdModeCost ( cpi, MBrow, MBcol, mode );
+	else
+		EstModeCost = modeCost ( cpi, MBrow, MBcol, mode );
+	ThisError = EstModeCost * EPB;
+ 
+	// Trap for cases where mode cost alone rules this mode out
+	if( !cpi->RdOpt && (ThisError > *Error))
+		return;
+
+	ThisError += GetMBInterError ( cpi, cpi->yuv1ptr, Frame, ThisMVector, FourError );
+
+	// Are we using RD
+	if ( cpi->RdOpt )
+	{
+		UINT32 Rate;
+		UINT32 Dist;
+		UINT32 RdValue;
+
+		// RD Code TBD
+		SetMBMotionVectorsAndMode ( cpi, FragsToCheck, mode, ThisMVector );
+		EncodeMacroBlock_RD ( cpi, FragsToCheck, MBrow, MBcol, &Rate, &Dist );
+		Rate += EstModeCost;
+
+		// Calculate Best RD value
+		RdValue = RdFunction ( cpi, Rate, Dist );
+
+		if ( (RdValue < *BestRd) || ( (ThisError<(*Error >> 1)) && (Dist<(*BestDist >> 1)) ) )
+		{
+			*BestRd   = RdValue;
+			*BestRate = Rate;
+			*BestDist = Dist;
+			*BestMode = mode;
+			*Error    = ThisError;
+			mv->x     = ThisMVector->x;
+			mv->y     = ThisMVector->y;
+		}
+	}
+	else
+	{
+		if ( ThisError < *Error )
+		{
+			*BestMode = mode;
+			*Error    = ThisError;
+			mv->x     = ThisMVector->x;
+			mv->y     = ThisMVector->y;
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PickBetterMBModeandMV
+ *
+ *  INPUTS        : CP_INSTANCE *cpi           : Pointer to encoder instance.
+ *	                UINT32 *FragsToCheck       : Pointer to list of 6 blocks in this MB.
+ *	                CODING_MODE mode           : Coding mode to evaluate.
+ *	                UINT8 *Frame               : Pointer to MB in previous frame reconstruction.
+ *	                UINT32 MBrow               : MB row.
+ *	                UINT32 MBcol               : MB column.
+ *	                CODING_MODE *BestMode      : Pointer to best mode found so far.
+ *	                UINT32 *Error              : Best error found so far.
+ *	                MOTION_VECTOR *BestMV      : Pointer to MV for best mode found so far.
+ *	                UINT32 *FourErrors         : Pointer to errors for 4 Y-blocks in MB.
+ *	                UINT32 *BestRate           : Pointer to best rate found so far.
+ *	                UINT32 *BestDist           : Pointer to best distortion found so far.
+ *	                UINT32 *BestRd             : Pointer to best RD-value found so far.
+ *
+ *  OUTPUTS       : CODING_MODE *BestMode      : Pointer to best mode found so far.
+ *                  UINT32 *Error              : Best error found so far.
+ *                  MOTION_VECTOR *BestMV      : Pointer to MV for best mode found so far.
+ *                  UINT32 *FourErrors         : Pointer to errors for 4 Y-blocks in MB.
+ *                  UINT32 *BestRate           : Pointer to best rate found so far.
+ *                  UINT32 *BestDist           : Pointer to best distortion found so far.
+ *                  UINT32 *BestRd             : Pointer to best RD-value found so far.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Evaluates the specified coding mode and if better than
+ *                  the best mode found so far, updates the relevant variables.
+ *                  As part of the evaluation of the mode a motion vector
+ *                  search is carried out.
+ *
+ *  SPECIAL NOTES : If rate-distortion mode is enabled then an estimate of
+ *                  the rate & distortion is found by a dummy coding of the
+ *                  MB that does not output to the bitstream.
+ *
+ ****************************************************************************/
+void PickBetterMBModeAndMV
+(
+	CP_INSTANCE *cpi,
+	UINT32 *FragsToCheck,
+	CODING_MODE mode,
+	UINT8 *Frame,
+	UINT32 MBrow,
+	UINT32 MBcol,
+	CODING_MODE *BestMode,
+	UINT32 *Error,
+	MOTION_VECTOR *BestMV,
+	BOOL FullSearchEnabled,
+	UINT32 *FourErrors,
+	UINT32 *BestRate,
+	UINT32 *BestDist,
+	UINT32 *BestRd
+)
+{
+	UINT32 ThisError;
+	UINT32 EstMvCost;	
+	UINT32 EstModeCost;
+	MOTION_VECTOR ThisMV;
+	MOTION_VECTOR InterMVectEx;
+	MOTION_VECTOR DifferentialVector;
+
+	// Get an estimate of the mode cost
+	if ( cpi->RdOpt )
+		EstModeCost = RdModeCost ( cpi, MBrow, MBcol, mode );
+	else
+		EstModeCost = modeCost ( cpi, MBrow, MBcol, mode );
+
+	if ( !cpi->RdOpt && ((EstModeCost * EPB) > *Error) )
+		return;
+
+	// If the best error is above the required threshold search for a new inter MV
+	// Use a mix of heirachical and exhaustive searches for quick mode.
+	ThisError = GetMBMVInterError ( cpi, mode, Frame, &ThisMV, FourErrors );
+	
+	// If we still do not have a good match try an exhaustive MBMV search
+	if ( FullSearchEnabled &&
+		 (ThisError > cpi->ExhaustiveSearchThresh) && 
+		 (*Error > cpi->ExhaustiveSearchThresh) ) 
+	{
+		UINT32 NewError;
+		NewError = GetMBMVExhaustiveSearch ( cpi, mode, Frame, &InterMVectEx, FourErrors );
+		
+		// Is the Variance measure for the EX search better... If so then use it.
+		if ( NewError < ThisError )
+		{
+			ThisError = NewError;   
+			ThisMV.x = InterMVectEx.x;
+			ThisMV.y = InterMVectEx.y;
+		}
+	}
+	
+	cpi->bc.BitCounter = 0;
+
+	// Convert the motion vector to a differential vector relative to "nearest"
+	DifferentialVector.x = ThisMV.x;
+	DifferentialVector.y = ThisMV.y;
+	if ( mode == CODE_INTER_PLUS_MV )
+	{
+		if ( cpi->pb.mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			DifferentialVector.x -= cpi->pb.mbi.NearestInterMVect.x;
+			DifferentialVector.y -= cpi->pb.mbi.NearestInterMVect.y;
+		}
+	}
+	else	// Golden frame
+	{
+		if ( cpi->pb.mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			DifferentialVector.x -= cpi->pb.mbi.NearestGoldMVect.x;
+			DifferentialVector.y -= cpi->pb.mbi.NearestGoldMVect.y;
+		}
+		else
+		{
+			DifferentialVector.x = ThisMV.x;
+			DifferentialVector.y = ThisMV.y;
+		}
+	}
+
+
+	// The error MV error adjustment coprises a MVEPB which is a constant set according 
+	// to the number of new motion vectors in the last frame and an estimate of the cost 
+	// in bits(*64) of the vector.
+	EstMvCost = cpi->EstMvCostPtrX[DifferentialVector.x] + cpi->EstMvCostPtrY[DifferentialVector.y];
+	ThisError += (cpi->MVErrorPerBit + (ThisError >> 13)) * EstMvCost;
+	ThisError += EstModeCost * EPB;
+
+	// Are we using RD
+	if ( cpi->RdOpt )
+	{
+		UINT32 Rate;
+		UINT32 Dist;
+		UINT32 RdValue;
+
+		// RD Code TBD
+		SetMBMotionVectorsAndMode ( cpi, FragsToCheck, mode, &ThisMV );
+		EncodeMacroBlock_RD ( cpi, FragsToCheck, MBrow, MBcol, &Rate, &Dist );
+		Rate += EstModeCost;
+		Rate += EstMvCost;	
+		Rate -= RdMvCostCorrection[cpi->LastFrameNewMvUsage];   // Apply mv re-use estimate correction
+
+		// Calculate Best RD value
+		RdValue = RdFunction ( cpi, Rate, Dist );
+
+		if ( (RdValue < *BestRd) || ( (ThisError<(*Error >> 1)) && (Dist<(*BestDist >> 1)) ) )
+		{
+			*BestRd   = RdValue;
+			*BestRate = Rate;
+			*BestDist = Dist;
+			*BestMode = mode;
+			*Error    = ThisError;
+			BestMV->x = ThisMV.x;
+			BestMV->y = ThisMV.y;
+		}
+	}
+	else
+	{
+		// Is the improvement, if any, good enough to justify a new MV
+		if ( ThisError < *Error )
+		{
+			*BestMode = mode;
+			*Error    = ThisError;
+			BestMV->x = ThisMV.x;
+			BestMV->y = ThisMV.y;
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PickBetterBMode
+ *
+ *  INPUTS        : CP_INSTANCE *cpi           : Pointer to encoder instance.
+ *	                UINT8 *Frame               : Pointer to block in previous frame reconstruction (NOT USED).
+ *	                UINT32 MBrow               : MB row of parent MB.
+ *	                UINT32 MBcol               : MB column of parent MB.
+ *	                UINT32 Block               : Block number in its parant MB (0-3).
+ *	                CODING_MODE ThisMode       : Coding mode to evaluate.
+ *	                MOTION_VECTOR *ThisMv      : Pointer to MV for best mode found so far.
+ *	                UINT32 *ThisError          : Best error found so far.
+ *	                CODING_MODE *BestMode      : Pointer to best mode found so far.
+ *	                UINT32 *BestError          : Pointer to best error found so far.
+ *	                MOTION_VECTOR *BestMv      : Pointer to MV for best mode found so far.
+ *	                UINT32 *BestRdValue        : Pointer to best RD-value found so far.
+ *
+ *  OUTPUTS       : CODING_MODE *BestMode      : Pointer to best mode found so far.
+ *	                UINT32 *BestError          : Pointer to best error found so far.
+ *	                MOTION_VECTOR *BestMv      : Pointer to MV for best mode found so far.
+ *	                UINT32 *BestRdValue        : Pointer to best RD-value found so far.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Evaluates the specified coding mode for coding the block
+ *                  and if better than the best mode found so far, updates 
+ *                  the relevant variables.
+ *
+ *  SPECIAL NOTES : If rate-distortion mode is enabled then an estimate of
+ *                  the rate & distortion is found by a dummy coding of the
+ *                  block that does not output to the bitstream.
+ *
+ ****************************************************************************/
+void PickBetterBMode
+(
+	CP_INSTANCE *cpi,
+	UINT8 *Frame,
+	UINT32 MBrow,
+	UINT32 MBcol,
+	UINT32 Block,
+	CODING_MODE ThisMode,
+	MOTION_VECTOR *ThisMv,
+	UINT32 ThisError,
+	CODING_MODE *BestMode,
+	UINT32 *BestError,
+	MOTION_VECTOR *BestMv,
+	UINT32 *BestRdValue
+)
+{
+	UINT32 EstModeCost;
+
+	EstModeCost = blockModeCost ( cpi, MBrow, MBcol, ThisMode );
+	ThisError += EstModeCost * EPB;
+
+	// Are we using RD or modified prediction error
+	if ( cpi->RdOpt > 1 )
+	{
+		UINT32 Rate    = 0;
+		UINT32 Dist    = 0;
+		UINT32 RdValue = 0;
+		PB_INSTANCE *pbi = &cpi->pb;
+		
+		// Save the Macro Block and DC context
+		RdSaveMbContext ( cpi, MBcol );
+
+		// Set up relevant parts of the mbi structure
+		pbi->mbi.Mode        = ThisMode;
+		pbi->mbi.Mv[Block].x = ThisMv->x;
+		pbi->mbi.Mv[Block].y = ThisMv->y;
+
+		switch ( Block )
+		{
+		case 0:
+			pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2];
+			pbi->mbi.blockDxInfo[Block].Left  = &pbi->fc.LeftY[0];
+			break;
+		case 1:
+			pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2+1];
+			pbi->mbi.blockDxInfo[Block].Left  = &pbi->fc.LeftY[0];
+			break;
+		case 2:
+			pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2];
+			pbi->mbi.blockDxInfo[Block].Left  = &pbi->fc.LeftY[1];
+			break;
+		case 3:
+			pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2+1];
+			pbi->mbi.blockDxInfo[Block].Left  = &pbi->fc.LeftY[1];
+			break;
+		default:
+//sjlhack -- what the heck is this??????  If it is an error then return one... don't fake it out!!!!!!!!!!
+			// Error - Block should always be in range 0-3
+			pbi->mbi.blockDxInfo[0].Above = &pbi->fc.AboveY[1+MBcol*2];
+			pbi->mbi.blockDxInfo[0].Left  = &pbi->fc.LeftY[0];
+			break;
+		}
+
+
+		// Encode the block to get a rate and a distortion value
+		EncodeBlock_RD ( cpi, MBrow, MBcol, Block, &Rate, &Dist, FALSE );
+
+		// Restore the MB and dc context
+		RdRestoresMbContext ( cpi, MBcol );
+
+		// Add in the mode cost to the rate.
+		Rate += EstModeCost;
+
+		// Calculate Best RD value
+		RdValue = RdFunction ( cpi, Rate, Dist );
+
+        // Does this mode give an improvement in RD
+		if ( (RdValue < *BestRdValue) || (ThisError < (*BestError >> 1)) )
+		{
+			*BestMode    = ThisMode;
+			*BestError = ThisError;
+			*BestError   = Dist;
+			*BestRdValue = RdValue;
+			BestMv->x    = ThisMv->x;
+			BestMv->y    = ThisMv->y;
+		}
+	}
+	else
+	{
+		// Non RD case.
+		if ( ThisError < *BestError )
+		{
+			*BestMode  = ThisMode;
+			*BestError = ThisError;
+			BestMv->x  = ThisMv->x;
+			BestMv->y  = ThisMv->y;
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PickBetterBModeandMV
+ *
+ *  INPUTS        : CP_INSTANCE *cpi           : Pointer to encoder instance.
+ *	                UINT8 *Frame               : Pointer to block in previous frame reconstruction.
+ *	                UINT32 MBrow               : MB row of parent MB.
+ *	                UINT32 MBcol               : MB column of parent MB.
+ *	                UINT32 Block               : Block number in its parant MB (0-3).
+ *	                CODING_MODE ThisMode       : Coding mode to evaluate.
+ *	                CODING_MODE *BestMode      : Coding mode to evaluate.
+ *	                UINT32 *BestError          : Pointer to best error found so far.
+ *	                MOTION_VECTOR *BestMv      : Pointer to MV for best mode found so far.
+ *	                UINT32 *BestRdValue        : Pointer to best RD-value found so far.
+ *                  BOOL FullSearchEnabled     : Flag as to whether exhaustive MV search is enabled (NOT USED).
+ *
+ *  OUTPUTS       : CODING_MODE *BestMode      : Coding mode to evaluate.
+ *	                UINT32 *BestError          : Pointer to best error found so far.
+ *	                MOTION_VECTOR *BestMv      : Pointer to MV for best mode found so far.
+ *	                UINT32 *BestRdValue        : Pointer to best RD-value found so far.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Evaluates the specified coding mode for coding the block
+ *                  and if better than the best mode found so far, updates 
+ *                  the relevant variables. As part of the evaluation of the
+ *                  mode a motion vector search is carried out.
+ *
+ *  SPECIAL NOTES : If rate-distortion mode is enabled then an estimate of
+ *                  the rate & distortion is found by a dummy coding of the
+ *                  block that does not output to the bitstream.
+ *
+ ****************************************************************************/
+void PickBetterBModeAndMV
+(
+	CP_INSTANCE *cpi,
+	UINT8 *Frame,
+	UINT32 MBrow,
+	UINT32 MBcol,
+	UINT32 Block,
+	CODING_MODE ThisMode,
+	CODING_MODE *BestMode,
+	UINT32 *BestError,
+	MOTION_VECTOR *BestMV,
+	UINT32 *BestRdValue,
+	BOOL FullSearchEnabled
+)
+{
+	UINT32 ThisError;
+	UINT32 EstMvCost;
+	UINT32 EstModeCost;
+	MOTION_VECTOR ThisMV;
+	MOTION_VECTOR DifferentialVector;
+
+	PB_INSTANCE *pbi = &cpi->pb;
+
+	// Weight the mode according to last mode
+	EstModeCost = blockModeCost ( cpi, MBrow, MBcol, ThisMode );
+	ThisError = EstModeCost * EPB;
+	
+    if ( !cpi->RdOpt && (ThisError > *BestError) )
+		return;
+
+    // If the best error is above the required threshold search for a new inter MV
+	if ( *BestError > cpi->BlockExhaustiveSearchThresh )
+	{
+		ThisError += GetBMVExhaustiveSearch( cpi, Frame, &ThisMV, Block);
+    }
+    else
+    {
+		ThisError += GetBMVSearch( cpi, Frame, &ThisMV, Block );
+    }
+	//else
+
+	DifferentialVector.x = ThisMV.x;
+	DifferentialVector.y = ThisMV.y;
+	if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+	{
+		DifferentialVector.x -= pbi->mbi.NearestInterMVect.x;
+		DifferentialVector.y -= pbi->mbi.NearestInterMVect.y;
+	}
+
+	EstMvCost  = cpi->EstMvCostPtrX[DifferentialVector.x] + cpi->EstMvCostPtrY[DifferentialVector.y];
+	ThisError += (cpi->MVErrorPerBit + (ThisError >> 13)) * EstMvCost;
+
+	// Are we using RD or modified prediction error
+	if ( cpi->RdOpt > 1 )
+	{
+		UINT32 Rate    = 0;
+		UINT32 Dist    = 0;
+		UINT32 RdValue = 0;
+		
+		// Save the Macro Block and DC context
+		RdSaveMbContext ( cpi, MBcol );
+
+		// Set up relevant parts of the mbi structure
+		pbi->mbi.Mode        = ThisMode;
+		pbi->mbi.Mv[Block].x = ThisMV.x;
+		pbi->mbi.Mv[Block].y = ThisMV.y;
+
+		switch ( Block )
+		{
+		case 0:
+			pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2];
+			pbi->mbi.blockDxInfo[Block].Left  = &pbi->fc.LeftY[0];
+			break;
+		case 1:
+			pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2+1];
+			pbi->mbi.blockDxInfo[Block].Left  = &pbi->fc.LeftY[0];
+			break;
+		case 2:
+			pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2];
+			pbi->mbi.blockDxInfo[Block].Left  = &pbi->fc.LeftY[1];
+			break;
+		case 3:
+			pbi->mbi.blockDxInfo[Block].Above = &pbi->fc.AboveY[1+MBcol*2+1];
+			pbi->mbi.blockDxInfo[Block].Left  = &pbi->fc.LeftY[1];
+			break;
+		}
+
+		// Encode the block to get a rate and a distortion value
+		EncodeBlock_RD ( cpi, MBrow, MBcol, Block, &Rate, &Dist, FALSE );
+
+		// Restore the MB and dc context
+		RdRestoresMbContext ( cpi, MBcol );
+
+		// Add in the mode and mv costs to the rate.
+		Rate += EstModeCost;
+		Rate += EstMvCost;
+
+		// Calculate Best RD value
+		RdValue = RdFunction ( cpi, Rate, Dist );
+
+        // Does this mode give an improvement in RD
+		if ( (RdValue < *BestRdValue) || (ThisError < (*BestError >> 1)) )
+		{
+   			*BestError = ThisError;
+			*BestMode    = ThisMode;
+			*BestError   = Dist;
+			*BestRdValue = RdValue;
+			BestMV->x    = ThisMV.x;
+			BestMV->y    = ThisMV.y;
+		}
+	}
+	else
+	{
+		// Non RD case.
+		if ( ThisError < *BestError )
+		{
+			*BestMode  = ThisMode;
+			*BestError = ThisError;
+			BestMV->x  = ThisMV.x;
+			BestMV->y  = ThisMV.y;
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PickBlockMode
+ *
+ *  INPUTS        : CP_INSTANCE *cpi           : Pointer to encoder instance.
+ *	                UINT32 MBrow               : MB row of parent MB.
+ *	                UINT32 MBcol               : MB column of parent MB.
+ *	                UINT32 Block               : Block number in its parant MB (0-3).
+ *	                CODING_MODE *BestMode      : Coding mode to evaluate.
+ *	                MOTION_VECTOR *BestMVect   : Pointer to MV for best mode found so far.
+ *	                UINT32 *BestError          : Pointer to best error found so far.
+ *
+ *  OUTPUTS       : CODING_MODE *BestMode      : Coding mode to evaluate.
+ *	                MOTION_VECTOR *BestMVect   : Pointer to MV for best mode found so far.
+ *	                UINT32 *BestError          : Pointer to best error found so far.
+ *
+ *  RETURNS       : void 
+ *
+ *  FUNCTION      : Picks the best coding mode for a block.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void PickBlockMode
+( 
+	CP_INSTANCE	  *cpi, 
+	UINT32        MBrow,
+	UINT32        MBcol,
+	UINT32        Block,
+	CODING_MODE	  *BestMode,
+	MOTION_VECTOR *BestMVect,
+	UINT32        *BestError
+)
+{
+	UINT32		  BestSoFarError   = HUGE_ERROR;
+	CODING_MODE	  BestSoFarMode    = CODE_INTER_NO_MV;
+	UINT32        BestSoFarRdValue = HUGE_ERROR;
+   	MOTION_VECTOR BestSoFarMVect   = { 0, 0 };       
+    MOTION_VECTOR ZeroMVect        = { 0, 0 };  
+	PB_INSTANCE   *pbi = &cpi->pb;
+	
+	// To start with I have chosen to pick the best mode and mv for the block based upon prediction error even when using RD
+	// and only do the rate and distortion stuff for the chosen best mode and MV.
+	PickBetterBMode ( cpi, pbi->LastFrameRecon, MBrow, MBcol, Block, CODE_INTER_NO_MV, &ZeroMVect, cpi->ZeroError[Block], &BestSoFarMode, &BestSoFarError, &BestSoFarMVect, &BestSoFarRdValue );
+
+	if ( pbi->mbi.NearestInterMVect.x || pbi->mbi.NearestInterMVect.y )
+		PickBetterBMode ( cpi, pbi->LastFrameRecon, MBrow, MBcol, Block, CODE_INTER_NEAREST_MV, &pbi->mbi.NearestInterMVect, cpi->NearestError[Block], &BestSoFarMode, &BestSoFarError, &BestSoFarMVect, &BestSoFarRdValue );
+
+	if ( pbi->mbi.NearInterMVect.x || pbi->mbi.NearInterMVect.y )
+		PickBetterBMode ( cpi, pbi->LastFrameRecon, MBrow, MBcol, Block, CODE_INTER_NEAR_MV, &pbi->mbi.NearInterMVect, cpi->NearError[Block], &BestSoFarMode, &BestSoFarError, &BestSoFarMVect, &BestSoFarRdValue  );
+
+	if ( (cpi->RdOpt > 1) || (BestSoFarError > cpi->MinErrorForBlockMVSearch) ) 
+		PickBetterBModeAndMV ( cpi, pbi->LastFrameRecon, MBrow, MBcol, Block, CODE_INTER_PLUS_MV,&BestSoFarMode,&BestSoFarError,&BestSoFarMVect, &BestSoFarRdValue, TRUE );
+
+	*BestMode    = BestSoFarMode;
+	*BestError   = BestSoFarError;
+	BestMVect->x = BestSoFarMVect.x;
+	BestMVect->y = BestSoFarMVect.y;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PickMacroBlockMode
+ *
+ *  INPUTS        : CP_INSTANCE	*cpi   : Pointer to encoder instance.
+ *	                UINT32 MBrow       : MB row number.
+ *	                UINT32 MBcol       : MB column number.
+ *	                
+ *  OUTPUTS       : UINT32 *InterError : Pointer to best inter-mode error.
+ *	                UINT32 *IntraError : Pointer to intra-mode error.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Picks the best coding mode for a macro-block.
+ *                  
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void PickMacroBlockMode
+( 
+	CP_INSTANCE	  *cpi, 
+	UINT32		  MBrow, 
+	UINT32		  MBcol,
+	UINT32        *InterError,
+	UINT32        *IntraError
+)
+{
+	UINT32 i;
+	UINT32 Temp;
+	UINT32 TempError[4];
+	UINT32 BestRate;    // The "rate" of the current best mode choice (when RD enabled else unused)
+	UINT32 BestDist;    // The "distortion" of the current best mode choice (when RD enabled else unused)
+	UINT32 BestRd;	    // The best RD compromise so far
+	int type, type2;
+	UINT32 EstModeCost;
+	UINT32 ThisError;
+	UINT32 ThisIntraError;
+	UINT32 FragsToCheck[6];
+    MOTION_VECTOR FourMVect[6];
+	CODING_MODE   FourMode[6];
+	MOTION_VECTOR DifferentialVector;
+
+    MOTION_VECTOR MVect     = { 0, 0 };
+    MOTION_VECTOR ZeroMVect = { 0, 0 };
+    UINT32		  BestError = HUGE_ERROR;
+	CODING_MODE	  BestMode = CODE_INTRA;
+	PB_INSTANCE   *pbi = &cpi->pb;
+ 
+	UINT32 YFragIndex   = (MBrow-BORDER_MBS) * pbi->HFragments * 2 + (MBcol-BORDER_MBS)*2;
+	UINT32 UVFragOffset = (MBrow-BORDER_MBS) * pbi->HFragments / 2 + (MBcol-BORDER_MBS);
+	UINT32 UFragIndex   = pbi->YPlaneFragments + UVFragOffset;
+	UINT32 VFragIndex   = pbi->YPlaneFragments + pbi->UVPlaneFragments + UVFragOffset;
+
+	// Intra and inter errors for this mb ignoring mode cost corrections etc.
+	UINT32 ThisMbIntraErr;
+	UINT32 ThisMbInterErr;
+
+//note: should be able to move FragsToCheck into the blockDxInfo struct
+//then in the MB loop, we should be able to inc the values instead of doing these multiplies
+//it may not affect the pc performance, but it may help other processors
+	FragsToCheck[0] = YFragIndex;
+	FragsToCheck[1] = YFragIndex+1;
+	FragsToCheck[2] = YFragIndex+pbi->HFragments;
+	FragsToCheck[3] = YFragIndex+pbi->HFragments+1;
+	FragsToCheck[4] = UFragIndex;
+	FragsToCheck[5] = VFragIndex;
+
+	// Root offsets for this MB
+    pbi->mbi.blockDxInfo[0].Source = pbi->YDataOffset + 16*(MBrow-BORDER_MBS) *pbi->Configuration.VideoFrameWidth + 16*(MBcol-BORDER_MBS);
+	pbi->mbi.blockDxInfo[0].thisRecon = pbi->ReconYDataOffset + 16*MBrow * pbi->Configuration.YStride + 16*MBcol;
+
+    // AWG Add function here to compute variance for each block in MB
+    // in progressive & interlaced mode. Use the resulting values to 
+    // determine which coding pattern to use from (initially):
+    // (P,P,P,P), (P,I,P,I), (I,P,I,P), (I,I,I,I)
+    // Selected pattern encoded instead of interlaced flag.
+
+	// Values that depend on whether or not we are coding an interlaced block.
+	if ( pbi->Configuration.Interlaced /*&& GetMBFrameVertVar(cpi) > GetMBFieldVertVar(cpi)*/ )
+	{
+		pbi->mbi.Interlaced = 1;
+		pbi->MBInterlaced[MBOffset(MBrow,MBcol)] = 1;
+
+	    pbi->mbi.blockDxInfo[0].CurrentReconStride =
+        pbi->mbi.blockDxInfo[1].CurrentReconStride =
+        pbi->mbi.blockDxInfo[2].CurrentReconStride =
+        pbi->mbi.blockDxInfo[3].CurrentReconStride = 2 * pbi->Configuration.YStride;
+
+	    pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+        pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+        pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+        pbi->mbi.blockDxInfo[3].CurrentSourceStride = 2 * pbi->Configuration.VideoFrameWidth;
+
+        pbi->mbi.blockDxInfo[1].thisRecon = pbi->mbi.blockDxInfo[0].thisRecon + 8;
+        pbi->mbi.blockDxInfo[2].thisRecon = pbi->mbi.blockDxInfo[0].thisRecon + pbi->Configuration.YStride;
+        pbi->mbi.blockDxInfo[3].thisRecon = pbi->mbi.blockDxInfo[2].thisRecon + 8;
+
+        pbi->mbi.blockDxInfo[1].Source = pbi->mbi.blockDxInfo[0].Source + 8;
+        pbi->mbi.blockDxInfo[2].Source = pbi->mbi.blockDxInfo[0].Source + pbi->Configuration.VideoFrameWidth;
+        pbi->mbi.blockDxInfo[3].Source = pbi->mbi.blockDxInfo[2].Source + 8;
+	} 
+	else	
+	{
+		pbi->mbi.Interlaced = 0;
+		pbi->MBInterlaced[MBOffset(MBrow,MBcol)] = 0;
+
+	    pbi->mbi.blockDxInfo[0].CurrentReconStride =
+        pbi->mbi.blockDxInfo[1].CurrentReconStride =
+        pbi->mbi.blockDxInfo[2].CurrentReconStride =
+        pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride;
+
+	    pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+        pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+        pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+        pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth;
+
+        pbi->mbi.blockDxInfo[1].thisRecon = pbi->mbi.blockDxInfo[0].thisRecon + 8;
+        pbi->mbi.blockDxInfo[2].thisRecon = pbi->mbi.blockDxInfo[0].thisRecon + (pbi->Configuration.YStride << 3);
+        pbi->mbi.blockDxInfo[3].thisRecon = pbi->mbi.blockDxInfo[2].thisRecon + 8;
+
+        pbi->mbi.blockDxInfo[1].Source = pbi->mbi.blockDxInfo[0].Source + 8;
+        pbi->mbi.blockDxInfo[2].Source = pbi->mbi.blockDxInfo[0].Source + (pbi->Configuration.VideoFrameWidth << 3);
+        pbi->mbi.blockDxInfo[3].Source = pbi->mbi.blockDxInfo[2].Source + 8;
+	}
+
+	// Calculate the U and V pointers (not affected by interlaced mode) for use in Rd code.
+	if ( cpi->RdOpt )
+	{
+        pbi->mbi.blockDxInfo[4].Source = pbi->UDataOffset + ((MBrow-BORDER_MBS) * 8) * (pbi->Configuration.VideoFrameWidth/2) + ((MBcol-BORDER_MBS) * 8);
+        pbi->mbi.blockDxInfo[5].Source = pbi->VDataOffset + ((MBrow-BORDER_MBS) * 8) * (pbi->Configuration.VideoFrameWidth/2) + ((MBcol-BORDER_MBS) * 8);
+
+        pbi->mbi.blockDxInfo[4].thisRecon = pbi->ReconUDataOffset + (MBrow * 8) * pbi->Configuration.UVStride + (MBcol * 8);
+        pbi->mbi.blockDxInfo[5].thisRecon = pbi->ReconVDataOffset + (MBrow * 8) * pbi->Configuration.UVStride + (MBcol * 8);
+	}
+
+	// What are the two nearest motion vectors.
+	VP6_FindNearestandNextNearest ( pbi, MBrow, MBcol, 1, &type  );
+	VP6_FindNearestandNextNearest ( pbi, MBrow, MBcol, 2, &type2 );
+
+	// Look at the intra coding error. 
+	ThisIntraError = GetMBIntraError ( cpi );
+
+	// Keep a cumulative Intra error score for the frame (clip individual values to an allowed range)
+	Temp = ThisIntraError >> 8;
+	if ( Temp < MIN_ERR )
+		Temp = MIN_ERR;
+	else if (  Temp > MAX_ERR )
+		Temp = MAX_ERR;
+	*IntraError += Temp;
+	ThisMbIntraErr = Temp;
+
+	EstModeCost = RdModeCost ( cpi, MBrow, MBcol, CODE_INTRA );
+	ThisError = EstModeCost*EPB;
+	ThisIntraError += ThisError;
+
+	// To start with set best mode etc to Intra values
+	BestMode  = CODE_INTRA;
+	BestError = ThisIntraError;
+
+	// Apply Intra weighting factors to best error
+	BestError = (ThisIntraError >> 7) * IntraFactors[pbi->quantizer->FrameQIndex];
+	if ( cpi->MBCodingMode != CODE_INTRA ) 
+		BestError += (cpi->IntraThresh); 
+	else
+		BestError += (cpi->IntraThresh >> 1);
+
+	// Set Best Rate and Dist if appropriate.
+	if ( cpi->RdOpt )
+	{
+		SetMBMotionVectorsAndMode ( cpi, FragsToCheck, CODE_INTRA, &ZeroMVect ); 
+		EncodeMacroBlock_RD ( cpi, FragsToCheck, MBrow, MBcol, &BestRate, &BestDist );
+		BestRate += EstModeCost;
+		
+		// Calculate a BestRd value for Intra
+		BestRd = RdFunction ( cpi, BestRate, BestDist );
+	}
+ 
+	// pick the best of the set of inter modes with known motion vectors
+	if ( !cpi->GfRecoveryFrame )
+    {
+        PickBetterMBMode ( cpi, 
+                           FragsToCheck, 
+                           CODE_INTER_NO_MV, 
+                           &ZeroMVect, 
+                           MBrow, 
+                           MBcol, 
+                           pbi->LastFrameRecon, 
+                           &BestMode, 
+                           &BestError, 
+                           &MVect, 
+                           cpi->ZeroError, 
+                           &BestRate, 
+                           &BestDist, 
+                           &BestRd );
+    }
+
+	if( (!cpi->GfRecoveryFrame) && (pbi->mbi.NearestInterMVect.x || pbi->mbi.NearestInterMVect.y) )
+	{
+		PickBetterMBMode ( cpi,
+                          FragsToCheck,
+                          CODE_INTER_NEAREST_MV, 
+                          &pbi->mbi.NearestInterMVect, 
+                          MBrow, 
+                          MBcol, 
+                          pbi->LastFrameRecon, 
+                          &BestMode, 
+                          &BestError, 
+                          &MVect, 
+                          cpi->NearestError, 
+                          &BestRate, 
+                          &BestDist, 
+                          &BestRd );
+	}
+
+	if( (!cpi->GfRecoveryFrame) && ( pbi->mbi.NearInterMVect.x || pbi->mbi.NearInterMVect.y) )
+	{
+		PickBetterMBMode ( cpi,
+                           FragsToCheck,
+                           CODE_INTER_NEAR_MV,
+                           &pbi->mbi.NearInterMVect,
+                           MBrow,
+                           MBcol,
+                           pbi->LastFrameRecon,
+                           &BestMode,
+                           &BestError,
+                           &MVect,
+                           cpi->NearError, 
+                           &BestRate, 
+                           &BestDist, 
+                           &BestRd );
+	}
+
+	PickBetterMBMode ( cpi,
+                       FragsToCheck,
+                       CODE_USING_GOLDEN,
+                       &ZeroMVect,
+                       MBrow,
+                       MBcol,
+                       pbi->GoldenFrame,
+                       &BestMode,
+                       &BestError,
+                       &MVect,
+                       TempError, 
+                       &BestRate, 
+                       &BestDist, 
+                       &BestRd );
+
+	if(pbi->mbi.NearestGoldMVect.x || pbi->mbi.NearestGoldMVect.y)
+	{
+		PickBetterMBMode ( cpi,
+                           FragsToCheck,
+                           CODE_GOLD_NEAREST_MV,
+                           &pbi->mbi.NearestGoldMVect,
+                           MBrow,
+                           MBcol,
+                           pbi->GoldenFrame,
+                           &BestMode,
+                           &BestError,
+                           &MVect,
+                           TempError, 
+                           &BestRate, 
+                           &BestDist, 
+                           &BestRd );
+	}
+
+	if ( pbi->mbi.NearGoldMVect.x || pbi->mbi.NearGoldMVect.y )
+	{
+		PickBetterMBMode ( cpi,
+                           FragsToCheck,
+                           CODE_GOLD_NEAR_MV,
+                           &pbi->mbi.NearGoldMVect,
+                           MBrow,
+                           MBcol,
+                           pbi->GoldenFrame,
+                           &BestMode,
+                           &BestError,
+                           &MVect,
+                           TempError, 
+                           &BestRate, 
+                           &BestDist, 
+                           &BestRd );
+	}
+
+    // DEBUG Code...
+	{
+		int a = (BestError >> 17);
+		if ( a>127 )
+			cpi->ErrorBins[127]++;
+		else
+			cpi->ErrorBins[a]++;
+	}
+
+	// (Note: ignoring this threshold for RD doesn't seem to help much)
+	if ( (!cpi->GfRecoveryFrame) && (BestError > cpi->MinErrorForMacroBlockMVSearch) ) 
+	{
+		PickBetterMBModeAndMV ( cpi,
+                                FragsToCheck,
+                                CODE_INTER_PLUS_MV,
+                                pbi->LastFrameRecon,
+                                MBrow,
+                                MBcol,
+                                &BestMode,
+                                &BestError, 
+                                &MVect, 
+                                TRUE, 
+                                cpi->BestError, 
+                                &BestRate, 
+                                &BestDist, 
+                                &BestRd );
+	}
+	
+	// (Note: ignoring this threshold for RD doesn't seem to help much)
+	if ( BestError > cpi->MinErrorForGoldenMVSearch ) 
+	{
+		PickBetterMBModeAndMV ( cpi,
+                                FragsToCheck,
+                                CODE_GOLDEN_MV,
+                                pbi->GoldenFrame,
+                                MBrow,
+                                MBcol,
+                                &BestMode,
+                                &BestError, 
+                                &MVect, 
+                                FALSE, 
+                                TempError, 
+                                &BestRate, 
+                                &BestDist, 
+                                &BestRd );
+	}
+ 	
+	// Finaly... If the best error is still to high then consider the 4MV mode
+	EstModeCost = RdModeCost(cpi,MBrow,MBcol,CODE_INTER_FOURMV);
+	ThisError   = EstModeCost * EPB;
+
+	// Only consider 4-Mode mode if the best prediction error so far is above a threshold
+	// (Note that ignoring this threshold for RD doesn't seem to help much)
+	if ( (!cpi->GfRecoveryFrame) && ((ThisError + cpi->MinImprovementForFourMV) < BestError) )
+	{
+		UINT32 Error;
+		UINT32 RdValue;
+		UINT32 Rate = 0;
+		UINT32 Dist = 0;
+
+		for ( i=0; i<4; i++ )
+		{
+			PickBlockMode ( cpi, MBrow, MBcol, i, &FourMode[i], &FourMVect[i], &Error );
+			ThisError += Error;
+		}
+		
+		// Calculate the UV vectors as the average of the Y plane ones.
+		// First .x component
+		FourMVect[4].x = FourMVect[0].x + FourMVect[1].x + FourMVect[2].x + FourMVect[3].x;
+		if ( FourMVect[4].x >= 0 )
+			FourMVect[4].x = (FourMVect[4].x + 2) / 4;
+		else
+			FourMVect[4].x = (FourMVect[4].x - 2) / 4;
+		FourMVect[5].x = FourMVect[4].x;
+		
+		// Then .y component
+		FourMVect[4].y = FourMVect[0].y + FourMVect[1].y + FourMVect[2].y + FourMVect[3].y;
+		if ( FourMVect[4].y >= 0 )
+			FourMVect[4].y = (FourMVect[4].y + 2) / 4;
+		else
+			FourMVect[4].y = (FourMVect[4].y - 2) / 4;
+		FourMVect[5].y = FourMVect[4].y;
+
+		// Do Rd for selected modes
+		if ( cpi->RdOpt )
+		{
+			// Set up the individual block modes and motion vector structures
+			pbi->mbi.Mode = CODE_INTER_FOURMV;
+			SetFragMotionVectorAndMode ( pbi, FragsToCheck[0], &FourMVect[0], FourMode[0] );
+			SetFragMotionVectorAndMode ( pbi, FragsToCheck[1], &FourMVect[1], FourMode[1] );
+			SetFragMotionVectorAndMode ( pbi, FragsToCheck[2], &FourMVect[2], FourMode[2] );
+			SetFragMotionVectorAndMode ( pbi, FragsToCheck[3], &FourMVect[3], FourMode[3] );
+			SetFragMotionVectorAndMode ( pbi, FragsToCheck[4], &FourMVect[4], CODE_INTER_FOURMV );
+			SetFragMotionVectorAndMode ( pbi, FragsToCheck[5], &FourMVect[5], CODE_INTER_FOURMV );
+
+			// Now calculate Rate and distortion
+			EncodeMacroBlock_RD ( cpi, FragsToCheck, MBrow, MBcol, &Rate, &Dist );
+			Rate += EstModeCost;    // Add in the cost of specifying 4-Mode mode in the first place
+			
+			// Add in the cost of the 4 individual modes and Mvs
+			for ( i=0; i<4; i++ )
+			{
+				Rate += blockModeCost ( cpi, MBrow, MBcol, FourMode[i] );
+				if ( FourMode[i] == CODE_INTER_PLUS_MV )
+				{
+					DifferentialVector.x = FourMVect[i].x;
+					DifferentialVector.y = FourMVect[i].y;
+					if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+					{
+						DifferentialVector.x -= pbi->mbi.NearestInterMVect.x;
+						DifferentialVector.y -= pbi->mbi.NearestInterMVect.y;
+					}
+
+					Rate += cpi->EstMvCostPtrX[DifferentialVector.x] + cpi->EstMvCostPtrY[DifferentialVector.y];
+				}
+			}
+
+			// Finaly plug the combined Rate and distortion values into the RD function.
+			RdValue = RdFunction ( cpi, Rate, Dist );
+
+			if ( RdValue < BestRd )
+			{
+				BestRd    = RdValue;
+				BestRate  = Rate;
+				BestDist  = Dist;
+				BestError = ThisError;
+				BestMode  = CODE_INTER_FOURMV;
+			}
+		}
+		else if ( (ThisError + cpi->MinImprovementForFourMV) < BestError )
+		{
+			BestError = ThisError;
+			BestMode  = CODE_INTER_FOURMV;
+		}
+	}
+
+	// Keep a cumulative best error score for the frame (clip individual values to an allowed range)
+	// For The Intra mode case use ThisIntraError not BestError because BestError has been modified
+	// by intra weighting factors and could be less than mode cost.
+	if ( BestMode != CODE_INTRA )
+		Temp = ( (BestError - (modeCost( cpi, MBrow, MBcol, BestMode )*EPB)) >>8 );
+	else 
+		Temp = ( (ThisIntraError - (modeCost( cpi, MBrow, MBcol, BestMode )*EPB)) >>8 );
+
+	if ( Temp < MIN_ERR )
+		Temp = MIN_ERR;
+	else if ( Temp > MAX_ERR )
+		Temp = MAX_ERR;
+	*InterError += Temp;
+	ThisMbInterErr = Temp;
+
+	// Record of intra and inter error for motion modes
+	if ( (BestMode != CODE_INTRA) && (BestMode != CODE_INTER_NO_MV) && (BestMode != CODE_USING_GOLDEN) )
+	{
+		// Keep a record of motion related inta and intra prediction errors
+		cpi->MotionIntraErr += ThisMbIntraErr;
+		cpi->MotionInterErr += ThisMbInterErr;
+	}
+
+	// Keep running total of the approximate cost of the chosen mode / MVs etc
+	cpi->ModeMvCostEstimate += modeCost ( cpi, MBrow, MBcol, BestMode );
+
+	// keep track of how many times this mode is the same as the last one we encountered
+	if ( (pbi->mbi.NearestInterMVect.x == 0) && (pbi->mbi.NearestInterMVect.y == 0) )
+		type = NONEAREST_MACROBLOCK;
+	else if ( (pbi->mbi.NearInterMVect.x == 0) && (pbi->mbi.NearInterMVect.y == 0) )
+		type = NONEAR_MACROBLOCK;
+	else
+		type = MACROBLOCK;
+
+	//type = 0;
+	cpi->CountModeSameAsLast[type][BestMode] += (cpi->MBCodingMode == BestMode);
+	cpi->CountModeDiffFrLast[type][BestMode] += (cpi->MBCodingMode != BestMode);
+	cpi->MBModeCount[type][BestMode]++;
+	cpi->MBCodingMode = BestMode;
+
+	switch ( BestMode )
+	{
+	case CODE_INTER_FOURMV:
+
+		for ( i=0; i<4; i++ )
+		{
+			cpi->BModeCount[FourMode[i]]++;
+
+			// Running total modeMv costs
+			cpi->ModeMvCostEstimate += blockModeCost ( cpi, MBrow, MBcol, FourMode[i] );
+		}
+
+		// Set up mb mode and mv structures for four mv
+		SetFragMotionVectorAndMode ( pbi, FragsToCheck[0], &FourMVect[0], FourMode[0] );
+		SetFragMotionVectorAndMode ( pbi, FragsToCheck[1], &FourMVect[1], FourMode[1] );
+		SetFragMotionVectorAndMode ( pbi, FragsToCheck[2], &FourMVect[2], FourMode[2] );
+		SetFragMotionVectorAndMode ( pbi, FragsToCheck[3], &FourMVect[3], FourMode[3] );
+		SetFragMotionVectorAndMode ( pbi, FragsToCheck[4], &FourMVect[4], CODE_INTER_FOURMV );
+		SetFragMotionVectorAndMode ( pbi, FragsToCheck[5], &FourMVect[5], CODE_INTER_FOURMV );
+		
+		for ( i=0; i<4; i++ )
+		{
+			if ( FourMode[i] == CODE_INTER_PLUS_MV )
+			{
+				DifferentialVector.x = FourMVect[i].x;
+				DifferentialVector.y = FourMVect[i].y;
+				if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+				{
+					DifferentialVector.x -= pbi->mbi.NearestInterMVect.x;
+					DifferentialVector.y -= pbi->mbi.NearestInterMVect.y;
+				}
+
+				cpi->MvBaselineDist[0][(MV_ENTROPY_TOKENS >> 1) + DifferentialVector.x]++;
+				cpi->MvBaselineDist[1][(MV_ENTROPY_TOKENS >> 1) + DifferentialVector.y]++;
+
+				// Running total of estimated mode+mv costs
+				cpi->ModeMvCostEstimate += (cpi->EstMvCostPtrX[DifferentialVector.x] + cpi->EstMvCostPtrY[DifferentialVector.y]);
+
+				// Store mv stats 
+				cpi->FrameMvStats.NumMvs++;
+				cpi->FrameMvStats.SumAbsX += abs(FourMVect[i].x);
+				cpi->FrameMvStats.SumAbsY += abs(FourMVect[i].y);
+				cpi->FrameMvStats.SumX    += FourMVect[i].x;
+				cpi->FrameMvStats.SumY    += FourMVect[i].y;
+				cpi->FrameMvStats.SumXSq  += FourMVect[i].x * FourMVect[i].x;
+				cpi->FrameMvStats.SumYSq  += FourMVect[i].y * FourMVect[i].y;
+			}
+		}
+		
+		// Update the new MV and Mode counters
+		cpi->FrameNewMvCounter += 4;
+		cpi->FrameModeCounter += 4;
+
+		// Update KeyFrameIndicator
+		if ( (MBrow >= cpi->FirstSixthBoundary) && (MBrow < cpi->LastSixthBoundary) &&						// Exclude top and bottome for "letterbox in 4:3" video
+			 (BestError > KF_INDICATOR_THRESH) && ((ThisIntraError * 2) < (BestError * 5)) )
+			cpi->MotionScore ++;
+		break;
+
+	case CODE_INTRA:
+		SetMBMotionVectorsAndMode ( cpi, FragsToCheck, BestMode, &ZeroMVect );
+
+		// Update KeyFrameIndicator
+		if ( (MBrow >= cpi->FirstSixthBoundary) && (MBrow < cpi->LastSixthBoundary) )						// Exclude top and bottome for "letterbox in 4:3" video
+			cpi->MotionScore++;
+
+		// Update the Mode counter
+		cpi->FrameModeCounter++;
+
+		break;
+
+	case CODE_INTER_PLUS_MV:
+	case CODE_GOLDEN_MV:
+
+		DifferentialVector.x = MVect.x;
+		DifferentialVector.y = MVect.y;
+		if ( BestMode == CODE_INTER_PLUS_MV )
+		{
+			if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+			{
+				DifferentialVector.x -= pbi->mbi.NearestInterMVect.x;
+				DifferentialVector.y -= pbi->mbi.NearestInterMVect.y;
+			}
+			else
+			{
+				DifferentialVector.x = MVect.x;
+				DifferentialVector.y = MVect.y;
+			}
+		}
+		else
+		{
+			if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+			{
+				DifferentialVector.x -= pbi->mbi.NearestGoldMVect.x;
+				DifferentialVector.y -= pbi->mbi.NearestGoldMVect.y;
+			}
+			else
+			{
+				DifferentialVector.x = MVect.x;
+				DifferentialVector.y = MVect.y;
+			}
+		}
+
+		SetMBMotionVectorsAndMode ( cpi, FragsToCheck, BestMode, &MVect );
+
+		// Update KeyFrameIndicator
+		if ( (MBrow >= cpi->FirstSixthBoundary) && (MBrow < cpi->LastSixthBoundary) &&						// Exclude top and bottome for "letterbox in 4:3" video
+			 (BestError > KF_INDICATOR_THRESH) && ((ThisIntraError * 2) < (BestError * 5)) )
+			cpi->MotionScore++;
+
+		cpi->MvBaselineDist[0][(MV_ENTROPY_TOKENS >> 1) + DifferentialVector.x]++;
+		cpi->MvBaselineDist[1][(MV_ENTROPY_TOKENS >> 1) + DifferentialVector.y]++;
+
+		// Update the new MV and Mode counters
+		cpi->FrameNewMvCounter++;
+		cpi->FrameModeCounter++;
+
+		// Running total of estimated mode+mv costs
+		cpi->ModeMvCostEstimate += (cpi->EstMvCostPtrX[DifferentialVector.x] + cpi->EstMvCostPtrY[DifferentialVector.y]);
+
+		// Store mv stats (exclude GF)
+		if ( BestMode == CODE_INTER_PLUS_MV)
+		{
+			cpi->FrameMvStats.NumMvs++;
+			cpi->FrameMvStats.SumAbsX += abs(MVect.x);
+			cpi->FrameMvStats.SumAbsY += abs(MVect.y);
+			cpi->FrameMvStats.SumX    += MVect.x;
+			cpi->FrameMvStats.SumY    += MVect.y;
+			cpi->FrameMvStats.SumXSq  += MVect.x * MVect.x;
+			cpi->FrameMvStats.SumYSq  += MVect.y * MVect.y;
+		}
+		break;
+
+	default:
+		SetMBMotionVectorsAndMode ( cpi, FragsToCheck, BestMode, &MVect );
+		
+        // Update KeyFrameIndicator
+		if ( (MBrow >= cpi->FirstSixthBoundary) && (MBrow < cpi->LastSixthBoundary) &&						// Exclude top and bottome for "letterbox in 4:3" video
+			 (BestError > KF_INDICATOR_THRESH) && ((ThisIntraError * 2) < (BestError * 5)) )
+			cpi->MotionScore++;
+
+		// Update the Mode counters
+		cpi->FrameModeCounter++;
+
+		// Store mv stats (exclude GF mv modes)
+		if ( (BestMode == CODE_INTER_NEAREST_MV) || (BestMode == CODE_INTER_NEAR_MV) )
+		{
+			cpi->FrameMvStats.NumMvs++;
+			cpi->FrameMvStats.SumAbsX += abs(MVect.x);
+			cpi->FrameMvStats.SumAbsY += abs(MVect.y);
+			cpi->FrameMvStats.SumX    += MVect.x;
+			cpi->FrameMvStats.SumY    += MVect.y;
+			cpi->FrameMvStats.SumXSq  += MVect.x * MVect.x;
+			cpi->FrameMvStats.SumYSq  += MVect.y * MVect.y;
+		}
+
+		break;
+	}
+
+	// Keep a record of the distribution of mode choices in this frame
+	cpi->ModeDist[BestMode]++;
+	pbi->predictionMode[MBOffset(MBrow,MBcol)]   = BestMode;
+	pbi->MBMotionVector[MBOffset(MBrow,MBcol)].x = pbi->FragInfo[FragsToCheck[3]].MVectorX;
+	pbi->MBMotionVector[MBOffset(MBrow,MBcol)].y = pbi->FragInfo[FragsToCheck[3]].MVectorY;
+
+	// If Rd Opt is enabled then restore the macro block Dc Prediction context for chosen mode.
+	if ( cpi->RdOpt )
+	{
+		for ( i=0; i<6; i++ )
+		{
+			memcpy ( cpi->MbDcContexts[BestMode][i].AbovePtr,  &cpi->MbDcContexts[BestMode][i].Above,  sizeof(BLOCK_CONTEXT) );
+			memcpy ( cpi->MbDcContexts[BestMode][i].LeftPtr,   &cpi->MbDcContexts[BestMode][i].Left,   sizeof(BLOCK_CONTEXT) );
+			memcpy ( cpi->MbDcContexts[BestMode][i].LastDcPtr, &cpi->MbDcContexts[BestMode][i].LastDc, sizeof(Q_LIST_ENTRY)  );
+
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PickModes
+ *
+ *  INPUTS        : CP_INSTANCE	*cpi   : Pointer to encoder instance.
+ *	                
+ *  OUTPUTS       : UINT32 *InterError : Pointer to inter-mode error.
+ *	                UINT32 *IntraError : Pointer to intra-mode error.
+ *
+ *  RETURNS       : UINT32: 0 Always.
+ *
+ *  FUNCTION      : Picks the best coding mode for each macro-block in
+ *                  the frame.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+UINT32 PickModes ( CP_INSTANCE *cpi, UINT32 *InterError, UINT32 *IntraError )
+{
+    UINT8   QIndex;
+	UINT32  MBrow, MBcol;
+	unsigned int duration;
+	unsigned int starttsc,endtsc;
+	UINT32 CountInterlaced = 0;
+	PB_INSTANCE *pbi = &cpi->pb;
+
+    // Record start time
+    VP6_readTSC ( &starttsc );
+
+	// Work new motion vector cost weighting based upon the frequency of new motion vectors in the last frame.
+	if ( cpi->FrameModeCounter )
+	{
+		cpi->LastFrameNewMvUsage = (cpi->FrameNewMvCounter * 10)/cpi->FrameModeCounter; 
+		cpi->MvEpbCorrection     = MvEpbCorrectionTable[cpi->LastFrameNewMvUsage];
+	}
+	else
+	{
+		cpi->LastFrameNewMvUsage = 0;
+		cpi->MvEpbCorrection = MvEpbCorrectionTable[0];
+	}
+
+	cpi->FrameModeCounter  = 0;
+	cpi->FrameNewMvCounter = 0;
+
+	// Set flag to allow bit cost anlaylsis without actual output.
+	cpi->bc.MeasureCost = TRUE;
+    QIndex = pbi->quantizer->FrameQIndex;
+
+	memset ( (void *)cpi->MvBaselineDist,      0, sizeof(cpi->MvBaselineDist) );
+	memset ( (void *)cpi->MBModeCount,         0, sizeof(cpi->MBModeCount) );
+	memset ( (void *)cpi->CountModeSameAsLast, 0, sizeof(cpi->CountModeSameAsLast) );
+	memset ( (void *)cpi->CountModeDiffFrLast, 0, sizeof(cpi->CountModeDiffFrLast) );
+	memset ( (void *)cpi->BModeCount,          0, sizeof(cpi->BModeCount) );
+
+	// Clear down record of frame coding mode distribution
+	memset ( cpi->ModeDist, 0, sizeof(cpi->ModeDist) );
+
+	// Clear down frame average abs MV data structure
+	memset ( &cpi->FrameMvStats, 0, sizeof(cpi->FrameMvStats) );
+
+	// Clear the mode+mv frame cost estimate
+	cpi->ModeMvCostEstimate = 0;
+
+	cpi->ErrorPerBit = ErrorPerBit[QIndex];
+
+	if ( cpi->ErrorPerBit < 1 )
+		cpi->ErrorPerBit = 1;
+
+	// Calculate a provisional mv-epb using epb and a correction that depends on 
+	// frequency of mv's in last frame.
+	cpi->MVErrorPerBit = (ErrorPerBit[QIndex] << 8) / cpi->MvEpbCorrection;
+
+    // initialize error scores
+    *InterError = 0;
+    *IntraError = 0;
+
+	// Initialise key frame indicator.
+	cpi->MotionScore = 0;
+
+	// Initialise mode variable for use in mode weighting tests
+	cpi->MBCodingMode = CODE_INTER_NO_MV;
+
+	// Error threshold where we consider forcing INTRA mode.
+    cpi->InterTripOutThresh = (5000<<12);
+
+	// Test Values
+	cpi->IntraThresh = (IntraThreshTable[QIndex] << 12); 
+
+    switch ( cpi->QuickCompress )
+    {
+	case 2:  
+		{
+			// this auto speed selection code needs some work !!!
+			UINT32 millisecondsForCompress = 1000000 / cpi->Configuration.OutputFrameRate;
+			millisecondsForCompress = millisecondsForCompress * (16-cpi->CPUUsed) / 16;
+			
+			if ( cpi->avgEncodeTime+cpi->avgPackVideoTime < millisecondsForCompress )
+			{
+				millisecondsForCompress -= cpi->avgEncodeTime + cpi->avgPackVideoTime;
+			
+				if ( cpi->avgPickModeTime == 0 )
+				{
+					cpi->Speed = 4;
+				}
+				else
+				{
+					// why just go up by 1 and not try to calculate the value 
+					// that would compress fast enough (etc)??
+				    if ( millisecondsForCompress <  cpi->avgPickModeTime )
+                    {
+                        cpi->Speed          += 3;
+                        cpi->avgPickModeTime = 0;                        
+                    }                    
+                    else if ( millisecondsForCompress*100 > cpi->avgPickModeTime*130 )
+                    {
+                        cpi->Speed          -= 1;
+                        cpi->avgPickModeTime = 0;
+                    }
+
+                    if ( cpi->Speed < 4 )
+                        cpi->Speed = 4;
+					else if ( cpi->Speed > 16 )
+						cpi->Speed = 16;
+				}
+			}
+			else
+			{
+				cpi->Speed = 16;
+			}
+
+            cpi->MinErrorForMacroBlockMVSearch = 25   << 12;
+            cpi->MinErrorForGoldenMVSearch     = 40   << 12;
+            cpi->ExhaustiveSearchThresh        = 1000 << 12;
+            cpi->MinErrorForBlockMVSearch      = 50   << 12;
+            cpi->FindMvViaSearch        = FindMvVia3StepSearch;
+            cpi->FindBestHalfPixelMv    = FindBestFractionalPixelStep;
+            cpi->FindBestQuarterPixelMv = FindBestFractionalPixelStep;
+            cpi->BlockExhaustiveSearchThresh   = HUGE_ERROR;
+			
+            if ( cpi->Speed >= 1 )
+                cpi->FindMvViaSearch = FindMvViaDiamondSearch;
+            if ( cpi->Speed >= 2 )
+                cpi->FindBestQuarterPixelMv = SkipFractionalPixelStep;
+            if ( cpi->Speed >= 3 )
+                cpi->MinErrorForGoldenMVSearch = HUGE_ERROR;
+            if ( cpi->Speed >= 4 )
+                cpi->MinErrorForBlockMVSearch = HUGE_ERROR;
+            if ( cpi->Speed >= 14 )
+                cpi->FindBestHalfPixelMv = SkipFractionalPixelStep;
+            if ( cpi->Speed >= 5 )
+            {
+                unsigned int i, sum=0;
+
+				for ( i=0; i<128; i++ )
+				{
+					sum += cpi->ErrorBins[i];
+					if ( 10*sum>(cpi->Speed-6)*(pbi->MBRows-4)*(pbi->MBCols-4) )
+						break;
+				}
+				++i;
+				cpi->MinErrorForMacroBlockMVSearch = i << 17;
+                cpi->ExhaustiveSearchThresh        = i << 23;
+			}
+            if ( cpi->Speed >= 12 )
+                cpi->ExhaustiveSearchThresh = HUGE_ERROR;
+
+            memset ( cpi->ErrorBins, 0, sizeof(cpi->ErrorBins) );
+		}
+		break;
+	
+    case 1: 
+		cpi->MinErrorForMacroBlockMVSearch = 25   << 12;
+		cpi->MinErrorForGoldenMVSearch     = 25   << 12;
+		cpi->ExhaustiveSearchThresh        = 1000  << 12;
+		cpi->MinErrorForBlockMVSearch      = 50   << 12;
+        cpi->BlockExhaustiveSearchThresh   = HUGE_ERROR;
+		break;
+
+	case 3: 
+		cpi->MinErrorForMacroBlockMVSearch = 25   << 12;
+		cpi->MinErrorForGoldenMVSearch     = 25   << 12;
+		cpi->ExhaustiveSearchThresh        = 1000 << 12;
+		cpi->MinErrorForBlockMVSearch      = 50   << 12;
+        cpi->BlockExhaustiveSearchThresh   = HUGE_ERROR;
+		cpi->RdOpt = 2;				
+        break;
+
+	case 0: 
+		cpi->MinErrorForMacroBlockMVSearch = 25   << 12;
+		cpi->MinErrorForGoldenMVSearch     = 25   << 12;
+		cpi->ExhaustiveSearchThresh        = 300   << 12;
+        cpi->BlockExhaustiveSearchThresh   = 40  << 12;
+		cpi->MinErrorForBlockMVSearch      = 20  << 12;
+		cpi->RdOpt = 2;				
+		break;
+  } 
+
+	// Extra cost penalty to prevent spurious use of 4mv mode.
+	// The reason this is needed probably has something to do with 
+	// poorer dc prediction with a 4mv macro block than within a 
+	// macro block where all are coded with the same mode.
+	cpi->MinImprovementForFourMV = FourModeImprovement[QIndex]<<12;
+
+	// Define boundaries to be used in key frame selection process
+	cpi->FirstSixthBoundary = (pbi->MBRows-(2*BORDER_MBS))/6+2;			// Macro block index marking the first sixth of the image
+	cpi->LastSixthBoundary  = ((pbi->MBRows-(2*BORDER_MBS))*5)/6+2;		// Macro block index marking the last sixth of the image
+
+	// If we are using RdOpt then reset the Above dc context data structure
+	if ( cpi->RdOpt )
+		VP6_ResetAboveContext ( pbi );
+
+	// decide what block type and motion vectors to use on all of the frames
+	for ( MBrow=BORDER_MBS; MBrow<pbi->MBRows-BORDER_MBS; MBrow++ )
+	{
+		// If we are using RdOpt then reset the Left dc context data structure for each row of MBs
+		if ( cpi->RdOpt )
+			VP6_ResetLeftContext ( pbi );
+
+		for ( MBcol=BORDER_MBS; MBcol < pbi->MBCols-BORDER_MBS; MBcol++ )
+		{
+			// Try to pick the best mode for the macro block
+			PickMacroBlockMode ( cpi, MBrow, MBcol,InterError, IntraError );
+
+			if ( pbi->MBInterlaced[MBOffset(MBrow,MBcol)] )
+				CountInterlaced++;
+		}
+	}
+
+	pbi->probInterlaced = 256-(1+254*CountInterlaced/((pbi->MBRows-(2*BORDER_MBS))*(pbi->MBCols-(2*BORDER_MBS))));
+
+    // system state should be cleared here....
+#if defined(_MSC_VER)
+	ClearSysState();
+#endif
+
+    // Recored end time & compute duration
+	VP6_readTSC(&endtsc);
+	duration = (endtsc - starttsc) / pbi->ProcessorFrequency;
+	
+    if ( cpi->avgPickModeTime == 0)
+		cpi->avgPickModeTime = duration;
+	else
+		cpi->avgPickModeTime = (7*cpi->avgPickModeTime+duration)>>3;
+
+	return 0;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/RawBuffer.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/RawBuffer.c
new file mode 100644
index 00000000..3d222359
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/RawBuffer.c
@@ -0,0 +1,130 @@
+/****************************************************************************
+*
+*   Module Title :     RawBuffer.c
+*
+*   Description  :     Functions to handle bit-wise writing to raw buffer.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "RawBuffer.h"
+#include "codec_common.h"
+/****************************************************************************
+ * 
+ *  ROUTINE       : WriteLongToBuffer
+ *
+ *  INPUTS        : RAW_BUFFER *buf : Pointer to the buffer instance to be written to.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Writes the 32-bits of buf->DataBlock into the byte
+ *                  buffer buf->Buffer in big-endian format.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ***************************************************************************/
+INLINE
+void WriteLongToBuffer ( RAW_BUFFER *buf )
+{
+    buf->Buffer[buf->pos++] = (buf->DataBlock>>24);
+    buf->Buffer[buf->pos++] = (buf->DataBlock>>16) & 0x000000FF;
+	buf->Buffer[buf->pos++] = (buf->DataBlock>> 8) & 0x000000FF;
+	buf->Buffer[buf->pos++] =  buf->DataBlock      & 0x000000FF;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : InitAddRawBitsToBuffer
+ *
+ *  INPUTS        : RAW_BUFFER *buf : Pointer to the buffer instance to be written to.
+ *                  UINT8 *Buffer   : Array to be used by RAW_BUFFER to write to.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Initializes a Raw Buffer instance given a pointer to an
+ *                  array of UINT8s to be used as the storage buffer.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void InitAddRawBitsToBuffer ( RAW_BUFFER *buf, UINT8 *Buffer )
+{                      
+    buf->Buffer          = Buffer;
+    buf->byte_bit_offset = 32;
+    buf->DataBlock       = 0;
+	buf->pos             = 0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : AddRawBitsToBuffer
+ *
+ *  INPUTS        : RAW_BUFFER *buf : Pointer to the buffer instance to be written to.
+ *                  UINT32 data     : Bit pattern to be written to the buffer.
+ *                  UINT32 bits     : Number of significant bits of data to write.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Writes data to the buffer to the specified number of bits
+ *                  (UINT32 bits).
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void AddRawBitsToBuffer( RAW_BUFFER *buf, UINT32 data, UINT32 bits )
+{
+	// how many bits  should we shift by?
+	buf->byte_bit_offset -= bits;
+
+	if ( buf->byte_bit_offset < 0 )
+	{
+		// only write the left most bits in this datablock
+		buf->DataBlock |= (data >> (-buf->byte_bit_offset));
+
+		// output block 
+		WriteLongToBuffer ( buf );
+		buf->DataBlock = 0;
+		buf->byte_bit_offset += 32;
+	}
+	// note we may have bits getting shifted off the left side (like in above case)
+	buf->DataBlock |= (data << buf->byte_bit_offset);
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : EndAddRawBitsToBuffer
+ *
+ *  INPUTS        : RAW_BUFFER *buf : Pointer to the buffer instance to be written to.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Finalizes all writes to the buffer.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void EndAddRawBitsToBuffer ( RAW_BUFFER *buf )
+{      
+	UINT8 shift = 24;
+
+	while ( buf->byte_bit_offset < 32 )
+	{
+		buf->Buffer[buf->pos++] = (buf->DataBlock>>shift) & 0xff;
+		shift -= 8;
+		buf->byte_bit_offset += 8;
+	}
+
+    buf->byte_bit_offset = 32;
+    buf->DataBlock = 0;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Tokenize.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Tokenize.c
new file mode 100644
index 00000000..432164ed
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Tokenize.c
@@ -0,0 +1,454 @@
+/****************************************************************************
+*
+*   Module Title :     Tokenize.C
+*
+*   Description  :     Tokenizing fragments for output by pack video
+*
+****************************************************************************/
+#define STRICT         /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <math.h>       // For abs()
+#include "compdll.h"
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/
+static TOKENEXTRA DctValueTokens[DCT_MAX_VALUE*2];
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FillValueTokens
+ *
+ *  INPUTS        :     None.
+ *						
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Fills in the DctValueTokens array used during
+ *                      compression for fast look-up of token and eatra-bits
+ *                      information.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+void FillValueTokens ( void )
+{
+	INT32 i;
+
+	for ( i=-2048; i<2047; i++ )
+	{
+		UINT32 AbsDataVal = abs ( i );
+		TOKENEXTRA *TokenExtra = DctValueTokens + 2048 + i;
+
+		// Values are tokenised as category value and a number of 
+		// additional bits that define the position within the category.
+		if ( i == 0 )
+		{
+			TokenExtra->Token = 0;
+		} 
+		else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY1] )
+		{
+			TokenExtra->Token = AbsDataVal; 
+            TokenExtra->Extra = (i < 0);
+		}
+		// Extra Bit 1 determines sign, Bit 0 the value
+		else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY2] )
+		{
+			TokenExtra->Token = DCT_VAL_CATEGORY1; 
+            TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY1]);
+            TokenExtra->Extra <<=1;
+            TokenExtra->Extra |= (i < 0);
+		}
+		// Extra Bit 2 determines sign, Bit 0-1 the value
+		else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY3] )
+		{
+			TokenExtra->Token = DCT_VAL_CATEGORY2; 
+            TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY2]);
+            TokenExtra->Extra <<=1;
+            TokenExtra->Extra |= (i < 0);
+		}
+		// Extra Bit 3 determines sign, Bit 0-2 the value
+		else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY4] )
+		{
+			TokenExtra->Token = DCT_VAL_CATEGORY3; 
+            TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY3]);
+            TokenExtra->Extra <<=1;
+            TokenExtra->Extra |= (i < 0);
+		}
+		// Extra Bit 4 determines sign, Bit 0-3 the value
+		else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY5] )
+		{
+			TokenExtra->Token = DCT_VAL_CATEGORY4; 
+            TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY4]);
+            TokenExtra->Extra <<=1;
+            TokenExtra->Extra |= (i < 0);
+		}
+		// Extra Bit 5 determines sign, Bit 0-4 the value
+		else if ( AbsDataVal < VP6_DctRangeMinVals[DCT_VAL_CATEGORY6] )
+		{
+			TokenExtra->Token = DCT_VAL_CATEGORY5; 
+            TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY5]);
+            TokenExtra->Extra <<=1;
+            TokenExtra->Extra |= (i < 0);
+		}
+		// Extra Bit 11 determines sign, Bit 0-10 the value
+		else 
+		{
+			TokenExtra->Token = DCT_VAL_CATEGORY6; 
+            TokenExtra->Extra = (AbsDataVal - VP6_DctRangeMinVals[DCT_VAL_CATEGORY6]);
+            TokenExtra->Extra <<=1;
+            TokenExtra->Extra |= (i < 0);
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : TokenizeFrag
+ *
+ *  INPUTS        : CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *	                INT16 *RawData       : Array of quantized DCT coefficients.
+ *	                UINT32 Plane         : Plane block belongs to (Y=0, UV=1)
+ *	                BLOCK_CONTEXT *Above : Pointer to an above context.
+ *	                BLOCK_CONTEXT *Left  : Pointer to a left context.
+ *						
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT16: Index of the EOB token for the block.
+ *
+ *  FUNCTION      : Takes a set of quantized DCT coefficients for a block
+ *                  and produces a set of representative tokens. Each token
+ *                  consists of a token identifier and, for most tokens, a 
+ *                  set of 'extra-bits'.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 TokenizeFrag
+(
+	CP_INSTANCE *cpi, 
+	INT16 *RawData, 
+	UINT32 Plane,
+	BLOCK_CONTEXT *Above,
+	BLOCK_CONTEXT *Left
+)
+{
+    INT32 i;
+	UINT32 Token;
+	INT32  Offset;
+	INT32  ZeroCount;
+    INT32  LastNonZeroCoeff=0;          
+    
+    UINT32 token_pos = 0;
+	UINT32 PlaneX = Plane;
+	UINT32 PrevTokenIndex;
+    UINT32 LastTokenNonZero;
+
+
+	for ( i=1; i<64; i++ )
+	{
+        // j is coeff number in zig-zag order
+        int j = cpi->pb.ModifiedScanOrder[i];
+
+		if ( RawData[j] )
+        {
+		    LastNonZeroCoeff = i;
+            cpi->FrameNzCount[j][1]++;
+        }
+		else
+			cpi->FrameNzCount[j][0]++;
+	}
+
+	// Tokenize the DC value
+	if ( RawData[0] )
+	{
+        if ( cpi->CurrentDcZeroRun[PlaneX] > 0 )
+        {
+            // Termination of run of zeros in DC positions
+            cpi->DcZeroRunStartPtr[PlaneX]->Extra = cpi->CurrentDcZeroRun[PlaneX];
+            cpi->CurrentDcZeroRun[PlaneX] = 0;
+        }
+
+        Offset = DCT_MAX_VALUE + RawData[0];
+		
+		Token = DctValueTokens[Offset].Token;
+		cpi->CoeffTokenPtr->Token = Token;
+		cpi->CoeffTokenPtr->Extra = DctValueTokens[Offset].Extra;
+
+        cpi->FrameDcTokenDist2[Plane][Token]++;
+	}
+	else
+	{
+		Token = ZERO_TOKEN;
+		cpi->CoeffTokenPtr->Token = Token;
+        cpi->CoeffTokenPtr->Extra = 0;
+
+        // Check for run of zeros at DC position (Huffman mode)
+        if  ( cpi->CurrentDcZeroRun[PlaneX] == 0 )
+        {
+            //  New run starts
+            cpi->DcZeroRunStartPtr[PlaneX] = cpi->CoeffTokenPtr;
+            cpi->FrameDcTokenDist2[Plane][Token]++;
+        }
+        
+        cpi->CurrentDcZeroRun[PlaneX]++;
+
+        if ( cpi->CurrentDcZeroRun[PlaneX] >= 74/*11+63*/ )
+        {
+            // Maximum run-length is 11+63
+            cpi->DcZeroRunStartPtr[PlaneX]->Extra = cpi->CurrentDcZeroRun[PlaneX];
+            cpi->CurrentDcZeroRun[PlaneX] = 0;
+        }
+	}
+
+    cpi->CoeffTokenPtr->LastTokenL = Left->Token;
+	cpi->CoeffTokenPtr->LastTokenA = Above->Token;
+	cpi->FrameDcTokenDist[Plane][Token]++;
+	PrevTokenIndex = VP6_PrevTokenIndex[Token];
+	cpi->CoeffTokenPtr++; 
+	token_pos++;
+
+	// Update the context
+    LastTokenNonZero = (Token != ZERO_TOKEN);
+	Left->Token  = LastTokenNonZero;
+	Above->Token = LastTokenNonZero;
+
+	// Tokenize the rest of the block 
+	for ( i=1; i<=LastNonZeroCoeff; i++ )
+	{   
+		
+        UINT32 Band;        
+		ZeroCount = 0;
+		while ( !RawData[cpi->pb.ModifiedScanOrder[i]] )
+		{
+			i++;
+			ZeroCount++;
+
+		}
+		//  Trap the end of a run of EOBs at AC1
+        if ( cpi->CurrentAc1EobRun[PlaneX] > 0 )
+        {
+            // End of run of EOBs at first AC position
+            cpi->Ac1EobRunStartPtr[PlaneX]->Extra = cpi->CurrentAc1EobRun[PlaneX];
+            cpi->CurrentAc1EobRun[PlaneX] = 0;
+        }
+
+        // Code the zero token and zero run length
+		if ( ZeroCount > 0 )
+		{
+            int ZeroBand;
+
+            Band = VP6_CoeffToBand[token_pos];
+			cpi->CoeffTokenPtr->Token = ZERO_TOKEN;
+			cpi->CoeffTokenPtr->Extra = ZeroCount - 1;
+
+			cpi->FrameAcTokenDist [PrevTokenIndex][Plane][Band][ZERO_TOKEN]++;
+            cpi->FrameAcTokenDist2[PrevTokenIndex][Plane][Band][ZERO_TOKEN]++;
+
+			PrevTokenIndex = VP6_PrevTokenIndex[ZERO_TOKEN];
+
+            // ZeroBand = 0:1
+            ZeroBand = (token_pos >= ZRL_BAND2);    
+
+			cpi->FrameZrlDist[ZeroBand][ZeroCount]++;
+			cpi->FrameZeroCount[ZeroBand]++;
+			
+			// Update token_pos 
+			token_pos += ZeroCount;
+
+			// Step on to next token
+			cpi->CoeffTokenPtr++;
+		}
+
+		// Code the non zero value
+		Offset = DCT_MAX_VALUE + RawData[cpi->pb.ModifiedScanOrder[i]];
+		cpi->CoeffTokenPtr->Token = DctValueTokens[Offset].Token;
+		cpi->CoeffTokenPtr->Extra = DctValueTokens[Offset].Extra;
+        Band = VP6_CoeffToBand[token_pos];
+
+		cpi->FrameAcTokenDist [PrevTokenIndex][Plane][Band][cpi->CoeffTokenPtr->Token]++;
+        cpi->FrameAcTokenDist2[PrevTokenIndex][Plane][Band][cpi->CoeffTokenPtr->Token]++;
+		PrevTokenIndex = VP6_PrevTokenIndex [cpi->CoeffTokenPtr->Token];
+
+		cpi->CoeffTokenPtr++; 
+		token_pos++;
+	}
+
+    // If we have reached the end of the block then code EOB 
+    if ( i < BLOCK_SIZE  )
+    {
+        UINT32 Band;
+        cpi->CoeffTokenPtr->Token = DCT_EOB_TOKEN;
+        cpi->CoeffTokenPtr->Extra = 0;
+        Band = VP6_CoeffToBand[token_pos];
+        
+        // if EOB at first AC pos
+        if ( token_pos == 1 )
+        {
+            // The start of an EOB run
+            if ( cpi->CurrentAc1EobRun[PlaneX] == 0 )
+            {
+                cpi->Ac1EobRunStartPtr[PlaneX] = cpi->CoeffTokenPtr;
+                cpi->FrameAcTokenDist2[PrevTokenIndex][Plane][Band][DCT_EOB_TOKEN]++;
+            }
+            
+            cpi->CurrentAc1EobRun[PlaneX]++;
+            
+            if ( cpi->CurrentAc1EobRun[PlaneX] >= 74 /*11+63*/ )
+            {
+                cpi->Ac1EobRunStartPtr[PlaneX]->Extra = cpi->CurrentAc1EobRun[PlaneX];
+                cpi->CurrentAc1EobRun[PlaneX] = 0;
+            }
+        }
+        else
+        {
+            cpi->FrameAcTokenDist2[PrevTokenIndex][Plane][Band][DCT_EOB_TOKEN]++;
+        }
+        
+        cpi->FrameAcTokenDist[PrevTokenIndex][Plane][Band][DCT_EOB_TOKEN]++;
+        PrevTokenIndex = VP6_PrevTokenIndex [DCT_EOB_TOKEN];
+        
+        cpi->CoeffTokenPtr++;
+        token_pos++;
+        
+    }
+
+
+	token_pos--;
+
+    // Return the position of the last token. 
+	return cpi->pb.EobOffsetTable[token_pos];
+}
+
+/****************************************************************************
+*
+*	Rate Distortion Specific Code...
+*
+****************************************************************************/
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : TokenCost_RD
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *	                UINT8 Token      : Token to be costed.
+ *	                int Band         : Band that the token belongs in.
+ *	                UINT8 Plane      : Plane that the token belogs in.
+ *	                UINT8 PrecCase   : Previous token context type.
+ *						
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Estimated cost in bits of coding this token.
+ *
+ *  FUNCTION      : Produces an estimate of the cost, i.e. number of bits
+ *                  required to code, the token using statistics derived
+ *                  from the distribution of tokens in the previous frame.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ***************************************************************************/
+UINT32 TokenCost_RD ( CP_INSTANCE *cpi, UINT8 Token, int Band, UINT8 Plane, UINT8 PrecCase )
+{
+    if ( Band == -1 ) 
+		return cpi->EstDcTokenCosts[Plane][Token] + (ExtraBitLengths_VP6[Token] << 6);
+    else
+        return cpi->EstAcTokenCosts[PrecCase][Plane][Band][Token] + (ExtraBitLengths_VP6[Token] << 6);
+}         
+/****************************************************************************
+ * 
+ *  ROUTINE       : TokenizeFrag_RD
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *	                INT16 *RawData   : Array of quantized DCT coeffs to be tokenized.
+ *	                UINT32 Plane     : Plane that the block belongs to.
+ *	                						
+ *  OUTPUTS       : UINT32 *MbCost   : Pointer to variable that will hold the 
+ *                                     cost of tokenizing the block.
+ *
+ *  RETURNS       : UINT8: Estimated cost in bits of coding this token.
+ *
+ *  FUNCTION      : Cut down RD version of tokenize function of tokenize block
+ *					that does not update all the context stuff.
+ *
+ *  SPECIAL NOTES :     
+ *
+ ****************************************************************************/
+UINT8 TokenizeFrag_RD
+(
+	CP_INSTANCE *cpi, 
+	INT16 *RawData, 
+	UINT32 Plane,
+    UINT32 *MbCost
+)
+{
+    UINT32	i;
+	UINT8   Token;
+	INT32	ZeroCount;
+
+    UINT8   TokenPos = 1;	
+    INT32   Band;
+    INT32   PrevTokenCase ;
+    
+
+	// Tokenize the DC value	
+    Token = DctValueTokens[DCT_MAX_VALUE + RawData[0]].Token;
+    *MbCost += cpi->EstDcTokenCosts[Plane][Token] + (ExtraBitLengths_VP6[Token] << 6);    
+	PrevTokenCase =VP6_PrevTokenIndex[Token];    
+
+
+	// Tokenize the rest of the block 
+	for ( i=1; i<BLOCK_SIZE; i++ )
+	{   
+		// Test for EOB condition 
+		ZeroCount = 0;
+		while ( !RawData[cpi->pb.ModifiedScanOrder[i]] && (i < BLOCK_SIZE) )
+		{
+			i++;
+			ZeroCount++;
+		}
+
+		// If we have reached the end of the block then code EOB 
+		if ( i == BLOCK_SIZE  )
+		{
+			Token = DCT_EOB_TOKEN;
+            Band  = VP6_CoeffToBand[TokenPos];
+		    *MbCost += cpi->EstAcTokenCosts[PrevTokenCase][Plane][Band][Token] + (ExtraBitLengths_VP6[Token] << 6);             
+            PrevTokenCase =VP6_PrevTokenIndex[Token];    
+			TokenPos++;
+		}
+		else
+		{
+			INT32 Offset = DCT_MAX_VALUE + RawData[cpi->pb.ModifiedScanOrder[i]];
+
+			if ( ZeroCount > 0 )
+			{
+                //0:1
+				UINT8 ZBand = (TokenPos >= ZRL_BAND2);
+
+				Token = ZERO_TOKEN;
+                Band  = VP6_CoeffToBand[TokenPos];
+		        *MbCost += cpi->EstAcTokenCosts[PrevTokenCase][Plane][Band][Token] + (ExtraBitLengths_VP6[Token] << 6);             
+                PrevTokenCase =VP6_PrevTokenIndex[Token];
+				TokenPos += ZeroCount;
+
+				// Get estimated cost of zero run bits (based upon previous frame stats
+				*MbCost += cpi->EstZrlCosts[ZBand][ZeroCount];
+			}
+
+			Token = DctValueTokens[Offset].Token;
+            Band  = VP6_CoeffToBand[TokenPos];
+            *MbCost += cpi->EstAcTokenCosts[PrevTokenCase][Plane][Band][Token] + (ExtraBitLengths_VP6[Token] << 6);             
+            PrevTokenCase =VP6_PrevTokenIndex[Token];
+            TokenPos++;
+		}
+	}
+
+	TokenPos--;
+
+    // Return the position of the last token. 
+    return TokenPos;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Transform.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Transform.c
new file mode 100644
index 00000000..6ee4f44a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/Transform.c
@@ -0,0 +1,361 @@
+/****************************************************************************
+* 
+*   Module Title :     Transform.c
+*
+*   Description  :     DCT transform & inverse transform functions.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <math.h>           // For Abs()
+#include "type_aliases.h"
+#include "codec_common.h"
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SUB8
+ *
+ *  INPUTS        : UINT8 *FiltPtr     : Pointer to 8x8 source block.
+ *                  UINT8 *ReconPtr    : Pointer to 8x8 block to be subtracted from FiltPtr.
+ *                  UINT8 *old_ptr1    : NOT USED.
+ *                  UINT8 *new_ptr1    : NOT USED.
+ *                  INT32 SourceStride : Stride of FiltPtr.
+ *                  INT32 ReconStride  : Stride of ReconPtr.
+ *
+ *  OUTPUTS       : INT16 *DctInputPtr : Pointer to 8x8 array to hold difference.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Does a pixel-by-pixel subtraction of the two 8x8 blocks
+ *                  and stores the results in DctInputPtr.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void SUB8 
+( 
+    UINT8 *FiltPtr,
+    UINT8 *ReconPtr,
+    INT16 *DctInputPtr,
+    UINT8 *old_ptr1,     /* NOT USED */
+    UINT8 *new_ptr1,     /* NOT USED */
+    INT32 SourceStride,
+    INT32 ReconStride
+)
+{
+    int i;
+
+    // Loop unrolled to improve speed...
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        DctInputPtr[0] = (INT16)((int)(FiltPtr[0]) - ((int)ReconPtr[0]) );
+        DctInputPtr[1] = (INT16)((int)(FiltPtr[1]) - ((int)ReconPtr[1]) );
+        DctInputPtr[2] = (INT16)((int)(FiltPtr[2]) - ((int)ReconPtr[2]) );
+        DctInputPtr[3] = (INT16)((int)(FiltPtr[3]) - ((int)ReconPtr[3]) );
+        DctInputPtr[4] = (INT16)((int)(FiltPtr[4]) - ((int)ReconPtr[4]) );
+        DctInputPtr[5] = (INT16)((int)(FiltPtr[5]) - ((int)ReconPtr[5]) );
+        DctInputPtr[6] = (INT16)((int)(FiltPtr[6]) - ((int)ReconPtr[6]) );
+        DctInputPtr[7] = (INT16)((int)(FiltPtr[7]) - ((int)ReconPtr[7]) );
+
+        // Next row...
+        FiltPtr     += SourceStride;
+        ReconPtr    += ReconStride;
+        DctInputPtr += BLOCK_HEIGHT_WIDTH;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : Sub8_128
+ *
+ *  INPUTS        : UINT8 *FiltPtr     : Pointer to 8x8 source block.
+ *                  UINT8 *old_ptr1    : NOT USED.
+ *                  UINT8 *new_ptr1    : NOT USED.
+ *                  INT32 SourceStride : Stride of FiltPtr.
+ *
+ *  OUTPUTS       : INT16 *DctInputPtr : Pointer to 8x8 array to hold modified block.
+ *
+ *  RETURNS       : None.
+ *
+ *  FUNCTION      : Subtracts the value 128 from each pixel value in the
+ *                  input block FiltPtr.
+ *
+ *  SPECIAL NOTES : Used when coding a block in INTRA mode to convert the
+ *                  pixel range (0,255) to (-128,127). This reduces the 
+ *                  internal precision required by the DCT transform.
+ *
+ ****************************************************************************/
+void SUB8_128
+( 
+    UINT8 *FiltPtr,
+    INT16 *DctInputPtr,
+    UINT8 *old_ptr1,    /* NOT USED */
+    UINT8 *new_ptr1,    /* NOT USED */
+    INT32 SourceStride 
+)
+{
+    int i;
+
+    // Loop unrolled to improve speed...
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        DctInputPtr[0] = (INT16)((int)(FiltPtr[0]) - 128);
+        DctInputPtr[1] = (INT16)((int)(FiltPtr[1]) - 128);
+        DctInputPtr[2] = (INT16)((int)(FiltPtr[2]) - 128);
+        DctInputPtr[3] = (INT16)((int)(FiltPtr[3]) - 128);
+        DctInputPtr[4] = (INT16)((int)(FiltPtr[4]) - 128);
+        DctInputPtr[5] = (INT16)((int)(FiltPtr[5]) - 128);
+        DctInputPtr[6] = (INT16)((int)(FiltPtr[6]) - 128);
+        DctInputPtr[7] = (INT16)((int)(FiltPtr[7]) - 128);
+        
+        // Next row...
+        FiltPtr     += SourceStride;
+        DctInputPtr += BLOCK_HEIGHT_WIDTH;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SUB8AV2
+ *
+ *  INPUTS        : UINT8 *FiltPtr     : Pointer to 8x8 source block.
+ *                  UINT8 *ReconPtr1   : Pointer to first 8x8 reference block.
+ *                  UINT8 *ReconPtr2   : Pointer to second 8x8 reference block.
+ *                  UINT8 *old_ptr1    : NOT USED.
+ *                  UINT8 *new_ptr1    : NOT USED.
+ *                  INT32 SourceStride : Stride of FiltPtr.
+ *                  INT32 ReconStride  : Stride of ReconPtr1 & ReconPtr2.
+ *
+ *  OUTPUTS       : INT16 *DctInputPtr : Pointer to 8x8 array to hold difference.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Subtracts the average of the two reconstruction blocks
+ *                  from the FiltPtr block.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void SUB8AV2
+( 
+    UINT8 *FiltPtr,
+    UINT8 *ReconPtr1,
+    UINT8 *ReconPtr2,
+    INT16 *DctInputPtr,
+    UINT8 *old_ptr1,    /* NOT USED */
+    UINT8 *new_ptr1,    /* NOT USED */
+    INT32 SourceStride,
+    INT32 ReconStride 
+)
+{
+    int i;
+
+    // Loop unrolled to improve speed...
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {   
+        DctInputPtr[0] = (INT16)((int)(FiltPtr[0]) - (((int)ReconPtr1[0] + (int)ReconPtr2[0]) / 2) );
+        DctInputPtr[1] = (INT16)((int)(FiltPtr[1]) - (((int)ReconPtr1[1] + (int)ReconPtr2[1]) / 2) );
+        DctInputPtr[2] = (INT16)((int)(FiltPtr[2]) - (((int)ReconPtr1[2] + (int)ReconPtr2[2]) / 2) );
+        DctInputPtr[3] = (INT16)((int)(FiltPtr[3]) - (((int)ReconPtr1[3] + (int)ReconPtr2[3]) / 2) );
+        DctInputPtr[4] = (INT16)((int)(FiltPtr[4]) - (((int)ReconPtr1[4] + (int)ReconPtr2[4]) / 2) );
+        DctInputPtr[5] = (INT16)((int)(FiltPtr[5]) - (((int)ReconPtr1[5] + (int)ReconPtr2[5]) / 2) );
+        DctInputPtr[6] = (INT16)((int)(FiltPtr[6]) - (((int)ReconPtr1[6] + (int)ReconPtr2[6]) / 2) );
+        DctInputPtr[7] = (INT16)((int)(FiltPtr[7]) - (((int)ReconPtr1[7] + (int)ReconPtr2[7]) / 2) );
+              
+        // Next row...
+        FiltPtr     += SourceStride;
+        ReconPtr1   += ReconStride;
+        ReconPtr2   += ReconStride;
+        DctInputPtr += BLOCK_HEIGHT_WIDTH;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : AllZeroDctData
+ *
+ *  INPUTS        : Q_LIST_ENTRY *QuantList : Array of quantized DCT coefficients.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : BOOL: TRUE if all quantized DCT coeffs are zero, FALSE otherwise.
+ *
+ *  FUNCTION      : Checks for case where all DCT data will be zero.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+BOOL AllZeroDctData ( Q_LIST_ENTRY * QuantList )
+{
+    UINT32 i;
+
+    for ( i=0; i<64; i++ )
+        if ( QuantList[i] != 0 )
+            return FALSE;
+    return TRUE;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : Sub8Filtered
+ *
+ *  INPUTS        : UINT8 *FiltPtr     : Pointer to 8x8 source block.
+ *                  UINT8 *ReconPtr    : Pointer to 8x8 block to be subtracted from FiltPtr.
+ *                  INT32 SourceStride : Stride of FiltPtr.
+ *                  INT32 ReconStride  : Stride of ReconPtr.
+ *                  INT32 *Kernel      : Pointer to filter taps to filter source.
+ *
+ *  OUTPUTS       : INT16 *DctInputPtr : Pointer to 8x8 array to hold difference.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Does a pixel-by-pixel subtraction of the two 8x8 blocks
+ *                  and stores the results in DctInputPtr. However, at any
+ *                  pixel if the difference exceeds 4 then a 3x3 filter is 
+ *                  applied to the source block before doing the subtraction.
+ *
+ *  SPECIAL NOTES : The Kernel actually has 10 entries, the first 9 are the
+ *                  taps of the 3x3 filter, the last is the filter normalization
+ *                  factor. 
+ *
+ ****************************************************************************/
+void Sub8Filtered
+(  
+    UINT8 *FiltPtr, 
+    UINT8 *ReconPtr, 
+    INT16 *DctInputPtr, 
+    INT32 SourceStride, 
+    INT32 ReconStride, 
+    INT32 *Kernel 
+)
+{
+    int i,j;
+	INT32 Tmp;
+	INT32 Diff;
+	UINT8 *SrcPtr;
+
+    // Loop unrolled to improve speed...
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+		for ( j=0; j<BLOCK_HEIGHT_WIDTH; j++ )
+		{
+			Diff = (INT32)((INT32)FiltPtr[j] - (INT32)ReconPtr[j]);
+			
+            if ( abs( Diff ) > 4 )
+			{
+                // Filter source
+
+                // Top row of filter...
+				SrcPtr = &FiltPtr[j-SourceStride];
+				Tmp  = (UINT32)SrcPtr[-1] * Kernel[0];
+				Tmp += (UINT32)SrcPtr[0] * Kernel[1];
+				Tmp += (UINT32)SrcPtr[1] * Kernel[2];
+				
+                // Middle row of filter...
+                SrcPtr = &FiltPtr[j];
+				Tmp += (UINT32)SrcPtr[-1] * Kernel[3];
+				Tmp +=  (UINT32)SrcPtr[0] * Kernel[4];
+				Tmp += (UINT32)SrcPtr[1] * Kernel[5];
+				
+                // Bottom row of filter...
+                SrcPtr = &FiltPtr[j+SourceStride];
+				Tmp += (UINT32)SrcPtr[-1] * Kernel[6];
+				Tmp += (UINT32)SrcPtr[0] * Kernel[7];
+				Tmp += (UINT32)SrcPtr[1] * Kernel[8];
+				
+                // Normalize filter output...
+                Tmp = Tmp / Kernel[9];
+
+                // Subtract...
+				Tmp = (Tmp - (INT32)ReconPtr[j]);
+
+				// Dcide whether to use filtered or unfiltered result...
+                if ( abs(Tmp)+4 < abs(Diff) )
+					DctInputPtr[j] = (INT16)Tmp;
+				else  
+					DctInputPtr[j] = (INT16)Diff;
+			}
+			else
+				DctInputPtr[j] = (INT16)Diff;
+		}
+
+        // Next row...
+        FiltPtr     += SourceStride;
+        ReconPtr    += ReconStride;
+        DctInputPtr += BLOCK_HEIGHT_WIDTH;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : Sub8_128Filtered
+ *
+ *  INPUTS        : UINT8 *FiltPtr     : Pointer to 8x8 source block.
+ *                  INT32 SourceStride : Stride of FiltPtr.
+ *                  INT32 *Kernel      : Pointer to filter taps to filter source.
+ *
+ *  OUTPUTS       : INT16 *DctInputPtr : Pointer to 8x8 array to hold difference.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 3x3 filter to the source data and then subtracts
+ *                  128 from each pixel value. The resulting block is stored in
+ *                  DctInputPtr.
+ *
+ *  SPECIAL NOTES : The Kernel actually has 10 entries, the first 9 are the
+ *                  taps of the 3x3 filter, the last is the filter normalization
+ *                  factor. 
+ *
+ ****************************************************************************/
+void Sub8_128Filtered
+(  
+    UINT8 *FiltPtr,
+    INT16 *DctInputPtr,
+    INT32 SourceStride,
+    INT32 *Kernel
+)
+{
+    int   i, j;
+	INT32 Tmp;
+	UINT8 *SrcPtr;
+
+    // Loop unrolled to improve speed...
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+		for ( j=0; j<BLOCK_HEIGHT_WIDTH; j++ )
+		{
+            // Filter source
+
+            // Top row of filter...
+			SrcPtr = &FiltPtr[j-SourceStride];
+			Tmp  = (UINT32)SrcPtr[-1] * Kernel[0];
+			Tmp += (UINT32)SrcPtr[0] * Kernel[1];
+			Tmp += (UINT32)SrcPtr[1] * Kernel[2];
+			
+            // Middle row of filter...
+            SrcPtr = &FiltPtr[j];
+			Tmp += (UINT32)SrcPtr[-1] * Kernel[3];
+			Tmp +=  (UINT32)SrcPtr[0] * Kernel[4];
+			Tmp += (UINT32)SrcPtr[1] * Kernel[5];
+			
+            // Bottom row of filter...
+            SrcPtr = &FiltPtr[j+SourceStride];
+			Tmp += (UINT32)SrcPtr[-1] * Kernel[6];
+			Tmp += (UINT32)SrcPtr[0] * Kernel[7];
+			Tmp += (UINT32)SrcPtr[1] * Kernel[8];
+			
+            // Normalize filter output...
+            Tmp = Tmp / Kernel[9];
+
+            // Subtract...
+			DctInputPtr[j] = (INT16)(Tmp - (INT32)128);
+		}
+
+        // Next row...
+        FiltPtr     += SourceStride;
+        DctInputPtr += BLOCK_HEIGHT_WIDTH;
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encode.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encode.c
new file mode 100644
index 00000000..c663cfef
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encode.c
@@ -0,0 +1,527 @@
+/****************************************************************************
+*
+*   Module Title :     Encode.c
+*
+*   Description  :     Main encode function.
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <stdio.h>
+#include "compdll.h"
+#include "misc_common.h"
+#include "encodemv.h"
+#include "encodemode.h"
+
+/****************************************************************************
+*  Explicit imports
+****************************************************************************/ 
+extern void PackCodedVideo ( CP_INSTANCE *cpi );
+extern void InitLoopDeringThresholds ( PB_INSTANCE *pbi );
+
+#if defined FULLFRAMEFDCT
+extern void BuildFrameMbs ( CP_INSTANCE *cpi );
+extern void FDCTFrameMbs ( CP_INSTANCE *cpi );
+#endif 
+
+extern const UINT32 VP6_QThreshTable[Q_TABLE_SIZE];
+extern const UINT32 VP6_ZBinTable[Q_TABLE_SIZE];
+extern const UINT32 VP6_RTable[Q_TABLE_SIZE];
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_ShannonCost
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     UINT32: Computed Shannon cost.
+ *
+ *  FUNCTION      :     Computes the Shannon cost of coding the frame based
+ *                      on the observed distribution of tokens for the frame.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+UINT32 VP6_ShannonCost ( CP_INSTANCE *cpi )
+{
+	UINT32 Cost = 0;
+	UINT32 i, j;
+	UINT32 Sum;
+	UINT32 Band;
+	UINT32 Plane;
+	UINT32 Prob;
+
+	// First cost the DC tokens...
+	for ( Plane=0; Plane<2; Plane++ )
+	{
+		Sum = 0;
+		for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+		{
+			Sum += cpi->FrameDcTokenDist[Plane][i];
+		}
+
+		if ( Sum>0 )
+		{
+			for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+			{
+				Prob = (cpi->FrameDcTokenDist[Plane][i] * 255) / Sum;
+				if ( Prob > 254 )
+					Prob = 254;
+				else if ( Prob == 0 )
+					Prob = 1;
+
+				Cost += (VP6_ProbCost[Prob] * cpi->FrameDcTokenDist[Plane][i])/256;
+				Cost += cpi->FrameDcTokenDist[Plane][i] * ExtraBitLengths_VP6[i];
+
+                // Save individual token costs for use in next frames RD code
+                // Cost in bits x 265.... convert to bits x 64
+                cpi->EstDcTokenCosts[Plane][i] = VP6_ProbCost[Prob] >> 2; 
+				if ( cpi->EstDcTokenCosts[Plane][i] == 0 )
+					cpi->EstDcTokenCosts[Plane][i] = 1;
+			}
+		}
+		// Set defaults for predictive cost tables used in RD code
+        else
+        {
+			for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+			{
+                cpi->EstDcTokenCosts[Plane][i] = 4 << 6;  
+            }
+        }
+	}
+
+	// Then cost the AC tokens...
+	for ( Plane=0; Plane<2; Plane++ )
+	{
+		for ( Band=0; Band<VP6_AC_BANDS; Band++ )
+		{
+			for ( j=0; j<PREC_CASES; j++ )
+			{
+				Sum = 0;
+				for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+				{
+					Sum += cpi->FrameAcTokenDist[j][Plane][Band][i];
+				}
+
+				if ( Sum>0 )
+				{
+					for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+					{
+						Prob = (cpi->FrameAcTokenDist[j][Plane][Band][i] * 255) / Sum;
+						if ( Prob > 254 )
+							Prob = 254;
+						else if ( Prob == 0 )
+							Prob = 1;
+
+						Cost += (VP6_ProbCost[Prob] * cpi->FrameAcTokenDist[j][Plane][Band][i])/256;
+						Cost += cpi->FrameAcTokenDist[j][Plane][Band][i] * ExtraBitLengths_VP6[i];
+                    
+                        // Save individual token costs for use in next frames RD code
+                        // Cost in bits x 265.... convert to bits x 64
+                        cpi->EstAcTokenCosts[j][Plane][Band][i] = VP6_ProbCost[Prob] >> 2; 
+						if ( cpi->EstAcTokenCosts[j][Plane][Band][i] == 0 )
+							cpi->EstAcTokenCosts[j][Plane][Band][i] = 1;
+					}
+				}
+				// Set defaults for predictive cost tables used in RD code
+                else
+                {
+			        for ( i=0; i<MAX_ENTROPY_TOKENS; i++ )
+			        {
+                        cpi->EstAcTokenCosts[j][Plane][Band][i] = 4 << 6;  
+                    }
+                }
+			}
+		}
+	}
+
+	// Finally cost the zero run lengths...
+    for ( i=0; i<ZRL_BANDS; i++ )
+	{
+		Sum = 0;
+		for ( j=0; j<64; j++ )
+		{
+			Sum += cpi->FrameZrlDist[i][j];
+		}
+
+		// Now work out Shannon cost approximations for each run length
+		if ( Sum>0 )
+		{
+			for ( j=0; j<64; j++ )
+			{
+				Prob = (cpi->FrameZrlDist[i][j] * 255) / Sum;
+				if ( Prob > 255 )
+					Prob = 255;
+				else if ( Prob == 0 )
+					Prob = 1;
+
+				// Add in to our total cost estimate
+				Cost += (VP6_ProbCost[Prob] * cpi->FrameZrlDist[i][j])/256;
+
+                // Cost in bits x 265.... convert to bits x 64
+				cpi->EstZrlCosts[i][j] = VP6_ProbCost[Prob] >> 2;
+			}
+		}
+		// Set a default for predictive cost tables used in RD code
+		else
+		{
+			cpi->EstZrlCosts[i][j] = 3 << 6;
+		}
+	}
+	return Cost;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     EncodeData
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     UINT32: Always TRUE (This needs fixing!)
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  SPECIAL NOTES :     Applies rate targetting heuristics. 
+ *
+ ****************************************************************************/
+UINT32 EncodeData ( CP_INSTANCE *cpi )
+{                                                       
+    unsigned char *tmp;
+	BOOL	RedoY = FALSE;
+	UINT32	FrameOverShootLimit;
+	UINT32  FrameUnderShootLimit;
+	UINT32  ShannonBits;
+	UINT32  TopIndex;
+	UINT32  BottomIndex;
+	INT32   ModeMvCost = cpi->ModeMvCostEstimate/64;		// Estimated overhed in bits for modes and MVs
+	INT32   QuantizePasses = 0;
+	PB_INSTANCE *pbi = &cpi->pb;
+
+	// Set tolerance values for frame overshoot and undershoot.
+	if ( VP6_GetFrameType(pbi) == BASE_FRAME )
+	{
+		if ( cpi->BufferedMode )
+		{
+			if ( cpi->BufferLevel < cpi->OptimalBufferLevel )
+			{
+				FrameOverShootLimit = cpi->ThisFrameTarget * 10/8;
+				FrameUnderShootLimit = 0;
+			}
+			else
+			{
+				FrameOverShootLimit = cpi->ThisFrameTarget * 14/8;
+				FrameUnderShootLimit = 0;
+			}
+		}
+		// Unbuffered video mode (eg video conferencing)
+		else
+		{
+			FrameOverShootLimit = cpi->ThisFrameTarget * 10/8;
+			FrameUnderShootLimit = 0;
+		}
+
+		// Limit Q range for the adaptive loop.
+		BottomIndex = cpi->Configuration.ActiveWorstQuality;
+		if ( BottomIndex < 20)
+			BottomIndex = 20;
+		TopIndex = cpi->Configuration.ActiveBestQuality;
+		if ( TopIndex > 60 )
+			TopIndex = 60;
+	}
+	else
+	{
+		// Normal streamed video mode
+		if ( cpi->BufferedMode && cpi->pass != 2 )
+		{
+			if ( cpi->BufferLevel < cpi->OptimalBufferLevel )
+			{
+				// Looser frame size constraints for local file playback
+				if ( cpi->EndUsage == USAGE_LOCAL_FILE_PLAYBACK )
+				{
+					if ( cpi->ThisFrameTarget > cpi->PerFrameBandwidth )
+						FrameOverShootLimit = cpi->ThisFrameTarget * 2;
+					else
+						FrameOverShootLimit = cpi->PerFrameBandwidth * 2;
+
+					FrameUnderShootLimit = cpi->ThisFrameTarget * 3/8;
+				}
+				else
+				{
+					if ( cpi->MaxAllowedDatarate > 125 )
+						FrameOverShootLimit = (cpi->ThisFrameTarget * cpi->MaxAllowedDatarate)/100;
+					else 
+						FrameOverShootLimit = cpi->ThisFrameTarget * 125/100;
+
+					FrameUnderShootLimit = cpi->ThisFrameTarget * 3/8;
+				}
+			}
+			else
+			{
+				// Looser frame size constraints for local file playback
+				if ( cpi->EndUsage == USAGE_LOCAL_FILE_PLAYBACK)
+				{
+					if ( cpi->ThisFrameTarget > cpi->PerFrameBandwidth )
+						FrameOverShootLimit = cpi->ThisFrameTarget * 2;
+					else
+						FrameOverShootLimit = cpi->PerFrameBandwidth * 2;
+
+					FrameUnderShootLimit = cpi->ThisFrameTarget * 4/8;
+				}
+				else
+				{
+					if ( cpi->MaxAllowedDatarate > 150 )
+						FrameOverShootLimit = (cpi->ThisFrameTarget * cpi->MaxAllowedDatarate)/100;
+					else 
+						FrameOverShootLimit = cpi->ThisFrameTarget * 150/100;
+
+					FrameUnderShootLimit = cpi->ThisFrameTarget * 5/8;
+				}
+			}
+		}
+
+		// Unbuffered video mode (eg video conferencing)
+		// jbb upped this from 10/8 to 14/8 and shut off lower 
+        //   limit.  This basically eliminated the multiple 
+        //   go round issue?
+        else
+		{
+			FrameOverShootLimit = cpi->ThisFrameTarget * 14/8;
+			FrameUnderShootLimit = cpi->ThisFrameTarget * 0/8;
+		}
+
+		// Limit Q range for the adaptive loop.
+		BottomIndex = cpi->Configuration.ActiveWorstQuality;
+		TopIndex = cpi->Configuration.ActiveBestQuality;
+		if ( TopIndex > 60)
+			TopIndex = 60;
+	}
+
+
+	// Q adjustment loop (Only loops around if our rate targeting huristic is badly off). 
+	do 
+	{
+        #if defined FULLFRAMEFDCT
+            FDCTFrameMbs ( cpi );
+        #endif
+
+		// Zero down the structures used to count token distributions
+		memset ( cpi->FrameDcTokenDist,  0, sizeof(cpi->FrameDcTokenDist)  );	
+		memset ( cpi->FrameDcTokenDist2, 0, sizeof(cpi->FrameDcTokenDist2) );	
+		memset ( cpi->FrameAcTokenDist,  0, sizeof(cpi->FrameAcTokenDist)  );
+		memset ( cpi->FrameAcTokenDist2, 0, sizeof(cpi->FrameAcTokenDist2) );
+		memset ( cpi->FrameNzCount,      0, sizeof(cpi->FrameNzCount)      );
+	
+		// Zero down run distribution counts
+		memset( cpi->FrameZrlDist, 0, sizeof(cpi->FrameZrlDist) );
+		cpi->FrameZeroCount[0] = 0;
+		cpi->FrameZeroCount[1] = 0;
+
+		// Pack DC tokens and adjust the ones we couldn't predict 2d
+		pbi->CodedBlockIndex = 0;
+
+		// reset our token list
+		cpi->CoeffTokenPtr = cpi->CoeffTokens;
+
+		// Set loop/predictionfilter thresholds based upon Q
+		if ( pbi->UseLoopFilter == LOOP_FILTER_DERING )
+			InitLoopDeringThresholds( pbi );
+
+#if defined FULLFRAMEFDCT
+        BuildFrameMbs ( cpi );
+#else
+        // Encode frame MB-by-MB
+		EncodeFrameMbs(cpi);
+#endif
+		// Increment the counter on the number of passes through the dct quantize loop
+		QuantizePasses++;
+
+		// Clear MMX state so floating point can work again
+#if defined(_MSC_VER)
+	    ClearSysState();
+#endif
+
+		// If we are in buffered (streaming) mode and have selected fastest speed 
+		// then disallow the re-code loop
+		if ( (cpi->QuickCompress == 2) && (cpi->BufferedMode) )
+			break;
+
+		// Test for severe over-run or under-run conditions. If necessary adjust Q and try again.
+		ShannonBits = VP6_ShannonCost(cpi) + ModeMvCost;
+
+
+		// Are we are overshooting and up against the limit of active max Q.
+		if ( (pbi->quantizer->FrameQIndex == cpi->Configuration.ActiveWorstQuality) &&
+			 (cpi->Configuration.ActiveWorstQuality > cpi->Configuration.WorstQuality) &&
+			 (ShannonBits > FrameOverShootLimit) )
+		{
+			INT32 OverSizePercent = ((ShannonBits - FrameOverShootLimit) * 100) / FrameOverShootLimit;
+
+			// If so is there any scope for relaxing it
+			while ( (cpi->Configuration.ActiveWorstQuality > cpi->Configuration.WorstQuality) &&
+				    (OverSizePercent > 0) )
+			{
+
+				cpi->Configuration.ActiveWorstQuality --;
+				BottomIndex = cpi->Configuration.ActiveWorstQuality;
+
+				OverSizePercent -= 6;		// Assume 1 qstep = about 65 on frame size.
+			}
+		}
+
+		// Should we try and recode
+		if ( ((ShannonBits > FrameOverShootLimit) && (pbi->quantizer->FrameQIndex > BottomIndex)) || 
+			 ((ShannonBits < FrameUnderShootLimit) && (pbi->quantizer->FrameQIndex < TopIndex)) )
+		{
+			UINT32 LastQIndex = pbi->quantizer->FrameQIndex;
+			
+			if ( ShannonBits > FrameOverShootLimit )
+			{  
+				// Truncate TmpBottomIndex
+				UINT32 TmpBottomIndex = (pbi->quantizer->FrameQIndex + BottomIndex) >> 1;
+
+				if ( pbi->quantizer->FrameQIndex > 0 )
+					TopIndex = pbi->quantizer->FrameQIndex - 1;
+				else
+					TopIndex = 0;
+
+  				// Tweak the appropriate BpbCorrectionFactor.
+				UpdateBpbCorrectionFactor( cpi, ShannonBits );
+				
+				if ( VP6_GetFrameType(pbi) == BASE_FRAME )
+					RegulateQ(cpi, FrameOverShootLimit );
+				else
+					RegulateQ(cpi, cpi->ThisFrameTarget );
+
+				// Do not allow jumps to be to large and to go out of range.
+				if ( pbi->quantizer->FrameQIndex < TmpBottomIndex )
+					ClampAndUpdateQ ( cpi, (UINT32)TmpBottomIndex );
+				else if ( pbi->quantizer->FrameQIndex > TopIndex )
+					ClampAndUpdateQ ( cpi, (UINT32)TopIndex );
+			}
+			else
+			{
+				// Round TmpTopIndex Up
+				UINT32 TmpTopIndex = (TopIndex + pbi->quantizer->FrameQIndex + 1) >> 1;
+
+				if ( pbi->quantizer->FrameQIndex < (Q_TABLE_SIZE-1) )
+					BottomIndex = pbi->quantizer->FrameQIndex + 1;
+				else
+					BottomIndex = (Q_TABLE_SIZE-1);
+
+  				// Tweak the appropriate BpbCorrectionFactor.
+				UpdateBpbCorrectionFactor( cpi, ShannonBits );
+				RegulateQ(cpi, cpi->ThisFrameTarget );
+
+				// Clamp Q to upper and lower limits
+				if ( pbi->quantizer->FrameQIndex < BottomIndex )
+					ClampAndUpdateQ ( cpi, (UINT32)BottomIndex );
+				else if ( pbi->quantizer->FrameQIndex > TmpTopIndex )
+					ClampAndUpdateQ ( cpi, (UINT32)TmpTopIndex );
+			}
+
+			// If we were able to adjust Q index 
+			// given current constraints, then cycle round again.
+			if ( pbi->quantizer->FrameQIndex != LastQIndex )	
+			{
+				// Loop round and try again at the modified Q
+				RedoY = TRUE;
+			}
+			else
+			{
+				RedoY = FALSE;
+			}
+		}
+		else
+			RedoY = FALSE;
+	}
+    while ( RedoY );
+
+
+	// Optimize the scan order and then repeat dct and tokenize phases
+	if ( ( (cpi->pb.Configuration.Interlaced) || (cpi->AllowScanOrderUpdates) ) && 
+		   (!cpi->ErrorResilliantMode) &&
+		   (cpi->QuickCompress !=2) )
+	{
+		// Work out the optimal scan bands based upon the frame zero counts for this frame
+		PredictScanOrder( cpi );
+
+		// Build the scan order
+		BuildScanOrder( &(cpi->pb), cpi->NewScanOrderBands );
+
+		// Zero down the structures used to count token distributions
+		memset ( cpi->FrameDcTokenDist,  0, sizeof(cpi->FrameDcTokenDist)  );	
+		memset ( cpi->FrameDcTokenDist2, 0, sizeof(cpi->FrameDcTokenDist2) );	
+		memset ( cpi->FrameAcTokenDist,  0, sizeof(cpi->FrameAcTokenDist)  );
+		memset ( cpi->FrameAcTokenDist2, 0, sizeof(cpi->FrameAcTokenDist2) );
+		memset ( cpi->FrameNzCount,      0, sizeof(cpi->FrameNzCount)      );
+	
+		// Zero run distribution counts
+		memset( cpi->FrameZrlDist, 0, sizeof(cpi->FrameZrlDist) );
+		cpi->FrameZeroCount[0] = 0;
+		cpi->FrameZeroCount[1] = 0;
+
+		// Pack DC tokens and adjust the ones we couldn't predict 2d
+		pbi->CodedBlockIndex = 0;
+
+		// reset our token list
+		cpi->CoeffTokenPtr = cpi->CoeffTokens;
+
+		// Set loop/prediction filter thresholds based upon Q
+		if ( pbi->UseLoopFilter == LOOP_FILTER_DERING )
+			InitLoopDeringThresholds( pbi );
+		
+        // Encode frame MB-by-MB
+#if defined FULLFRAMEFDCT
+        BuildFrameMbs ( cpi );
+#else
+		EncodeFrameMbs(cpi);
+#endif
+
+		// Clear MMX state so floating point can work again
+#if defined(_MSC_VER)
+	    ClearSysState();
+#endif
+	}
+    // Decide whether to drop back to using Huffman entropy coding or not
+    if ( cpi->pb.VpProfile == SIMPLE_PROFILE )
+	{
+        if( ShannonBits > 9000*8 )
+            pbi->UseHuffman = TRUE;
+        else 
+            pbi->UseHuffman = FALSE; 
+	}
+
+    // Entropy code the tokens generated & output bits to the bitstream
+    PackCodedVideo(cpi);
+
+    // switch pointers so that this frame recon becomes last frame recon
+    tmp = pbi->LastFrameRecon;
+    pbi->LastFrameRecon = pbi->ThisFrameRecon;
+    pbi->ThisFrameRecon = tmp;
+	
+	// update UMV border 
+	UpdateUMVBorder ( pbi->postproc, pbi->LastFrameRecon );
+	
+	// Update the golden frame buffer.
+	if( (pbi->FrameType == BASE_FRAME) || pbi->RefreshGoldenFrame )
+		memcpy ( pbi->GoldenFrame, pbi->LastFrameRecon, pbi->ReconYPlaneSize + 2* pbi->ReconUVPlaneSize ); 
+
+#if defined(_MSC_VER)
+	ClearSysState();
+#endif
+
+    BuildMVCostEstimates(cpi);
+	BuildModeCostEstimates(cpi);
+
+    // AWG This function returns a UINT32 __NOT__ a BOOL !!
+	return TRUE;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodembs.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodembs.c
new file mode 100644
index 00000000..91b0a642
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodembs.c
@@ -0,0 +1,454 @@
+/****************************************************************************
+*
+*   Module Title :     Encodembs.c 
+*
+*   Description  :     Compressor functions for block order transmittal
+*
+*   AUTHOR       :     Paul Wilkins
+*
+****************************************************************************/
+#define STRICT               /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "compdll.h"
+#include "misc_common.h"
+#include "decodemode.h"
+#include "decodemv.h"
+#include "quantize.h"
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PredictBlock
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *                      BLOCK_POSITION bp : Position of block in MB (0-5)
+ *                      UINT32 MBrow      : MB row (NOT USED).
+ *                      UINT32 MBcol      : MB column (NOT USED).
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Creates a prediction for an 8x8 block given a coding 
+ *                      mode and other data stored at the MB level.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void PredictBlock ( CP_INSTANCE *cpi, BLOCK_POSITION bp)
+{
+	MACROBLOCK_INFO *mbi=&cpi->pb.mbi;
+    INT32  CurrentReconStride = cpi->pb.mbi.blockDxInfo[bp].CurrentReconStride;
+    INT32  CurrentSourceStride = cpi->pb.mbi.blockDxInfo[bp].CurrentSourceStride;
+    UINT32 thisRecon = cpi->pb.mbi.blockDxInfo[bp].thisRecon;
+    UINT32 Source = cpi->pb.mbi.blockDxInfo[bp].Source;
+
+	if ( VP6_ModeUsesMC[mbi->Mode] )
+	{
+		VP6_PredictFilteredBlock ( &cpi->pb, cpi->DCTDataBuffer, bp );
+
+		SubtractBlock ( &cpi->yuv1ptr[Source], cpi->DCTDataBuffer, CurrentSourceStride );
+	}
+	else if ( mbi->Mode==CODE_INTER_NO_MV ) 
+	{
+		Sub8 ( &cpi->yuv1ptr[Source], &cpi->pb.LastFrameRecon[thisRecon], cpi->DCTDataBuffer, 0, 0, CurrentSourceStride, CurrentReconStride );
+	}
+	else if ( mbi->Mode==CODE_USING_GOLDEN )
+	{
+		Sub8 ( &cpi->yuv1ptr[Source], &cpi->pb.GoldenFrame[thisRecon], cpi->DCTDataBuffer, 0, 0, CurrentSourceStride, CurrentReconStride );
+	}
+	else if ( mbi->Mode==CODE_INTRA )
+	{
+		Sub8_128 ( &cpi->yuv1ptr[Source], cpi->DCTDataBuffer, 0, 0, CurrentSourceStride );
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PredictDCE
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *	                    BLOCK_POSITION bp    : Position of block in MB (0-5)
+ *	                    Q_LIST_ENTRY *LastDC : Pointer to array of DC values last used (one per prediction frame type)
+ *	                    BLOCK_CONTEXT *Above : Pointer to above context for block.
+ *	                    BLOCK_CONTEXT *Left  : Pointer to left context for block.
+ *	
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Computes a DC predictor for the block based on two
+ *                      supplied contexts, one above and one to the left.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void PredictDCE
+( 
+	CP_INSTANCE *cpi,
+	BLOCK_POSITION bp
+)
+{
+	PB_INSTANCE *pbi = &cpi->pb;
+	UINT8 Frame = VP6_Mode2Frame[pbi->mbi.Mode];
+
+	Q_LIST_ENTRY *LastDC = pbi->mbi.blockDxInfo[bp].LastDc;
+	BLOCK_CONTEXT *Above = pbi->mbi.blockDxInfo[bp].Above;
+	BLOCK_CONTEXT *Left = pbi->mbi.blockDxInfo[bp].Left;
+
+	INT32 Avg;
+
+	Avg = LastDC[Frame];
+
+	if(Frame == Left->Frame) 
+	{
+		Avg = Left->Dc;
+	}
+	if(Frame == Above->Frame) 
+	{
+		Avg = Above->Dc;
+        if(Frame == Left->Frame)
+        {
+            #define HIGHBITDUPPED(X) (((signed short) X)  >> 15)
+            Avg += Left->Dc;
+            Avg += (HIGHBITDUPPED(Avg)&1);
+			Avg >>= 1;
+
+        }
+	}
+
+//Jim says that y,u,v all use the same quantizer so we probably do not need to have a separate dequant ptr
+	// make sure the last dc is updated for next time
+	cpi->DCT_codes[0] -= ((Avg * pbi->mbi.blockDxInfo[bp].dequantPtr[0]));
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     EncodeMacroBlock
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *                      UINT32 MBrow      : MB row.
+ *                      UINT32 MBcol      : MB column.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Encodes a single macro-block by coding each of
+ *                      it's six constituent blocks in turn.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void EncodeMacroBlock (	CP_INSTANCE *cpi, UINT32 MBrow, UINT32 MBcol )
+{
+	UINT32 bp;
+    UINT32 fragCoefEOB;
+//	UINT32 MBPointer;
+//	UINT32 MBSourcePointer;
+	//INT32  NextBlock;
+	//INT32  NextLineInBlock;
+    UINT32 FragsToCheck[6];
+	PB_INSTANCE *pbi = &cpi->pb;
+    UINT32 FragIndex = 2*(MBrow-BORDER_MBS) * pbi->HFragments + 2*(MBcol-BORDER_MBS);
+
+	pbi->mbi.Interlaced = pbi->MBInterlaced[MBOffset(MBrow,MBcol)];
+
+	//NextBlock = 8;
+	//NextLineInBlock = 1;
+
+	if ( pbi->mbi.Interlaced == 1 )
+	{
+		//NextBlock = 1;
+		//NextLineInBlock = 2;
+		pbi->mbi.blockDxInfo[0].CurrentReconStride = 
+		pbi->mbi.blockDxInfo[1].CurrentReconStride = 
+		pbi->mbi.blockDxInfo[2].CurrentReconStride = 
+		pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride * 2;
+
+	    pbi->mbi.blockDxInfo[2].thisRecon -= (pbi->Configuration.YStride * 7);
+	    pbi->mbi.blockDxInfo[3].thisRecon -= (pbi->Configuration.YStride * 7);
+
+	    pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+	    pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+	    pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+	    pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth * 2;
+
+        pbi->mbi.blockDxInfo[2].Source -= (pbi->Configuration.VideoFrameWidth * 7);
+        pbi->mbi.blockDxInfo[3].Source -= (pbi->Configuration.VideoFrameWidth * 7);
+	}
+
+
+//note: should be able to move FragsToCheck into the blockDxInfo struct
+//then in the MB loop, we should be able to inc the values instead of doing these multiplies
+//it may not affect the pc performance, but it may help other processors
+    FragsToCheck[0] = FragIndex;
+    FragsToCheck[1] = FragIndex+1;
+    FragsToCheck[2] = FragIndex+cpi->pb.HFragments;
+    FragsToCheck[3] = FragIndex+cpi->pb.HFragments+1;
+    FragsToCheck[4] = cpi->pb.YPlaneFragments + (MBrow-BORDER_MBS) * (cpi->pb.HFragments / 2) + MBcol-BORDER_MBS;
+    FragsToCheck[5] = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments + (MBrow-BORDER_MBS) * ( cpi->pb.HFragments / 2 ) + MBcol-BORDER_MBS;
+
+    cpi->pb.mbi.Mode = -1;
+    
+    for( bp=0 ; bp<6 ; bp++ )
+    {
+        cpi->pb.mbi.Mode = cpi->pb.FragInfo[FragsToCheck[bp]].FragCodingMode;
+        cpi->pb.mbi.Mv[bp].x = cpi->pb.FragInfo[FragsToCheck[bp]].MVectorX;
+        cpi->pb.mbi.Mv[bp].y = cpi->pb.FragInfo[FragsToCheck[bp]].MVectorY;
+    }
+
+    for( bp=0 ; bp<6 ; bp++ )
+    {
+	    // Build a block predictor, subtract from source to get prediction error for block
+	    PredictBlock ( cpi, bp );
+	    
+	    // Transform the error signal using the forward DCT to get set of transform coefficients
+	    fdct_short ( cpi->DCTDataBuffer, cpi->DCT_codes );
+
+	    // Predict the DCT DC value from those in surrounding blocks
+	    PredictDCE ( cpi, bp );
+
+	    // Quantize the resulting DCT coefficients at prevailing Q
+	    VP6_quantize ( cpi->pb.quantizer, cpi->DCT_codes, cpi->pb.mbi.blockDxInfo[bp].coeffsPtr, (UINT8)bp );   
+
+	    // Tokenize the resulting quantized coefficients
+	    fragCoefEOB = (UINT8)TokenizeFrag ( cpi, 
+                                                    cpi->pb.mbi.blockDxInfo[bp].coeffsPtr, 
+                                                    cpi->pb.mbi.blockDxInfo[bp].Plane, 
+                                                    pbi->mbi.blockDxInfo[bp].Above, 
+                                                    pbi->mbi.blockDxInfo[bp].Left );
+
+        // Produce reconstructed block so encoder has __exactly__ the same
+        // data for last frame reconstruction as the decoder
+	    
+        // Re-form the DC value from the prediction
+	    VP6_PredictDC ( &cpi->pb, bp );
+	    
+	    // Invert the transform to re-create the prediction error
+	    cpi->pb.idct[fragCoefEOB]( cpi->pb.mbi.blockDxInfo[bp].coeffsPtr, 
+                                   cpi->pb.mbi.blockDxInfo[bp].dequantPtr, 
+                                   cpi->pb.ReconDataBuffer[bp] );
+	    
+	    // Add prediction error to predictor to re-create block as it appears at decoder
+	    VP6_ReconstructBlock(&cpi->pb, bp);
+
+	    // DEBUG Code: Store prediction block in Post-processing buffer 
+	    //PredictBlockToPostProcessBuffer ( &cpi->pb, bp );
+
+	    // Update the context info for the next block 
+	    cpi->pb.CodedBlockIndex++;
+	    VP6_UpdateContextA ( &cpi->pb, pbi->mbi.blockDxInfo[bp].Above, bp );
+	    VP6_UpdateContext  ( &cpi->pb, pbi->mbi.blockDxInfo[bp].Left,  bp );
+    }
+
+	if ( pbi->mbi.Interlaced == 1 )
+	{
+        /* reset to non interlaced */
+    	pbi->mbi.blockDxInfo[0].CurrentReconStride =
+	    pbi->mbi.blockDxInfo[1].CurrentReconStride =
+	    pbi->mbi.blockDxInfo[2].CurrentReconStride =
+	    pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride;
+
+	    pbi->mbi.blockDxInfo[2].thisRecon += (pbi->Configuration.YStride * 7);
+	    pbi->mbi.blockDxInfo[3].thisRecon += (pbi->Configuration.YStride * 7);
+
+	    pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+	    pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+	    pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+	    pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth;
+
+        pbi->mbi.blockDxInfo[2].Source += (pbi->Configuration.VideoFrameWidth * 7);
+        pbi->mbi.blockDxInfo[3].Source += (pbi->Configuration.VideoFrameWidth * 7);
+	}
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     EncodeFrameMbs
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Encodes a frame by encoding each of it's constituent
+ *                      macro-blocks in turn.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void EncodeFrameMbs ( CP_INSTANCE *cpi )
+{
+	PB_INSTANCE *pbi = &cpi->pb;
+
+	unsigned int duration;
+	unsigned int starttsc;
+    unsigned int endtsc;
+	
+    // Record start time
+    VP6_readTSC ( &starttsc );
+
+    // Reset Dc zero & Ac EOB run counters
+    cpi->CurrentDcZeroRun[0]  = 0;
+    cpi->CurrentDcZeroRun[1]  = 0;
+	cpi->DcZeroRunStartPtr[0] = NULL;
+	cpi->DcZeroRunStartPtr[1] = NULL;
+    cpi->CurrentAc1EobRun[0]  = 0;
+    cpi->CurrentAc1EobRun[1]  = 0;
+    cpi->Ac1EobRunStartPtr[0] = NULL;
+    cpi->Ac1EobRunStartPtr[1] = NULL;
+
+	if ( cpi->pb.FrameType == BASE_FRAME )
+	{
+        // Initialise probability distributions with baseline default values
+		memcpy ( cpi->pb.IsMvShortProb,    DefaultIsShortProbs,      sizeof(cpi->pb.IsMvShortProb) );
+		memcpy ( cpi->pb.MvShortProbs,     DefaultMvShortProbs,      sizeof(cpi->pb.MvShortProbs) );
+		memcpy ( cpi->pb.MvSignProbs,      DefaultSignProbs,         sizeof(cpi->pb.MvSignProbs) );
+		memcpy ( cpi->pb.MvSizeProbs,      DefaultMvLongProbs,       sizeof(cpi->pb.MvSizeProbs) );
+		memcpy ( cpi->pb.probXmitted,      VP6_BaselineXmittedProbs, sizeof(cpi->pb.probXmitted) );
+		memset ( cpi->pb.MBModeProb,       128,                      sizeof(cpi->pb.MBModeProb) );
+		memset ( cpi->pb.BModeProb,        128,                      sizeof(cpi->pb.BModeProb) );
+		memset ( cpi->pb.probModeSame,     128,                      sizeof(cpi->pb.probModeSame) );
+		memset ( cpi->pb.probMode,         128,                      sizeof(cpi->pb.probMode) );
+		memset ( cpi->pb.predictionMode,   1,                        sizeof(char)*cpi->pb.MacroBlocks );
+
+        memset ( cpi->MBModeCostNoNearest, 0,                        sizeof(cpi->MBModeCostNoNearest) );
+		memset ( cpi->MBModeCostNoNear,    0,                        sizeof(cpi->MBModeCostNoNear) );
+		memset ( cpi->MBModeCostBoth,      0,                        sizeof(cpi->MBModeCostBoth) );
+		memset ( cpi->BModeCost,           0,                        sizeof(cpi->BModeCost) );
+	}
+	else
+	{
+	    cpi->pb.LastMode = CODE_INTER_NO_MV;  
+	}
+
+	// since we are on a new frame reset the above contexts 
+	VP6_ResetAboveContext( &cpi->pb );
+
+    {
+	    UINT32 MBrow;
+	    UINT32 MBRows = cpi->pb.MBRows; 
+	    UINT32 MBCols = cpi->pb.MBCols;
+
+        MBCols -= BORDER_MBS;
+        MBRows -= BORDER_MBS;
+
+        // AWG Code Added: Initialize strides for source & recon
+    	pbi->mbi.blockDxInfo[0].CurrentReconStride =
+	    pbi->mbi.blockDxInfo[1].CurrentReconStride =
+	    pbi->mbi.blockDxInfo[2].CurrentReconStride =
+	    pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride;
+
+	    pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+	    pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+	    pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+	    pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth;
+        // AWG End Added Code
+
+        // for each row of macroblocks 
+	    MBrow=BORDER_MBS;
+        do
+	    {
+            MACROBLOCK_INFO *mbi = &cpi->pb.mbi;
+            UINT32 MBcol;
+
+		    VP6_ResetLeftContext(&cpi->pb);
+
+		    // for each macroblock within a row of macroblocks
+
+	        mbi->blockDxInfo[0].Above = &pbi->fc.AboveY[BORDER_MBS*2];
+	        mbi->blockDxInfo[1].Above = &pbi->fc.AboveY[BORDER_MBS*2+1];
+	        mbi->blockDxInfo[2].Above = &pbi->fc.AboveY[BORDER_MBS*2];
+	        mbi->blockDxInfo[3].Above = &pbi->fc.AboveY[BORDER_MBS*2+1];
+	        mbi->blockDxInfo[4].Above = &pbi->fc.AboveU[BORDER_MBS];
+	        mbi->blockDxInfo[5].Above = &pbi->fc.AboveV[BORDER_MBS];
+
+
+	        mbi->blockDxInfo[0].thisRecon = pbi->ReconYDataOffset + ((MBrow * pbi->Configuration.YStride) << 4) + (BORDER_MBS * 16);
+	        mbi->blockDxInfo[1].thisRecon = mbi->blockDxInfo[0].thisRecon + 8;
+	        mbi->blockDxInfo[2].thisRecon = mbi->blockDxInfo[0].thisRecon + (pbi->Configuration.YStride << 3);
+	        mbi->blockDxInfo[3].thisRecon = mbi->blockDxInfo[1].thisRecon + (pbi->Configuration.YStride << 3);
+	        mbi->blockDxInfo[4].thisRecon = pbi->ReconUDataOffset + ((MBrow * pbi->Configuration.UVStride) << 3) + (BORDER_MBS * 8);
+	        mbi->blockDxInfo[5].thisRecon = pbi->ReconVDataOffset + ((MBrow * pbi->Configuration.UVStride) << 3) + (BORDER_MBS * 8);
+
+
+            mbi->blockDxInfo[0].Source = pbi->YDataOffset + ((MBrow * 16) - UMV_BORDER) * pbi->Configuration.VideoFrameWidth;
+            mbi->blockDxInfo[1].Source = mbi->blockDxInfo[0].Source + 8;
+	        mbi->blockDxInfo[2].Source = mbi->blockDxInfo[0].Source + (pbi->Configuration.VideoFrameWidth << 3);
+	        mbi->blockDxInfo[3].Source = mbi->blockDxInfo[1].Source + (pbi->Configuration.VideoFrameWidth << 3);
+	        mbi->blockDxInfo[4].Source = pbi->UDataOffset + ((MBrow * 8) - (UMV_BORDER>>1)) * (pbi->Configuration.VideoFrameWidth/2);
+	        mbi->blockDxInfo[5].Source = pbi->VDataOffset + ((MBrow * 8) - (UMV_BORDER>>1)) * (pbi->Configuration.VideoFrameWidth/2);
+
+            MBcol=BORDER_MBS;
+            do
+            {
+
+			    // Decode the macroblock
+			    EncodeMacroBlock(cpi, MBrow, MBcol);   
+
+
+	            mbi->blockDxInfo[0].Above += 2;
+	            mbi->blockDxInfo[1].Above += 2;
+	            mbi->blockDxInfo[2].Above += 2;
+	            mbi->blockDxInfo[3].Above += 2;
+	            mbi->blockDxInfo[4].Above += 1;
+	            mbi->blockDxInfo[5].Above += 1;
+
+                mbi->blockDxInfo[0].thisRecon += 16;
+                mbi->blockDxInfo[1].thisRecon += 16;
+                mbi->blockDxInfo[2].thisRecon += 16;
+                mbi->blockDxInfo[3].thisRecon += 16;
+                mbi->blockDxInfo[4].thisRecon += 8;
+                mbi->blockDxInfo[5].thisRecon += 8;
+
+                mbi->blockDxInfo[0].Source += 16;
+                mbi->blockDxInfo[1].Source += 16;
+                mbi->blockDxInfo[2].Source += 16;
+                mbi->blockDxInfo[3].Source += 16;
+                mbi->blockDxInfo[4].Source += 8;
+                mbi->blockDxInfo[5].Source += 8;
+
+		    } while(++MBcol < MBCols);
+
+
+	    } while(++MBrow < MBRows);
+    }
+
+
+    // Terminate current DC run of zeros or AC run of EOB
+    if ( cpi->CurrentDcZeroRun[0] > 0 )
+    {
+        cpi->DcZeroRunStartPtr[0]->Extra = cpi->CurrentDcZeroRun[0];
+        cpi->CurrentDcZeroRun[0] = 0;
+    }
+    if ( cpi->CurrentDcZeroRun[1] > 0 )
+    {
+        cpi->DcZeroRunStartPtr[1]->Extra = cpi->CurrentDcZeroRun[1];
+        cpi->CurrentDcZeroRun[1] = 0;
+    }
+    if ( cpi->CurrentAc1EobRun[0] > 0 )
+    {
+        cpi->Ac1EobRunStartPtr[0]->Extra = cpi->CurrentAc1EobRun[0];
+        cpi->CurrentAc1EobRun[0] = 0;
+    }
+    if ( cpi->CurrentAc1EobRun[1] > 0 )
+    {
+        cpi->Ac1EobRunStartPtr[1]->Extra = cpi->CurrentAc1EobRun[1];
+        cpi->CurrentAc1EobRun[1] = 0;
+    }
+
+    // Record end time and compute duration
+    VP6_readTSC ( &endtsc );
+	duration = (endtsc - starttsc)/cpi->pb.ProcessorFrequency;
+
+	if( cpi->avgEncodeTime==0 )
+		cpi->avgEncodeTime = duration;
+	else
+		cpi->avgEncodeTime = ( 7 * cpi->avgEncodeTime + duration ) >> 3;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.c
new file mode 100644
index 00000000..0dc81031
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.c
@@ -0,0 +1,764 @@
+/****************************************************************************
+*        
+*   Module Title :	   encodemode.c
+*
+*   Description  :     Functions for encoding modes and motion vectors.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+ 
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <math.h>           // For abs()
+#include "compdll.h"
+#include "boolhuff.h"
+#include "decodemode.h"
+#include "encodemv.h"
+#include "decodemv.h"
+
+/****************************************************************************
+*  Explicit Imports
+****************************************************************************/        
+extern void AddBitsToBuffer( BOOL_CODER *bc, UINT32 data, UINT32 bits );
+
+/****************************************************************************
+*  Module statics.
+****************************************************************************/        
+static const HNODE CodingMode[9] =
+{
+	{ // 0 
+		{	0,	1	},   
+		{   0,	2	},
+	},
+	{ // 1
+		{	0,	3	},
+		{   0,	4	},
+	},
+	{ // 2
+		{	0,	5	},
+		{   0,	6	},
+	},
+	{ // 3
+		{	1,	CODE_INTER_NO_MV	},
+		{   1,	CODE_INTER_PLUS_MV	},
+	},
+	{ // 4
+		{	1,	CODE_INTER_NEAREST_MV	},
+		{   1,	CODE_INTER_NEAR_MV	},
+	},
+	{ // 5 
+		{	1,	CODE_INTRA	},
+		{   1,	CODE_INTER_FOURMV	},
+	},
+	{ // 6
+		{	0,	7	},
+		{   0,	8	},
+	},
+	{ // 7
+		{	1,	CODE_USING_GOLDEN	},
+		{   1,	CODE_GOLDEN_MV	},
+	},
+	{ // 8
+		{	1,	CODE_GOLD_NEAREST_MV},
+		{   1,	CODE_GOLD_NEAR_MV	},
+	},
+};
+
+// NOTE: 
+// ModeCodeArray contains information required to traverse a binary
+// decision tree for coding the coding mode. The form of the tree is
+// documented in decodemode.c. Each entry corresponds to a decision
+// as to whether to take the 0 or one branch at a particular node.
+// An entry whose value is 9 indicates that we have reached a leaf node.
+// Each row corresponds to the value of the previously coded mode
+// and each column to the succesive node decisions.
+static const UINT32 ModeCodeArray[MAX_MODES][7] =
+{
+	0, 0, 0, 9, 9, 9, 9,   // CODE_INTER_NO_MV		
+	1, 0, 0, 9, 9, 9, 9,   // CODE_INTRA				
+	0, 0, 1, 9, 9, 9, 9,   // CODE_INTER_PLUS_MV		
+    0, 1, 0, 9, 9, 9, 9,   // CODE_INTER_NEAREST_MV	
+	0, 1, 1, 9, 9, 9, 9,   // CODE_INTER_NEAR_MV		
+	1, 1, 0, 0, 9, 9, 9,   // CODE_USING_GOLDEN		
+	1, 1, 0, 1, 9, 9, 9,   // CODE_GOLDEN_MV			
+	1, 0, 1, 9, 9, 9, 9,   // CODE_INTER_FOURMV		
+    1, 1, 1, 0, 9, 9, 9,   // CODE_GOLD_NEAREST_MV	
+	1, 1, 1, 1, 9, 9, 9    // CODE_GOLD_NEAR_MV		
+};
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     encodeBlockMode
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                      CODING_MODE mode : Mode we are trying to encode.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Encodes a block mode into the bitstream using 2 bits.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void encodeBlockMode ( CP_INSTANCE *cpi, CODING_MODE mode )
+{
+	int choice = 0;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MODE_SECTION;
+#endif
+
+	switch ( mode )
+	{
+	case CODE_INTER_NO_MV:      choice = 0; break;
+	case CODE_INTER_PLUS_MV:    choice = 1; break;
+	case CODE_INTER_NEAREST_MV: choice = 2; break;
+	case CODE_INTER_NEAR_MV:    choice = 3; break;
+	} 
+	AddBitsToBuffer ( &cpi->bc, choice, 2 );				
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     encodeMode
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *                      CODING_MODE lastmode : Mode of the last coded macroblock.
+ *                      CODING_MODE mode     : Mode we are trying to encode.
+ *                      UINT32 type          : MODE_TYPE (all modes available, nonearest
+ *                                             no near macroblock)
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Encodes coding mode for MB into the bitstream using a tree
+ *                      traversal algorithm: 
+ *                      -- First decision is whether mode==lastmode: code a 0 or 1
+ *                         using probability from probModeSame.
+ *                      -- If mode!=lastmode step down the tree using ModeCodeArray
+ *                         to decide whether to code a 0 or 1 decision at each node,
+ *                         and probMode to determine the probability of coding a 0 
+ *                         decision (1 decision probability is then computed as
+ *                         (1 minus zero-decision-prob)).
+ *
+ *  SPECIAL NOTES :     Uses VP6_EncodeBool to encode the bits to the bitstream.
+ *
+ ****************************************************************************/
+void encodeMode ( CP_INSTANCE *cpi, CODING_MODE lastmode, CODING_MODE mode, UINT32 type )
+{
+    UINT8 Stat;
+	UINT8 i = 0;
+    UINT8 node = 0;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MODE_SECTION;
+#endif
+
+	if ( mode == lastmode ) 
+	{
+		VP6_EncodeBool ( &cpi->bc, 1, cpi->pb.probModeSame[type][lastmode] );
+	}
+	else
+	{
+		VP6_EncodeBool(	&cpi->bc, 0, cpi->pb.probModeSame[type][lastmode] );
+		
+		while ( ModeCodeArray[mode][i] != 9 )
+		{
+			Stat = cpi->pb.probMode[type][lastmode][node];
+			
+			VP6_EncodeBool ( &cpi->bc, ModeCodeArray[mode][i], (int)Stat );
+			
+			if ( ModeCodeArray[mode][i] == 0 )
+				node = CodingMode[node].left.value;
+			else
+				node = CodingMode[node].right.value;
+			i++;
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     encodeModeTest
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *                      CODING_MODE lastmode : Mode of the last coded macroblock.
+ *                      CODING_MODE mode     : Mode we are trying to encode.
+ *                      UINT32 type          : MODE_TYPE (all modes available, nonearest
+ *                                             no near macroblock)
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Exactly the same functionality as encodeMode above,
+ *                      _but_ rather than outputting bits to the bitstream,
+ *                      BitCounter in cpi->pb in incremented by an estimate
+ *                      of the number of bits required.
+ *
+ *  SPECIAL NOTES :     Uses VP6_EncodeBool2 to get an estimate of the number
+ *                      of bits that will be generated. 
+ *
+ ****************************************************************************/
+void encodeModeTest ( CP_INSTANCE *cpi, CODING_MODE lastmode, CODING_MODE mode, UINT32 type )
+{
+    UINT8 Stat;
+	UINT8 i = 0;
+    UINT8 node = 0;
+
+	if ( mode==lastmode ) 
+	{
+		VP6_EncodeBool2 ( &cpi->bc, 1, cpi->pb.probModeSame[type][lastmode] );
+	}
+	else
+	{
+		VP6_EncodeBool2 ( &cpi->bc, 0, cpi->pb.probModeSame[type][lastmode] );
+		
+		while ( ModeCodeArray[mode][i] != 9 )
+		{
+			Stat = cpi->pb.probMode[type][lastmode][node];
+			
+			VP6_EncodeBool2 ( &cpi->bc, ModeCodeArray[mode][i], (int)Stat );
+			
+			if ( ModeCodeArray[mode][i] == 0 )
+				node = CodingMode[node].left.value;
+			else
+				node = CodingMode[node].right.value;
+			i++;
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : encodeModeDiff
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                  int diff         : Probability difference value to encode.
+ *						
+ *  OUTPUTS       : None.    
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Takes a differential probability value in the range 
+ *                  -256 to +256 in steps of 4 and encodes it using a fixed
+ *                  tree with hard-coded probabilities.
+ *
+ *  SPECIAL NOTES : The hard coded probabilities for the difference tree
+ *                  were calcualated by taking the average number of times a 
+ *                  branch was taken on some sample material i.e. 
+ *                  (bond, bike, beautifulmind).
+ *
+ ****************************************************************************/
+void encodeModeDiff ( CP_INSTANCE *cpi, int diff )
+{
+	if ( diff==0 )
+	{
+		// 0 difference
+		VP6_EncodeBool ( &cpi->bc, 0, 205 );
+	}
+	else
+	{
+		// Non-0 
+		VP6_EncodeBool ( &cpi->bc, 1, 205 );
+
+		// transmit sign of difference 
+		VP6_EncodeBool ( &cpi->bc, diff<0, 128 );
+
+		// go to abs value
+		diff = abs(diff);
+
+		if ( diff<12 )
+		{
+			VP6_EncodeBool ( &cpi->bc, 0, 171 );
+			VP6_EncodeBool ( &cpi->bc, diff==4, 83 );
+		}
+		else
+		{
+			VP6_EncodeBool ( &cpi->bc, 1, 171 );
+
+			if ( diff<28 ) 
+			{
+				VP6_EncodeBool ( &cpi->bc, 0, 199 );
+				VP6_EncodeBool ( &cpi->bc, diff==12, 140 );
+				if ( diff>12 ) 
+				{
+					VP6_EncodeBool ( &cpi->bc, diff==16, 125 );
+					if ( diff>16 )
+						VP6_EncodeBool ( &cpi->bc, diff==20, 104 );
+				}
+			}
+			else 
+			{
+				VP6_EncodeBool ( &cpi->bc, 1, 199 );
+				AddBitsToBuffer ( &cpi->bc, diff>>2, 7 );
+			}
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : estimateModeDiffCost
+ *
+ *  INPUTS        : int diff : Probability difference value to encode.
+ *						
+ *  OUTPUTS       : None.   
+ *
+ *  RETURNS       : UINT32: Number of bits required to code diff.    
+ *
+ *  FUNCTION      : Same as encodeModeDiff above but rather than outputting
+ *                  bits to the bitstream it estimates the number of bits
+ *                  that will be generated.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int estimateModeDiffCost ( int diff )
+{
+	int cost = 0;
+
+	if ( diff==0 )
+	{
+		cost += (VP6_ProbCost[205]+128) >> 8;
+	}
+	else
+	{
+		cost += (VP6_ProbCost[255-205]+128) >> 8;
+		cost += 64;
+
+		// go to abs value
+		diff = abs(diff);
+		if ( diff<12 )
+		{
+			// < 12
+    		cost += (VP6_ProbCost[171]+128) >> 8;
+
+			if ( diff==4 )
+        		cost += (VP6_ProbCost[255-83]+128) >> 8;
+			else
+        		cost += (VP6_ProbCost[83]+128) >> 8;
+		}
+		else
+		{
+			// >= 12
+    		cost += (VP6_ProbCost[255-171]+128) >> 8;
+
+			if ( diff<28 ) 
+			{
+				// < 28
+          		cost += (VP6_ProbCost[199]+128) >> 8;
+
+				if ( diff==12 )
+            		cost += (VP6_ProbCost[255-140]+128) >> 8;
+				else
+				{
+    		        cost += (VP6_ProbCost[140]+128) >> 8;
+
+					if ( diff==16 )
+                		cost += (VP6_ProbCost[255-125]+128) >> 8;
+					else
+					{
+                		cost += (VP6_ProbCost[125]+128) >> 8;
+						if ( diff==20 )
+                    		cost += (VP6_ProbCost[255-104]+128) >> 8;
+						else
+                    		cost += (VP6_ProbCost[104]+128) >> 8;
+					}
+				}
+			}
+			else 
+			{
+				// >= 28 just send the bits
+        		cost += (VP6_ProbCost[255-199]+128) >> 8;
+				cost += 7*64;
+			}
+        }
+    }
+	return cost;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : UpdateModeProbs
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *						
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void  
+ *
+ *  FUNCTION      : Determines which probabilities to transmit and 
+ *                  use for encoding macroblock modes, and then 
+ *                  transmits the information necessary to decode the 
+ *                  probabilities.
+ *
+ *                  a) Pick the lowest cost vector we have available
+ *                  b) Compare it to what we used in the last frame
+ *                  c) Determine if it makes sense to update the vector
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void UpdateModeProbs ( CP_INSTANCE *cpi )
+{
+	int i, j, k;
+	int diff;
+    int costToIdeal;
+	unsigned int thisCost, bestCost;
+	unsigned int lowestCost = 0x7fffffff;
+    unsigned int lastCost   = 0x7fffffff; 
+	unsigned int whichVector = 0;
+	UINT32 total, round; 
+	UINT8 proposedProb[2][10];
+	PB_INSTANCE *pbi = &cpi->pb;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MODE_SECTION;
+#endif
+
+	// For each mode type (all modes available, no nearest, no near mode)
+	for ( j=0; j<MODETYPES; j++ )
+	{
+		// sum the totals for each of the modes
+		cpi->MBModeCount[j][MAX_MODES]         = 0;
+		cpi->CountModeSameAsLast[j][MAX_MODES] = 0;
+		for ( i=0; i<MAX_MODES; i++ )
+		{
+			cpi->MBModeCount[j][MAX_MODES]         += cpi->MBModeCount[j][i];
+			cpi->CountModeSameAsLast[j][MAX_MODES] += cpi->CountModeSameAsLast[j][i];
+			cpi->CountModeDiffFrLast[j][MAX_MODES] += cpi->CountModeDiffFrLast[j][i];
+		}
+
+		// estimate the cost of using the cheapest vector from our vq codebook 
+		whichVector = 0;
+		lowestCost  = 0x7fffffff;
+		for ( k=0; k<MODEVECTORS; k++ )
+		{
+			thisCost = 0;
+			for ( i=0; i<MAX_MODES; i++ )
+			{
+				thisCost += cpi->CountModeSameAsLast[j][i]*((VP6_ProbCost[VP6_ModeVq[j][k][i*2]]  +128)>>8);
+				thisCost += cpi->CountModeDiffFrLast[j][i]*((VP6_ProbCost[VP6_ModeVq[j][k][i*2+1]]+128)>>8);
+			}
+			if ( thisCost<lowestCost )
+			{
+				whichVector = k;
+				lowestCost  = thisCost;
+			}
+		}
+
+		// In the error resilliant mode / VC mode we discount the "last frame values" as 
+		// a candidate vector in order to improve the resilliance to dropped/corrupt frames.
+		if ( !cpi->ErrorResilliantMode )
+		{
+			// estimate the cost of using the vector we have from the last frame
+			lastCost = 0;
+			for ( i=0; i<MAX_MODES; i++ )
+			{
+				lastCost += cpi->CountModeSameAsLast[j][i]*((VP6_ProbCost[pbi->probXmitted[j][1][i]] + 128)>>8);
+				lastCost += cpi->CountModeDiffFrLast[j][i]*((VP6_ProbCost[pbi->probXmitted[j][0][i]] + 128)>>8);
+			}
+		}
+
+		// if the best from our vq book + the cost of transmitting the vector is cheaper
+		// than our current vector use it. OR... if we are running in error resilliant mode.
+		if( cpi->ErrorResilliantMode ||
+			 ( (lastCost / 64) > (((VP6_ProbCost[255-PROBVECTORXMIT]+128)>>8) + lowestCost) / 64 + 4 ) /* for the vector itself */ ) 
+		{
+			// transmit that we are transmitting a new vector 
+			VP6_EncodeBool ( &cpi->bc,1,PROBVECTORXMIT );
+
+			// transmit which vector to use here
+			AddBitsToBuffer ( &cpi->bc, whichVector, 4 );				
+
+			// adjust the vector
+			for ( i=0; i<MAX_MODES; i++ )
+			{
+				pbi->probXmitted[j][1][i] = VP6_ModeVq[j][whichVector][i*2];
+				pbi->probXmitted[j][0][i] = VP6_ModeVq[j][whichVector][i*2+1];
+			}
+		}
+		else 
+		{
+			lowestCost = lastCost;
+
+			// transmit that we are reusing the last vector
+			VP6_EncodeBool ( &cpi->bc, 0, PROBVECTORXMIT );
+		}
+
+		// calculate the ideal vector and how much it would cost to go to it.
+		bestCost    = 0;
+		costToIdeal = 0;
+		total = 1 + cpi->CountModeSameAsLast[j][MAX_MODES]+cpi->CountModeDiffFrLast[j][MAX_MODES];
+		round = total/2;
+		for ( i=0; i<10; i++ )
+		{
+			// what's the ideal probability
+			proposedProb[1][i] = (round+256*cpi->CountModeSameAsLast[j][i]) / total;
+
+			// calculate the truncated difference between the ideal and where we are now
+			diff = 4*((proposedProb[1][i] - pbi->probXmitted[j][1][i]) / 4);
+			costToIdeal += estimateModeDiffCost(diff);
+			diff += pbi->probXmitted[j][1][i];
+			proposedProb[1][i] = ( diff<0 ? 0 : (diff>255 ? 255 : diff) );
+
+			// update the cost of our ideal choice and of moving to our ideal values
+			bestCost += cpi->CountModeSameAsLast[j][i]*((VP6_ProbCost[proposedProb[1][i]]+128)>>8);
+
+			// what's the ideal probability
+			proposedProb[0][i] = (round+256*cpi->CountModeDiffFrLast[j][i]) / total;
+
+			// calculate the truncated difference between the ideal and where we are now 
+			diff = 4*((proposedProb[0][i] - pbi->probXmitted[j][0][i]) / 4);
+			costToIdeal += estimateModeDiffCost(diff);
+			diff += pbi->probXmitted[j][0][i];
+			proposedProb[0][i] = ( diff<0 ? 0 : (diff>255 ? 255 : diff) );
+
+			// update the cost of our ideal choice and of moving to our ideal values
+			bestCost += cpi->CountModeDiffFrLast[j][i]*((VP6_ProbCost[proposedProb[0][i]]+128)>>8);
+		}
+
+		// if updating our vector to be closer to the ideal is cheaper than going with what we have now
+		if ( (costToIdeal + bestCost + ((VP6_ProbCost[255-PROBIDEALXMIT]+128)>>8)) / 64 < lowestCost / 64 )
+		{
+			// transmit that we are updating the mode probabilities
+			VP6_EncodeBool ( &cpi->bc, 1, PROBIDEALXMIT );
+
+			// encode the differences and adjust the ideal values
+			for ( i=0; i<10; i++ )
+			{
+				diff = proposedProb[1][i]-pbi->probXmitted[j][1][i];
+				encodeModeDiff(cpi,diff);
+				diff += pbi->probXmitted[j][1][i];
+				pbi->probXmitted[j][1][i] = ( diff<0 ? 0 : (diff>255 ? 255 : diff) );
+
+				diff = proposedProb[0][i]- pbi->probXmitted[j][0][i];
+				encodeModeDiff(cpi,diff);
+				diff += pbi->probXmitted[j][0][i];
+				pbi->probXmitted[j][0][i] = ( diff<0 ? 0 : (diff>255 ? 255 : diff) );
+			}
+		}
+		else
+		{
+			// transmit that we are not updating the mode probabilities
+			VP6_EncodeBool ( &cpi->bc, 0, PROBIDEALXMIT );
+		}
+	}
+	
+	VP6_BuildModeTree ( &cpi->pb );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     encodeModeandMotionVector
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *                      UINT32 MBrow      : MB row.
+ *                      UINT32 MBcol      : MB column.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Encodes a macroblock's mode and motion vectors to 
+ *                      the bitstream.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void encodeModeAndMotionVector ( CP_INSTANCE *cpi, UINT32 MBrow, UINT32 MBcol )
+{
+	UINT32 k;
+	int type, type2;
+	CODING_MODE mode;
+	UINT32 FragsToCheck[4];
+	PB_INSTANCE *pbi = &cpi->pb;
+ 	int width  = pbi->HFragments;
+	UINT32 FragIndex = (MBrow-BORDER_MBS)*width*2 + (MBcol-BORDER_MBS)*2;
+
+	VP6_FindNearestandNextNearest ( &cpi->pb, MBrow, MBcol, 1, &type  );
+	VP6_FindNearestandNextNearest ( &cpi->pb, MBrow, MBcol, 2, &type2 );
+	
+	FragsToCheck[0] = FragIndex;
+	FragsToCheck[1] = FragIndex+1;
+	FragsToCheck[2] = FragIndex+pbi->HFragments;
+	FragsToCheck[3] = FragIndex+pbi->HFragments+1;
+	
+	mode = pbi->predictionMode[MBOffset(MBrow,MBcol)];
+
+	encodeMode ( cpi, pbi->LastMode, mode, type );
+    pbi->LastMode = mode;
+
+	// check to see if we need to encode mvs or more sub modes
+	switch ( mode )
+	{
+	case CODE_INTER_PLUS_MV:
+		encodeMotionVector ( cpi, pbi->FragInfo[FragIndex].MVectorX, pbi->FragInfo[FragIndex].MVectorY, mode );
+		break;
+
+	case CODE_GOLDEN_MV:
+		encodeMotionVector ( cpi, pbi->FragInfo[FragIndex].MVectorX, pbi->FragInfo[FragIndex].MVectorY, mode);
+		break;
+
+	case CODE_INTER_FOURMV:
+		// encode sub mode decisions
+		encodeBlockMode ( cpi, pbi->FragInfo[FragsToCheck[0]].FragCodingMode );
+		encodeBlockMode ( cpi, pbi->FragInfo[FragsToCheck[1]].FragCodingMode );
+		encodeBlockMode ( cpi, pbi->FragInfo[FragsToCheck[2]].FragCodingMode );
+		encodeBlockMode ( cpi, pbi->FragInfo[FragsToCheck[3]].FragCodingMode );
+
+		// encode the 4 motion vectors
+		for ( k=0; k<4; k++ )
+			if ( pbi->FragInfo[FragsToCheck[k]].FragCodingMode==CODE_INTER_PLUS_MV )
+				encodeMotionVector ( cpi, pbi->FragInfo[FragsToCheck[k]].MVectorX, pbi->FragInfo[FragsToCheck[k]].MVectorY, CODE_INTER_PLUS_MV );
+		break;
+	}
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_EstimateCost
+ *
+ *  INPUTS        :     BOOL_CODER *bc : Pointer to a BoolCoder (UNUSED). 
+ *                      HUFF_NODE *hn  : Pointer to a Huffman tree.
+ *                      int value      : Value to be encoded.
+ *                      int length     : Length in bits of value.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     UINT32: Cost of coding value (in bits).
+ *
+ *  FUNCTION      :     Computes the cost of coding value bit-by-bit using
+ *                      the Huffman tree specified.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+UINT32 VP6_EstimateCost ( BOOL_CODER *bc, HUFF_NODE *hn, int value, int length )
+{
+    int i;
+    int node = 0;
+	UINT32 total = 0;
+
+    for ( i=length-1; i>=0; i-- )
+    {
+        int v = (value>>i) & 1;
+
+        if ( v )
+        {
+			total += (VP6_ProbCost[255-hn[node].freq]+128)>>8;
+            node  = hn[node].rightunion.right.value;
+        }
+        else
+        {
+			total += (VP6_ProbCost[hn[node].freq]+128)>>8;
+            node  = hn[node].leftunion.left.value;
+        }
+    }
+	return total; 
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     modeCost
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                      UINT32 MBrow     : MB row.
+ *                      UINT32 MBcol     : MB column.
+ *                      CODING_MODE mode : Mode to be costed.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     UINT32: Cost of coding mode (in bits*64).
+ *
+ *  FUNCTION      :     Computes the cost of coding mode (in bits*64).
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+UINT32 modeCost ( CP_INSTANCE *cpi, UINT32 MBrow, UINT32 MBcol, CODING_MODE mode )
+{
+	int type;
+	CODING_MODE lastmode;
+	PB_INSTANCE *pbi = &cpi->pb;
+ 	int width  = pbi->HFragments;
+
+	VP6_FindNearestandNextNearest ( &cpi->pb, MBrow, MBcol, 1, &type );
+
+	if ( MBcol==BORDER_MBS && MBrow==BORDER_MBS )
+		lastmode = CODE_INTER_NO_MV;
+	else if ( MBcol==BORDER_MBS )
+		lastmode = pbi->predictionMode[MBOffset(MBrow-1,pbi->MBCols - (BORDER_MBS+1))];
+	else 
+		lastmode = pbi->predictionMode[MBOffset(MBrow,MBcol-1)];
+
+	return cpi->EstModeCost[(lastmode==mode) ? 0 : 1][mode];
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     blockModeCost
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance (NOT USED).
+ *                      UINT32 i         : Undefined (NOT USED).
+ *                      UINT32 j         : Undefined (NOT USED).
+ *                      CODING_MODE mode : Mode to be costed (NOT USED).
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     UINT32: Cost of coding mode (in bits*64).
+ *
+ *  FUNCTION      :     Computes the cost of coding mode (in bits*64).
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+UINT32 blockModeCost ( CP_INSTANCE *cpi, UINT32 i, UINT32 j, CODING_MODE mode )
+{
+    // All modes within 4 mode mode cost 2 bits (cost specified as bits * 64)
+	return 128;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     BuildModeCostEstimates
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void.
+ *
+ *  FUNCTION      :     Compute an estimate of the cost of encoding each mode.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ***************************************************************************/
+void BuildModeCostEstimates ( CP_INSTANCE *cpi )
+{
+	int i;
+
+	for ( i=0; i<MAX_MODES; i++ )
+	{
+		cpi->bc.BitCounter = 0;
+		encodeModeTest ( cpi, i, i, MACROBLOCK );
+		cpi->EstModeCost[0][i] = (cpi->bc.BitCounter) >> 2;	
+
+		// Non matching last mode case
+		cpi->bc.BitCounter = 0;
+		if ( i==0 )
+			encodeModeTest ( cpi, 1, i, MACROBLOCK );
+		else
+			encodeModeTest ( cpi, 0, i, MACROBLOCK );
+		cpi->EstModeCost[1][i] = (cpi->bc.BitCounter) >> 2;	
+	}
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.h
new file mode 100644
index 00000000..a8a6fa65
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemode.h
@@ -0,0 +1,24 @@
+/****************************************************************************
+*        
+*   Module Title :     encodemode.h
+*
+*   Description  :     Functions for encoding modes and Motion Vectors.
+*
+****************************************************************************/
+#ifndef __INC_ENCODEMODE_H
+#define __INC_ENCODEMODE_H
+
+#ifndef STRICT
+#define STRICT              /* Strict type checking */
+#endif
+
+/****************************************************************************
+*  Exports
+****************************************************************************/        
+extern void encodeModeAndMotionVector(CP_INSTANCE* cpi, UINT32 MBrow, UINT32 MBcol);
+extern void UpdateModeProbs(CP_INSTANCE *cpi);
+extern UINT32 modeCost(CP_INSTANCE *cpi,UINT32 i,UINT32 j,CODING_MODE mode);
+extern UINT32 blockModeCost(CP_INSTANCE *cpi,UINT32 i,UINT32 j,CODING_MODE mode);
+extern void BuildModeCostEstimates( CP_INSTANCE *cpi );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.c
new file mode 100644
index 00000000..080cf07f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.c
@@ -0,0 +1,720 @@
+/****************************************************************************
+*        
+*   Module Title :	   encodemv.c
+*
+*   Description  :     Functions for encoding modes and motion vectors
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+ 
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "compdll.h"
+#include "boolhuff.h"
+#include "decodemv.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+// This small correction allows for the fact that an update to an MV probability
+// may have benefit in subsequent frames as well as the current one.
+#define MV_PROB_UPDATE_CORECTION	-1				
+
+/****************************************************************************
+*  Imports
+****************************************************************************/        
+extern void AddBitsToBuffer ( BOOL_CODER *bc, UINT32 data, UINT32 bits );
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     encodeMotionVectorComponent
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                      int i            : Selector as to what set of probs to use.						
+ *                      INT32 Vector     : MV component to be coded.
+ *                      INT32 MvOffset   : Reference value to code Vector from.
+ *
+ *  OUTPUTS       :     None.    
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Encodes a motion vector component either outputting
+ *                      bits to the bitstream _or_ updating BitCounter in 
+ *                      cpi->bc with the estimated cost.
+ *
+ *  SPECIAL NOTES :     cpi->bc.MeasureCost determines whether bits are
+ *                      generated to the bitstream or not. 
+ *
+ ****************************************************************************/
+void encodeMotionVectorComponent ( CP_INSTANCE *cpi, int i, INT32 Vector, INT32 MvOffset )
+{
+	UINT8 SignBit;
+	INT32 TmpVector;
+	void (*CodeBool)( BOOL_CODER *, int, int );
+
+    // Are we outputting bits to the bitstream or just estimating cost?
+	if ( cpi->bc.MeasureCost )
+		CodeBool = VP6_EncodeBool2;
+	else
+		CodeBool = VP6_EncodeBool;
+
+	// Code vector differentially
+	TmpVector = Vector - MvOffset;
+
+	// Convert vector to sign bit and magnitude
+	if ( TmpVector < 0 )
+	{
+		TmpVector = - TmpVector;
+		SignBit = 1;
+	}
+	else
+		SignBit = 0;
+
+	// Is the vector a small vector componet (currently < 2 whole pixels)
+	if ( TmpVector <= 7 )
+	{
+		// Small vector
+		CodeBool ( &cpi->bc, 0, cpi->pb.IsMvShortProb[i] );
+
+		// Code up the magnitude value
+		switch ( TmpVector )
+		{
+		case 0:
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][0] );
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][1] );
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][2] );
+			break;
+		case 1:
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][0] );
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][1] );
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][2] );
+			break;
+		case 2:
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][0] );
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][1] );
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][3] );
+			break;
+		case 3:
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][0] );
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][1] );
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][3] );
+			break;
+		case 4:
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][0] );
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][4] );
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][5] );
+			break;
+		case 5:
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][0] );
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][4] );
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][5] );
+			break;
+		case 6:
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][0] );
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][4] );
+			CodeBool ( &cpi->bc, 0, cpi->pb.MvShortProbs[i][6] );
+			break;
+		case 7:
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][0] );
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][4] );
+			CodeBool ( &cpi->bc, 1, cpi->pb.MvShortProbs[i][6] );
+			break;
+		}
+
+		// Code the sign bit
+        if ( TmpVector > 0 )			
+			CodeBool ( &cpi->bc, SignBit, cpi->pb.MvSignProbs[i] );
+	}
+	else
+	{
+		// Indicate that we have a larger vector
+		CodeBool ( &cpi->bc, 1, cpi->pb.IsMvShortProb[i] );
+
+		// Code the magnitude
+		CodeBool ( &cpi->bc, ((TmpVector & 0x01) ? 1 : 0), cpi->pb.MvSizeProbs[i][0] );	// QPel
+		CodeBool ( &cpi->bc, ((TmpVector & 0x02) ? 1 : 0), cpi->pb.MvSizeProbs[i][1] );	// HPel
+		CodeBool ( &cpi->bc, ((TmpVector & 0x04) ? 1 : 0), cpi->pb.MvSizeProbs[i][2] );	// Pel
+
+		// At least one of the following must be non zero (or we would have coded a short vector)
+		// We code from least likely to be set to most likely. The last bit is thus implicit 
+		// if none of the others are set
+		CodeBool ( &cpi->bc, ((TmpVector & 0x80) ? 1 : 0), cpi->pb.MvSizeProbs[i][7] );
+		CodeBool ( &cpi->bc, ((TmpVector & 0x40) ? 1 : 0), cpi->pb.MvSizeProbs[i][6] );
+		CodeBool ( &cpi->bc, ((TmpVector & 0x20) ? 1 : 0), cpi->pb.MvSizeProbs[i][5] );
+		CodeBool ( &cpi->bc, ((TmpVector & 0x10) ? 1 : 0), cpi->pb.MvSizeProbs[i][4] );
+
+		// Only need to code if at least one of the others was set else it is implicit
+		if ( TmpVector & 0xF0 )
+			CodeBool ( &cpi->bc, ((TmpVector & 0x08) ? 1 : 0), cpi->pb.MvSizeProbs[i][3] );
+
+		// Code the sign bit
+		CodeBool ( &cpi->bc, SignBit, cpi->pb.MvSignProbs[i] );
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     encodeMotionVector 
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                      INT32 MVectorX   : MV x-component to be coded.
+ *                      INT32 MVectorY   : MV y-component to be coded.
+ *  					CODING_MODE Mode : Coding mode for corresponding MB/Block.
+ *
+ *  OUTPUTS       :     None.     
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Encodes a motion vector to the bitstream.
+ *
+ *  SPECIAL NOTES :     None. 
+ * 
+ ****************************************************************************/
+void encodeMotionVector ( CP_INSTANCE *cpi, INT32 MVectorX, INT32 MVectorY, CODING_MODE Mode )
+{  
+	INT32  MvOffsetX = 0;
+	INT32  MvOffsetY = 0;
+	PB_INSTANCE *pbi = &cpi->pb; 
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MV_SECTION;
+#endif
+
+	// Work out how the MV was coded so that the appropriate origin offset can be applied
+	if ( Mode == CODE_INTER_PLUS_MV )
+	{
+        // Normal Inter MV
+		if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestInterMVect.x;
+			MvOffsetY = pbi->mbi.NearestInterMVect.y;
+		}
+	}
+	else
+	{	
+        // Golden Frame MV
+		if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+			MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+		}
+	}
+
+	encodeMotionVectorComponent ( cpi, 0, MVectorX, MvOffsetX );
+	encodeMotionVectorComponent ( cpi, 1, MVectorY, MvOffsetY );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CalculateMvNodeProbabilities
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Build the MV entropy coding tree.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+***************************************************************************/
+void CalculateMvNodeProbabilities ( CP_INSTANCE *cpi )
+{
+	UINT32 Sum;
+	UINT32 Sum2;
+	UINT32 Count;
+	INT32  AbsVector;
+	INT32  DistributionOffset;
+	INT32  NewProb;
+	INT32  i;
+	INT32  j;
+
+	UINT32 MvShortDist[2][2];
+	UINT32 MvShortSizeDist[2][8];
+	UINT32 MvSignDist[2][2];
+	UINT32 MvSizeDist[2][LONG_MV_BITS][2];
+
+	memset( MvShortDist,     0, sizeof(MvShortDist) );
+	memset( MvShortSizeDist, 0, sizeof(MvShortSizeDist) );
+	memset( MvSizeDist,      0, sizeof(MvSizeDist) );
+	memset( MvSignDist,      0, sizeof(MvSignDist) );
+
+	// Calculate the distributions for the MV nodes.
+	cpi->FrameMvCount = 0;
+	for ( i=0; i<2; i++ )
+	{
+		Sum = 0;
+		for ( j=-(MV_ENTROPY_TOKENS >> 1); j<0; j++ )
+		{
+            // -ve vectors
+			DistributionOffset = (MV_ENTROPY_TOKENS >> 1) + j;
+			Count = cpi->MvBaselineDist[i][DistributionOffset];
+			AbsVector = -j;
+			MvSignDist[i][1] += Count;
+
+            if ( AbsVector < 8 )
+			{
+				MvShortDist[i][0] += Count;			    // Short vector
+				MvShortSizeDist[i][AbsVector] += Count;	// Magnitude distribution
+			}
+			else 
+			{
+				MvShortDist[i][1] += Count;				// Long vector
+
+				MvSizeDist[i][0][(AbsVector & 0x01) ? 1 : 0] += Count;	// QPel
+				MvSizeDist[i][1][(AbsVector & 0x02) ? 1 : 0] += Count;	// HPel
+				MvSizeDist[i][2][(AbsVector & 0x04) ? 1 : 0] += Count;	// Bit1
+
+				MvSizeDist[i][3][(AbsVector & 0x08) ? 1 : 0] += Count;	// Bit2
+				MvSizeDist[i][4][(AbsVector & 0x10) ? 1 : 0] += Count;	// Bit3
+				MvSizeDist[i][5][(AbsVector & 0x20) ? 1 : 0] += Count;	// Bit4 
+				MvSizeDist[i][6][(AbsVector & 0x40) ? 1 : 0] += Count;	// Bit5
+				MvSizeDist[i][7][(AbsVector & 0x80) ? 1 : 0] += Count;	// Bit6
+			}
+			Sum += Count;
+		}
+
+		// Zero Vector component
+		Count = cpi->MvBaselineDist[i][(MV_ENTROPY_TOKENS >> 1)];
+		MvShortDist[i][0] += Count;							
+		MvShortSizeDist[i][0] += Count;
+		Sum += Count;
+
+		for ( j=1; j<(MV_ENTROPY_TOKENS >> 1); j++ )
+		{
+            // +ve vectors
+			DistributionOffset = (MV_ENTROPY_TOKENS >> 1) + j;
+			Count = cpi->MvBaselineDist[i][DistributionOffset];
+			AbsVector = j;
+			MvSignDist[i][0] += Count;							
+
+			if ( AbsVector < 8 )
+			{
+				MvShortDist[i][0] += Count;						// Short vector
+				MvShortSizeDist[i][AbsVector] += Count;			// Magnitude distribution
+			}
+			else
+			{
+				MvShortDist[i][1] += Count;						// Long vector
+
+				MvSizeDist[i][0][(AbsVector & 0x01) ? 1 : 0] += Count;	// QPel
+				MvSizeDist[i][1][(AbsVector & 0x02) ? 1 : 0] += Count;	// HPel
+				MvSizeDist[i][2][(AbsVector & 0x04) ? 1 : 0] += Count;	// Bit1
+
+				MvSizeDist[i][3][(AbsVector & 0x08) ? 1 : 0] += Count;	// Bit2
+				MvSizeDist[i][4][(AbsVector & 0x10) ? 1 : 0] += Count;	// Bit3
+				MvSizeDist[i][5][(AbsVector & 0x20) ? 1 : 0] += Count;	// Bit4 
+				MvSizeDist[i][6][(AbsVector & 0x40) ? 1 : 0] += Count;	// Bit5
+				MvSizeDist[i][7][(AbsVector & 0x80) ? 1 : 0] += Count;	// Bit6
+			}
+			Sum += Count;
+		}
+	}
+	cpi->FrameMvCount = Sum;		// Note that Sum is reset to 0 for each "i" above		
+
+	for ( i=0; i<2; i++ )		// X and Y
+	{
+		// Convert the distributions to optimal node probabilities
+		Sum = MvShortDist[i][0] + MvShortDist[i][1];
+		
+        if ( Sum>0 )
+		{
+			Sum2 = MvShortDist[i][0];
+
+			NewProb = (Sum2 * 255) / Sum;
+			NewProb &= ~0x01;
+			if ( NewProb < 1 )
+				NewProb = 1;
+			cpi->NewIsMvShortProb[i] = NewProb;
+			cpi->NewIsMvShortHits[i][0] = Sum2;
+			cpi->NewIsMvShortHits[i][1] = Sum - Sum2;
+		}
+
+		// Sign
+		Sum = ( MvSignDist[i][0] + MvSignDist[i][1] );
+		if ( Sum>0 )
+		{
+			Sum2 = MvSignDist[i][0];
+
+			NewProb = (Sum2 * 255) / Sum;
+			NewProb &= ~0x01;
+			if ( NewProb < 1 )
+				NewProb = 1;
+			cpi->NewMvSignProbs[i] = NewProb;
+			cpi->NewMvSignHits[i][0] = Sum2;
+			cpi->NewMvSignHits[i][1] = Sum - Sum2;
+		}
+
+		// Tree nodes for short vectors
+		for ( j=0; j<7; j++ )
+		{
+			// Node specific
+			switch ( j )
+			{
+			case 0:
+				// Node 0 Low
+				Sum =  MvShortSizeDist[i][0] + MvShortSizeDist[i][1] + MvShortSizeDist[i][2] + MvShortSizeDist[i][3] +
+					   MvShortSizeDist[i][4] + MvShortSizeDist[i][5] + MvShortSizeDist[i][6] + MvShortSizeDist[i][7];
+				Sum2 = MvShortSizeDist[i][0] + MvShortSizeDist[i][1] + MvShortSizeDist[i][2] + MvShortSizeDist[i][3];
+				break;
+			case 1:
+				// Node 1 LowLow
+				Sum = Sum2;
+				Sum2 = MvShortSizeDist[i][0] + MvShortSizeDist[i][1];
+				break;
+			case 2:
+				// Node 2 LowLowLow
+				Sum = Sum2;
+				Sum2 = MvShortSizeDist[i][0];
+				break;
+			case 3:
+				// Node 3 LowHighLow
+				Sum = MvShortSizeDist[i][2] + MvShortSizeDist[i][3];
+				Sum2 = MvShortSizeDist[i][2];
+				break;
+			case 4:
+				// Node 4 HighLow
+				Sum = MvShortSizeDist[i][4] + MvShortSizeDist[i][5] + MvShortSizeDist[i][6] + MvShortSizeDist[i][7];
+				Sum2 = MvShortSizeDist[i][4] + MvShortSizeDist[i][5];
+				break;
+			case 5:
+				// Node 5 HighLowLow 
+				Sum = MvShortSizeDist[i][4] + MvShortSizeDist[i][5];
+				Sum2 = MvShortSizeDist[i][4];
+				break;
+			case 6:
+				// Node 6 HighLowHigh
+				Sum = MvShortSizeDist[i][6] + MvShortSizeDist[i][7];
+				Sum2 = MvShortSizeDist[i][6];
+				break;
+			}
+
+			if ( Sum )
+			{ 
+				NewProb = (Sum2 * 255)/Sum;
+				NewProb &= ~0x01;
+				if ( NewProb < 1 )
+					NewProb = 1;
+				cpi->NewMvShortProbs[i][j] = NewProb;
+				cpi->NewMvShortHits[i][j][0] = Sum2;
+				cpi->NewMvShortHits[i][j][1] = Sum - Sum2;
+			}
+		}
+
+		// Long vectors 
+		for ( j=0; j<LONG_MV_BITS; j++ )
+		{
+			Sum	 = MvSizeDist[i][j][0] + MvSizeDist[i][j][1];
+			Sum2 = MvSizeDist[i][j][0];
+
+			if ( Sum )
+			{
+				NewProb = (Sum2 * 255)/Sum;
+				NewProb &= ~0x01;
+				if ( NewProb < 1 )
+					NewProb = 1;
+				cpi->NewMvSizeProbs[i][j] = NewProb;
+				cpi->NewMvSizeHits[i][j][0] = Sum2;
+				cpi->NewMvSizeHits[i][j][1] = Sum - Sum2;
+			}
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     BuildandPackMvTree
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.     
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Build the MV entropy coding tree.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+***************************************************************************/
+void BuildandPackMvTree ( CP_INSTANCE *cpi )
+{
+	INT32  i;
+	INT32  NewProb;
+	INT32  OldProb;
+	INT32  NewBits;
+	INT32  OldBits;
+	INT32  ProbUpdateCost;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MV_SECTION;
+#endif
+
+	// calculate the MV node Probabilities
+	CalculateMvNodeProbabilities ( cpi );
+
+	// If appropriate update short, sign, qpel, half pixel and low order bit probabilities.
+	for ( i=0; i<2; i++ )
+	{
+		// Update the Short vector probability.
+		NewProb = cpi->NewIsMvShortProb[i];
+		OldProb = cpi->pb.IsMvShortProb[i];
+		OldBits = ((cpi->NewIsMvShortHits[i][0] * VP6_ProbCost[OldProb])/256) +
+				  ((cpi->NewIsMvShortHits[i][1] * VP6_ProbCost[255 - OldProb])/256);
+		NewBits = ((cpi->NewIsMvShortHits[i][0] * VP6_ProbCost[NewProb])/256) +
+				  ((cpi->NewIsMvShortHits[i][1] * VP6_ProbCost[255 - NewProb])/256);
+
+		ProbUpdateCost = PROB_UPDATE_BASELINE_COST + MV_PROB_UPDATE_CORECTION;
+		ProbUpdateCost += (VP6_ProbCost[255 - VP6_MvUpdateProbs[i][0]] + 128) / 256;
+		ProbUpdateCost -= (VP6_ProbCost[VP6_MvUpdateProbs[i][0]] + 128) / 256;
+
+		if ( (OldBits - NewBits) > ProbUpdateCost )
+		{
+			cpi->pb.IsMvShortProb[i] = NewProb;
+			VP6_EncodeBool  ( &cpi->bc, 1, VP6_MvUpdateProbs[i][0] );
+			AddBitsToBuffer ( &cpi->bc, NewProb>>1, PROB_UPDATE_BASELINE_COST );
+		}
+		else
+		{
+			VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][0] );
+		}
+
+		// Sign
+		NewProb = cpi->NewMvSignProbs[i];
+		OldProb = cpi->pb.MvSignProbs[i];
+
+		OldBits = ((cpi->NewMvSignHits[i][0] * VP6_ProbCost[OldProb])/256) +
+				  ((cpi->NewMvSignHits[i][1] * VP6_ProbCost[255 - OldProb])/256);
+		NewBits = ((cpi->NewMvSignHits[i][0] * VP6_ProbCost[NewProb])/256) +
+				  ((cpi->NewMvSignHits[i][1] * VP6_ProbCost[255 - NewProb])/256);
+
+		ProbUpdateCost = PROB_UPDATE_BASELINE_COST + MV_PROB_UPDATE_CORECTION;
+		ProbUpdateCost += (VP6_ProbCost[255 - VP6_MvUpdateProbs[i][1]] + 128) / 256;
+		ProbUpdateCost -= (VP6_ProbCost[VP6_MvUpdateProbs[i][1]] + 128) / 256;
+
+		if ( (OldBits - NewBits) > ProbUpdateCost )
+		{
+			cpi->pb.MvSignProbs[i] = NewProb;
+			VP6_EncodeBool ( &cpi->bc, 1, VP6_MvUpdateProbs[i][1] );
+			AddBitsToBuffer ( &cpi->bc, NewProb >> 1, PROB_UPDATE_BASELINE_COST );
+		}
+		else
+		{
+			VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][1] );
+		}
+	}
+
+	// If appropriate update the tree probabilities for short vector
+	for ( i = 0; i < 2; i++ )   // X then Y
+	{
+		INT32  j;
+		UINT32 MvUpdateProbsOffset = 2;				// Offset into VP6_MvUpdateProbs[i][]
+
+		// For each node in the tree
+		for ( j=0; j<7; j++ )
+		{
+			NewProb = cpi->NewMvShortProbs[i][j];
+			OldProb = cpi->pb.MvShortProbs[i][j];
+
+			OldBits = ((cpi->NewMvShortHits[i][j][0] * VP6_ProbCost[OldProb])/256) +
+					  ((cpi->NewMvShortHits[i][j][1] * VP6_ProbCost[255 - OldProb])/256);
+			NewBits = ((cpi->NewMvShortHits[i][j][0] * VP6_ProbCost[NewProb])/256) +
+					  ((cpi->NewMvShortHits[i][j][1] * VP6_ProbCost[255 - NewProb])/256);
+
+			ProbUpdateCost = PROB_UPDATE_BASELINE_COST + MV_PROB_UPDATE_CORECTION;
+			ProbUpdateCost += (VP6_ProbCost[255 - VP6_MvUpdateProbs[i][MvUpdateProbsOffset]] + 128) / 256;
+			ProbUpdateCost -= (VP6_ProbCost[VP6_MvUpdateProbs[i][MvUpdateProbsOffset]] + 128) / 256;
+
+			if ( (OldBits - NewBits) > ProbUpdateCost )
+			{
+				cpi->pb.MvShortProbs[i][j] = NewProb;
+				VP6_EncodeBool(&cpi->bc, 1, VP6_MvUpdateProbs[i][MvUpdateProbsOffset] );
+				AddBitsToBuffer( &cpi->bc, NewProb >> 1, PROB_UPDATE_BASELINE_COST );
+			}
+			else
+			{
+				VP6_EncodeBool(&cpi->bc, 0, VP6_MvUpdateProbs[i][MvUpdateProbsOffset] );
+			}
+
+			// Increment to next offset in VP6_MvUpdateProbs[];
+			MvUpdateProbsOffset++;
+		}
+	}
+
+	// If appropriate update the bit probabilities for long vectors
+	for ( i=0; i<2; i++ )   // X then Y
+	{
+		INT32  j;
+		UINT32 MvUpdateProbsOffset = 2 + 7;
+	
+		// For each bit
+		for ( j=0; j<LONG_MV_BITS; j++ )
+		{
+			NewProb = cpi->NewMvSizeProbs[i][j];
+			OldProb = cpi->pb.MvSizeProbs[i][j];
+
+			OldBits = ((cpi->NewMvSizeHits[i][j][0] * VP6_ProbCost[OldProb])/256) +
+					  ((cpi->NewMvSizeHits[i][j][1] * VP6_ProbCost[255 - OldProb])/256);
+			NewBits = ((cpi->NewMvSizeHits[i][j][0] * VP6_ProbCost[NewProb])/256) +
+					  ((cpi->NewMvSizeHits[i][j][1] * VP6_ProbCost[255 - NewProb])/256);
+
+			ProbUpdateCost = PROB_UPDATE_BASELINE_COST + MV_PROB_UPDATE_CORECTION;
+			ProbUpdateCost += (VP6_ProbCost[255 - VP6_MvUpdateProbs[i][MvUpdateProbsOffset]] + 128) / 256;
+			ProbUpdateCost -= (VP6_ProbCost[VP6_MvUpdateProbs[i][MvUpdateProbsOffset]] + 128) / 256;
+
+			if ( (OldBits - NewBits) > ProbUpdateCost )
+			{
+				cpi->pb.MvSizeProbs[i][j] = NewProb;
+				VP6_EncodeBool(&cpi->bc, 1, VP6_MvUpdateProbs[i][MvUpdateProbsOffset] );
+				AddBitsToBuffer( &cpi->bc, NewProb >> 1, PROB_UPDATE_BASELINE_COST );
+			}
+			else
+			{
+				VP6_EncodeBool(&cpi->bc, 0, VP6_MvUpdateProbs[i][MvUpdateProbsOffset] );
+			}
+
+			// Increment to next offset in VP6_MvUpdateProbs[];
+			MvUpdateProbsOffset++;
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     BuildandPackMvTree2
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.     
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Build the MV entropy coding tree. This version is
+ *                      used when in unbuffered / VC mode to improve tolerance
+ *                      to dropped frames.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ***************************************************************************/
+void BuildandPackMvTree2 ( CP_INSTANCE *cpi )
+{
+	INT32  i;
+
+// Stats to measure section costs
+#if defined MEASURE_SECTION_COSTS
+ActiveSection = MV_SECTION;
+#endif
+	
+	// calculate the MV node Probabilities
+	CalculateMvNodeProbabilities ( cpi );
+
+	// Send short and sign probabilities
+	for ( i=0; i<2; i++ )
+	{
+		cpi->pb.IsMvShortProb[i] = cpi->NewIsMvShortProb[i];
+		VP6_EncodeBool ( &cpi->bc, 1, VP6_MvUpdateProbs[i][0] );
+		AddBitsToBuffer( &cpi->bc, cpi->pb.IsMvShortProb[i] >> 1, PROB_UPDATE_BASELINE_COST );
+
+		cpi->pb.MvSignProbs[i] = cpi->NewMvSignProbs[i];
+		VP6_EncodeBool ( &cpi->bc, 1, VP6_MvUpdateProbs[i][1] );
+		AddBitsToBuffer( &cpi->bc, cpi->pb.MvSignProbs[i] >> 1, PROB_UPDATE_BASELINE_COST );
+	}
+
+	// Short vector tree nodes
+	for ( i=0; i<2; i++ )
+	{
+		// Node 0 Low
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][2] );
+
+		// Node 1 LowLow
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][3] );
+
+		// Node 2 LowLowLow
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][4] );
+
+		// Node 3 LowHighLow
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][5] );
+
+		// Node 4 HighLow
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][6] );
+
+		// Node 5 HighLowLow
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][7] );
+
+		// Node 6 HighHighLow
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][8] );
+	}
+
+	// Long vector Probabilities
+	for ( i=0; i<2; i++ )
+	{
+		// QPel
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][9] );
+
+		// HPel
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][10] );
+
+		// Bit1
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][11] );
+
+		// Bit2
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][12] );
+
+		// Bit3
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][13] );
+
+		// Bit4
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][14] );
+
+		// Bit5
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][15] );
+
+		// Bit6
+		VP6_EncodeBool ( &cpi->bc, 0, VP6_MvUpdateProbs[i][16] );
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     BuildMVCostEstimates
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Calculate a cost in bits of encoding a motion vector.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ***************************************************************************/
+void BuildMVCostEstimates ( CP_INSTANCE *cpi )
+{
+	int i;
+	int vect;
+
+	cpi->bc.MeasureCost = TRUE;
+
+    for ( i=0; i<MV_ENTROPY_TOKENS; i++ )
+	{
+		cpi->bc.BitCounter = 0;
+		vect = i - (MV_ENTROPY_TOKENS/2);
+
+		encodeMotionVectorComponent ( cpi, 0, vect, 0 );
+		
+		// keep all costs at 64 * actual number of bits
+		cpi->EstMvCostPtrX[vect] = (cpi->bc.BitCounter ) >> 2;	
+
+		cpi->bc.BitCounter = 0;
+		encodeMotionVectorComponent ( cpi, 1, vect, 0 );
+
+		// keep all costs at 64 * actual number of bits
+		cpi->EstMvCostPtrY[vect] = (cpi->bc.BitCounter) >> 2;
+	}
+	
+    cpi->bc.MeasureCost = FALSE;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.h
new file mode 100644
index 00000000..41422d70
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/encodemv.h
@@ -0,0 +1,23 @@
+/****************************************************************************
+*        
+*   Module Title :     encodemv.h
+*
+*   Description  :     functions for decoding modes and motionvectors 
+*
+****************************************************************************/
+#ifndef __INC_ENCODEMV_H
+#define __INC_ENCODEMV_H
+
+#ifndef STRICT
+#define STRICT              /* Strict type checking */
+#endif
+
+/****************************************************************************
+*  Exports
+****************************************************************************/        
+extern void BuildandPackMvTree( CP_INSTANCE *cpi );
+extern void BuildandPackMvTree2( CP_INSTANCE *cpi );
+extern void BuildMVCostEstimates( CP_INSTANCE *cpi );
+extern void encodeMotionVector ( CP_INSTANCE *cpi, INT32 MVectorX, INT32 MVectorY, CODING_MODE Mode );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/fullframefdct.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/fullframefdct.c
new file mode 100644
index 00000000..20d023fd
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/fullframefdct.c
@@ -0,0 +1,24 @@
+/****************************************************************************
+*
+*   Module Title :     FullFrameFDCT.c 
+*
+*   Description  :     Compressor functions for block order transmittal
+*
+*   AUTHOR       :     Paul Wilkins
+*
+****************************************************************************/
+#define STRICT               /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "compdll.h"
+#include "misc_common.h"
+#include "decodemode.h"
+#include "decodemv.h"
+#include "quantize.h"
+extern void PredictBlock ( CP_INSTANCE *cpi, BLOCK_POSITION bp, UINT32 MBrow, UINT32 MBcol );
+extern void PredictDCE( CP_INSTANCE *cpi, BLOCK_POSITION bp);
+
+#if defined FULLFRAMEFDCT
+#endif
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.c
new file mode 100644
index 00000000..70fd1072
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.c
@@ -0,0 +1,1906 @@
+/****************************************************************************
+*
+*   Module Title :     mcomp.c
+*
+*   Description  :     Motion compensation functions.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <math.h>       // For Abs()
+#include "mcomp.h"
+#include "compdll.h"
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern INT32 *XX_LUT;
+
+extern void VP6_PredictFiltered(PB_INSTANCE *pbi,UINT8 *SrcPtr,INT32 mx,INT32 my,INT32 MvShift) ;
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define HP_THRESH       0
+
+// bias towards cheaper motion vectors should be tied to cpi->MVErrorPerBit 
+// but isn't at least not yet.  setting this to 0 says don't bias at all
+#define MVEPBSAD_MULT		1
+#define MVEPBSAD_RSHIFT		2
+#define MVEPBSAD_RSHIFT2	14
+
+/****************************************************************************
+*  Exports.
+****************************************************************************/
+UINT32  TotError = 0;
+UINT32  ErrCount = 0;
+
+UINT8  FilteredBlock[256];
+
+/****************************************************************************
+ *
+ *  ROUTINE       : InitDSMotionCompensation
+ *
+ *  INPUTS        : CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Initialises data structures used by the diamond search.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void InitDSMotionCompensation ( CP_INSTANCE *cpi )
+{
+    int i;
+    int Len;
+    int SearchSite = 0;
+    int LineStepY = (INT32)cpi->pb.Configuration.YStride;
+
+    // How many search stages are there.
+    cpi->DSMVSearchSteps = 0;
+
+    // Generate offsets for 4 search sites per step.
+    Len = (MAX_MV_EXTENT + 1)/4;				
+
+    while ( Len>0 )
+    {
+        // Another step.
+        cpi->DSMVSearchSteps += 1;
+
+        // Compute offsets for search sites.
+        cpi->DSMVOffsetX[SearchSite]   = 0;
+        cpi->DSMVOffsetY[SearchSite++] = -Len;
+
+        cpi->DSMVOffsetX[SearchSite]   = -Len;
+        cpi->DSMVOffsetY[SearchSite++] = 0;
+        
+        cpi->DSMVOffsetX[SearchSite]   = Len;
+        cpi->DSMVOffsetY[SearchSite++] = 0;
+        
+        cpi->DSMVOffsetX[SearchSite]   = 0;
+        cpi->DSMVOffsetY[SearchSite++] = Len;
+
+        // Contract.
+        Len /= 2;
+    }
+
+    // Compute pixel index offsets.
+    for ( i=SearchSite-1; i>=0; i-- )
+        cpi->DSMVPixelOffsetY[i] = (cpi->DSMVOffsetY[i]*LineStepY) + cpi->DSMVOffsetX[i];
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :  InitMotionCompensation
+ *
+ *  INPUTS        :  CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :  None.    
+ *
+ *  RETURNS       :  void
+ *
+ *  FUNCTION      :  Initialises motion compensation data structures. 
+ *
+ *  SPECIAL NOTES :  None. 
+ *
+ ****************************************************************************/
+void InitMotionCompensation ( CP_INSTANCE *cpi )
+{
+    int i;
+    int Len;
+    int SearchSite = 0; 
+    int LineStepY = (INT32)cpi->pb.Configuration.YStride;
+
+    // How many search stages are there.
+    cpi->MVSearchSteps = 0;
+ 
+    // Set up offsets arrays used in fractional pel searches
+    cpi->SubPixelXOffset[0] = 0;
+    cpi->SubPixelXOffset[1] = 0;
+    cpi->SubPixelXOffset[2] = -1;
+    cpi->SubPixelXOffset[3] = 1;
+    cpi->SubPixelXOffset[4] = 0;
+    cpi->SubPixelXOffset[5] = -1;
+    cpi->SubPixelXOffset[6] = 1;
+    cpi->SubPixelXOffset[7] = -1;
+    cpi->SubPixelXOffset[8] = 1;
+
+    cpi->SubPixelYOffset[0] = 0;
+    cpi->SubPixelYOffset[1] = -1;
+    cpi->SubPixelYOffset[2] = 0;
+    cpi->SubPixelYOffset[3] = 0;
+    cpi->SubPixelYOffset[4] = 1;
+    cpi->SubPixelYOffset[5] = -1;
+    cpi->SubPixelYOffset[6] = -1;
+    cpi->SubPixelYOffset[7] = 1;
+    cpi->SubPixelYOffset[8] = 1;
+
+    // Generate offsets for 8 search sites per step.
+    Len = (MAX_MV_EXTENT + 1)/4;				
+    while ( Len>0 )
+    {
+        // Another step.
+        cpi->MVSearchSteps += 1;
+
+        // Compute offsets for search sites.
+        cpi->MVOffsetX[SearchSite]   = -Len;
+        cpi->MVOffsetY[SearchSite++] = -Len;
+        cpi->MVOffsetX[SearchSite]   = 0;
+        cpi->MVOffsetY[SearchSite++] = -Len;
+        cpi->MVOffsetX[SearchSite]   = Len;
+        cpi->MVOffsetY[SearchSite++] = -Len;
+        cpi->MVOffsetX[SearchSite]   = -Len;
+        cpi->MVOffsetY[SearchSite++] = 0;
+        cpi->MVOffsetX[SearchSite]   = Len;
+        cpi->MVOffsetY[SearchSite++] = 0;
+        cpi->MVOffsetX[SearchSite]   = -Len;
+        cpi->MVOffsetY[SearchSite++] = Len;
+        cpi->MVOffsetX[SearchSite]   = 0;
+        cpi->MVOffsetY[SearchSite++] = Len;
+        cpi->MVOffsetX[SearchSite]   = Len;
+        cpi->MVOffsetY[SearchSite++] = Len;
+
+        // Contract.
+        Len /= 2;
+    }
+
+    // Compute pixel index offsets.
+    for ( i=SearchSite-1; i>=0; i-- )
+        cpi->MVPixelOffsetY[i] = (cpi->MVOffsetY[i]*LineStepY) + cpi->MVOffsetX[i];
+
+    // set up search sites for 5 region Diamond search    
+    InitDSMotionCompensation(cpi);
+
+    // Initialize the function pointers for block motion search
+    // and fractional pixel motion search
+    cpi->FindMvViaSearch        = FindMvVia3StepSearch;
+    cpi->FindBestHalfPixelMv    = FindBestFractionalPixelStep;
+    cpi->FindBestQuarterPixelMv = FindBestFractionalPixelStep;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :  GetMBFrameVerticalVariance
+ *
+ *  INPUTS        :  CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :  None.    
+ *
+ *  RETURNS       :  UINT32: vertical variance for a macroblock.
+ *
+ *  FUNCTION      :  Computes the vertical variance by summing the local
+ *                   2 pixel variances throughout the MB.
+ *
+ *  SPECIAL NOTES :  The difference between the last two rows in a 
+ *                   macro-block are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 GetMBFrameVerticalVariance ( CP_INSTANCE *cpi )
+{
+    int i, j;
+    UINT32 x, y, z;
+    UINT32  MBVariance = 0;
+    PB_INSTANCE *pbi = &cpi->pb;
+//    UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.Source];
+    UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[0].Source];
+    INT32 SourceStride = pbi->Configuration.VideoFrameWidth;
+    INT32 Pitch2 = SourceStride*2;
+
+    for ( i=0; i<7; i++ )
+    {
+        for ( j=0; j<16; j++ )
+        {
+            x = SrcPtr[j];
+            y = SrcPtr[j+SourceStride];
+            z = SrcPtr[j+Pitch2 ];
+            MBVariance +=(x-y)*(x-y) + (y-z)*(y-z);
+        }
+        SrcPtr += Pitch2;
+    }
+    return MBVariance;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :  GetMBFieldVerticalVariance
+ *
+ *  INPUTS        :  CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :  None.    
+ *
+ *  RETURNS       :  UINT32: field vertical variance for a macroblock.
+ *
+ *  FUNCTION      :  Computes the vertical variance by summing the local
+ *                   2 pixel variances within two independent fields
+ *                   throughout the MB.
+ *
+ *  SPECIAL NOTES :  None.
+ *
+ ****************************************************************************/
+UINT32 GetMBFieldVerticalVariance ( CP_INSTANCE *cpi )
+{
+    int i,j;
+    UINT32  x, y, z, w;
+    UINT32  MBFieldVariance = 0;
+    PB_INSTANCE *pbi = &cpi->pb;
+    //UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.Source];
+    UINT8 *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[0].Source];
+    INT32 SourceStride = pbi->Configuration.VideoFrameWidth;
+    INT32 Pitch2 = SourceStride*2;
+
+    for ( i=0; i<7; i++ )
+    {
+        for ( j=0; j<16; j++ )
+        { 
+            x = SrcPtr[j];
+            y = SrcPtr[j+SourceStride];
+            z = SrcPtr[j+Pitch2 ];
+            w = SrcPtr[j+Pitch2 + SourceStride];
+            MBFieldVariance +=(x-z)*(x-z) + (y-w)*(y-w);
+        }
+        SrcPtr += Pitch2;
+    }
+    return MBFieldVariance;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : GetReconReferencePoints
+ *  
+ *  INPUTS        : PB_INSTANCE *pbi     : Pointer to decoder instance.
+ *                  UINT8 *BufferPointer : Pointer to refernce point in reference image.
+ *                  MOTION_VECTOR *MV    : Motion vector to be used.
+ *						
+ *  OUTPUTS       : UINT8 **ReconPtr1    : Pointer-to-pointer to first block in ref frame.
+ *                  UINT8 **ReconPtr2    : Pointer-to-pointer to second block in ref frame.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Computes pointers to two blocks in the reference frame
+ *                  that bracket the fractional pixel position specified in MV.
+ *                  These two blocks will later be used to interpolate
+ *                  the prediction block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void GetReconReferencePoints
+( 
+    PB_INSTANCE *pbi,
+    UINT8 *BufferPointer,
+    UINT8 **ReconPtr1,
+    UINT8 **ReconPtr2,
+    MOTION_VECTOR *MV
+)
+{
+	INT32  mVx,  mVy;
+    INT32  ModX, ModY;
+
+    // Calculate full pixel motion vector position 
+    if ( MV->x >= 0 )
+        mVx = (MV->x >> Y_MVSHIFT);
+    else 
+        mVx = -((-MV->x) >> Y_MVSHIFT);
+
+    if ( MV->y >= 0 )
+        mVy = (MV->y >> Y_MVSHIFT);
+    else
+        mVy = -((-MV->y) >> Y_MVSHIFT);
+
+	// Calculate the first pointer.
+	*ReconPtr1 = BufferPointer + (pbi->mbi.blockDxInfo[0].FrameReconStride * mVy) + mVx;
+
+    // Calculate the second pointer
+    *ReconPtr2 = *ReconPtr1;
+	ModX = (MV->x & Y_MVMODMASK);
+	ModY = (MV->y & Y_MVMODMASK);
+    
+    if ( ModX )
+	{
+		if ( MV->x > 0 )
+			*ReconPtr2 += 1;
+		else
+			*ReconPtr2 -= 1;
+	}
+
+    if ( ModY )
+	{
+		if ( MV->y > 0 )
+			*ReconPtr2 += pbi->mbi.blockDxInfo[0].CurrentReconStride;
+		else
+			*ReconPtr2 -= pbi->mbi.blockDxInfo[0].CurrentReconStride;
+	}
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : GetInterErrQPel
+ *
+ *  INPUTS        : PB_INSTANCE *pbi    : Pointer to decoder instance.
+ *                  UINT8 *NewDataPtr   : Pointer to source block.
+ *                  UINT32 SourceStride : Stride for NewDataPtr.
+ *                  UINT8 *RefDataPtr1  : Pointer to block position in reference frame.
+ *                  UINT8 *RefDataPtr2  : Pointer to block position in reference frame.
+ *                  INT32 ReconStride   : Size of the block.
+ *                  MOTION_VECTOR *MV   : Best MV found for block in reference frame.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Variance of the prediction error (scaled by 2^12)
+ *
+ *  FUNCTION      : Calculates scaled prediction error variance for the
+ *                  QPel interpolated block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 GetInterErrQPel
+(
+    PB_INSTANCE *pbi,
+    UINT8 *NewDataPtr,
+    INT32 SourceStride,
+    UINT8 *RefDataPtr1,
+    UINT8 *RefDataPtr2,
+    INT32 ReconStride,
+    MOTION_VECTOR *MV
+)
+{
+    INT32 ModX, ModY;
+
+    // Compute fractional MV offsets (to 1/8 point precision as required by FilterBlock)
+    ModX = (MV->x & Y_MVMODMASK) << 1;
+    ModY = (MV->y & Y_MVMODMASK) << 1;
+
+    // FilterBlockBil_8 filters the input data to produce an 8x8 Qpel precision prediction block.
+	FilterBlockBil_8 ( RefDataPtr1, RefDataPtr2, FilteredBlock, ReconStride, ModX, ModY );
+
+    // Compute and return population variance as mis-match metric.
+	return GetInterError ( NewDataPtr, SourceStride, FilteredBlock, FilteredBlock, 8 );
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : GetInterError2
+ *
+ *  INPUTS        : PB_INSTANCE *pbi   : Pointer to decoder instance.
+ *                  UINT8 *NewDataPtr  : Pointer to current block.
+ *                  UINT8 *RefDataPtr1 : Pointer to reference block.
+ *                  MOTION_VECTOR *MV  : Pointer to motion vector.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Error variance.
+ *
+ *  FUNCTION      : Calculates a difference error score between two blocks.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 GetInterError2 ( PB_INSTANCE *pbi, UINT8 *NewDataPtr, UINT8 *RefDataPtr1, MOTION_VECTOR *MV )
+{
+	UINT8 *ReconDataPtr1;
+	UINT8 *ReconDataPtr2;
+	UINT32 err = 0;
+
+	// Get the reference pointers for the motion vector
+	GetReconReferencePoints( pbi, RefDataPtr1, &ReconDataPtr1, &ReconDataPtr2, MV );
+
+	// Calculate the variance error score for the vector
+	if ( (MV->x & Y_MVMODMASK) || (MV->y & Y_MVMODMASK) )
+	{
+		err = GetInterErrQPel ( pbi, NewDataPtr, pbi->mbi.blockDxInfo[0].CurrentSourceStride, ReconDataPtr1, ReconDataPtr2, pbi->mbi.blockDxInfo[0].CurrentReconStride, MV);
+	}
+	else
+	{
+		err = GetInterError ( NewDataPtr, pbi->mbi.blockDxInfo[0].CurrentSourceStride, ReconDataPtr1, ReconDataPtr2, pbi->mbi.blockDxInfo[0].CurrentReconStride ); 
+	}
+	return err;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : GetInterError2_slow
+ *
+ *  INPUTS        : PB_INSTANCE *pbi   : Pointer to decoder instance.
+ *                  UINT8 *NewDataPtr  : Pointer to current block.
+ *                  UINT8 *RefDataPtr1 : Pointer to reference block.
+ *                  MOTION_VECTOR *MV  : Pointer to motion vector.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Error variance.
+ *
+ *  FUNCTION      : Calculates a difference error score between two blocks.
+ *
+ *  SPECIAL NOTES : This function works on a loop filtered version of 
+ *					the data.
+ *
+ ****************************************************************************/
+UINT32 GetInterError2_slow ( PB_INSTANCE *pbi, UINT8 *NewDataPtr, UINT8 *RefDataPtr1, MOTION_VECTOR *MV )
+{
+    INT32  ModX, ModY;
+	UINT32 err = 0;
+	UINT8 *TempPtr1 = pbi->LoopFilteredBlock + (2*16+2);
+	UINT8 *TempPtr2 = TempPtr1;
+
+	// This function produces a filtered copy of the appropriate part of the 
+	// reconstruction buffer in pbi->LoopFilteredBlock[].
+	VP6_PredictFiltered ( pbi, RefDataPtr1, MV->x, MV->y, Y_MVSHIFT );
+
+	// Pull off the fractional bits
+	ModX = (MV->x & Y_MVMODMASK);
+	ModY = (MV->y & Y_MVMODMASK);
+
+	// Update the second reference pointer inrespect of the fractional X bits.
+    if ( ModX )
+	{
+		if ( MV->x >= 0 )
+			TempPtr2 += 1;
+		else
+			TempPtr2 -= 1;
+	}
+
+	// Update the second reference pointer inrespect of the fractional Y bits.
+    if ( ModY )
+	{
+		if ( MV->y > 0 )
+			TempPtr2 += 16;
+		else
+			TempPtr2 -= 16;
+	}
+
+	//  If any of the fractional bits are set use GetInterErrQPel() else GetInterError()
+	if ( ModX || ModY )
+		err = GetInterErrQPel(pbi,NewDataPtr,pbi->mbi.blockDxInfo[0].CurrentSourceStride, TempPtr1,TempPtr2,16, MV );
+	else
+		err = GetInterError(NewDataPtr,pbi->mbi.blockDxInfo[0].CurrentSourceStride, TempPtr1,TempPtr2,16);
+
+	return err;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : GetInterErr
+ *
+ *  INPUTS        : UINT8 *NewDataPtr  : Pointer to current block.
+ *                  INT32 SourceStride : Stride for NewDataPtr block.
+ *                  UINT8 *RefDataPtr1 : Pointer to reference block.
+ *                  UINT8 *RefDataPtr2 : Pointer to reference block.
+ *                  INT32 ReconStride  : Stride for RefDataPtr1 & RefDataPtr2.
+ *
+ *  OUTPUTS       : None
+ *
+ *  RETURNS       : UINT32: Error variance (scaled by 2^12).
+ *
+ *  FUNCTION      : Calculates the variance of the difference between the
+ *                  NewDataPtr block and the average of the RefDataPtr1 & 
+ *                  RefDataPtr2 blocks.
+ *
+ *  SPECIAL NOTES : Computed error variance is multiplied by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetInterErr
+(
+    UINT8 * NewDataPtr,
+    INT32 SourceStride,
+    UINT8 * RefDataPtr1,
+    UINT8 * RefDataPtr2,
+    INT32 ReconStride 
+)
+{
+    UINT32  i;
+    INT32   XSum=0;
+    INT32   XXSum=0;
+    INT32   DiffVal;
+    INT32   AbsRefOffset = abs((int)(RefDataPtr1 - RefDataPtr2));
+
+    // Mode of interpolation chosen based upon on the offset of the second reference pointer
+    if ( AbsRefOffset == 0 )
+    {
+        for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+        {
+            DiffVal = ((int)NewDataPtr[0]) - (int)RefDataPtr1[0];
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[1]) - (int)RefDataPtr1[1];
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[2]) - (int)RefDataPtr1[2];
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[3]) - (int)RefDataPtr1[3];
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[4]) - (int)RefDataPtr1[4];
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[5]) - (int)RefDataPtr1[5];
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[6]) - (int)RefDataPtr1[6];
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[7]) - (int)RefDataPtr1[7];
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            // Step to next row of block.
+            NewDataPtr += SourceStride;
+            RefDataPtr1 += ReconStride;
+        }
+    }
+    // Simple two reference interpolation
+    else
+    {
+        for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+        {
+            DiffVal = ((int)NewDataPtr[0]) - (((int)RefDataPtr1[0] + (int)RefDataPtr2[0]) / 2);
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[1]) - (((int)RefDataPtr1[1] + (int)RefDataPtr2[1]) / 2);
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[2]) - (((int)RefDataPtr1[2] + (int)RefDataPtr2[2]) / 2);
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[3]) - (((int)RefDataPtr1[3] + (int)RefDataPtr2[3]) / 2);
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[4]) - (((int)RefDataPtr1[4] + (int)RefDataPtr2[4]) / 2);
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[5]) - (((int)RefDataPtr1[5] + (int)RefDataPtr2[5]) / 2);
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[6]) - (((int)RefDataPtr1[6] + (int)RefDataPtr2[6]) / 2);
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            DiffVal = ((int)NewDataPtr[7]) - (((int)RefDataPtr1[7] + (int)RefDataPtr2[7]) / 2);
+            XSum += DiffVal;
+            XXSum += XX_LUT[DiffVal];
+
+            // Step to next row of block.
+            NewDataPtr += SourceStride;
+            RefDataPtr1 += ReconStride;
+            RefDataPtr2 += ReconStride;
+        }
+    }
+
+    // Compute and return population variance as mis-match metric.
+    return (( (XXSum<<6) - XSum*XSum ));
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :  GetSumAbsDiffs
+ *
+ *  INPUTS        :  UINT8 *NewDataPtr  : Pointer to current block.
+ *                   INT32 SourceStride : Stride for NewDataPtr block.
+ *                   UINT8 *RefDataPtr  : Pointer to reference block.
+ *                   INT32 ReconStride  : Stride for RefDataPtr.
+ *                   UINT32 ErrorSoFar  : Error for MB so far.
+ *                   UINT32 BestSoFar   : Best error found so far.
+ *
+ *  OUTPUTS       :  None.
+ *
+ *  RETURNS       :  Sum absolute differences
+ *
+ *  FUNCTION      :  Calculates the sum of the absolute differences.
+ *
+ *  SPECIAL NOTES :  ErrorSoFar represents the prediction error sum for
+ *                   those blocks within the current MB that have been predicted.
+ *                   BestSoFar is used as an early bail-out condition.
+ *
+ ****************************************************************************/
+UINT32 GetSumAbsDiffs
+(
+    UINT8 * NewDataPtr,
+    INT32 SourceStride,
+    UINT8  * RefDataPtr,
+    INT32 ReconStride,
+    UINT32 ErrorSoFar,
+    UINT32 BestSoFar
+)
+{
+    UINT32  i;
+    UINT32  DiffVal = ErrorSoFar;
+
+    for ( i=0; i < BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        DiffVal += AbsX_LUT[ ((int)NewDataPtr[0]) - ((int)RefDataPtr[0]) ];
+        DiffVal += AbsX_LUT[ ((int)NewDataPtr[1]) - ((int)RefDataPtr[1]) ];
+        DiffVal += AbsX_LUT[ ((int)NewDataPtr[2]) - ((int)RefDataPtr[2]) ];
+        DiffVal += AbsX_LUT[ ((int)NewDataPtr[3]) - ((int)RefDataPtr[3]) ];
+        DiffVal += AbsX_LUT[ ((int)NewDataPtr[4]) - ((int)RefDataPtr[4]) ];
+        DiffVal += AbsX_LUT[ ((int)NewDataPtr[5]) - ((int)RefDataPtr[5]) ];
+        DiffVal += AbsX_LUT[ ((int)NewDataPtr[6]) - ((int)RefDataPtr[6]) ];
+        DiffVal += AbsX_LUT[ ((int)NewDataPtr[7]) - ((int)RefDataPtr[7]) ];
+
+        if ( DiffVal > BestSoFar )
+            break;
+
+        // Step to next row of block.
+        NewDataPtr += SourceStride;
+        RefDataPtr += ReconStride;
+    }
+    return DiffVal;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :  GetHalfPixelSumAbsDiffs
+ *
+ *  INPUTS        :  UINT8 *SrcData     : Pointer to current block.
+ *                   INT32 SourceStride : Stride for NewDataPtr block.
+ *                   UINT8 *RefDataPtr1 : Pointer to first reference block.
+ *                   UINT8 *RefDataPtr2 : Pointer to second reference block.
+ *                   INT32 ReconStride  : Stride for RefDataPtr1 & RefDataPtr2.
+ *                   UINT32 ErrorSoFar  : Error for MB so far.
+ *                   UINT32 BestSoFar   : Best error found so far.
+ *
+ *  OUTPUTS       :  None.
+ *
+ *  RETURNS       :  UINT32: Sum absolute differences at 1/2 pixel accuracy.
+ *
+ *  FUNCTION      :  Calculates the sum of the absolute differences against
+ *                   half pixel interpolated references.
+ *
+ *  SPECIAL NOTES :  ErrorSoFar represents the prediction error sum for
+ *                   those blocks within the current MB that have been predicted.
+ *                   BestSoFar is used as an early bail-out condition.
+ *
+ ****************************************************************************/
+UINT32 GetHalfPixelSumAbsDiffs
+(
+    UINT8 * SrcData,
+    INT32 SourceStride,
+    UINT8 * RefDataPtr1,
+    UINT8 * RefDataPtr2,
+    INT32 ReconStride,
+    UINT32 ErrorSoFar,
+    UINT32 BestSoFar
+)
+{
+
+    UINT32  i;
+    UINT32  DiffVal = ErrorSoFar;
+
+    for ( i=0; i < BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        DiffVal += AbsX_LUT[ ((int)SrcData[0]) - (((int)RefDataPtr1[0] + (int)RefDataPtr2[0]) / 2) ];
+        DiffVal += AbsX_LUT[ ((int)SrcData[1]) - (((int)RefDataPtr1[1] + (int)RefDataPtr2[1]) / 2) ];
+        DiffVal += AbsX_LUT[ ((int)SrcData[2]) - (((int)RefDataPtr1[2] + (int)RefDataPtr2[2]) / 2) ];
+        DiffVal += AbsX_LUT[ ((int)SrcData[3]) - (((int)RefDataPtr1[3] + (int)RefDataPtr2[3]) / 2) ];
+        DiffVal += AbsX_LUT[ ((int)SrcData[4]) - (((int)RefDataPtr1[4] + (int)RefDataPtr2[4]) / 2) ];
+        DiffVal += AbsX_LUT[ ((int)SrcData[5]) - (((int)RefDataPtr1[5] + (int)RefDataPtr2[5]) / 2) ];
+        DiffVal += AbsX_LUT[ ((int)SrcData[6]) - (((int)RefDataPtr1[6] + (int)RefDataPtr2[6]) / 2) ];
+        DiffVal += AbsX_LUT[ ((int)SrcData[7]) - (((int)RefDataPtr1[7] + (int)RefDataPtr2[7]) / 2) ];
+
+        if ( DiffVal > BestSoFar )
+            break;
+
+        // Step to next row of block.
+        SrcData += SourceStride;
+        RefDataPtr1 += ReconStride;
+        RefDataPtr2 += ReconStride;
+    }
+    return DiffVal;
+
+}
+/****************************************************************************
+ *
+ *  ROUTINE       :     GetIntraErrorC
+ *
+ *  INPUTS        :     UINT8 *DataPtr      : Pointer to intra block.
+ *                      INT32  SourceStride : Block stride.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Intra frame block variance (scaled by 2^12)
+ *
+ *  FUNCTION      :     Calculates a variance score for the block.
+ *
+ *  SPECIAL NOTES :     Computed variance value is scaled by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetIntraErrorC ( UINT8 *DataPtr, INT32 SourceStride )
+{
+    UINT32  i;
+    UINT32  XSum=0;
+    UINT32  XXSum=0;
+    UINT8   *DiffPtr;
+
+    DiffPtr = DataPtr;
+
+    // Loop expanded out for speed.
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        // Examine alternate pixel locations.
+        XSum  += DiffPtr[0];
+        XXSum += XX_LUT[DiffPtr[0]];
+        XSum  += DiffPtr[1];
+        XXSum += XX_LUT[DiffPtr[1]];
+        XSum  += DiffPtr[2];
+        XXSum += XX_LUT[DiffPtr[2]];
+        XSum  += DiffPtr[3];
+        XXSum += XX_LUT[DiffPtr[3]];
+        XSum  += DiffPtr[4];
+        XXSum += XX_LUT[DiffPtr[4]];
+        XSum  += DiffPtr[5];
+        XXSum += XX_LUT[DiffPtr[5]];
+        XSum  += DiffPtr[6];
+        XXSum += XX_LUT[DiffPtr[6]];
+        XSum  += DiffPtr[7];
+        XXSum += XX_LUT[DiffPtr[7]];
+
+        // Step to next row of block.
+        DiffPtr += SourceStride;
+    }
+
+    // Compute population variance as mis-match metric.
+    return ((XXSum<<6) - XSum*XSum);
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :  GetSumAbsDiffs16
+ *
+ *  INPUTS        :  UINT8 *SrcPtr      : Pointer to current block.
+ *                   INT32 SourceStride : Stride for SrcPtr block.
+ *                   UINT8 *RefPtr      : Pointer to reference block.
+ *                   INT32 ReconStride  : Stride for RefPtr.
+ *                   UINT32 ErrorSoFar  : Error for MB so far (NOT USED).
+ *                   UINT32 BestSoFar   : Best error found so far (NOT USED).
+ *
+ *  OUTPUTS       :  None.
+ *
+ *  RETURNS       :  UINT32: SAD for the 16x16 block
+ *
+ *  FUNCTION      :  Calculates the sum of the absolute differences for 
+ *                   the 16x16 block.
+ *
+ *  SPECIAL NOTES :  None.
+ *
+ ****************************************************************************/
+UINT32 GetSumAbsDiffs16
+(
+    UINT8 *SrcPtr,
+    INT32 SourceStride,
+    UINT8 *RefPtr,
+    INT32 ReconStride,
+    UINT32 ErrorSoFar,
+    UINT32 BestSoFar
+)
+{
+    UINT32 Error = 0;
+
+    Error = GetSAD ( SrcPtr,                  SourceStride, RefPtr,                 ReconStride, Error, HUGE_ERROR );
+    Error = GetSAD ( SrcPtr+8,                SourceStride, RefPtr+8,               ReconStride, Error, HUGE_ERROR );
+    Error = GetSAD ( SrcPtr+8*SourceStride,   SourceStride, RefPtr+8*ReconStride,   ReconStride, Error, HUGE_ERROR );
+    Error = GetSAD ( SrcPtr+8*SourceStride+8, SourceStride, RefPtr+8*ReconStride+8, ReconStride, Error, HUGE_ERROR );
+    return Error;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :     GetHalfPixelSumAbsDiffs16
+ *
+ *  INPUTS        :  UINT8 *SrcPtr      : Pointer to current block.
+ *                   INT32 SourceStride : Stride for SrcPtr block.
+ *                   UINT8 *RefPtr       : Pointer to first reference block.
+ *                   UINT8 *RefPtr2      : Pointer to second reference block.
+ *                   UINT32 ReconStride  : Stride for RefPtr & RefPtr2. 
+ *                   INT32  ErrorSoFar   : Error for MB so far (NOT USED).
+ *                   INT32  BestSoFar    : Best error found so far (NOT USED).
+ *
+ *  OUTPUTS       :  None.
+ *
+ *  RETURNS       :  UINT32: SAD at 1/2 pixel accuracy.
+ *
+ *  FUNCTION      :  Calculates the sum of the absolute differences between
+ *                   the block pointed to by SrcPtr and the half pixel 
+ *                   interpolation block created from RefPtr & RefPtr2.
+ *
+ *  SPECIAL NOTES :  None.
+ *
+ ****************************************************************************/
+UINT32 GetHalfPixelSumAbsDiffs16
+(
+    UINT8 *SrcPtr,
+    INT32 SourceStride,
+    UINT8 *RefPtr,
+    UINT8 *RefPtr2,
+    INT32 ReconStride,
+    UINT32 ErrorSoFar,
+    UINT32 BestSoFar
+)
+{
+    UINT32 Error = 0;
+
+    Error = GetSadHalfPixel ( SrcPtr, SourceStride, RefPtr, RefPtr2, ReconStride, Error, HUGE_ERROR );
+
+    Error = GetSadHalfPixel ( SrcPtr+8,SourceStride, RefPtr+8, RefPtr2+8, ReconStride, Error, HUGE_ERROR );
+
+    Error = GetSadHalfPixel ( SrcPtr+8*SourceStride, SourceStride, RefPtr+8*ReconStride ,
+        RefPtr2+8*ReconStride, ReconStride, Error, HUGE_ERROR );
+
+    Error = GetSadHalfPixel( SrcPtr+8*SourceStride+8, SourceStride,
+        RefPtr+8*ReconStride+8, RefPtr2+8*ReconStride+8,
+        ReconStride, Error, HUGE_ERROR );
+
+    return Error;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :  GetMBIntraError
+ *
+ *  INPUTS        :  CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       :  None.
+ *
+ *  RETURNS       :  UINT32: Intra-frame variance for the MB.
+ *
+ *  FUNCTION      :  Calculates the intra-frame variance for the MB.
+ *
+ *  SPECIAL NOTES :  Only considers the four Y blocks in the MB (chroma
+ *                   ignored).
+ *
+ ****************************************************************************/
+UINT32 GetMBIntraError ( CP_INSTANCE *cpi )
+{
+    UINT32  i;
+    UINT32 IntraError = 0;
+    PB_INSTANCE *pbi = &cpi->pb;
+
+    // Add together the intra errors for the four Y blocks in the MB
+    for ( i=0; i<4; i++ )
+        IntraError += GetIntraError( &cpi->yuv1ptr[pbi->mbi.blockDxInfo[i].Source], pbi->mbi.blockDxInfo[i].CurrentSourceStride );
+    return IntraError;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :  GetMBInterError
+ *
+ *  INPUTS        :  CP_INSTANCE *cpi   : Pointer to encoder instance.
+ *                   UINT8 *SrcPtr      : Pointer to first block.
+ *                   UINT8 *RefPtr      : Pointer to second block.
+ *                   MOTION_VECTOR *MV  : Motion vector to be used.
+ *
+ *  OUTPUTS       :  UINT32 *BlockError : Array to hold individual block variances.
+ *
+ *  RETURNS       :  UINT32: Inter-frame variance for the MB (scaled by 2^12).
+ *
+ *  FUNCTION      :  Calculates the variance of the difference between
+ *                   the MB pointed to by SrcPtr & the MB found by
+ *                   applying MV to RefPtr.
+ *
+ *  SPECIAL NOTES :  Variance is scaled by 2^12 (4096). Choma is ignored
+ *                   when computing the variance.
+ *
+ ****************************************************************************/
+UINT32 GetMBInterError
+(
+    CP_INSTANCE *cpi,
+    UINT8 *SrcPtr,
+    UINT8 *RefPtr,
+    MOTION_VECTOR *MV,
+    UINT32 *BlockError
+)
+{
+    int i;
+    PB_INSTANCE *pbi = &cpi->pb;
+
+    for ( i=0; i<4; i++ )
+    {
+        // Select either GetInterError2 which does not do loop filtering
+		// or GetInterError2_slow which does based on speed and profile
+		// constraints. 
+		if( (cpi->pb.UseLoopFilter == NO_LOOP_FILTER) || 
+			(cpi->Speed > 8) || 
+			(cpi->pb.VpProfile == SIMPLE_PROFILE) )
+		{
+	        BlockError[i] = GetInterError2 ( pbi,
+		            &SrcPtr[pbi->mbi.blockDxInfo[i].Source],
+				    &RefPtr[pbi->mbi.blockDxInfo[i].thisRecon],
+				    MV );
+		}
+		else
+		{
+	        BlockError[i] = GetInterError2_slow ( pbi,
+		            &SrcPtr[pbi->mbi.blockDxInfo[i].Source],
+				    &RefPtr[pbi->mbi.blockDxInfo[i].thisRecon],
+				    MV );
+		}
+
+    }
+    return BlockError[0]+BlockError[1]+BlockError[2]+BlockError[3];
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FindMvVia3StepSearch
+ *
+ *  INPUTS        : CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *                  CODING_MODE	Mode     : Coding mode for the block.
+ *                  UINT8 *SrcPtr        : Pointer to source block.
+ *                  UINT8 *RefPtr        : Pointer to block position in reference frame.
+ *                  UINT32 BlockSize     : Size of the block.
+ *
+ *  OUTPUTS       : MOTION_VECTOR *MV    : Best MV found for block in reference frame.
+ *                  UINT8 **BestBlockPtr : Pointer-to-pointer to best blockin ref frame.
+ *
+ *  RETURNS       : UINT32: SAD error of the best matching block.
+ *
+ *  FUNCTION      : Finds block in reference frame that best matches the SrcPtr 
+ *                  block using a hierarchical search.
+ *
+ *  SPECIAL NOTES : The actual number of steps in the search varies depending
+ *                  on the maximum possible MV size. Motion vectors are
+ *                  stored in 1/4 pixel units.
+ *
+ ****************************************************************************/
+UINT32 FindMvVia3StepSearch
+(
+    CP_INSTANCE *cpi,
+	CODING_MODE	Mode,
+    UINT8 *SrcPtr,
+    UINT8 *RefPtr,
+    MOTION_VECTOR *MV,
+    UINT8 **BestBlockPtr,
+    UINT32 BlockSize
+)
+{
+    INT32  i;
+    INT32  step;
+	UINT32 EstMvBits;			// Actualy bits * 64
+    INT32  SourceStride;
+    INT32  ReconStride;
+	INT32  FirstStepOffset;
+	MOTION_VECTOR DifferentialVector;
+    INT32  x=0, y=0;
+    INT32  SearchSite=0;
+    UINT32 Error = 0;
+    UINT32 MinError = HUGE_ERROR;
+	INT32  MvOffsetX = 0;
+	INT32  MvOffsetY = 0;
+    UINT8  *CandidateBlockPtr = NULL;
+    PB_INSTANCE *pbi = &cpi->pb;
+    UINT32 (*GetSad)( UINT8 * SrcPtr, INT32 SourceStride, UINT8  * RefPtr, INT32 ReconStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+
+	// Work out if we will code the vector relative to 0,0 or nearest
+	if ( Mode == CODE_INTER_PLUS_MV )
+	{
+		if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestInterMVect.x;
+			MvOffsetY = pbi->mbi.NearestInterMVect.y;
+		}
+	}
+	else	// Golden frame
+	{
+		if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+			MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+		}
+	}
+
+    if (BlockSize == 8)
+    {
+        GetSad = GetSAD;
+//sjlhack -- always assuming y plane
+        SourceStride = pbi->mbi.blockDxInfo[0].CurrentSourceStride;
+        ReconStride  = pbi->mbi.blockDxInfo[0].CurrentReconStride;
+    }
+    else
+    {
+        // get sad 16 function works for a whole macroblock interlaced only if pixels per line
+        // works frame wise
+        GetSad = GetSAD16;
+        ReconStride  = pbi->Configuration.YStride;
+        SourceStride = pbi->Configuration.VideoFrameWidth;
+    }
+    
+    // Check the 0,0 candidate.
+    Error = GetSad( SrcPtr, SourceStride, RefPtr, ReconStride, 0, HUGE_ERROR );
+
+    MinError = Error;
+    *BestBlockPtr = RefPtr;
+    x = 0;
+    y = 0;
+    MV->x = 0;
+    MV->y = 0;
+
+	// Set up control of how many steps to take and size of first step
+    // For larger images use a longer initial step and hence more search steps
+    if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+    {
+		BOOL LongVectorsAllowed= TRUE; 
+
+		if ( LongVectorsAllowed  &&
+		     ( (MvOffsetX >= 48) || (MvOffsetX <= -48) || (MvOffsetY >= 48) || (MvOffsetY <= -48) )  )
+		{
+			FirstStepOffset = 0;
+		}
+		else if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+			FirstStepOffset = 1;
+		else 
+			FirstStepOffset = 2;
+	}			
+    else if ( cpi->pb.Configuration.VideoFrameWidth >= 320 )
+	{
+		if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+			FirstStepOffset = 1;
+		else 
+			FirstStepOffset = 2;
+	}
+    else
+	{
+		if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+			FirstStepOffset = 1;
+		else
+			FirstStepOffset = 2;
+	}
+	SearchSite = FirstStepOffset * 8;
+
+    // Proceed through the appropriate number of steps.
+    for (  step=FirstStepOffset; step<cpi->MVSearchSteps; step++ )
+    {
+        // Search the 8-neighbours at distance pertinent to current step.
+        for ( i=0; i<8; i++ )
+        {
+            // Set pointer to next candidate matching block.
+            CandidateBlockPtr = RefPtr + cpi->MVPixelOffsetY[SearchSite];
+
+            // Get the block error score.
+            Error = GetSad( SrcPtr, SourceStride, CandidateBlockPtr,ReconStride,0, MinError );
+
+			// Calculate differential vector in Qpel units
+			DifferentialVector.x = (4 * (MV->x + cpi->MVOffsetX[SearchSite])) -	MvOffsetX;
+			DifferentialVector.y = (4 * (MV->y + cpi->MVOffsetY[SearchSite])) - MvOffsetY;
+
+			EstMvBits = cpi->EstMvCostPtrX[DifferentialVector.x]
+				      + cpi->EstMvCostPtrY[DifferentialVector.y];
+
+			Error += (EstMvBits * MVEPBSAD_MULT)>>MVEPBSAD_RSHIFT;
+			Error += (EstMvBits * Error)>>MVEPBSAD_RSHIFT2;
+
+            if ( Error < MinError )
+            {
+                // Remember best match.
+                MinError = Error;
+                *BestBlockPtr = CandidateBlockPtr;
+
+                // Where is it.
+                x = MV->x + cpi->MVOffsetX[SearchSite];
+                y = MV->y + cpi->MVOffsetY[SearchSite];
+            }
+
+            // Move to next search location.
+            SearchSite += 1;
+        }
+
+        // Move to best location this step.
+        RefPtr = *BestBlockPtr;
+        MV->x = x;
+        MV->y = y;
+    }
+
+    // Factor vectors to 1/4 pixel resoultion.
+    MV->x = (MV->x * 4);
+    MV->y = (MV->y * 4);
+
+	TotError += MinError;
+	ErrCount++;
+
+    return MinError;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FindMvViaExhaustSearch
+ *
+ *  INPUTS        : CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *                  CODING_MODE	Mode     : Coding mode for the block.
+ *                  UINT8 *SrcPtr        : Pointer to source block.
+ *                  UINT8 *RefPtr        : Pointer to block position in reference frame.
+ *                  UINT32 BlockSize     : Size of the block.
+ *
+ *  OUTPUTS       : MOTION_VECTOR *MV    : Best MV found for block in reference frame.
+ *                  UINT8 **BestBlockPtr : Pointer-to-pointer to best blockin ref frame.
+ *
+ *  RETURNS       : UINT32: SAD error of the best matching block.
+ *
+ *  FUNCTION      : Finds block in reference frame that best matches the SrcPtr 
+ *                  block using an exhaustive search.
+ *
+ *  SPECIAL NOTES : Motion vectors are stored in 1/4 pixel units.
+ *                  
+ ****************************************************************************/
+UINT32 FindMvViaExhaustSearch
+(
+    CP_INSTANCE *cpi,
+	CODING_MODE	Mode,
+    UINT8 *SrcPtr,
+    UINT8 *RefPtr,
+    MOTION_VECTOR *MV,
+    UINT8 **BestBlockPtr,
+    UINT32 BlockSize
+)
+{
+    INT32  i,j;
+    UINT32 Error;
+	UINT32 EstMvBits;			  // Actualy bits * 64
+	INT32  MvMaxExtent; 
+	INT32  HalfMvMaxExtent; 
+    INT32  SourceStride;
+    INT32  ReconStride;
+	MOTION_VECTOR ThisMv;
+	MOTION_VECTOR DifferentialVector;
+	INT32  MvOffsetX = 0;
+	INT32  MvOffsetY = 0;
+    UINT32 MinError = HUGE_ERROR;
+    UINT8  *CandidateBlockPtr=NULL;
+    PB_INSTANCE *pbi = &cpi->pb;
+    UINT32 (*GetSad)( UINT8 * SrcPtr, INT32 SourceStride, UINT8  * RefPtr, INT32 ReconStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+
+	// Work out if we will code the vector relative to 0,0 or nearest
+	if ( Mode == CODE_INTER_PLUS_MV )
+	{
+		if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestInterMVect.x;
+			MvOffsetY = pbi->mbi.NearestInterMVect.y;
+		}
+	}
+	else	// Golden frame
+	{
+		if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+			MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+		}
+	}
+
+    // For larger images allow a longer search
+	// NOTE: 
+    // MvOffsetX and MvOffsetY are in 1/4 pel units.
+    if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+	{
+	    MvMaxExtent =  63;				
+	}
+    else if ( cpi->pb.Configuration.VideoFrameWidth >= 320 )
+	{
+		// Consider the length of the nearest X and Y
+        MvMaxExtent =  31;				
+	}
+    else
+        MvMaxExtent =  31;				
+
+	HalfMvMaxExtent =  MvMaxExtent/2;
+
+    if (BlockSize == 8)
+    {
+        GetSad = GetSAD;
+//sjlhack -- always assuming y plane
+        SourceStride = pbi->mbi.blockDxInfo[0].CurrentSourceStride;
+        ReconStride  = pbi->mbi.blockDxInfo[0].CurrentReconStride;
+    }
+    else
+    {
+        // get sad 16 function works for a whole macroblock interlaced only if pixels per line
+        // works frame wise
+        GetSad = GetSAD16;
+        ReconStride  = pbi->Configuration.YStride;
+        SourceStride = pbi->Configuration.VideoFrameWidth;
+    }
+
+    RefPtr = RefPtr - (HalfMvMaxExtent * pbi->Configuration.YStride) - HalfMvMaxExtent;
+
+    // Search each pixel alligned site
+    for ( i=0; i<(INT32)MvMaxExtent; i++ )
+    {
+        // Starting position in row
+        CandidateBlockPtr = RefPtr;
+
+        for ( j=0; j<(INT32)MvMaxExtent; j++ )
+        {
+			// *4 converts to 1/4 pixel resolution
+			ThisMv.x = 4 * (j - HalfMvMaxExtent);
+			ThisMv.y = 4 * (i - HalfMvMaxExtent);
+
+			// Get the block error score.
+            Error = GetSad( SrcPtr, SourceStride, CandidateBlockPtr, ReconStride,0, HUGE_ERROR );
+
+			// Should we code relative to 0,0 or nearest
+		    DifferentialVector.x = ThisMv.x - MvOffsetX;
+			DifferentialVector.y = ThisMv.y - MvOffsetY;
+
+			EstMvBits = cpi->EstMvCostPtrX[DifferentialVector.x]
+				      + cpi->EstMvCostPtrY[DifferentialVector.y];
+
+			Error += (EstMvBits * MVEPBSAD_MULT)>>MVEPBSAD_RSHIFT;
+			Error += (EstMvBits * Error)>>MVEPBSAD_RSHIFT2;
+
+            // Was this the best so far
+            if ( Error < MinError )
+            {
+                MinError = Error;
+                *BestBlockPtr = CandidateBlockPtr;
+                MV->x = ThisMv.x;
+                MV->y = ThisMv.y;
+            }
+
+            // Move the the next site
+            CandidateBlockPtr++;
+        }
+
+        // Move on to the next row.
+        RefPtr += pbi->Configuration.YStride;
+    }
+    return MinError;
+}
+
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FindBestFractionalPixelStep
+ *
+ *  INPUTS        : CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *                  CODING_MODE	Mode     : Coding mode for the block.
+ *                  UINT8 *SrcPtr        : Pointer to source block.
+ *                  UINT8 *RefPtr        : Pointer to block position in reference frame.
+ *                  UINT32 BlockSize     : Size of the block.
+ *                  UINT32 *MinError     : Pointer to best error found to date.
+ *                  UINT8 BitShift       : Number of its to shift the MV components
+ *                                         by (depending whether 1/2 or 1/4 pel search)
+ *
+ *  OUTPUTS       : MOTION_VECTOR *MV    : Best MV found for block in reference frame.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Finds the best fractional (1/2 or 1/4) pixel MV that
+ *                  gives the best matching block in the refernce frame.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FindBestFractionalPixelStep
+(
+    CP_INSTANCE *cpi,
+	CODING_MODE	Mode,
+    UINT8 *SrcPtr,
+    UINT8 *RefPtr,
+    MOTION_VECTOR *MV,
+    UINT32 BlockSize,
+    UINT32 *MinError,
+	UINT8  BitShift
+)
+{
+    UINT32 i, j;
+    UINT32 nBlocks;
+    INT32  ModX, ModY;
+	UINT32 EstMvBits;					// bits * 64
+    INT32  SourceStride;
+    INT32  ReconStride;
+    INT32  BlockOffset[4];
+    UINT8 *SourceBlock[4];
+    UINT8 *RefDataPtr1;
+    UINT8 *RefDataPtr2;
+	MOTION_VECTOR DifferentialVector;
+
+    UINT32 Error = 0;
+    UINT8  BestOffset = 0;
+	INT32  MvOffsetX = 0;
+	INT32  MvOffsetY = 0;
+    MOTION_VECTOR TmpVector = {0, 0};
+    PB_INSTANCE *pbi = &cpi->pb;
+
+	// Work out if we will code the vector relative to 0,0 or nearest
+	if ( Mode == CODE_INTER_PLUS_MV )
+	{
+		if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestInterMVect.x;
+			MvOffsetY = pbi->mbi.NearestInterMVect.y;
+		}
+	}
+	else	// Golden frame
+	{
+		if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+			MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+		}
+	}
+
+//sjlhack -- always assuming y plane
+    SourceStride = pbi->mbi.blockDxInfo[0].CurrentSourceStride;
+    ReconStride = pbi->mbi.blockDxInfo[0].CurrentReconStride;
+    if (BlockSize == 8)
+    {  
+        // Only 1 block to process
+        nBlocks = 1;
+        BlockOffset[0] = 0;
+        SourceBlock[0] = SrcPtr;
+    }
+    else
+    {
+        // 4 8x8s to process--may be interlaced!
+		nBlocks = 4;
+		if ( pbi->mbi.Interlaced == 1 )
+		{
+			SourceBlock[0] = SrcPtr;
+			SourceBlock[1] = SrcPtr + 8;
+			SourceBlock[2] = SrcPtr + pbi->Configuration.VideoFrameWidth;
+			SourceBlock[3] = SourceBlock[2] + 8;
+
+			BlockOffset[0] = 0;
+			BlockOffset[1] = 8;
+			BlockOffset[2] = pbi->Configuration.YStride - 8;
+			BlockOffset[3] = 8;
+		}
+		else
+		{
+			SourceBlock[0] = SrcPtr;
+			SourceBlock[1] = SrcPtr + 8;
+			SourceBlock[2] = SrcPtr + (8*pbi->Configuration.VideoFrameWidth);
+			SourceBlock[3] = SourceBlock[2] + 8;
+
+			BlockOffset[0] = 0;
+			BlockOffset[1] = 8;
+			BlockOffset[2] = (8 * pbi->Configuration.YStride) - 8;
+			BlockOffset[3] = 8;
+		}
+    }
+
+    // Examine eight positions around a central position
+    for ( i = 1; i < 9; i++ )
+	{
+        // MV holds best mv in 1/4 pixel units
+		TmpVector.x = MV->x + (cpi->SubPixelXOffset[i] << BitShift);
+		TmpVector.y = MV->y + (cpi->SubPixelYOffset[i] << BitShift);
+
+		// Get the two reference pointers for the motion vector
+		GetReconReferencePoints( pbi, RefPtr, &RefDataPtr1, &RefDataPtr2, &TmpVector );
+        
+        // Filter number is based on 1/8th pixel positions
+		ModX = (TmpVector.x & Y_MVMODMASK) << 1;
+		ModY = (TmpVector.y & Y_MVMODMASK) << 1;
+
+        // Ptr1 & Ptr2 are current frame and fractional pel filtered block respectively
+        Error = 0;
+
+        for ( j=0; j<nBlocks; j++ )
+        {
+            //UINT32 error1, error2;
+            RefDataPtr1 += BlockOffset[j];
+            RefDataPtr2 += BlockOffset[j];
+
+            Error += FiltBlockBilGetSad(SourceBlock[j], SourceStride, RefDataPtr1, RefDataPtr2, ReconStride, ModX, ModY,HUGE_ERROR);
+        }
+		
+        // Should we code relative to 0,0 or nearest
+		DifferentialVector.x = TmpVector.x - MvOffsetX;
+		DifferentialVector.y = TmpVector.y - MvOffsetY;
+
+		EstMvBits = cpi->EstMvCostPtrX[DifferentialVector.x]
+				  + cpi->EstMvCostPtrY[DifferentialVector.y];
+
+		Error += (EstMvBits * MVEPBSAD_MULT)>>MVEPBSAD_RSHIFT;
+		Error += (EstMvBits * Error)>>MVEPBSAD_RSHIFT2;
+
+		if ( Error < *MinError )
+		{
+			BestOffset = (UINT8)i;
+			*MinError = Error;
+		}
+	}
+
+    // Set the returned vector
+    MV->x += (cpi->SubPixelXOffset[BestOffset] << BitShift);
+    MV->y += (cpi->SubPixelYOffset[BestOffset] << BitShift);
+
+    return;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : GetMBMVInterError
+ *
+ *  INPUTS        : CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *                  CODING_MODE	Mode     : Coding mode for the block.
+ *                  UINT8 *RefPtr        : Pointer to block position in reference frame.
+ *
+ *  OUTPUTS       : MOTION_VECTOR *MV    : Best MV found for block in reference frame.
+ *                  UINT32 *TempErrors   : Array to hold variances of individual Y-blocks.
+ *
+ *  RETURNS       : UINT32: Prediction error variance for best matching block.
+ *
+ *  FUNCTION      : Calculates a MB MV using a heirachical search.
+ *
+ *  SPECIAL NOTES : Returned variance is scaled by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetMBMVInterError
+(
+    CP_INSTANCE *cpi,
+	CODING_MODE	Mode,
+    UINT8 *RefFramePtr,
+    MOTION_VECTOR *MV,
+    UINT32 *TempErrors
+)
+{
+    UINT32  MinError;
+    UINT32  InterMVError = 0;
+
+    PB_INSTANCE *pbi=&cpi->pb;
+//sjlhack -- always assuming y plane
+    UINT8   *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[0].Source];
+    UINT8   *RefPtr = &RefFramePtr[pbi->mbi.blockDxInfo[0].thisRecon];
+
+    UINT8   *BestBlockPtr=NULL;
+    
+    MinError = cpi->FindMvViaSearch ( cpi, Mode, SrcPtr,RefPtr,MV, &BestBlockPtr,16);
+        
+ 	if ( MinError > HP_THRESH )
+	   cpi->FindBestHalfPixelMv ( cpi, Mode, SrcPtr, RefPtr,  MV,  16, &MinError, 1 );
+    
+    if ( MinError > HP_THRESH )
+	   cpi->FindBestQuarterPixelMv( cpi, Mode, SrcPtr, RefPtr,  MV,  16, &MinError, 0 );
+    
+#if defined(_MSC_VER)
+	ClearSysState();
+#endif
+
+    // Get the error score for the chosen 1/2 pixel offset as a variance.
+    InterMVError = GetMBInterError( cpi, cpi->yuv1ptr, RefFramePtr, MV, TempErrors );
+
+    // Return score of best matching block.
+    return InterMVError;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : GetMBMVExhaustiveSearch
+ *
+ *  INPUTS        : CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *                  CODING_MODE	Mode     : Coding mode for the block.
+ *                  UINT8 *RefPtr        : Pointer to block position in reference frame.
+ *
+ *  OUTPUTS       : MOTION_VECTOR *MV    : Best MV found for block in reference frame.
+ *                  UINT32 *TempErrors   : Array to hold variances of individual Y-blocks.
+ *
+ *  RETURNS       : UINT32: Prediction error variance for best matching block.
+ *
+ *  FUNCTION      : Calculates a MB MV using an exhaustive search.
+ *
+ *  SPECIAL NOTES : Returned variance is scaled by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetMBMVExhaustiveSearch
+(
+    CP_INSTANCE *cpi,
+	CODING_MODE	Mode,
+    UINT8 *RefFramePtr,
+    MOTION_VECTOR *MV,    
+    UINT32 *TempErrors
+)
+{
+    UINT32  MinError;
+    UINT32  InterMVError = 0;
+
+    PB_INSTANCE *pbi=&cpi->pb;
+//sjlhack -- always assuming y plane
+    UINT8   *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[0].Source];
+    UINT8   *RefPtr = &RefFramePtr[pbi->mbi.blockDxInfo[0].thisRecon];
+    UINT8   *BestBlockPtr=NULL;
+
+	MinError = FindMvViaExhaustSearch( cpi, Mode, SrcPtr,RefPtr,MV,  &BestBlockPtr,16);
+
+    if ( MinError > HP_THRESH )
+		cpi->FindBestHalfPixelMv ( cpi, Mode, SrcPtr, RefPtr,  MV,  16, &MinError, 1 );
+    
+	if ( MinError > HP_THRESH )
+	    cpi->FindBestQuarterPixelMv( cpi, Mode, SrcPtr, RefPtr,  MV,  16, &MinError, 0 );
+    
+#if defined(_MSC_VER)
+	ClearSysState();
+#endif
+
+    // Get the error score for the chosen 1/2 pixel offset as a variance.
+    InterMVError = GetMBInterError( cpi, cpi->yuv1ptr, RefFramePtr, MV, TempErrors );
+
+    // Return score of best matching block.
+    return InterMVError;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : GetBMVExhaustiveSearch
+ *
+ *  INPUTS        : CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *                  UINT8 *RefPtr     : Pointer to block position in reference frame.
+ *
+ *  OUTPUTS       : MOTION_VECTOR *MV : Best MV found for block in reference frame.
+ *
+ *  RETURNS       : UINT32: Prediction error variance for best matching block.
+ *
+ *  FUNCTION      : Calculates a MV for an 8x8 Y block using an exhaustive search.
+ *
+ *  SPECIAL NOTES : Returned variance is scaled by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetBMVExhaustiveSearch ( CP_INSTANCE *cpi, UINT8 *RefFramePtr, MOTION_VECTOR *MV, UINT32 bp )
+{
+    UINT32  MinError;
+    UINT32  InterMVError = 0;
+
+    PB_INSTANCE *pbi = &cpi->pb;
+
+    UINT8   *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[bp].Source];
+    UINT8   *RefPtr = &RefFramePtr[pbi->mbi.blockDxInfo[bp].thisRecon];
+    UINT8   *BestBlockPtr = NULL;
+
+	MinError = FindMvViaExhaustSearch( cpi, CODE_INTER_PLUS_MV, SrcPtr,RefPtr,MV, &BestBlockPtr,8);
+
+	if ( MinError > HP_THRESH )
+		cpi->FindBestHalfPixelMv ( cpi, CODE_INTER_PLUS_MV, SrcPtr, RefPtr,  MV, 8, &MinError, 1 );
+    
+	if ( MinError > HP_THRESH )
+		cpi->FindBestQuarterPixelMv( cpi, CODE_INTER_PLUS_MV, SrcPtr, RefPtr,  MV, 8, &MinError, 0 );
+    
+    InterMVError = GetInterError2( pbi, SrcPtr, RefPtr, MV );
+
+    // Return score of best matching block.
+    return InterMVError;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : GetBMVSearch
+ *
+ *  INPUTS        : CP_INSTANCE *cpi   : Pointer to encoder instance.
+ *                  UINT8 *RefFramePtr : Pointer to block position in reference frame.
+ *
+ *  OUTPUTS       : MOTION_VECTOR *MV  : Best MV found for block in reference frame.
+ *
+ *  RETURNS       : UINT32: Prediction error variance for best matching block.
+ *
+ *  FUNCTION      : Calculates a MV for an 8x8 Y block using an exhaustive search.
+ *
+ *  SPECIAL NOTES : Returned variance is scaled by 2^12 (4096).
+ *
+ ****************************************************************************/
+UINT32 GetBMVSearch ( CP_INSTANCE *cpi, UINT8 *RefFramePtr, MOTION_VECTOR *MV, UINT32 bp )
+{
+    UINT32  MinError;
+    UINT32  InterMVError = 0;
+
+    PB_INSTANCE *pbi=&cpi->pb;
+
+    UINT8   *SrcPtr = &cpi->yuv1ptr[pbi->mbi.blockDxInfo[bp].Source];
+    UINT8   *RefPtr = &RefFramePtr[pbi->mbi.blockDxInfo[bp].thisRecon];
+    UINT8   *BestBlockPtr=NULL;
+
+
+    MinError = cpi->FindMvViaSearch( cpi, CODE_INTER_PLUS_MV, SrcPtr,RefPtr, MV, &BestBlockPtr, 8);
+    
+    
+	if ( MinError > HP_THRESH )
+		cpi->FindBestHalfPixelMv ( cpi, CODE_INTER_PLUS_MV, SrcPtr, RefPtr,  MV, 8, &MinError, 1 );
+
+    
+	if ( MinError > HP_THRESH )
+		cpi->FindBestQuarterPixelMv( cpi, CODE_INTER_PLUS_MV, SrcPtr, RefPtr,  MV, 8, &MinError, 0 );
+    
+
+    InterMVError = GetInterError2( pbi, SrcPtr, RefPtr, MV );
+
+
+    // Return score of best matching block.
+    return InterMVError;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FindMvViaDiamondSearch
+ *
+ *  INPUTS        : CP_INSTANCE *cpi     : Pointer to encoder instance.
+ *                  CODING_MODE Mode     : Coding mode for the block.
+ *                  UINT8 *SrcPtr        : Pointer to block in source image.
+ *                  UINT8 *RefPtr        : Pointer to block in reference image.
+ *                  UINT32 BlockSize     : Size of block.
+ *
+ *  OUTPUTS       : MOTION_VECTOR *MV    : Motion vector of best block found.
+ *                  UINT8 **BestBlockPtr : Pointer-to-pointer of best block found.
+ *
+ *  RETURNS       : UINT32: SAD for the best matching block found.
+ *
+ *  FUNCTION      : Calculates a MV using a diamond search.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 FindMvViaDiamondSearch
+(
+    CP_INSTANCE *cpi,
+    CODING_MODE Mode,
+    UINT8 *SrcPtr,
+    UINT8 *RefPtr,
+    MOTION_VECTOR *MV,
+    UINT8 **BestBlockPtr,
+    UINT32 BlockSize
+)
+{
+    INT32  i;
+    INT32  step;
+	UINT32 EstMvBits;			// Actualy bits * 64
+    INT32  SourceStride;
+    INT32  ReconStride;
+	INT32  FirstStepOffset;
+    MOTION_VECTOR DifferentialVector;
+
+    INT32  x=0, y=0;
+    UINT32 Error = 0;
+    UINT32 MinError = HUGE_ERROR;
+	INT32  MvOffsetX = 0;
+	INT32  MvOffsetY = 0;
+    INT32  SearchSite = 0;
+    UINT8  *CandidateBlockPtr = NULL;
+    PB_INSTANCE *pbi = &cpi->pb;
+    UINT32 (*GetSad)( UINT8 * SrcPtr, INT32 SourceStride, UINT8  * RefPtr, INT32 ReconStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+
+	// Work out if we will code the vector relative to 0,0 or nearest
+	if ( Mode == CODE_INTER_PLUS_MV )
+	{
+		if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestInterMVect.x;
+			MvOffsetY = pbi->mbi.NearestInterMVect.y;
+		}
+	}
+	else	// Golden frame
+	{
+		if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+			MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+		}
+	}
+
+
+    if ( BlockSize == 8 )
+    {
+        GetSad = GetSAD;
+//sjlhack -- always assuming y plane
+        SourceStride = pbi->mbi.blockDxInfo[0].CurrentSourceStride;
+        ReconStride  = pbi->mbi.blockDxInfo[0].CurrentReconStride;
+    }
+    else
+    {
+        // get sad 16 function works for a whole macroblock interlaced only if pixels per line
+        // works frame wise
+        GetSad = GetSAD16;
+        ReconStride  = pbi->Configuration.YStride;
+        SourceStride = pbi->Configuration.VideoFrameWidth;
+    }
+    
+    // Check the 0,0 candidate.
+    Error = GetSad( SrcPtr, SourceStride, RefPtr, ReconStride, 0, HUGE_ERROR );
+
+    MinError = Error;
+    *BestBlockPtr = RefPtr;
+    x = 0;
+    y = 0;
+    MV->x = 0;
+    MV->y = 0;
+
+	// Set up control of how many steps to take and size of first step
+    // For larger images use a longer initial step and hence more search steps
+    if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+    {
+		BOOL LongVectorsAllowed = TRUE; 
+
+		if ( LongVectorsAllowed  &&
+		     ( (MvOffsetX >= 48) || (MvOffsetX <= -48) || (MvOffsetY >= 48) || (MvOffsetY <= -48) )  )
+		{
+			FirstStepOffset = 0;
+		}
+		else if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+			FirstStepOffset = 1;
+		else 
+			FirstStepOffset = 2;
+	}			
+    else if ( cpi->pb.Configuration.VideoFrameWidth >= 320 )
+	{
+		if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+			FirstStepOffset = 1;
+		else 
+			FirstStepOffset = 2;
+	}
+    else
+	{
+		if ( (MvOffsetX >= 16) || (MvOffsetX <= -16) || (MvOffsetY >= 16) || (MvOffsetY <= -16) )
+			FirstStepOffset = 1;
+		else
+			FirstStepOffset = 2;
+	}
+
+	SearchSite = FirstStepOffset * 4;
+
+    // Proceed through N-steps.
+    for (  step=FirstStepOffset; step<cpi->DSMVSearchSteps; step++ )
+    {
+        // Search the 4-neighbours at distance pertinent to current step.
+        for ( i=0; i<4; i++ )
+        {
+            // Set pointer to next candidate matching block.
+            CandidateBlockPtr = RefPtr + cpi->DSMVPixelOffsetY[SearchSite];
+
+            // Get the block error score.
+            Error = GetSad( SrcPtr, SourceStride, CandidateBlockPtr,ReconStride,0, MinError );
+
+			// Calculate differential vector in Qpel units
+			DifferentialVector.x = (4 * (MV->x + cpi->MVOffsetX[SearchSite])) -	MvOffsetX;
+			DifferentialVector.y = (4 * (MV->y + cpi->MVOffsetY[SearchSite])) - MvOffsetY;
+
+			EstMvBits = cpi->EstMvCostPtrX[DifferentialVector.x]
+				      + cpi->EstMvCostPtrY[DifferentialVector.y];
+
+			Error += (EstMvBits * MVEPBSAD_MULT)>>MVEPBSAD_RSHIFT;
+			Error += (EstMvBits * Error)>>MVEPBSAD_RSHIFT2;
+
+            if ( Error < MinError )
+            {
+                // Remember best match.
+                MinError = Error;
+                *BestBlockPtr = CandidateBlockPtr;
+
+                // Where is it.
+                x = MV->x + cpi->DSMVOffsetX[SearchSite];
+                y = MV->y + cpi->DSMVOffsetY[SearchSite];
+            }
+
+            // Move to next search location.
+            SearchSite += 1;
+        }
+
+        // Move to best location this step.
+        RefPtr = *BestBlockPtr;
+        MV->x = x;
+        MV->y = y;
+    }
+    // Factor vectors to 1/4 pixel resoultion.
+    MV->x = (MV->x * 4);
+    MV->y = (MV->y * 4);
+
+	TotError += MinError;
+	ErrCount++;
+
+    return MinError;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : SkipFractionalPixelStep
+ *
+ *  INPUTS        : CP_INSTANCE *cpi     : Pointer to encoder instance (NOT USED).
+ *                  CODING_MODE	Mode     : Coding mode for the block (NOT USED).
+ *                  UINT8 *SrcPtr        : Pointer to source block (NOT USED).
+ *                  UINT8 *RefPtr        : Pointer to block position in reference frame (NOT USED).
+ *                  UINT32 BlockSize     : Size of the block (NOT USED).
+ *                  UINT32 *MinError     : Pointer to best error found to date (NOT USED).
+ *                  UINT8 BitShift       : Number of its to shift the MV components 
+ *                                         by (depending whether 1/2 or 1/4 pel search)(NOT USED).
+ *
+ *  OUTPUTS       : MOTION_VECTOR *MV    : Best MV found for block in reference frame.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Stub function to avoid fractional pixel MV search.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SkipFractionalPixelStep
+(
+    CP_INSTANCE *cpi,
+	CODING_MODE	Mode,
+    UINT8 *SrcPtr,
+    UINT8 *RefPtr,
+    MOTION_VECTOR *MV,
+    UINT32 BlockSize,
+    UINT32 *MinError,
+	UINT8  BitShift
+)
+{
+    // stub function 
+    return;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FiltBlockBilGetSad_C
+ *
+ *  INPUTS        : UINT8 *SrcPtr       : Pointer to source block.
+ *                  INT32 SrcStride     : Stride of source image.
+ *                  UINT8 *ReconPtr1    : Pointer to first block position in reference frame.
+ *                  UINT8 *ReconPtr2    : Pointer to second block position in reference frame.
+ *                  INT32 PixelsPerLine : Pixels in line of frame containing ReconPtr1/2.
+ *                  INT32 ModX          : Fractional part of MV x-component.
+ *                  INT32 ModY          : Fractional part of MV x-component.
+ *                  UINT32 BestSoFar    : Best error found to date.
+ *
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD of the filtered block prediction error.
+ *
+ *  FUNCTION      : Produces a filtered fractional pel prediction block
+ *  				using bi-linear filters and calculates the SAD of
+ *                  the prediction error.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 FiltBlockBilGetSad_C
+(
+    UINT8 *SrcPtr,
+    INT32 SrcStride,
+    UINT8 *ReconPtr1,
+    UINT8 *ReconPtr2,
+    INT32 PixelsPerLine,
+    INT32 ModX, 
+    INT32 ModY,
+    UINT32 BestSoFar
+)
+{
+    // AWG This array name masks array of same name at file scope!!! BEWARE!!!
+    UINT8 FilteredBlock[256];
+
+    FilterBlockBil_8 ( ReconPtr1, ReconPtr2, FilteredBlock, PixelsPerLine, ModX, ModY );    
+    return GetSAD (  SrcPtr, SrcStride, FilteredBlock, 8, 0, BestSoFar );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.h
new file mode 100644
index 00000000..74b7be7f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/mcomp.h
@@ -0,0 +1,84 @@
+/****************************************************************************
+*
+*   Module Title :     MComp.h
+*
+*   Description  :     Video CODEC: motion compensation module header .
+*
+****************************************************************************/
+#ifndef __INC_MCOMP_H
+#define __INC_MCOMP_H
+
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "type_aliases.h"
+#include "codec_common.h"
+#include "compdll.h"
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+extern INT32 *AbsX_LUT;
+
+/****************************************************************************
+*  Functions
+****************************************************************************/
+extern void InitMotionCompensation ( CP_INSTANCE *cpi);
+extern UINT32 GetIntraErrorC ( UINT8 * DataPtr, INT32 SourceStride );
+extern UINT32 GetMBIntraError ( CP_INSTANCE *cpi );
+extern UINT32 GetMBInterError ( CP_INSTANCE *cpi, UINT8 * SrcPtr, UINT8 * RefPtr, MOTION_VECTOR *MV, UINT32 * );
+extern UINT32 GetMBMVInterError ( CP_INSTANCE *cpi, CODING_MODE	Mode, UINT8 * RefFramePtr, MOTION_VECTOR *MV, UINT32 *TempErrors );
+extern UINT32 GetMBMVExhaustiveSearch ( CP_INSTANCE *cpi, CODING_MODE Mode, UINT8 * RefFramePtr, MOTION_VECTOR *MV, UINT32 * );
+
+extern UINT32 GetBMVExhaustiveSearch ( CP_INSTANCE* cpi, UINT8* RefFramePtr, MOTION_VECTOR* MV, UINT32);
+extern UINT32 GetBMVSearch ( CP_INSTANCE* cpi, UINT8* RefFramePtr, MOTION_VECTOR* MV, UINT32 );
+
+extern UINT32 GetMBFrameVerticalVariance ( CP_INSTANCE* cpi );
+extern UINT32 GetMBFieldVerticalVariance ( CP_INSTANCE* cpi );
+extern UINT32 FindMvViaDiamondSearch
+(
+    CP_INSTANCE *cpi,
+    CODING_MODE Mode,
+    UINT8 *SrcPtr,
+    UINT8 *RefPtr,
+    MOTION_VECTOR *MV,
+    UINT8 **BestBlockPtr,
+    UINT32 BlockSize
+);
+extern UINT32 FindMvVia3StepSearch
+(
+    CP_INSTANCE *cpi,
+    CODING_MODE Mode,
+    UINT8 *SrcPtr,
+    UINT8 *RefPtr,
+    MOTION_VECTOR *MV,
+    UINT8 **BestBlockPtr,
+    UINT32 BlockSize
+);
+
+extern void FindBestFractionalPixelStep
+(
+    CP_INSTANCE *cpi,
+	CODING_MODE	Mode,
+    UINT8 *SrcPtr,
+    UINT8 *RefPtr,
+    MOTION_VECTOR *MV,
+    UINT32 BlockSize,
+    UINT32 *MinError,
+	UINT8  BitShift
+);
+extern void SkipFractionalPixelStep
+(
+    CP_INSTANCE *cpi,
+	CODING_MODE	Mode,
+    UINT8 *SrcPtr,
+    UINT8 *RefPtr,
+    MOTION_VECTOR *MV,
+    UINT32 BlockSize,
+    UINT32 *MinError,
+	UINT8  BitShift
+);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/misc_common.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/misc_common.c
new file mode 100644
index 00000000..4420e248
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/misc_common.c
@@ -0,0 +1,482 @@
+/****************************************************************************
+*
+*   Module Title :     MiscCommon.c
+*
+*   Description  :     Miscellaneous common routines
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "compdll.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/        
+#define KF_WORST_Q_INDEX	20
+
+/****************************************************************************
+*  Module Static
+****************************************************************************/        
+// Provisional data for interpolated positions (x.xx00)     
+static const LINE_EQ2 InterBpBEquations[Q_TABLE_SIZE] = 
+{
+	{ 0.00115,   445.98890}, { 0.00132,   406.83041}, { 0.00148,    400.18762}, { 0.00160,    363.68569},
+	{ 0.00174,   378.33470}, { 0.00199,   377.42412}, { 0.00237,    300.00652}, { 0.00262,    266.74763},
+	{ 0.00280,   252.69107}, { 0.00312,   205.72084}, { 0.00351,    183.14721}, { 0.00386,    155.88815},
+	{ 0.00432,    95.74501}, { 0.00447,    91.53841}, { 0.00469,     69.65309}, { 0.00481,     80.08054},
+	{ 0.00496,    63.44023}, { 0.00520,   110.00485}, { 0.00542,    108.04172}, { 0.00558,    165.23727},
+	{ 0.00585,   154.10530}, { 0.00600,   176.84087}, { 0.00621,    169.06892}, { 0.00641,    157.49036},
+	{ 0.00664,   148.93471}, { 0.00713,   199.24375}, { 0.00752,    210.01239}, { 0.00816,    195.86514},
+	{ 0.00883,   352.16439}, { 0.00920,   354.57230}, { 0.00958,    393.60319}, { 0.00999,    420.30206},
+	{ 0.01063,   529.24195}, { 0.01118,   538.52879}, { 0.01170,    651.23813}, { 0.01218,    713.79800},
+	{ 0.01263,   788.52303}, { 0.01321,   871.46329}, { 0.01393,   1078.68114}, { 0.01459,   1180.46989},
+	{ 0.01529,  1309.93961}, { 0.01597,  1366.39052}, { 0.01677,   1627.17452}, { 0.01762,   1826.38865},
+	{ 0.01859,  2010.00287}, { 0.01963,  2388.91757}, { 0.02070,   2683.36530}, { 0.02178,   2875.49060},
+	{ 0.02260,  3178.16923}, { 0.02418,  3572.88801}, { 0.02531,   4062.37227}, { 0.02709,   4921.59728},
+	{ 0.02918,  5592.29649}, { 0.03107,  6186.93245}, { 0.03372,   7376.13311}, { 0.03768,   9534.78915},
+	{ 0.04197, 11906.09757}, { 0.04691, 15241.79652}, { 0.05157,  18904.29545}, { 0.05953,  27091.47553},
+	{ 0.07025, 41522.27709}, { 0.08343, 67789.86180}, { 0.11547, 124265.97640}, { 0.13380, 210301.81305},
+};
+
+static const LINE_EQ2 IntraBpBEquations[Q_TABLE_SIZE] = 
+{
+	{ 0.00106,  2288.83435}, { 0.00111,  2381.24321}, { 0.00116,   2484.21594}, { 0.00120,   2536.01662},
+	{ 0.00127,  2674.68182}, { 0.00136,  2835.12286}, { 0.00146,   2946.60819}, { 0.00154,   3034.48115},
+	{ 0.00163,  3117.20084}, { 0.00172,  3233.89966}, { 0.00184,   3407.24634}, { 0.00195,   3543.03650},
+	{ 0.00210,  3699.64900}, { 0.00215,  3793.02049}, { 0.00220,   3854.74475}, { 0.00224,   3915.99566},
+	{ 0.00227,  3959.82316}, { 0.00233,  4204.84699}, { 0.00237,   4276.08365}, { 0.00242,   4387.12774},
+	{ 0.00246,  4452.87571}, { 0.00251,  4578.78112}, { 0.00256,   4642.65467}, { 0.00261,   4710.56167},
+	{ 0.00267,  4780.30368}, { 0.00279,  5030.71570}, { 0.00288,   5170.75293}, { 0.00303,   5374.83851},
+	{ 0.00315,  5872.91562}, { 0.00324,  6002.40178}, { 0.00331,   6163.13111}, { 0.00341,   6330.88665},
+	{ 0.00356,  6638.13056}, { 0.00367,  6813.20389}, { 0.00378,   7073.27347}, { 0.00391,   7264.41977},
+	{ 0.00401,  7464.35187}, { 0.00414,  7686.68885}, { 0.00427,   8222.38307}, { 0.00442,   8469.27069},
+	{ 0.00459,  8750.44432}, { 0.00472,  8961.97754}, { 0.00492,   9406.63273}, { 0.00513,   9784.70928},
+	{ 0.00531, 10199.58953}, { 0.00556, 10786.82064}, { 0.00582,  11271.52430}, { 0.00606,  11694.10222},
+	{ 0.00631, 12147.95242}, { 0.00664, 12808.92178}, { 0.00695,  13528.07213}, { 0.00732,  14860.00245},
+	{ 0.00779, 15815.03822}, { 0.00822, 16685.69714}, { 0.00884,  18214.89132}, { 0.00972,  20431.29266},
+	{ 0.01063, 22995.09970}, { 0.01169, 26309.59450}, { 0.01275,  29857.49766}, { 0.01436,  37027.81351},
+	{ 0.01637, 49621.40625}, { 0.01873, 72068.47846}, { 0.02150, 123873.67566}, { 0.02488, 208511.43171},
+};   
+
+/****************************************************************************
+*  Exports
+****************************************************************************/   
+
+// For FixedQ helps choos appropriate key frame quality.          
+const UINT8 FixedQKfBoostTable[64] = 
+{
+	22, 24, 26, 26, 26, 26, 27, 28,
+	28, 27, 27, 26, 26, 25, 25, 24,
+	24, 23, 23, 22, 22, 21, 21, 20,
+	20, 19, 18, 18, 17, 16, 16, 15,
+	15, 14, 14, 13, 13, 13, 12, 12,
+	12, 12, 12, 11, 11, 11, 10,  9,
+	 8,  7,  7,  6,  5,  4,  3,  2,
+	 1,  1,  0,  0,  0,  0,  0,  0
+};
+
+const UINT8 GfFixedQKfBoostTable[64] = 
+{
+	20, 22, 23, 23, 23, 24, 25, 26,
+	27, 27, 28, 28, 29, 29, 28, 28,
+	28, 27, 27, 27, 26, 26, 26, 26,
+	25, 25, 25, 25, 24, 24, 23, 23,
+	22, 21, 21, 20, 20, 19, 18, 17,
+	16, 15, 14, 13, 12, 11, 10,  9,
+	 8,  7,  6,  6,  5,  5,  4,  4,
+	 4,  3,  3,  2,  1,  0,  0,  0
+};
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     GetEstimatedBpb
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi    : Pointer to encoder instance.
+ *						UINT32 TargetQIndex : Q Index to estimate for.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     double: The current estimate for the number of bits per block
+ *                      at the current Q.
+ *
+ *  FUNCTION      :     Computes estimate of the number of bits per block 
+ *                      that will be produced if coded at the specified Q.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+double GetEstimatedBpb ( CP_INSTANCE *cpi, UINT32 TargetQIndex )
+{
+    double BitsPerBlock;
+	double Complexity = (double)cpi->InterError;	
+
+    // NOTE 1: Inter and Intra error are the same for key frames.
+    // NOTE 2: It may prove necessary to clip the complexity value.
+    
+    // Adjust according to currently active correction factor
+    if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+    {
+		// TEMP: use inter equations * 2 until Key frame values worked out
+		//BitsPerBlock = ((InterBpBEquations[TargetQIndex].M * Complexity) + InterBpBEquations[TargetQIndex].C)/(double)cpi->pb.UnitFragments;
+		BitsPerBlock = ((IntraBpBEquations[TargetQIndex].M * Complexity) + IntraBpBEquations[TargetQIndex].C)/(double)cpi->pb.UnitFragments;
+	    BitsPerBlock = BitsPerBlock * cpi->KeyFrameBpbCorrectionFactor;
+    }
+    else 
+    {
+        // Get primary prediction
+		BitsPerBlock = ((InterBpBEquations[TargetQIndex].M * Complexity) + InterBpBEquations[TargetQIndex].C)/(double)cpi->pb.UnitFragments;
+
+		// Apply the correction factor that is based upon recent observations of overshoot and undershoot
+		// Note that if we are coding a GF update frame we expect overshoot because we are jumping to
+		// a higher quality from a lower quality (the tables were caluclated using fixed Q). Hence the 
+		// additional correction for this case.
+		if ( cpi->pb.RefreshGoldenFrame )
+			BitsPerBlock = BitsPerBlock * (cpi->BpbCorrectionFactor * cpi->GfuBpbCorrectionFactor);
+		else
+			BitsPerBlock = BitsPerBlock * cpi->BpbCorrectionFactor;
+    }
+
+	return BitsPerBlock;
+}
+
+void UpdateBpbCorrectionFactor2 ( CP_INSTANCE *cpi, UINT32 FrameSize )
+{
+	double BpbCorrectionFactor;
+
+#if defined(_MSC_VER)
+	// NOTE: This function uses floating point
+	ClearSysState();
+#endif
+
+	if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+		BpbCorrectionFactor = cpi->KeyFrameBpbCorrectionFactor;
+	else
+	{
+		if ( cpi->pb.RefreshGoldenFrame )
+			BpbCorrectionFactor = cpi->GfuBpbCorrectionFactor;
+		else
+			BpbCorrectionFactor = cpi->BpbCorrectionFactor;
+	}
+
+	// Work out a size correction factor.
+	BpbCorrectionFactor *= (3+(2.0 * FrameSize) / cpi->ThisFrameTarget) /5;
+
+	if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+		cpi->KeyFrameBpbCorrectionFactor = BpbCorrectionFactor;
+	else
+	{
+		if ( cpi->pb.RefreshGoldenFrame )
+			cpi->GfuBpbCorrectionFactor = BpbCorrectionFactor;
+		else
+			cpi->BpbCorrectionFactor = BpbCorrectionFactor;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     UpdateBpbCorrectionFactor
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                      UINT32 FrameSize : Size of coded frame.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Adjusts the Bits Per Block correction factor used
+ *                      for rate prediction.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void UpdateBpbCorrectionFactor ( CP_INSTANCE *cpi, UINT32 FrameSize )
+{
+    INT32  CorrectionFactor=100;
+	double BpbCorrectionFactor;
+
+#if defined(_MSC_VER)
+	// NOTE: This function uses floating point
+	ClearSysState();
+#endif
+
+	if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+		BpbCorrectionFactor = cpi->KeyFrameBpbCorrectionFactor;
+	else
+	{
+		if ( cpi->pb.RefreshGoldenFrame )
+			BpbCorrectionFactor = cpi->GfuBpbCorrectionFactor;
+		else
+			BpbCorrectionFactor = cpi->BpbCorrectionFactor;
+	}
+
+	// Work out a size correction factor.
+    if(cpi->ThisFrameTarget > 0 )
+    	CorrectionFactor = (100 * FrameSize) / cpi->ThisFrameTarget;
+    
+	if ( (CorrectionFactor > 101) && 
+		 (cpi->pb.quantizer->FrameQIndex > cpi->Configuration.ActiveWorstQuality ) )
+    {
+        // We are not already at the worst allowable quality
+		CorrectionFactor = 100 + ((CorrectionFactor - 100)/4);
+		if ( CorrectionFactor > 125 )   // Damp the adjustment
+			BpbCorrectionFactor = (BpbCorrectionFactor * 125)/100;
+		else
+			BpbCorrectionFactor = (BpbCorrectionFactor * CorrectionFactor) / 100;
+
+		// Keep BpbCorrectionFactor within limits
+		if ( BpbCorrectionFactor > MAX_BPB_FACTOR )
+			 BpbCorrectionFactor = MAX_BPB_FACTOR;
+	}
+	else if ( (CorrectionFactor < 99) && 
+		   	  (cpi->pb.quantizer->FrameQIndex < cpi->Configuration.ActiveBestQuality ) )
+	{
+        // We are not already at the best allowable quality
+		CorrectionFactor = 100 - ((100 - CorrectionFactor)/4);
+		if ( CorrectionFactor < 80 )    // Damp the adjustment
+			BpbCorrectionFactor = (BpbCorrectionFactor * 80)/100;
+		else
+			BpbCorrectionFactor = (BpbCorrectionFactor * CorrectionFactor) / 100;
+
+		// Keep BpbCorrectionFactor within limits
+		if ( BpbCorrectionFactor < MIN_BPB_FACTOR )
+			 BpbCorrectionFactor = MIN_BPB_FACTOR;
+	}
+
+	if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+		cpi->KeyFrameBpbCorrectionFactor = BpbCorrectionFactor;
+	else
+	{
+		if ( cpi->pb.RefreshGoldenFrame )
+			cpi->GfuBpbCorrectionFactor = BpbCorrectionFactor;
+		else
+			cpi->BpbCorrectionFactor = BpbCorrectionFactor;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ClampAndUpdateQ 
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *						UINT32 QIndex    : Current Q Index.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Ensures that the specified Q index is within current
+ *						active range and applies other constraints.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void ClampAndUpdateQ ( CP_INSTANCE *cpi, UINT32 QIndex ) 
+{   
+	// Apply limits to the value of QIndex
+	// NOTE: Bigger QIndex ==> Higher Quality (Lower Quantizer)!!!!
+	if ( QIndex > cpi->Configuration.ActiveBestQuality )
+		QIndex = cpi->Configuration.ActiveBestQuality;
+	else if ( QIndex < cpi->Configuration.ActiveWorstQuality )
+		QIndex = cpi->Configuration.ActiveWorstQuality;
+
+    // Apply range restrictions for key frames.
+    if ( VP6_GetFrameType(&cpi->pb) == BASE_FRAME )
+    {
+		// Fixed Q Stuff for key frames
+		if ( cpi->FixedQ >= 0 )
+		{
+			UINT8 Q;
+
+			// Set an appropriate Key frame Q to match the recent ambient quality
+			if ( (cpi->LastKeyFrame >= cpi->ForceKeyFrameEvery) )
+				Q = cpi->FixedQ + (FixedQKfBoostTable[cpi->FixedQ]/2);
+			else
+				Q = cpi->FixedQ + FixedQKfBoostTable[cpi->FixedQ];
+
+			cpi->pb.quantizer->FrameQIndex = Q;
+		}   
+		else
+		{
+			// Additional QIndex limits for Key frames
+            if( cpi->pass != 2) 
+            {
+			    if ( QIndex < KF_WORST_Q_INDEX )
+				    QIndex = KF_WORST_Q_INDEX;
+			    else if ( QIndex > 60 )
+				    QIndex = 60;
+            }
+
+			cpi->pb.quantizer->FrameQIndex = QIndex;
+		}
+
+		// We are going to update GF this frame so reset counter till next update due.
+        if(cpi->pass < 2)
+    		cpi->GfUpdateInterval = DEFAULT_GF_UPDATE_INTERVAL;
+        else
+            cpi->GfUpdateInterval = DEFAULT_2PASS_GF_UPDATE_INTERVAL;
+
+		cpi->FramesTillGfUpdateDue = cpi->GfUpdateInterval;
+		
+		if ( cpi->GfUpdateInterval )
+			cpi->GfuMotionSpeed = GF_UPDATE_MOTION_INTERVAL / cpi->GfUpdateInterval;
+		else
+			cpi->GfuMotionSpeed = 0;
+
+		cpi->GfuMotionComplexity = GF_DEFAULT_MOTION_CMPLX;
+		cpi->GfuBoost = 0;
+	}
+	else 
+	{
+		if(cpi->FixedQ >= 0) 
+		{
+			// We want KFs to count as GF updates
+			cpi->pb.quantizer->FrameQIndex = cpi->FixedQ;
+
+            if(!cpi->DisableGolden)
+            {
+				if ( cpi->FramesTillGfUpdateDue == 0 )
+                {
+					UINT32 Sum = 0;
+					UINT32 Sum2 = 0;
+					UINT32 Sum3 = 0;
+					UINT32 i;
+					UINT32 VarianceX = 0;
+					UINT32 VarianceY = 0;
+					UINT32 MaxVariance = 0;
+
+					// Check the level of MV reuse as a measure of how valuable a GF update is likely to be.
+					for ( i = 0; i < MAX_MODES; i++ )
+						Sum += cpi->ModeDist[i];
+
+					if ( Sum )
+					{
+						Sum2 = Sum - (cpi->ModeDist[CODE_INTRA] + cpi->ModeDist[CODE_INTER_PLUS_MV] + cpi->ModeDist[CODE_INTER_FOURMV]);
+						Sum3 = Sum2 - cpi->ModeDist[CODE_INTER_NO_MV];			
+
+						// Convert Sum2 and Sum3 to %
+						Sum2 = (Sum2 * 100 / Sum);						
+						Sum3 = (Sum3 * 100 / Sum);							
+					}
+
+					// Calculate various motion metrics
+					if ( cpi->FrameMvStats.NumMvs )
+					{
+						cpi->GfuMotionSpeed = (cpi->FrameMvStats.SumAbsX > cpi->FrameMvStats.SumAbsY) ? (cpi->FrameMvStats.SumAbsX/cpi->FrameMvStats.NumMvs) : (cpi->FrameMvStats.SumAbsY/cpi->FrameMvStats.NumMvs);
+						VarianceX = ((cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.SumXSq) - (cpi->FrameMvStats.SumX*cpi->FrameMvStats.SumX)) / (cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.NumMvs);
+						VarianceY = ((cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.SumYSq) - (cpi->FrameMvStats.SumY*cpi->FrameMvStats.SumY)) / (cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.NumMvs);
+						MaxVariance = (VarianceX > VarianceY) ? VarianceX : VarianceY;
+						cpi->GfuMotionComplexity = cpi->GfuMotionSpeed + ((VarianceX)/4) + ((VarianceY)/4);
+						if ( cpi->GfuMotionComplexity > 31 )
+							cpi->GfuMotionComplexity = 31;
+					}	
+					else
+					{
+						cpi->GfuMotionSpeed = 0;
+						cpi->GfuMotionComplexity = 0;
+					}
+
+					// Should we even consider a GF update or is there no point
+					if ( (Sum2 > GF_MODE_DIST_THRESH1) && (Sum3 > GF_MODE_DIST_THRESH2) &&
+						 (cpi->GfuMotionSpeed <= MAX_GF_UPDATE_MOTION) && 
+						 (MaxVariance <= GF_MAX_VAR_THRESH) )
+					{
+						cpi->pb.quantizer->FrameQIndex = cpi->FixedQ + GfFixedQKfBoostTable[cpi->FixedQ];
+
+						cpi->pb.RefreshGoldenFrame = TRUE;
+					}
+					else
+					{
+						cpi->pb.quantizer->FrameQIndex = cpi->FixedQ;
+					}
+	            }
+                else
+                {
+
+                    cpi->pb.quantizer->FrameQIndex = cpi->FixedQ;
+                }
+            }
+		}
+		else
+		{
+			cpi->pb.quantizer->FrameQIndex = QIndex;
+		}
+	}
+    
+    // If necessary re-initialise the quantiser
+    VP6_UpdateQC( cpi->pb.quantizer, cpi->pb.Vp3VersionNo );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     RegulateQ
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi : Pointer to encoder instance.
+ *						INT32 TargetBits : Target number of bits for frame.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     This function tries to regulate quanitzer level
+ *                      to produce the specified target number of bits.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void RegulateQ ( CP_INSTANCE *cpi, INT32 TargetBits ) 
+{   
+    UINT32 i;
+    double Predbpb;
+
+    UINT32 QIndex = Q_TABLE_SIZE - 1;
+    double Targetbpb = (double)TargetBits / (double)cpi->pb.UnitFragments;
+    double LastBitError = 10000.0;       // Infeasibly high number to initialize
+
+    // Search for the best Q for the target bitrate.
+	for ( i=0; i<Q_TABLE_SIZE; i++ )
+	{
+        Predbpb = GetEstimatedBpb( cpi, i );
+        if ( Predbpb > Targetbpb )
+        {
+            if ( (Predbpb - Targetbpb) <= LastBitError )
+                QIndex = i;
+            else
+                QIndex = i - 1;
+            break;
+        }
+        else
+            LastBitError = Targetbpb - Predbpb;
+    }
+
+    ClampAndUpdateQ ( cpi, QIndex );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ConfigureQuality
+ *
+ *  INPUTS        :     CP_INSTANCE *cpi    : Pointer to encoder instance.
+ *						UINT32 QualityValue : Quality value.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Sets maximum operating Q value for specified
+ *                      quality level.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+void ConfigureQuality ( CP_INSTANCE *cpi, UINT32 QualityValue )
+{
+    // Set the worst case quality value.
+    // Note that the actual quality is determined by lookup into the quantiser table QThreshTable[]
+    cpi->Configuration.WorstQuality = 63 - QualityValue;
+
+    // Set the default Active WorstQuality.
+    cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/resource.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/resource.h
new file mode 100644
index 00000000..06c51167
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/resource.h
@@ -0,0 +1,97 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Developer Studio generated include file.
+// Used by compdll.rc
+//
+#define IDD_SYNC_SENTINEL               109
+#define IDD_STATS_DIALOG                113
+#define IDM_SHOW_STATS                  115
+#define ID_OPTIONS_LIVEVIDEO            116
+#define IDM_STOP                        117
+#define IDM_SHOW_DIFFERENCES            130
+#define ID_OPTIONS_SETUPGRABBER         131
+#define IDD_DIALOG1                     209
+#define IDC_CK_ENABLE                   1023
+#define IDC_ED_HSEARCH                  1024
+#define IDC_ED_HINSERT                  1028
+#define IDC_ED_VSEARCH                  1029
+#define IDC_ED_VINSERT                  1030
+#define IDC_MFILTER                     1039
+#define IDC_STATS_FRAME_NO_EDIT         1040
+#define IDC_BAR_ENHANCE_EDIT            1041
+#define IDC_STATS_LAST_FRAME_EDIT       1041
+#define IDC_STATS_AV_EDIT               1042
+#define IDC_STATS_PEAK_EDIT             1043
+#define IDC_STATS_DROPPED_FRAMES_EDIT   1044
+#define IDC_STATS_AVFPS_EDIT            1045
+#define IDC_STATS_IFPS_EDIT             1046
+#define IDC_STATS_TIME_EDIT             1047
+#define IDC_STATS_CURR_EDIT             1048
+#define IDC_STATS_CFA_EDIT              1049
+#define IDC_STATS_CFAAV_EDIT            1050
+#define IDC_EDIT_CAT_A                  2000
+#define IDC_EDIT_SRC_FR                 2001
+#define IDC_EDIT_PIX_DIFF_THRESH        2002
+#define IDC_EDIT_LOCALS_LOSSY           2003
+#define IDC_EDIT_CAT_C                  2004
+#define IDC_EDIT_CAT_D                  2005
+#define IDC_EDIT_CAT_B                  2006
+#define IDC_EDIT_CAT_A_FR               2007
+#define IDC_EDIT_CAT_C_FR               2008
+#define IDC_EDIT_CAT_D_FR               2009
+#define IDC_EDIT_CAT_B_FR               2010
+#define IDC_EDIT_NUM_FRAMES             2011
+#define IDC_EDIT_NOISE_SUP              2012
+#define IDC_EDIT_NOISE_SUP2             2013
+#define IDC_PIXEL_LOSSY_CHECK           2014
+#define IDC_SCORE_LOSSY                 2015
+#define IDC_LOCALS_LOSSY                2016
+#define IDC_SING_LOSSY                  2017
+#define IDC_EDIT_SCORE_LOSSY            2018
+#define IDC_EDIT_OUT_FRAME_RATE         2019
+#define IDC_EDIT_TARGET_DATA_RATE       2020
+#define IDC_EDIT_PIX_GREY_THRESH        2021
+#define IDC_DCT_THRESH                  2023
+#define IDC_GREY_DCT_VARIABLE           2024
+#define IDC_DCT_THRESH_TOP              2025
+#define IDC_CONS_SEMI                   2026
+#define IDC_CONS_NORM_FRAMES            2027
+#define IDC_NUM_CONS_SEMI               2028
+#define IDC_NUM_CONS_NORM               2029
+#define IDC_NUM_CONS_NORM_MAX           2030
+#define IDC_CONS_NORM_FRAMES_MAX        2031
+#define IDC_DCT_GREY                    2033
+#define IDC_GREY                        2034
+#define IDC_COLOUR                      2035
+#define IDC_EDIT_MAX_DATA_RATE          2036
+#define IDC_CWASH_CHECK                 2037
+#define IDC_PWASH_CHECK                 2038
+#define IDC_FF_DCT_EDIT                 2039
+#define IDM_RUN                         40001
+#define IDM_COMPRESS                    40002
+#define IDM_STEP                        40003
+#define IDM_REPLAY                      40004
+#define IDM_PARAMS                      40005
+#define IDM_CAPTURE_RAW                 40006
+#define IDM_SHOW_ZERO_DIFFERENCES       40007
+#define IDM_SHOW_EDGES                  40008
+#define ID_OPTIONS_DISKSTATS            40008
+#define IDM_SHOW_NORMAL                 40009
+#define IDM_SHOW_SCORE                  40010
+#define ID_OPTIONS_OUTPUTFILTEREDBITMAPS 40011
+#define ID_OPTIONS_DISABLEENCODER       40012
+#define RGM_SLOW_FDCT                   40013
+#define RGM_FAST_IDCT                   40014
+#define RGM_FAST_FDCT                   40015
+#define RGM_SLOW_IDCT                   40016
+#define RGM_DEBLOCK                     40017
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        104
+#define _APS_NEXT_COMMAND_VALUE         40018
+#define _APS_NEXT_CONTROL_VALUE         1000
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.c
new file mode 100644
index 00000000..218c9cea
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.c
@@ -0,0 +1,810 @@
+/****************************************************************************
+*
+*   Module Title :     vfwcomp_if.c
+*
+*   Description  :     Compressor interface definition.
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <stdio.h>
+#include "compdll.h"
+#include "twopass.h"
+#include <math.h>
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern const UINT32 GfuDataRateBoost[64];
+extern const UINT32 GfuMotionCorrection[32];
+extern const UINT32 GfUsageCorrection[64];
+
+/****************************************************************************
+ *
+ *  ROUTINE       : ZeroStats
+ *
+ *  INPUTS        : 
+ *                  FIRSTPASS_STATS *stats  Stats to empty the accumulator of
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ZeroStats( FIRSTPASS_STATS *section)
+{
+    section->count = 0;
+
+    section->MotionSpeed = 0 ;
+    section->VarianceX = 0 ;
+    section->VarianceY = 0 ;
+    section->PercentGolden = 0;
+    section->PercentMotionY = 0 ;
+    section->PercentMotion = 0 ;
+    section->PercentNewMotion = 0 ;
+    section->MeanInterError = 0 ;
+    section->MeanIntraError = 0 ;
+    section->BitsPerMacroblock = 0 ;
+    section->SqBitsPerMacroblock = 0 ;
+    section->PSNR = 0 ;
+    section->isGolden = 0;
+    section->isKey = 0;
+
+}
+/****************************************************************************
+ *
+ *  ROUTINE       : AccumulateStats
+ *
+ *  INPUTS        : FIRSTPASS_STATS *section stats to accumulate into
+ *                  FIRSTPASS_STATS *stats   Stats to add to accumulated values
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Accumulates firstpass statistics
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void AccumulateStats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
+{
+    section->count ++;
+
+    section->MotionSpeed += frame->MotionSpeed;
+    section->VarianceX += frame->VarianceX;
+    section->VarianceY += frame->VarianceY;
+    section->PercentGolden += frame->PercentGolden;
+    section->PercentMotionY += frame->PercentMotionY;
+    section->PercentMotion += frame->PercentMotion;
+    section->PercentNewMotion += frame->PercentNewMotion;
+    section->MeanInterError += frame->MeanInterError;
+    section->MeanIntraError += frame->MeanIntraError;
+    section->BitsPerMacroblock += frame->BitsPerMacroblock;
+    section->SqBitsPerMacroblock += frame->SqBitsPerMacroblock;
+    section->PSNR += frame->PSNR;
+    section->isGolden += frame->isGolden;
+    section->isKey += frame->isKey;
+}
+/****************************************************************************
+ *
+ *  ROUTINE       : AvgStats
+ *
+ *  INPUTS        : 
+ *                  FIRSTPASS_STATS *stats  Stats to convert to averages using count
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void AvgStats ( FIRSTPASS_STATS *section)
+{
+    if(!section->count)
+        return;
+
+    section->MotionSpeed /= section->count;
+    section->VarianceX /= section->count;
+    section->VarianceY /= section->count;
+    section->PercentGolden /= section->count;
+    section->PercentMotionY /= section->count;
+    section->PercentMotion /= section->count;
+    section->PercentNewMotion /= section->count;
+    section->MeanInterError /= section->count;
+    section->MeanIntraError /= section->count;
+    section->BitsPerMacroblock /= section->count;
+    section->SqBitsPerMacroblock /= section->count;
+    section->PSNR /= section->count;
+}
+/****************************************************************************
+ *
+ *  ROUTINE       : OutputStats
+ *
+ *  INPUTS        : FILE *F                 File to output the stats to
+ *                  FIRSTPASS_STATS *stats  Stats to fill in
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void OutputStats( FILE *f, FIRSTPASS_STATS *stats)
+{
+    fprintf(f,
+        "%8d %8d %8d %8d %12.04f %12.04f %12.04f %12.04f %12.04f %12.04f %12.04f %12.04f %12.04f %12.04f \n",
+        stats->frame,
+        stats->count,
+        stats->isKey,
+        stats->isGolden,
+        stats->BitsPerMacroblock,
+        stats->SqBitsPerMacroblock,
+        stats->MeanInterError,
+        stats->MeanIntraError,
+        stats->MotionSpeed,
+        stats->VarianceX,
+        stats->VarianceY,
+        stats->PercentMotion,
+        stats->PercentNewMotion,
+        stats->PercentGolden);
+}
+/****************************************************************************
+ *
+ *  ROUTINE       : InputStats
+ *
+ *  INPUTS        : FILE *F                 File to read the stats in
+ *                  FIRSTPASS_STATS *stats  Stats to fill in
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void InputStats( FILE *f, FIRSTPASS_STATS *stats)
+{
+    fscanf(f,
+        "%d %d %d %d %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg \n",
+        &stats->frame,
+        &stats->count,
+        &stats->isKey,
+        &stats->isGolden,
+        &stats->BitsPerMacroblock,
+        &stats->SqBitsPerMacroblock,
+        &stats->MeanInterError,
+        &stats->MeanIntraError,
+        &stats->MotionSpeed,
+        &stats->VarianceX,
+        &stats->VarianceY,
+        &stats->PercentMotion,
+        &stats->PercentNewMotion,
+        &stats->PercentGolden);
+}
+/****************************************************************************
+ *
+ *  ROUTINE       : Pass2Initialize
+ *
+ *  INPUTS        : CP_INSTANCE *cpi            : Pointer to encoder instance.
+ *                  COMP_CONFIG_VP6 *CompConfig : Encoder configuration.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Initialize 1st or 2nd pass of the compressor
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV Pass2Initialize ( CP_INSTANCE *cpi, COMP_CONFIG_VP6 *CompConfig )
+{
+   if(cpi->pass == 2)
+    {
+
+        int    actualMBS =(cpi->pb.MBRows - (BORDER_MBS*2)) * (cpi->pb.MBCols - (BORDER_MBS*2));
+        double fpBitRate;                        // first pass bitrate
+        double target;                           // target bitrate
+        double NewQ;
+        double Sigma;
+        double RoomForVariation;
+        double tmp;
+        char dummy[1024];
+        ClearSysState();
+        cpi->fs = fopen(CompConfig->FirstPassFile,"r");
+        strncpy(dummy,CompConfig->FirstPassFile,1024);
+        strcat(dummy,".sst");
+        cpi->ss = fopen(dummy,"r");
+
+        fgets(dummy,1024,cpi->fs);
+        fgets(dummy,1024,cpi->ss);
+
+        InputStats(cpi->ss,&cpi->fpmss);
+
+        tmp = cpi->fpmss.SqBitsPerMacroblock - cpi->fpmss.BitsPerMacroblock*cpi->fpmss.BitsPerMacroblock;
+        Sigma = sqrt(tmp);
+        RoomForVariation = (Sigma+2) / 3.5;       // 5 q steps above
+        RoomForVariation = (Sigma+2) / 15;       // 5 q steps above
+		RoomForVariation = 1;
+
+        fpBitRate = cpi->fpmss.BitsPerMacroblock * actualMBS * cpi->Configuration.OutputFrameRate;
+        target = (double) cpi->Configuration.TargetBandwidth;
+
+        NewQ = (INT32)  FIRSTPASS_Q  -  ( RoomForVariation + .5 + log(fpBitRate/target) / log(1.04));
+        if(NewQ < cpi->Configuration.WorstQuality )
+            NewQ = cpi->Configuration.WorstQuality;
+        
+        if(NewQ > cpi->Configuration.ActiveBestQuality)
+            NewQ = cpi->Configuration.ActiveBestQuality;
+
+        if(NewQ > 50) 
+            NewQ = 50;
+
+        
+        cpi->PassedInWorstQ = cpi->Configuration.WorstQuality;
+        cpi->Configuration.WorstQuality = (INT32) NewQ;
+        cpi->CalculatedWorstQ = (INT32) NewQ;
+        
+        cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+
+        cpi->TotalBitsLeftInClip = 1.0 * cpi->ActualTargetBitRate * cpi->fpmss.count / cpi->Configuration.OutputFrameRate;
+        cpi->FramesYetToEncode = cpi->fpmss.count;
+        //cpi->TotalBitsPerMB = cpi->fpmss.BitsPerMacroblock * cpi->fpmss.count;
+        cpi->TotalBitsPerMB = cpi->fpmss.MeanInterError * cpi->fpmss.count;
+   
+   }
+    else if (cpi->pass == 1)
+    {
+        char dummy[1024];
+        ZeroStats( &cpi->fpmss);
+
+        cpi->fs = fopen(CompConfig->FirstPassFile,"w");
+        fprintf(cpi->fs,
+            "%8s %8s %8s %8s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s \n",
+            "","#","key","golden","bits/mb","sq bits/mb","Inter","Intra","Motion","VarX","VarY",
+            "%Motion","%NewMotion","%Golden");
+
+        strncpy(dummy,CompConfig->FirstPassFile,1024);
+        strcat(dummy,".sst");
+        cpi->ss = fopen(dummy,"w");
+        fprintf(cpi->ss,
+            "%8s %8s %8s %8s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s \n",
+            "","#","key","golden","bits/mb","sq bits/mb","Inter","Intra","Motion","VarX","VarY",
+            "%Motion","%NewMotion","%Golden");
+
+
+    }
+
+}
+/****************************************************************************
+ *
+ *  ROUTINE       : Pass2Control
+ *
+ *  INPUTS        : CP_INSTANCE *cpi                      : Pointer to encoder instance.
+ *                 
+ *  OUTPUTS       : unsigned int *is_key                  : Flag whether frame coded
+ *                                                          as intra-frame or not.
+ *
+ *  RETURNS       : 
+ *
+ *  FUNCTION      : Determines Section info, and does datarate control 
+ *                  that is only possible in 2nd pass
+ *
+ *  SPECIAL NOTES : 
+ *
+ ****************************************************************************/
+void CCONV Pass2Control( CP_INSTANCE *cpi)
+{
+    INT32 i;
+    FIRSTPASS_STATS sectionStats;
+    FIRSTPASS_STATS thisFrame;
+    FIRSTPASS_STATS nextFrame;
+    FIRSTPASS_STATS lastFrame;
+    double NewBitsPerMB;
+
+    double total=0;
+    double avg = 0;
+    int    actualMBS ;
+
+    fpos_t pos1,pos2;
+
+    InputStats(cpi->fs,&thisFrame);
+    fgetpos(cpi->fs,&pos1);
+
+    //NewBitsPerMB = cpi->TotalBitsPerMB  -  thisFrame.BitsPerMacroblock;
+    NewBitsPerMB = cpi->TotalBitsPerMB  -  thisFrame.MeanInterError;
+    // keyframe and section processing !
+    if( cpi->FramesToKey == 0 )
+    {
+        
+        cpi->KFForced = cpi->NextKFForced;
+        cpi->NextKFForced = 0;
+
+
+        cpi->ThisIsKeyFrame = TRUE; 
+        ZeroStats( &sectionStats);
+        //AccumulateStats(&sectionStats, &thisFrame);
+
+        cpi->FramesToKey = 1;
+        InputStats(cpi->fs,&nextFrame);
+
+        // find the next keyframe
+        while(!feof(cpi->fs))
+        {
+            memcpy(&lastFrame,&thisFrame,sizeof(thisFrame));
+            memcpy(&thisFrame,&nextFrame,sizeof(thisFrame));
+
+            InputStats(cpi->fs,&nextFrame);
+
+            //  mark a key if first pass marked it a keyframe and its within minimum distance to keyframe numbers or 
+            //  the next frame gets a big benefit from it being a keyframe
+
+            if( (  thisFrame.isKey 
+                && ( cpi->FramesToKey > cpi->MinimumDistanceToKeyFrame 
+                     && (   fabs(lastFrame.MeanInterError - thisFrame.MeanInterError) / thisFrame.MeanInterError > .40
+                         || fabs(lastFrame.MeanIntraError - thisFrame.MeanIntraError) / thisFrame.MeanIntraError > .40
+                         || thisFrame.MeanIntraError * 5 < thisFrame.MeanInterError * 6
+                        )
+                  || nextFrame.MeanIntraError > nextFrame.MeanInterError  + 2000 
+                  )
+                     
+                ) 
+              )
+            {
+                break;
+            }
+
+            cpi->FramesToKey ++;
+
+            // since we don't have a key frame within the next two forcekeyframeevery intervals 
+            // set the next keyframe to be forcekeyframe every
+            if(cpi->FramesToKey > 2 * cpi->ForceKeyFrameEvery)
+            {
+                cpi->FramesToKey = cpi->ForceKeyFrameEvery;
+                cpi->NextKFForced = 1;
+                break;
+            }
+        }
+        if(feof(cpi->fs))
+            cpi->FramesToKey ++;
+
+        // distance to keyframe is not 2 times our max distance but it is greater than our max distance
+        // since we need a keyframe put it in the center between this key frame and the next 
+        if( cpi->FramesToKey > cpi->ForceKeyFrameEvery )
+        {
+            cpi->FramesToKey /= 2;
+            cpi->NextKFForced = 1;
+        }
+
+        fgetpos(cpi->fs,&pos2);
+        pos2-=pos1;
+        fseek(cpi->fs,(INT32) -pos2,SEEK_CUR);
+
+
+        // determine how big to make this keyframe based on how well the subsequent frames use inter blocks
+        total = 1.0;
+        for(i = 0 ;i < 4 && i < cpi->FramesToKey ; i++)
+        {
+            InputStats(cpi->fs,&nextFrame);
+            total *= ( nextFrame.MeanIntraError - nextFrame.MeanInterError ) / nextFrame.MeanIntraError;
+            avg += total * ( nextFrame.MeanIntraError - nextFrame.MeanInterError ) ;
+
+            // this break out is to insure we handle the situation that is really different from 
+            // our last frame but similar to our next frame doesn't get counted in our metric, which 
+            // is trying to estimate the average amount of data retained from the keyframe. 
+            if(total < .1 || nextFrame.MeanIntraError < 200) 
+                break;
+
+        }
+
+        cpi->KFBoost = (INT32 ) avg / 180 ;//(16* total);//16 * avg / 6 );
+
+
+        if(cpi->FramesToKey < 4 )//&& cpi->BufferLevel < cpi->OptimalBufferLevel / 2)
+            cpi->KFBoost = 0;
+
+        fgetpos(cpi->fs,&pos2);
+        pos2-=pos1;
+        fseek(cpi->fs,(INT32) -pos2,SEEK_CUR);
+
+        // read first pass file up until next keyframe and generate avg section stats.
+        total = 1.0;
+        for(i = 0 ;i < cpi->FramesToKey-1  ; i++)
+        {
+            InputStats(cpi->fs,&thisFrame);
+            AccumulateStats(&sectionStats, &thisFrame);
+        }
+        AvgStats(&sectionStats);
+        fgetpos(cpi->fs,&pos2);
+        pos2-=pos1;
+        fseek(cpi->fs,(INT32) -pos2,SEEK_CUR);
+
+
+        actualMBS = (cpi->pb.MBRows - (BORDER_MBS*2)) * (cpi->pb.MBCols - (BORDER_MBS*2));
+
+        if(cpi->TwoPassVBREnabled)
+        // determine bitrate to shoot for for this section 
+        {
+            //double SectionBitsPerMB = sectionStats.BitsPerMacroblock * sectionStats.count;
+            //double Pctg = SectionBitsPerMB / cpi->TotalBitsPerMB ; 
+            double SectionErrorPerMB = sectionStats.MeanInterError * sectionStats.count;
+            double Pctg = SectionErrorPerMB / cpi->TotalBitsPerMB; 
+            double DesiredSectionSize = cpi->TotalBitsLeftInClip * Pctg;
+            double DesiredSectionBitRate = cpi->Configuration.OutputFrameRate * DesiredSectionSize / sectionStats.count;
+
+            if(sectionStats.count < 2) 
+                DesiredSectionBitRate = cpi->ActualTargetBitRate ;
+
+            if(cpi->TwoPassVBRBias)
+            {
+                DesiredSectionBitRate = cpi->ActualTargetBitRate * (100 - cpi->TwoPassVBRBias) / 100  + DesiredSectionBitRate * cpi->TwoPassVBRBias / 100  ;
+            }
+
+            if(DesiredSectionBitRate < cpi->ActualTargetBitRate * cpi->TwoPassVBRMinSection / 100 )
+                DesiredSectionBitRate = cpi->ActualTargetBitRate * cpi->TwoPassVBRMinSection /100 ;
+
+            if(DesiredSectionBitRate > (double) cpi->ActualTargetBitRate * cpi->TwoPassVBRMaxSection / 100 )
+                DesiredSectionBitRate = cpi->ActualTargetBitRate * cpi->TwoPassVBRMaxSection / 100 ;
+
+
+            cpi->Configuration.TargetBandwidth = (INT32) DesiredSectionBitRate; 
+            cpi->InterFrameTarget =  (INT32)((cpi->Configuration.TargetBandwidth -
+                ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency)) / cpi->Configuration.OutputFrameRate);
+
+            cpi->PerFrameBandwidth = (cpi->Configuration.TargetBandwidth / cpi->Configuration.OutputFrameRate);
+
+			if(0)
+            {
+        		FILE *sectionstats = fopen("section.stt","a");
+                fprintf(sectionstats,"Frame : %8d Count :%4d sq bits/mb:%8.3f BitsPerMB:%8.3f  BitRate: %8d Q:%3d s:%8d buffer:%8d max:%8d \n ",
+                    - 1 + (INT32) cpi->CurrentFrame , sectionStats.count, sectionStats.SqBitsPerMacroblock, sectionStats.BitsPerMacroblock, 
+                    cpi->Configuration.TargetBandwidth / 1024, cpi->Configuration.WorstQuality, cpi->SizeStep, cpi->BufferLevel ,cpi->MaxBufferLevel);
+                fclose(sectionstats);
+            }
+        }
+        else
+        // determine q to use for this section
+        {
+            double SectionErrorPerMB = sectionStats.MeanInterError * sectionStats.count;
+            double Pctg = SectionErrorPerMB / cpi->TotalBitsPerMB; 
+            double DesiredSectionSize = cpi->TotalBitsLeftInClip * Pctg;
+            double DesiredSectionBitRate = cpi->Configuration.OutputFrameRate * DesiredSectionSize / sectionStats.count;
+            double target;                           // target bitrate
+            double NewQ;
+            double RoomForVariation=3;
+            double FirstPassBitRate = sectionStats.BitsPerMacroblock * actualMBS * cpi->Configuration.OutputFrameRate;
+
+            if(sectionStats.count < 2) 
+                DesiredSectionBitRate = cpi->ActualTargetBitRate ;
+
+            if(cpi->TwoPassVBRBias)
+            {
+                DesiredSectionBitRate = cpi->ActualTargetBitRate * (100 - cpi->TwoPassVBRBias) / 100  + DesiredSectionBitRate * cpi->TwoPassVBRBias / 100  ;
+            }
+
+            if(DesiredSectionBitRate < cpi->ActualTargetBitRate * cpi->TwoPassVBRMinSection / 100 )
+                DesiredSectionBitRate = cpi->ActualTargetBitRate * cpi->TwoPassVBRMinSection /100 ;
+
+            if(DesiredSectionBitRate > (double) cpi->ActualTargetBitRate * cpi->TwoPassVBRMaxSection / 100 )
+                DesiredSectionBitRate = cpi->ActualTargetBitRate * cpi->TwoPassVBRMaxSection / 100 ;
+
+
+
+            // Clamp the Section Datarate between what will fill up the buffer and what will empty it to .25 of the optimal
+            {
+                double ActualPerFrameBandWidth = cpi->ActualTargetBitRate / cpi->Configuration.OutputFrameRate;
+                double UnusedSectionEndBufferLevel = cpi->BufferLevel + (((cpi->MaxAllowedDatarate * ActualPerFrameBandWidth) / 100) * sectionStats.count);
+                double QuarterOptimalBufferLevel = cpi->OptimalBufferLevel / 4.0;
+                double MaxBitRate = cpi->Configuration.OutputFrameRate * (UnusedSectionEndBufferLevel - QuarterOptimalBufferLevel) / (sectionStats.count + cpi->KFBoost / 16);
+                double MinBitRate = cpi->Configuration.OutputFrameRate * (UnusedSectionEndBufferLevel - cpi->MaxBufferLevel ) / (sectionStats.count + cpi->KFBoost / 16);
+                if( MaxBitRate < cpi->ActualTargetBitRate / 3) 
+                    MaxBitRate = cpi->ActualTargetBitRate / 3;
+                if( MinBitRate < cpi->ActualTargetBitRate / 3) 
+                    MinBitRate = cpi->ActualTargetBitRate / 3;
+                
+                if(DesiredSectionBitRate > MaxBitRate)
+                    DesiredSectionBitRate = MaxBitRate;
+
+                if(DesiredSectionBitRate < MinBitRate)
+                    DesiredSectionBitRate = MinBitRate;
+
+                cpi->Configuration.TargetBandwidth = (INT32) DesiredSectionBitRate; 
+
+
+                cpi->InterFrameTarget =  (INT32)((cpi->Configuration.TargetBandwidth -
+                    ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency)) / cpi->Configuration.OutputFrameRate);
+
+
+            }
+
+            target = (double) cpi->Configuration.TargetBandwidth;
+
+            // if q is worse than we estimated for the entire clip use it ( this must be a tough section )!! 
+            //   otherwise use the one we estimated.
+            NewQ = (INT32)  FIRSTPASS_Q  -  ( .5 + log(FirstPassBitRate/target) / log(1.040));
+            if( NewQ < cpi->CalculatedWorstQ )
+            {
+                if(NewQ < cpi->PassedInWorstQ)
+                    NewQ = cpi->PassedInWorstQ;
+
+                cpi->Configuration.ActiveWorstQuality = (INT32) NewQ;
+                cpi->Configuration.WorstQuality = (INT32) NewQ;
+            }
+            else
+            {
+                cpi->Configuration.ActiveWorstQuality = cpi->CalculatedWorstQ;
+                cpi->Configuration.WorstQuality = cpi->CalculatedWorstQ;
+            }
+
+			if(0)
+            {
+                FILE *sectionstats = fopen("section.stt","a");
+                fprintf(sectionstats,"Frame : %8d Count :%4d sq bits/mb:%8.3f BitsPerMB:%8.3f  BitRate: %8d Q:%3d s:%8d buffer:%8d max:%8d mdr %d %d \n ",
+                    - 1 + (INT32) cpi->CurrentFrame , sectionStats.count, sectionStats.SqBitsPerMacroblock, sectionStats.BitsPerMacroblock, 
+                    cpi->Configuration.TargetBandwidth / 1024, cpi->Configuration.WorstQuality, cpi->SizeStep,cpi->BufferLevel ,
+                    cpi->MaxBufferLevel , cpi->MaxAllowedDatarate * cpi->ActualTargetBitRate / cpi->Configuration.OutputFrameRate / 100,
+                    cpi->ThisFrameTarget
+                    );
+                fclose(sectionstats);
+            }
+        }
+
+
+		/*
+
+        // determine q to use for this section
+        
+            double target;                           // target bitrate
+            double NewQ;
+            double RoomForVariation=3;
+            double FirstPassBitRate = sectionStats.BitsPerMacroblock * actualMBS * cpi->Configuration.OutputFrameRate;
+            double SectionBitsPerMB = sectionStats.BitsPerMacroblock * sectionStats.count;
+            double Pctg = SectionBitsPerMB / cpi->TotalBitsPerMB ; 
+            double DesiredSectionSize = cpi->TotalBitsLeftInClip* Pctg;
+            double DesiredSectionBitRate = cpi->Configuration.OutputFrameRate * DesiredSectionSize / sectionStats.count;
+
+
+            if(cpi->TwoPassVBRBias)
+            {
+                DesiredSectionBitRate = cpi->ActualTargetBitRate * (100 - cpi->TwoPassVBRBias) / 100  + DesiredSectionBitRate * cpi->TwoPassVBRBias / 100  ;
+            }
+
+            // Clamp the Section Datarate between what will fill up the buffer and what will empty it to .25 of the optimal
+            {
+                double ActualPerFrameBandWidth = cpi->ActualTargetBitRate / cpi->Configuration.OutputFrameRate;
+                double UnusedSectionEndBufferLevel = cpi->BufferLevel + (((cpi->MaxAllowedDatarate * ActualPerFrameBandWidth) / 100) * sectionStats.count);
+                double QuarterOptimalBufferLevel = cpi->OptimalBufferLevel / 4.0;
+                double MaxBitRate = cpi->Configuration.OutputFrameRate * (UnusedSectionEndBufferLevel - QuarterOptimalBufferLevel) / (sectionStats.count + cpi->KFBoost / 16);
+                double MinBitRate = cpi->Configuration.OutputFrameRate * (UnusedSectionEndBufferLevel - cpi->MaxBufferLevel ) / (sectionStats.count + cpi->KFBoost / 16);
+                if( MaxBitRate < cpi->ActualTargetBitRate / 3) 
+                    MaxBitRate = cpi->ActualTargetBitRate / 3;
+                if( MinBitRate < cpi->ActualTargetBitRate / 3) 
+                    MinBitRate = cpi->ActualTargetBitRate / 3;
+                
+                if(DesiredSectionBitRate > MaxBitRate)
+                    DesiredSectionBitRate = MaxBitRate;
+
+                if(DesiredSectionBitRate < MinBitRate)
+                    DesiredSectionBitRate = MinBitRate;
+
+                cpi->Configuration.TargetBandwidth = (INT32) DesiredSectionBitRate; 
+
+
+                cpi->InterFrameTarget =  (INT32)((cpi->Configuration.TargetBandwidth -
+                    ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency)) / cpi->Configuration.OutputFrameRate);
+
+
+                //cpi->PerFrameBandwidth = (cpi->Configuration.TargetBandwidth / cpi->Configuration.OutputFrameRate);
+
+            }
+
+            target = (double) cpi->Configuration.TargetBandwidth;
+
+            // if q is worse than we estimated for the entire clip use it ( this must be a tough section )!! 
+            //   otherwise use the one we estimated.
+            NewQ = (INT32)  FIRSTPASS_Q  -  ( RoomForVariation + .5 + log(FirstPassBitRate/target) / log(1.040));
+            if( NewQ < cpi->CalculatedWorstQ )
+            {
+                if(NewQ < cpi->PassedInWorstQ)
+                    NewQ = cpi->PassedInWorstQ;
+
+                cpi->Configuration.ActiveWorstQuality = (INT32) NewQ;
+                cpi->Configuration.WorstQuality = (INT32) NewQ;
+            }
+            else
+            {
+                cpi->Configuration.ActiveWorstQuality = cpi->CalculatedWorstQ;
+                cpi->Configuration.WorstQuality = cpi->CalculatedWorstQ;
+            }
+
+			if(0)
+            {
+                FILE *sectionstats = fopen("section.stt","a");
+                fprintf(sectionstats,"Frame : %8d Count :%4d sq bits/mb:%8.3f BitsPerMB:%8.3f  BitRate: %8d Q:%3d s:%8d buffer:%8d max:%8d mdr %d %d \n ",
+                    - 1 + (INT32) cpi->CurrentFrame , sectionStats.count, sectionStats.SqBitsPerMacroblock, sectionStats.BitsPerMacroblock, 
+                    cpi->Configuration.TargetBandwidth / 1024, cpi->Configuration.WorstQuality, cpi->SizeStep,cpi->BufferLevel ,
+                    cpi->MaxBufferLevel , cpi->MaxAllowedDatarate * cpi->ActualTargetBitRate / cpi->Configuration.OutputFrameRate / 100,
+                    cpi->ThisFrameTarget
+                    );
+                fclose(sectionstats);
+            }
+        }
+
+*/
+    }
+
+    // its not a keyframe check if its time to update our golden frame?
+    else if (cpi->FramesTillGfUpdateDue == 0 )
+    {
+        FIRSTPASS_STATS GfStats; 
+        int count =0;
+        //double GfuMotionComplexity;
+        //double MaxVariance;
+        //int NonZeroMV;
+        //int NewMotion = 100 - (int) GfStats.PercentMotion;
+        //int ZeroMotion = (int) (GfStats.PercentMotion - GfStats.PercentNewMotion);
+        int IntraToInterRatio;
+        int GfUsage;
+
+        ZeroStats( &GfStats);
+        // ignore the next frame ( it will have this frame as reference no matter what)
+        InputStats(cpi->fs,&nextFrame);
+
+        // check next frames
+        for(i = 0 ;i < 4 ; i++)
+        {
+
+            InputStats(cpi->fs,&nextFrame);
+            AccumulateStats(&GfStats, &nextFrame);
+
+            if(nextFrame.isGolden) 
+            {
+                // throwout the next frame after this one
+                InputStats(cpi->fs,&lastFrame);
+            }
+        }
+        AvgStats(&GfStats);
+
+        // + 300 to stop tiny frames from producing huge boosts)
+        IntraToInterRatio = (int) (100 * GfStats.MeanIntraError / (GfStats.MeanInterError ));
+        IntraToInterRatio = (int) (IntraToInterRatio * GfStats.PercentNewMotion / 100);
+        GfUsage = (int) (GfStats.PercentGolden * 8);
+      
+        cpi->GfuBoost = IntraToInterRatio;
+
+		// Correct boost to take account of recent observed level of GF usage
+		if ( (GfUsage >> 3) < 64)
+			cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection[(GfUsage  >> 3)]) / 16;
+		else
+			cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection[63]) / 16;
+        
+
+        cpi->GfuBoost = cpi->GfuBoost* GfuDataRateBoost[cpi->pb.AvgFrameQIndex] / 1000;
+        
+
+		// Should we even consider a GF update or is there no point
+		if ( ( GfStats.PercentNewMotion > GF_MODE_DIST_THRESH2) &&
+			 (GfStats.MotionSpeed <= MAX_GF_UPDATE_MOTION) //&& 
+             //(cpi->GfuBoost > 80 ) && 
+			 //(MaxVariance <= GF_MAX_VAR_THRESH) 
+           )
+		{
+			cpi->ThisFrameTarget = (cpi->InterFrameTarget * (100 * cpi->GfUpdateInterval)) /
+								   ((100 * cpi->GfUpdateInterval) + cpi->GfuBoost);
+
+			cpi->ThisFrameTarget = cpi->ThisFrameTarget + ((cpi->ThisFrameTarget * cpi->GfuBoost) / 100);	
+
+            if(cpi->FramesToKey > 3)
+            {
+                cpi->pb.RefreshGoldenFrame = TRUE;
+            }
+
+			// Select the interval before the next GF update
+			// To find the interval we find the max of AvX and AvY and work out how many frames
+			// it will take to move X pels (GF_UPDATE_MOTION_INTERVAL in 1/4 pel) assuming the motion 
+			// level does not change. The value is then capped to the range MIN_GF_UPDATE_INTERVAL to MAX_GF_UPDATE_INTERVAL
+			if ( cpi->GfuMotionSpeed > 0 )
+			{
+				cpi->GfUpdateInterval = GF_UPDATE_MOTION_INTERVAL / cpi->GfuMotionSpeed;
+
+				if ( cpi->GfUpdateInterval < MIN_GF_UPDATE_INTERVAL )
+					cpi->GfUpdateInterval = MIN_GF_UPDATE_INTERVAL;
+
+				else if ( cpi->GfUpdateInterval > MAX_GF_UPDATE_INTERVAL )
+					cpi->GfUpdateInterval = MAX_GF_UPDATE_INTERVAL;
+
+			}
+			else
+				cpi->GfUpdateInterval = MAX_GF_UPDATE_INTERVAL;
+
+			if(0)
+            {
+                FILE *gfstats= fopen("gf.stt","a");
+                fprintf(gfstats,"Frame : %8d boost: %d, speed:%d,baseq:%d, intra2inter: %d, newmotion:%d, GfUsage:%d \n",
+                    - 1 + (INT32) cpi->CurrentFrame , 
+                    cpi->GfuBoost,
+                    cpi->GfuMotionSpeed,
+                    GfuDataRateBoost[cpi->pb.AvgFrameQIndex],
+					100 * GfStats.MeanIntraError / (GfStats.MeanInterError),
+					GfStats.PercentNewMotion,
+					GfUsage
+                    );
+                fclose(gfstats);
+            }
+
+		}
+        else
+        {
+
+        }
+
+
+        fgetpos(cpi->fs,&pos2);
+        pos2-=pos1;
+        fseek(cpi->fs,(INT32) -pos2,SEEK_CUR);
+
+
+    }
+
+    // check if we should boost or lower this frame based on our neighbors. 
+    else
+    {
+    }
+
+
+    cpi->FramesYetToEncode --;
+    cpi->FramesToKey --;
+    cpi->TotalBitsPerMB = NewBitsPerMB;
+
+}
+/****************************************************************************
+ *
+ *  ROUTINE       : Pass1Output
+ *
+ *  INPUTS        : CP_INSTANCE *cpi                      : Pointer to encoder instance.
+ *                 
+ *  OUTPUTS       : 
+ *                  
+ *
+ *  RETURNS       : 
+ *
+ *  FUNCTION      : output to external file the 1st pass results
+ *
+ *  SPECIAL NOTES : 
+ *
+ ****************************************************************************/
+void CCONV Pass1Output( CP_INSTANCE *cpi)
+{
+    PB_INSTANCE *pbi = &cpi->pb;
+    int actualMBS= (cpi->pb.MBRows - (BORDER_MBS*2)) * (cpi->pb.MBCols - (BORDER_MBS*2));
+    ClearSysState();
+    cpi->fps.MeanInterError = 1.0 * cpi->InterErrorb / actualMBS;
+    cpi->fps.MeanIntraError = 1.0 * cpi->IntraError / actualMBS;
+
+    cpi->fps.isKey = pbi->FrameType == BASE_FRAME; 
+    cpi->fps.isGolden = pbi->RefreshGoldenFrame;
+    cpi->fps.PSNR = 60;
+    cpi->fps.BitsPerMacroblock = 1.0 * cpi->ThisFrameSize / actualMBS;
+    cpi->fps.SqBitsPerMacroblock = cpi->fps.BitsPerMacroblock*cpi->fps.BitsPerMacroblock;
+    cpi->fps.QValue = cpi->pb.quantizer->FrameQIndex;
+    cpi->fps.MeanInterError ;
+    cpi->fps.MeanIntraError ;
+    cpi->fps.frame = (UINT32) (cpi->CurrentFrame-1);
+
+    AccumulateStats( &cpi->fpmss, &cpi->fps);
+    OutputStats(cpi->fs,&cpi->fps);
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.h b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.h
new file mode 100644
index 00000000..4bcdbfea
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/twopass.h
@@ -0,0 +1,29 @@
+/****************************************************************************
+*        
+*   Module Title :     twopass.h
+*
+*   Description  :     Functions for handling twopass dataratecontrol
+*
+****************************************************************************/
+#ifndef __INC_TWOPASS_H
+#define __INC_TWOPASS_H
+
+#ifndef STRICT
+#define STRICT              /* Strict type checking */
+#endif
+
+/****************************************************************************
+*  Module statics
+****************************************************************************/        
+
+
+extern void ZeroStats( FIRSTPASS_STATS *section);
+extern void AccumulateStats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame);
+extern void AvgStats ( FIRSTPASS_STATS *section);
+extern void OutputStats( FILE *f, FIRSTPASS_STATS *stats);
+extern void InputStats( FILE *f, FIRSTPASS_STATS *stats);
+extern void CCONV Pass2Initialize ( CP_INSTANCE *cpi, COMP_CONFIG_VP6 *CompConfig );
+extern void CCONV Pass2Control( CP_INSTANCE *cpi);
+extern void CCONV Pass1Output( CP_INSTANCE *cpi);
+
+#endif
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfw_comp_main.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfw_comp_main.c
new file mode 100644
index 00000000..965f8e26
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfw_comp_main.c
@@ -0,0 +1,87 @@
+/****************************************************************************
+*
+*   Module Title :     VFW_COMP_MAIN.c
+*
+*   Description  :     Main for video codec demo compression dll
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+* 
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+#define INC_WIN_HEADER      1
+#include <windows.h>
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+ 
+/****************************************************************************
+*  Module statics.
+*****************************************************************************
+*/        
+
+unsigned long cProcessesAttached = 0;         
+
+HINSTANCE hInstance;        /* Application instance handle. */
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Imports
+*****************************************************************************
+*/
+extern void VPEInitLibrary(void);
+extern void VPEDeInitLibrary(void);
+
+
+BOOL WINAPI DllMain(HANDLE hInst, DWORD fdwReason, LPVOID lpReserved)
+{
+	if ( fdwReason == DLL_PROCESS_ATTACH )
+	{
+        hInstance = hInst;
+		if ( cProcessesAttached++ )
+		{	
+			return(TRUE);         // Not the first initialization.
+    	}
+		else
+		{
+			// initialize all the global variables in the dll
+			VPEInitLibrary();
+
+			return TRUE;
+		}
+	}
+
+	else if ( fdwReason == DLL_PROCESS_DETACH )
+	{
+		if (--cProcessesAttached)
+		{
+			return TRUE;
+		}
+		else
+		{
+			VPEDeInitLibrary();
+			return TRUE;
+		}
+	}
+	else
+		return FALSE;
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp.c
new file mode 100644
index 00000000..27099dd1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp.c
@@ -0,0 +1,1687 @@
+/****************************************************************************
+*
+*   Module Title :     vfwcomp.c
+*
+*   Description  :     Video for Windows Compressor interface definition.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <stdio.h>
+#include <math.h>
+#include "compdll.h" 
+#include "misc_common.h"
+#include "decodemode.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define MAX_PSNR        60.0
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/
+static const UINT8 EndpointLookup[SCAN_ORDER_BANDS] =
+    { 1, 4, 10, 12, 15, 19, 21, 26, 28, 34, 36, 42, 48, 53, 57, 63 };
+
+static const UINT32 PriorKeyFrameWeight[KEY_FRAME_CONTEXT] = { 1, 2, 3, 4, 5 };
+
+static UINT32 TotDropFrameCount = 0;
+
+// % boost to data rate for GF update frames. 
+// This extra spend is recovered from the next few frames
+const UINT32 GfuDataRateBoost[64] = 
+{
+	1150, 1150, 1150, 1150, 1200, 1200, 1200, 1200,
+	1250, 1250, 1250, 1250, 1350, 1350, 1350, 1350,
+	1250, 1250, 1250, 1250, 1100, 1100, 1050, 1050,
+	1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
+	 950,  950,  950,  950,  950,  950,  950,  950,
+	 900,  900,  900,  900,  900,  900,  850,  850,
+	 800,  800,  750,  600,  500,  400,  350,  300,
+	 250,  200,  150,  125,  100,   75,   50,    0
+};
+
+// Reduce GFU boost as motion lvl increases
+const UINT32 GfuMotionCorrection[32] = 
+{
+   100, 95, 90, 85, 80, 75, 70, 65, 
+	60, 55, 50, 45, 40, 35, 30, 25,
+	20, 15, 10,  5,  5,  4,  4,  3, 
+	 3,  2,  2,  1,  1,  0,  0,  0,
+};
+
+// Correction to boost value that depends on recent observed GF usage
+// These are 1% steps. > 15% gets max boost.
+// Boost is multipled by table value then divided by 128.
+const UINT32 GfUsageCorrection2[16] = 
+{    
+	  8,  16,  32,  64,  80,  96, 112, 120, 
+	128, 128, 128, 128, 128, 128, 128, 128
+};
+
+const UINT32 GfUsageCorrection[64] = 
+{    
+    12,12,12,12,12,12,12,12,
+    12,12,12,12,12,13,14,15,
+    16,17,18,19,20,21,22,23,
+    24,25,26,27,28,29,30,31,
+    32,33,34,35,36,37,38,39, 
+    40,41,42,43,44,45,46,47,
+    48,49,50,51,52,53,54,55,
+    56,57,58,59,60,61,62,80
+};
+
+// Threshold and alpha limits for bicubi filtering
+const UINT8 BicubicMaxAlpha[64] =
+{
+	 3, 3, 3, 3, 3, 3, 3, 3,
+	 3, 3, 3, 3, 3, 3, 3, 3,
+	 4, 4, 4, 4, 4, 4, 4, 4,
+	 5, 5, 5, 5, 5, 5, 5, 5,
+	 6, 6, 6, 6, 7, 7, 7, 7,
+	 8, 8, 8, 8, 9, 9, 9, 9,
+	10,10,10,10,10,10,10,10, 
+	11,11,11,11,11,11,11,11, 
+};
+const UINT8 BicubicMinThresh[64] =
+{
+	 31,31,31,31,31,31,31,31,
+	 16,16,16,16,16,16,16,16,
+	 8, 8, 8, 8, 8, 8, 8, 8,
+	 4, 4, 4, 4, 4, 4, 4, 4,
+	 4, 4, 4, 4, 4, 4, 4, 4,
+	 2, 2, 2, 2, 2, 2, 2, 2,
+	 1, 1, 1, 1, 1, 1, 1, 1, 
+	 1, 1, 1, 1, 1, 1, 1, 1 
+};
+
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern UINT8 FixedQKfBoostTable[64];
+
+#if defined PSNR_ON
+
+/****************************************************************************
+ *
+ *  ROUTINE       : CalcPSNR
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : PSNR value for frame (in dB).
+ *
+ *  FUNCTION      : Calculate frame PSNR for diagnostic and tuning purposes.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+double CalcPSNR ( CP_INSTANCE *cpi )
+{
+    UINT32 i, j;
+    INT32  Diff;
+    UINT32 LineLength;
+    UINT32 PlaneHeight;
+    UINT32 FrameSize;
+    double FramePsnr;
+    double FrameYPsnr;
+    double FrameUPsnr;
+    double FrameVPsnr;
+    UINT8 *RawDataPtr;
+    UINT8 *ReconPtr;
+    UINT8 *RawDataBuffer;
+    UINT8 *ReconBuffer;
+    INT32  Total = 0;
+    INT32  GrandTotal = 0;
+
+#if defined(_MSC_VER)
+	ClearSysState();
+#endif
+
+    // choose the Raw data buffer to include or exclude the effect of pre-processing
+    // cpi->yuv1ptr (or yuv0ptr to exclude the effect of pre-processing)
+	if ( cpi->PreProcFilterLevel == 0 )
+		RawDataBuffer = cpi->yuv1ptr;
+	else
+		RawDataBuffer = cpi->yuv0ptr;
+
+    // Choose the reconstruction buffer according to whether or not post processing is on.
+    if ( cpi->pb.PostProcessingLevel )
+        ReconBuffer = cpi->pb.PostProcessBuffer;
+    else
+        ReconBuffer = cpi->pb.LastFrameRecon;
+
+    // Set up for Y plane measurement
+    LineLength  = cpi->pb.Configuration.VideoFrameWidth;
+    PlaneHeight = cpi->pb.Configuration.VideoFrameHeight;
+    RawDataPtr  = &RawDataBuffer[cpi->pb.YDataOffset];
+    ReconPtr    = &ReconBuffer[cpi->pb.ReconYDataOffset+(UMV_BORDER*cpi->pb.Configuration.YStride)+UMV_BORDER];
+
+    // Loop throught the Y plane raw and reconstruction data summing (square differences)
+    for ( i=0; i<PlaneHeight; i++ )
+    {
+        for ( j=0; j<LineLength; j++ )
+        {
+            Diff        = (INT32)(RawDataPtr[j]) - (INT32)(ReconPtr[j]);
+            Total      += Diff*Diff;
+            GrandTotal += Diff*Diff;
+        }
+        RawDataPtr += LineLength;
+        ReconPtr   += cpi->pb.Configuration.YStride;
+    }
+
+    // Work out Y PSNR
+    FrameSize = cpi->pb.YPlaneSize;
+    
+    if ( (double)Total > 0.0 )
+        FramePsnr = 10.0 * log10((255.0 * 255.0 * FrameSize) / (double)Total);
+    else
+        FramePsnr = MAX_PSNR;      // Limit to prevent / 0
+
+    // Limit max reported frame PSNR to limit the effect of any one frame on the average.
+    if ( FramePsnr > MAX_PSNR )
+        FramePsnr = MAX_PSNR;
+
+    cpi->TotYPsnr += FramePsnr;
+    if ( FramePsnr < cpi->MinYPsnr )
+        cpi->MinYPsnr = FramePsnr;
+    if ( FramePsnr > cpi->MaxYPsnr )
+        cpi->MaxYPsnr = FramePsnr;
+
+    FrameYPsnr = FramePsnr;
+
+    // Set up for U plane measurement
+    LineLength  = cpi->pb.Configuration.VideoFrameWidth/2;
+    PlaneHeight = cpi->pb.Configuration.VideoFrameHeight/2;
+    RawDataPtr  = &RawDataBuffer[cpi->pb.UDataOffset];
+    ReconPtr    = &ReconBuffer[cpi->pb.ReconUDataOffset+(UMV_BORDER>>1)*cpi->pb.Configuration.UVStride+(UMV_BORDER>>1)];
+
+    // Loop throught the U plane raw and reconstruction data summing (square differences)
+    Total = 0;
+    for ( i=0; i<PlaneHeight; i++ )
+    {
+        for ( j=0; j<LineLength; j++ )
+        {
+            Diff        = (INT32)(RawDataPtr[j]) - (INT32)(ReconPtr[j]);
+            Total      += Diff*Diff;
+            GrandTotal += Diff*Diff;
+        }
+        RawDataPtr += LineLength;
+        ReconPtr   += cpi->pb.Configuration.UVStride;
+    }
+
+    // Work out U PSNR
+    FrameSize = cpi->pb.UVPlaneSize;
+    
+    if ( (double)Total > 0.0 )
+        FramePsnr =  10.0 * log10((255.0 * 255.0 * FrameSize) / (double)Total);
+    else
+        FramePsnr =  MAX_PSNR;      // Limit to prevent / 0
+
+    // Limit max reported frame PSNR to limit the effect of any one frame on the average.
+    if ( FramePsnr > MAX_PSNR )
+        FramePsnr = MAX_PSNR;
+
+    cpi->TotUPsnr += FramePsnr;
+    if ( FramePsnr < cpi->MinUPsnr )
+        cpi->MinUPsnr = FramePsnr;
+    if ( FramePsnr > cpi->MaxUPsnr )
+        cpi->MaxUPsnr = FramePsnr;
+
+    FrameUPsnr = FramePsnr;
+
+    // Set up for V plane measurement
+    LineLength  = cpi->pb.Configuration.VideoFrameWidth/2;
+    PlaneHeight = cpi->pb.Configuration.VideoFrameHeight/2;
+    RawDataPtr  = &RawDataBuffer[cpi->pb.VDataOffset];
+    ReconPtr    = &ReconBuffer[cpi->pb.ReconVDataOffset+(UMV_BORDER>>1)*cpi->pb.Configuration.UVStride+(UMV_BORDER>>1)];
+
+    // Loop throught the UV plane raw and reconstruction data summing (square differences)
+    Total = 0;
+    for ( i=0; i<PlaneHeight; i++ )
+    {
+        for ( j=0; j<LineLength; j++ )
+        {
+            Diff        = (INT32)(RawDataPtr[j]) - (INT32)(ReconPtr[j]);
+            Total      += Diff*Diff;
+            GrandTotal += Diff*Diff;
+        }
+        RawDataPtr += LineLength;
+        ReconPtr   += cpi->pb.Configuration.UVStride;
+    }
+
+    // Work out V PSNR
+    FrameSize = cpi->pb.UVPlaneSize;
+    
+    if ( (double)Total > 0.0 )
+        FramePsnr = 10.0 * log10((255.0 * 255.0 * FrameSize) / (double)Total);
+    else
+        FramePsnr = MAX_PSNR;      // Limit to prevent / 0
+
+    // Limit max reported frame PSNR to limit the effect of any one frame on the average.
+    if ( FramePsnr > MAX_PSNR )
+        FramePsnr = MAX_PSNR;
+
+    cpi->TotVPsnr += FramePsnr;
+    
+    if ( FramePsnr < cpi->MinVPsnr )
+        cpi->MinVPsnr = FramePsnr;
+    if ( FramePsnr > cpi->MaxVPsnr )
+        cpi->MaxVPsnr = FramePsnr;
+
+    FrameVPsnr = FramePsnr;
+
+    // Now work out the average accross YU and V
+    FrameSize = cpi->pb.YPlaneSize + cpi->pb.UVPlaneSize + cpi->pb.UVPlaneSize;
+
+    if ( (double)GrandTotal > 0.0 )
+        FramePsnr = 10.0 * log10((255.0 * 255.0 * FrameSize) / (double)GrandTotal);
+    else
+        FramePsnr = MAX_PSNR;      // Limit to prevent / 0
+
+    cpi->TotalSqError += GrandTotal;
+
+    // Limit max reported frame PSNR to limit the effect of any one frame on the average.
+    if ( FramePsnr > MAX_PSNR )
+        FramePsnr = MAX_PSNR;
+
+    cpi->TotPsnr += FramePsnr;
+    
+    if ( FramePsnr < cpi->MinPsnr )
+        cpi->MinPsnr = FramePsnr;
+    if ( FramePsnr > cpi->MaxPsnr )
+        cpi->MaxPsnr = FramePsnr;
+
+    return FramePsnr;
+}
+#endif
+/****************************************************************************
+ *
+ *  ROUTINE       : SetupKeyFrame
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Sets frame type as Keyframe.
+ *
+ *  SPECIAL NOTES : Replace this function with cpi->pb.FrameType = BASE_FRAME;
+ *
+ ****************************************************************************/
+void SetupKeyFrame ( CP_INSTANCE *cpi )
+{
+    VP6_SetFrameType ( &cpi->pb, BASE_FRAME );
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : AdjustKeyFrameContext
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Adjusts the context for a keyframe.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void AdjustKeyFrameContext ( CP_INSTANCE *cpi )
+{
+    UINT32 i;
+    INT32 AvKeyFramesPerSecond;
+    INT32 MinFrameTargetRate;
+
+    // Average key frame frequency and size
+    UINT32  AvKeyFrameFrequency = (UINT32) (cpi->CurrentFrame / cpi->KeyFrameCount);
+    UINT32  AvKeyFrameBytes     = (UINT32) (cpi->TotKeyFrameBytes / cpi->KeyFrameCount);
+    UINT32 TotalWeight = 0;
+
+    // Update the frame carry over
+    cpi->TotKeyFrameBytes += (cpi->ThisFrameSize/8);
+
+    // reset keyframe context and calculate weighted average of last KEY_FRAME_CONTEXT keyframes
+    for ( i=0; i<KEY_FRAME_CONTEXT; i++ )
+    {
+        if ( i < KEY_FRAME_CONTEXT-1 )
+        {
+            cpi->PriorKeyFrameSize[i]     = cpi->PriorKeyFrameSize[i+1];
+            cpi->PriorKeyFrameDistance[i] = cpi->PriorKeyFrameDistance[i+1];
+        }
+        else
+        {
+            cpi->PriorKeyFrameSize[KEY_FRAME_CONTEXT - 1]     = cpi->ThisFrameSize;
+            cpi->PriorKeyFrameDistance[KEY_FRAME_CONTEXT - 1] = cpi->LastKeyFrame;
+        }
+
+        AvKeyFrameBytes += PriorKeyFrameWeight[i] * cpi->PriorKeyFrameSize[i] / 8;
+        AvKeyFrameFrequency += PriorKeyFrameWeight[i] * cpi->PriorKeyFrameDistance[i];
+        TotalWeight += PriorKeyFrameWeight[i];
+    }
+    AvKeyFrameBytes /= TotalWeight;
+    AvKeyFrameFrequency /= TotalWeight;
+    AvKeyFramesPerSecond =  100 * cpi->Configuration.OutputFrameRate / AvKeyFrameFrequency ;
+
+    /* Calculate a new target rate per frame allowing for average key frame frequency over newest frames . */
+    if ( (100 * cpi->Configuration.TargetBandwidth > AvKeyFrameBytes * AvKeyFramesPerSecond) &&
+         (100 * cpi->Configuration.OutputFrameRate - AvKeyFramesPerSecond ))
+    {
+        cpi->InterFrameTarget =
+            (INT32)(100* cpi->Configuration.TargetBandwidth - AvKeyFrameBytes * AvKeyFramesPerSecond )
+            / ( (100 * cpi->Configuration.OutputFrameRate - AvKeyFramesPerSecond ) );
+
+    }
+    else // don't let this number get too small!!!
+    {
+        cpi->InterFrameTarget = 1;
+    }
+
+    // minimum allowable frame_target_rate
+    MinFrameTargetRate = cpi->PerFrameBandwidth / 3;
+
+    if ( cpi->InterFrameTarget < MinFrameTargetRate )
+        cpi->InterFrameTarget = MinFrameTargetRate;
+
+
+    cpi->LastKeyFrame = 1;
+    cpi->LastKeyFrameSize = cpi->ThisFrameSize;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : ResizeFrameTo
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                  int hscale       : Horizontal scale factor numerator.
+ *                  int hratio       : Horizontal scale factor denominator.
+ *                  int vscale       : Vertical scale factor numerator.
+ *                  int vratioNone   : Vertical scale factor denominator.
+ *   
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Changes the encoder frame size by the specified ratio.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ResizeFrameTo ( CP_INSTANCE *cpi, int hscale, int hratio, int vscale, int vratio )
+{
+    int w  = cpi->YuvInputData.YWidth;
+    int h  = cpi->YuvInputData.YHeight;
+    int nw = w;
+    int nh = h;
+
+    cpi->pb.Configuration.HScale = hscale;
+    cpi->pb.Configuration.HRatio = hratio;
+    cpi->pb.Configuration.VScale = vscale;
+    cpi->pb.Configuration.VRatio = vratio;
+
+	nw = (cpi->pb.Configuration.HScale - 1 + w * cpi->pb.Configuration.HRatio) / cpi->pb.Configuration.HScale;
+	nh = (cpi->pb.Configuration.VScale - 1 + h * cpi->pb.Configuration.VRatio) / cpi->pb.Configuration.VScale;
+	nw = (nw + 15) / 16 * 16;
+	nh = (nh + 15) / 16 * 16;
+
+    cpi->InputConfig.YWidth   = nw;
+    cpi->InputConfig.YHeight  = nh;
+    cpi->InputConfig.UVWidth  = nw/2;
+    cpi->InputConfig.UVHeight = nh/2;
+    cpi->InputConfig.YStride  = nw;
+    cpi->InputConfig.UVStride = nw/2;
+
+    ChangeEncoderSize ( cpi, nw, nh );
+
+    CopyOrResize ( cpi, TRUE );
+
+    cpi->KeyFrameDataTarget = (int)cpi->KeyFrameDataTargetOrig * (nw + nh) / (w + h);
+
+	if ( cpi->KeyFrameDataTarget > (int)cpi->Configuration.TargetBandwidth/2 )
+		cpi->KeyFrameDataTarget = (int)cpi->Configuration.TargetBandwidth/2;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : ResizeFrame
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Resizes a frame as necessary.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ResizeFrame ( CP_INSTANCE *cpi )
+{
+	int HScale = 1;
+    int HRatio = 1;
+    int VScale = 1;
+    int VRatio = 1;
+
+	if ( cpi->ForceInternalSize )
+	{
+		ResizeFrameTo ( cpi, 
+                        cpi->ForceHScale, 
+                        cpi->ForceHRatio, 
+                        cpi->ForceVScale, 
+                        cpi->ForceVRatio );
+		return;
+	}
+
+	if ( cpi->pb.Configuration.Interlaced )
+	{
+		switch ( cpi->SizeStep )
+		{
+		case 1:
+			HScale = 5;
+			HRatio = 4;
+			break;
+		case 2:
+			HScale = 5;
+			HRatio = 3;
+			break;
+		case 3:
+			HScale = 2;
+			HRatio = 1;
+			break;
+		case 4:
+			HScale = 5;
+			HRatio = 3;
+			VScale = 2;
+			VRatio = 1;
+			break;
+		case 5:
+			HScale = 2;
+			HRatio = 1;
+			VScale = 2;
+			VRatio = 1;
+			break;
+		}
+	}
+	else
+	{
+		switch ( cpi->SizeStep )
+		{
+		case 1:
+			HScale = 5;
+			HRatio = 4;
+			break;
+		case 2:
+			HScale = 5;
+			HRatio = 4;
+			VScale = 5;
+			VRatio = 4;
+			break;
+		case 3:
+			HScale = 5;
+			HRatio = 3;
+			VScale = 5;
+			VRatio = 4;
+			break;
+		case 4:
+			HScale = 5;
+			HRatio = 3;
+			VScale = 5;
+			VRatio = 3;
+			break;
+		case 5:
+			HScale = 2;
+			HRatio = 1;
+			VScale = 2;
+			VRatio = 1;
+			break;
+		}
+	}
+
+	ResizeFrameTo ( cpi, HScale, HRatio, VScale, VRatio );
+
+
+    return;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : CompressFirstFrame
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Compresses the first frame.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CompressFirstFrame ( CP_INSTANCE *cpi )
+{
+    UINT32  i;
+
+	cpi->ErrorPerBit = 80;
+
+	// MV and mode counters used in assessing new MV frequency.
+	cpi->FrameNewMvCounter = 0;
+	cpi->FrameModeCounter  = 0;
+
+    // if not AutoKeyframing cpi->ForceKeyFrameEvery = is frequency
+    if ( !cpi->AutoKeyFrameEnabled )
+        cpi->ForceKeyFrameEvery = cpi->KeyFrameFrequency;
+
+    /* set up context of key frame sizes and distances for more local datarate control */
+    for ( i=0; i<KEY_FRAME_CONTEXT; i++ )
+    {
+        cpi->PriorKeyFrameSize[i]     = cpi->KeyFrameDataTarget;
+        cpi->PriorKeyFrameDistance[i] = cpi->ForceKeyFrameEvery;
+    }
+
+    // Keep track of the total number of Key Frames Coded.
+    cpi->KeyFrameCount    = 1;
+    cpi->LastKeyFrame     = 1;
+    cpi->TotKeyFrameBytes = 0;
+
+    if ( cpi->AllowSpatialResampling && cpi->SizeStep != 0 || cpi->ForceInternalSize )
+        ResizeFrame ( cpi );
+	else
+	    CopyOrResize ( cpi, TRUE );
+
+    // Use scan order updates for larger images.
+	if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+		cpi->AllowScanOrderUpdates = TRUE;
+	else
+ 		cpi->AllowScanOrderUpdates = FALSE;
+
+    SetupKeyFrame ( cpi );
+
+    // Calculate a new target rate per frame allowing for average key frame frequency and size thus far.
+    if ( cpi->Configuration.TargetBandwidth > ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency) )
+    {
+        cpi->InterFrameTarget =  (INT32)((cpi->Configuration.TargetBandwidth -
+            ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency)) / cpi->Configuration.OutputFrameRate);
+
+    }
+    else
+        cpi->InterFrameTarget = 1;
+
+    // Reset the drop frame flags
+    cpi->DropCount = 0;
+	cpi->MaxDropCount = 0;
+
+
+    // Select Intra mode for all MBs and calculate the total error score
+    cpi->IntraError = PickIntra ( cpi );
+    cpi->InterError = cpi->IntraError;
+
+#if defined(_MSC_VER)
+	ClearSysState();
+#endif
+
+    if( 0) //cpi->pass == 2) 
+    {
+        {
+            int Q,R;
+
+            cpi->ThisFrameTarget = cpi->InterFrameTarget;
+            RegulateQ ( cpi, cpi->ThisFrameTarget);
+            Q = cpi->pb.quantizer->FrameQIndex;
+            cpi->ThisFrameTarget = cpi->InterFrameTarget + ((cpi->InterFrameTarget * cpi->KFBoost) >> 4) ;
+            RegulateQ ( cpi, cpi->ThisFrameTarget);
+
+            R= cpi->pb.quantizer->FrameQIndex  - Q;
+            if(R>FixedQKfBoostTable[Q])
+                R=FixedQKfBoostTable[Q];
+            //S= (FixedQKfBoostTable[Q] + R) / 2;
+            ClampAndUpdateQ ( cpi, Q+R);
+
+        }
+        //ClampAndUpdateQ ( cpi, cpi->pb.AvgFrameQIndex + FixedQKfBoostTable[cpi->pb.AvgFrameQIndex]);
+    }
+	else
+	{
+
+	     // Set a target size for this key frame based upon the baseline target and frequency
+		cpi->ThisFrameTarget = cpi->KeyFrameDataTarget;
+        RegulateQ ( cpi, cpi->ThisFrameTarget);
+	}
+
+    /* Compress and output the frist frame */
+    UpdateFrame ( cpi );
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : CompressKeyFrame
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Compresses a Keyframe.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CompressKeyFrame ( CP_INSTANCE *cpi )
+{
+
+	// Reset the active worst quality to the baseline value for key frames.
+	cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+
+	// Auto-spatial re-sampling only allowed for buffered mode.
+	if ( cpi->BufferedMode && cpi->pass != 2)
+	{
+
+
+		// Decide whether we need to down sample or if we are able to return to a larger image size
+		// We downsample if the buffer fullness is below a given level and falling since the last key frame.
+		if( (cpi->BufferLevel < cpi->ResampleDownWaterMark) &&
+			(cpi->BufferLevel <= cpi->LastKeyFrameBufferLevel) )
+		{
+			if ( cpi->SizeStep < 5 )
+				cpi->SizeStep++;
+		}
+		// We upsample (or undo down sample) if the buffer fullness is above a given level
+		// and is not falling or it is significantly above the optimal level.
+		else if ( (cpi->BufferLevel > ((cpi->OptimalBufferLevel * 110)/100) ) ||
+				  ((cpi->BufferLevel > cpi->ResampleUpWaterMark) &&
+				   (cpi->BufferLevel >= cpi->LastKeyFrameBufferLevel) ) )
+		{
+			if ( cpi->SizeStep > 0 )
+				cpi->SizeStep--;
+		}
+	}
+
+	// Implement any resize that has been chosen
+    if ( cpi->AllowSpatialResampling && cpi->SizeStep != cpi->LastSizeStep || cpi->ForceInternalSize )
+        ResizeFrame ( cpi );
+	else
+	    CopyOrResize ( cpi, TRUE );
+
+    // Use scan order updates for larger images.
+	if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+		cpi->AllowScanOrderUpdates = TRUE;
+	else
+ 		cpi->AllowScanOrderUpdates = FALSE;
+
+    // Keep track of the total number of Key Frames Coded
+    cpi->KeyFrameCount += 1;
+
+    // Reset the drop frame flags
+    cpi->DropCount = 0;
+	cpi->MaxDropCount = 0;
+
+    SetupKeyFrame ( cpi );
+
+	// Set the key frame size constraints
+	cpi->ThisFrameTarget = cpi->KeyFrameDataTarget;
+
+    // Select Intra mode for all MBs and calculate the total error score
+    cpi->IntraError = PickIntra ( cpi );
+    cpi->InterError = cpi->IntraError;
+
+#if defined(_MSC_VER)
+	ClearSysState();
+#endif
+
+	// Reset the KeyFrameBpbCorrectionFactor to 1.0
+    cpi->KeyFrameBpbCorrectionFactor = 1;
+
+	// Set an appropriate Key frame Q to match the recent ambient quality
+    if( cpi->pass == 2) 
+    {
+	    if ( cpi->KFForced == TRUE)
+		    ClampAndUpdateQ ( cpi, cpi->pb.AvgFrameQIndex + (FixedQKfBoostTable[cpi->pb.AvgFrameQIndex]/2) );
+        else
+        {
+            int Q,R;
+
+            cpi->ThisFrameTarget = cpi->InterFrameTarget;
+            RegulateQ ( cpi, cpi->ThisFrameTarget);
+            Q = cpi->pb.quantizer->FrameQIndex;
+            cpi->ThisFrameTarget = cpi->InterFrameTarget + ((cpi->InterFrameTarget * cpi->KFBoost) >> 4) ;
+            RegulateQ ( cpi, cpi->ThisFrameTarget);
+
+            R= cpi->pb.quantizer->FrameQIndex  - Q;
+            if(R>FixedQKfBoostTable[Q])
+                R=FixedQKfBoostTable[Q];
+            //S= (FixedQKfBoostTable[Q] + R) / 2;
+            ClampAndUpdateQ ( cpi, Q+R);
+
+        }
+        //ClampAndUpdateQ ( cpi, cpi->pb.AvgFrameQIndex + FixedQKfBoostTable[cpi->pb.AvgFrameQIndex]);
+    }
+    else
+    {
+	    if ( cpi->KFForced == TRUE)
+		    ClampAndUpdateQ ( cpi, cpi->pb.AvgFrameQIndex + (FixedQKfBoostTable[cpi->pb.AvgFrameQIndex]/2) );
+	    else
+		    ClampAndUpdateQ ( cpi, cpi->pb.AvgFrameQIndex + FixedQKfBoostTable[cpi->pb.AvgFrameQIndex] );
+    }
+
+    /* Compress and output the first frame */
+    UpdateFrame ( cpi );
+    cpi->LastSizeStep = cpi->SizeStep;
+
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : CompressFrame
+ *
+ *  INPUTS        : CP_INSTANCE *cpi   : Pointer to encoder instance.
+ *                  UINT32 FrameNumber : Frame number (NOT USED).
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Compresses a frame.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CompressFrame ( CP_INSTANCE *cpi, UINT32 FrameNumber )
+{
+    UINT32  i;
+    UINT32  KFIndicator;
+	BOOL 	DropedFrame = FALSE;
+    int     actualMBS;
+
+    cpi->KFForced=0;
+    CopyOrResize ( cpi, FALSE );
+
+	/* Clear down the macro block level mode and MV arrays. */
+    for ( i=0; i<cpi->pb.UnitFragments; i++ )
+    {
+        cpi->pb.FragInfo[i].FragCodingMode = CODE_INTER_NO_MV;     // Default coding mode
+        cpi->pb.FragInfo[i].MVectorX       = 0;
+        cpi->pb.FragInfo[i].MVectorY       = 0;
+    }
+
+    /* Default to normal frames. */ 
+    VP6_SetFrameType ( &cpi->pb, NORMAL_FRAME );
+
+    // Calculate the target bytes for this frame. */
+    cpi->ThisFrameTarget = cpi->InterFrameTarget;
+ 
+    /* */
+/*
+    cpi->pb.mbi.blockDxInfo[0].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[0]];
+    cpi->pb.mbi.blockDxInfo[1].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[1]];
+    cpi->pb.mbi.blockDxInfo[2].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[2]];
+    cpi->pb.mbi.blockDxInfo[3].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[3]];
+    cpi->pb.mbi.blockDxInfo[4].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[4]];
+    cpi->pb.mbi.blockDxInfo[5].dequantPtr = cpi->pb.quantizer->dequant_coeffs[VP6_QTableSelect[5]];
+
+	cpi->pb.mbi.blockDxInfo[0].MvShift =
+    cpi->pb.mbi.blockDxInfo[1].MvShift =
+    cpi->pb.mbi.blockDxInfo[2].MvShift =
+    cpi->pb.mbi.blockDxInfo[3].MvShift = Y_MVSHIFT;
+	cpi->pb.mbi.blockDxInfo[4].MvShift =
+	cpi->pb.mbi.blockDxInfo[5].MvShift = UV_MVSHIFT;
+
+	cpi->pb.mbi.blockDxInfo[0].MvModMask =
+    cpi->pb.mbi.blockDxInfo[1].MvModMask =
+    cpi->pb.mbi.blockDxInfo[2].MvModMask =
+    cpi->pb.mbi.blockDxInfo[3].MvModMask = Y_MVMODMASK;
+	cpi->pb.mbi.blockDxInfo[4].MvModMask =
+	cpi->pb.mbi.blockDxInfo[5].MvModMask = UV_MVMODMASK;
+
+	cpi->pb.mbi.blockDxInfo[0].Plane =
+    cpi->pb.mbi.blockDxInfo[1].Plane =
+    cpi->pb.mbi.blockDxInfo[2].Plane =
+    cpi->pb.mbi.blockDxInfo[3].Plane = 0;
+	cpi->pb.mbi.blockDxInfo[4].Plane =
+	cpi->pb.mbi.blockDxInfo[5].Plane = 1;
+
+    cpi->pb.mbi.blockDxInfo[0].LastDc = 
+    cpi->pb.mbi.blockDxInfo[1].LastDc = 
+    cpi->pb.mbi.blockDxInfo[2].LastDc = 
+    cpi->pb.mbi.blockDxInfo[3].LastDc = cpi->pb.fc.LastDcY;
+    cpi->pb.mbi.blockDxInfo[4].LastDc = cpi->pb.fc.LastDcU;
+    cpi->pb.mbi.blockDxInfo[5].LastDc = cpi->pb.fc.LastDcV;
+
+    cpi->pb.mbi.blockDxInfo[0].Left = &cpi->pb.fc.LeftY[0];
+    cpi->pb.mbi.blockDxInfo[1].Left = &cpi->pb.fc.LeftY[0];
+    cpi->pb.mbi.blockDxInfo[2].Left = &cpi->pb.fc.LeftY[1];
+    cpi->pb.mbi.blockDxInfo[3].Left = &cpi->pb.fc.LeftY[1];
+    cpi->pb.mbi.blockDxInfo[4].Left = &cpi->pb.fc.LeftU;
+    cpi->pb.mbi.blockDxInfo[5].Left = &cpi->pb.fc.LeftV;
+*/
+ 
+	// For Buffered mode make data rate and Q range adjustments based on buffer fullness.
+	if ( cpi->BufferedMode )
+	{
+		INT32 OnePercentBits = 1 + cpi->OptimalBufferLevel/100;
+
+		//if ( cpi->BufferLevel < cpi->OptimalBufferLevel || cpi->BytesOffTarget < 0 )
+		if ( ( cpi->BufferLevel < cpi->OptimalBufferLevel ) || 
+			 ( cpi->BytesOffTarget < cpi->OptimalBufferLevel ) )
+		{
+			INT32 PercentLow = 0;
+
+			// Decide whether or not we need to adjust the frame data rate target.
+			//
+			// If we are are below the optimal buffer fullness level and adherence  
+			// to buffering contraints is important to the end useage then adjust
+			// the per frame target.
+			if ( (cpi->EndUsage == USAGE_STREAM_FROM_SERVER) && ( cpi->BufferLevel < cpi->OptimalBufferLevel ) )
+            {
+				PercentLow = (cpi->OptimalBufferLevel - cpi->BufferLevel) / OnePercentBits;
+				if ( PercentLow > 100 )
+					 PercentLow = 100;		
+				else if ( PercentLow < 0 )
+					 PercentLow = 0;
+            }
+			// Are we overshooting the long term clip data rate...
+			else if ( cpi->BytesOffTarget < 0 )
+			{
+				// Adjust per frame data target downwards to compensate.
+			    PercentLow = (INT32) (100 * -cpi->BytesOffTarget / (cpi->TotalByteCount * 8));
+			    if ( PercentLow > 100 )
+				     PercentLow = 100;		
+			    else if ( PercentLow < 0 )
+				     PercentLow = 0;
+			}
+
+			// lower the target bandwidth for this frame.
+			cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 - (PercentLow/2)) )/100;
+
+			// Set a reduced data rate target for our initial Q calculation. 
+			// This should provide a slight upward pressure on  buffer fullness 
+			// during easier sections. 
+			if ( (cpi->UnderShootPct > 0) && (cpi->UnderShootPct <= 100) ) 
+			{
+				cpi->ThisFrameTarget = (cpi->ThisFrameTarget * cpi->UnderShootPct)/100;
+			}
+
+			// Are we using allowing control of ActiveWorstQuality according to buffer level.
+			if ( cpi->AutoWorstQ )
+			{
+				INT32 CriticalBufferLevel;
+
+				// For streaming applications the most important factor is cpi->BufferLevel as this takes
+				// into account the specified short term buffering constraints. However, hitting the long 
+				// term clip data rate target is also important.
+				if ( cpi->EndUsage == USAGE_STREAM_FROM_SERVER )
+				{
+					// Take the smaller of cpi->BufferLevel and cpi->BytesOffTarget
+					CriticalBufferLevel = (cpi->BufferLevel < cpi->BytesOffTarget) ? cpi->BufferLevel : cpi->BytesOffTarget;
+				}
+				// For local file playback short term buffering contraints are less of an issue
+				else
+				{
+					// Consider only how we are doing for the clip as a whole
+					CriticalBufferLevel = cpi->BytesOffTarget;
+				}
+
+				// Set the active worst quality based upon the selected buffer fullness number.
+				if ( CriticalBufferLevel < cpi->OptimalBufferLevel ) 
+				{
+					if ( CriticalBufferLevel > (cpi->OptimalBufferLevel/4) )
+					{
+						UINT32 QAdjustmentRange = cpi->NiAvQi - cpi->Configuration.WorstQuality;
+						UINT32 AboveBase = (CriticalBufferLevel - (cpi->OptimalBufferLevel/4));
+
+						// Step active worst quality down from cpi->NiAvQi when (CriticalBufferLevel == cpi->OptimalBufferLevel)
+						// to cpi->Configuration.WorstQuality when (CriticalBufferLevel == cpi->OptimalBufferLevel/4) 
+						cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality + 
+																( (QAdjustmentRange * AboveBase) / (cpi->OptimalBufferLevel*3/4) );
+
+						//cpi->Configuration.ActiveWorstQuality = (cpi->NiAvQi * CriticalBufferLevel) / cpi->OptimalBufferLevel;
+						if ( cpi->Configuration.ActiveWorstQuality < cpi->Configuration.WorstQuality )
+							cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+					}
+					else 
+					{
+						cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+					}
+				}
+				else
+				{
+					cpi->Configuration.ActiveWorstQuality = cpi->NiAvQi;
+				}
+
+/*				// Problems with this for local file mode because
+				// cpi->NiAvQi set to lower of average and last frame so as soon as cpi->BytesOffTarget
+				// goes negative we tend to race down to worst quality so this does not behave as one might expect.
+				else
+				{
+					if ( cpi->BytesOffTarget < 0 )
+					{
+						INT32 PercentOvershoot;
+						
+						// Work out the overshoot as a percentage of the total file size
+						// Base cpi->Configuration.ActiveWorstQuality on this amount.
+						PercentOvershoot = (100 * -cpi->BytesOffTarget / (cpi->TotalByteCount * 8));
+
+						if ( PercentOvershoot > (cpi->NiAvQi - cpi->Configuration.WorstQuality) )
+							cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+						else
+							cpi->Configuration.ActiveWorstQuality = cpi->NiAvQi - PercentOvershoot;
+					}
+					else
+						cpi->Configuration.ActiveWorstQuality = cpi->NiAvQi;
+				}
+*/
+			}
+			else
+			{						
+				cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+			}
+		}
+		else 
+		{
+			INT32 PercentHigh;
+
+            if(cpi->BytesOffTarget > cpi->OptimalBufferLevel)
+            {
+			    PercentHigh = (INT32) (100 * (cpi->BytesOffTarget - cpi->OptimalBufferLevel) / (cpi->TotalByteCount * 8));
+			    if ( PercentHigh > 100 )
+				     PercentHigh = 100;		
+			    else if ( PercentHigh < 0 )
+				     PercentHigh = 0;
+                cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 + (PercentHigh/2)) )/100;
+            }
+
+			// Are we using allowing control of ActiveWorstQuality according to bufferl level.
+			if ( cpi->AutoWorstQ )
+			{
+				// When using the relaxed buffer model stick to the user specified value
+				cpi->Configuration.ActiveWorstQuality = cpi->NiAvQi;
+			}
+			else
+			{
+				cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+			}
+
+		}
+
+		// Set ActiveBestQuality to prevent quality rising too high
+   		cpi->Configuration.ActiveBestQuality = Q_TABLE_SIZE - cpi->BestAllowedQ;
+
+        // Worst quality obviously must not be better than best quality
+		if ( cpi->Configuration.ActiveWorstQuality > cpi->Configuration.ActiveBestQuality )
+			cpi->Configuration.ActiveWorstQuality = cpi->Configuration.ActiveBestQuality - 1;
+		
+	}
+	// Unbuffered mode (eg. video conferencing)
+	else
+	{
+		// Set the active worst quality
+		cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+	}
+
+	// The auto-drop frame code is only used in buffered mode.
+	// In unbufferd mode (eg vide conferencing) the descision to
+	// code or drop a frame is made outside the codec in response to real
+	// world comms or buffer considerations.
+	if ( cpi->DropFramesAllowed && cpi->BufferedMode ) 
+	{
+		// Check for a buffer underrun-crisis in which case we have to drop a frame
+		if ( cpi->BufferLevel < cpi->PerFrameBandwidth )
+			cpi->DropFrame = TRUE;
+		// Check for drop frame crtieria
+		else if ( cpi->BufferLevel < cpi->DropFramesWaterMark )
+		{
+			if ( cpi->DropCount < cpi->MaxDropCount )
+				cpi->DropFrame = TRUE;
+		}
+	}
+
+    if ( !cpi->DropFrame )
+    {
+        // pick all the macroblock modes and motion vectors
+        UINT32 InterError;
+        UINT32 IntraError;
+
+        /*********************** Q PREDICTION STAGE 1  *****************************/
+
+        /* Select modes and motion vectors for each of the blocks : return an error score for inter and intra */
+
+        // Test for auto key frame.
+        if ( cpi->AutoKeyFrameEnabled && (cpi->LastKeyFrame >= cpi->ForceKeyFrameEvery) ) 
+		{
+            cpi->KFForced=1;
+			CompressKeyFrame(cpi);  // Code a key frame
+			return;
+		}
+        
+
+#if defined(_MSC_VER)
+	    ClearSysState();
+#endif
+
+		// Update data rate to allow for GF updates.
+		// Note that we come in here even for fixed. In order to set the next update interval.
+		// Also not that we do not make a correction for the frames between a kf and the first GF update after a KF.
+		if ( (!cpi->DisableGolden) && cpi->BufferedMode && (cpi->pb.quantizer->FrameQIndex < 60) && (cpi->LastKeyFrame >= cpi->GfUpdateInterval))
+		{
+
+			UINT32 MaxVariance = 0;
+			UINT32 Sum2 = 0;
+			UINT32 Sum3 = 0;
+            int    Sum =                       // number of macroblocks
+                  (cpi->pb.MBRows - (BORDER_MBS*2)) 
+                * (cpi->pb.MBCols - (BORDER_MBS*2));
+
+			if ( Sum )
+			{
+				Sum2 = Sum - (cpi->ModeDist[CODE_INTRA] + cpi->ModeDist[CODE_INTER_PLUS_MV] + cpi->ModeDist[CODE_INTER_FOURMV]);
+				Sum3 = Sum2 - cpi->ModeDist[CODE_INTER_NO_MV] - cpi->ModeDist[CODE_USING_GOLDEN];			
+
+				// Convert Sum2 and Sum3 to %
+				Sum2 = (Sum2 * 100 / Sum);			
+				Sum3 = (Sum3 * 100 / Sum);		
+
+                cpi->fps.PercentMotion = Sum2;
+                cpi->fps.PercentNewMotion = Sum3;
+			}
+				    
+			// Calculate various motion metrics
+			if ( cpi->FrameMvStats.NumMvs )
+			{
+				cpi->GfuMotionSpeed = (cpi->FrameMvStats.SumAbsX > cpi->FrameMvStats.SumAbsY) ? (cpi->FrameMvStats.SumAbsX/cpi->FrameMvStats.NumMvs) : (cpi->FrameMvStats.SumAbsY/cpi->FrameMvStats.NumMvs);
+                cpi->fps.MotionSpeed = cpi->GfuMotionSpeed;
+				cpi->fps.VarianceX = ((cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.SumXSq) - (cpi->FrameMvStats.SumX*cpi->FrameMvStats.SumX)) / (cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.NumMvs);
+				cpi->fps.VarianceY = ((cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.SumYSq) - (cpi->FrameMvStats.SumY*cpi->FrameMvStats.SumY)) / (cpi->FrameMvStats.NumMvs * cpi->FrameMvStats.NumMvs);
+				MaxVariance = (UINT32) ((cpi->fps.VarianceX > cpi->fps.VarianceY) ? cpi->fps.VarianceX : cpi->fps.VarianceY);
+				cpi->GfuMotionComplexity = (UINT32) (cpi->GfuMotionSpeed + ((cpi->fps.VarianceX)/4) + ((cpi->fps.VarianceY)/4));
+				if ( cpi->GfuMotionComplexity > 31 )
+					cpi->GfuMotionComplexity = 31;
+			}	
+			else
+			{
+				cpi->GfuMotionSpeed = 0; 
+				cpi->GfuMotionComplexity = 0;
+			}
+
+
+            if( cpi->pass == 2 )
+            {
+                if(cpi->pb.RefreshGoldenFrame == TRUE) 
+                {
+    				cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 * cpi->GfUpdateInterval)) /
+	    								   ((100 * cpi->GfUpdateInterval) + cpi->GfuBoost);
+    
+	    			cpi->ThisFrameTarget = cpi->ThisFrameTarget + ((cpi->ThisFrameTarget * cpi->GfuBoost) / 100);	
+                }
+                else if ( cpi->FramesTillGfUpdateDue > 0 )
+                {
+				    // Non GFU frames reduced in bandwidth to account for 
+				    // + GfuBoost % on GFU frames
+				    cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 * cpi->GfUpdateInterval)) /
+						                   ((100 * cpi->GfUpdateInterval) + cpi->GfuBoost);
+                }
+            }
+            else
+            {
+			    if ( cpi->FramesTillGfUpdateDue > 0 )
+			    {
+				    // Non GFU frames reduced in bandwidth to account for 
+				    // + GfuBoost % on GFU frames
+				    cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 * cpi->GfUpdateInterval)) /
+						                   ((100 * cpi->GfUpdateInterval) + cpi->GfuBoost);
+			    }
+                else if (cpi->DisableGolden == 0) 
+                {
+			        int IntraToInterRatio;
+#define NEWWAY        
+#ifdef NEWWAY
+					IntraToInterRatio = 100 * cpi->IntraError / (cpi->InterError );
+					IntraToInterRatio = IntraToInterRatio * Sum3 / 100;
+
+					cpi->GfuBoost = IntraToInterRatio;
+
+					// Correct boost to take account of recent observed level of GF usage
+					if ( (cpi->GfUsage >> 3) < 64)
+						cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection[(cpi->GfUsage  >> 3)]) / 16;
+					else
+						cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection[63]) / 16;
+
+					cpi->GfuBoost = cpi->GfuBoost* GfuDataRateBoost[cpi->pb.AvgFrameQIndex] / 1000;
+
+
+					// Should we even consider a GF update or is there no point
+					if ( ( Sum3 > GF_MODE_DIST_THRESH2) &&
+						 ( cpi->GfuMotionSpeed <= MAX_GF_UPDATE_MOTION)  
+					   ) 
+				    {
+
+
+#else
+
+				    // Calculate the %extra for GFU frames
+				    cpi->GfuBoost = (GfuDataRateBoost[cpi->pb.quantizer->FrameQIndex] * GfuMotionCorrection[cpi->GfuMotionComplexity]) / 100;
+
+				    // Correct boost to take account of recent observed level of GF usage
+				    if ( (cpi->GfUsage >> 3) <= 15 )
+					    cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection2[(cpi->GfUsage >> 3)]) / 128;
+				    else
+					    cpi->GfuBoost = (cpi->GfuBoost * GfUsageCorrection2[15]) / 128;
+
+                    // Should we even consider a GF update or is there no point
+				    if ( (Sum2 > GF_MODE_DIST_THRESH1) && (Sum3 > GF_MODE_DIST_THRESH2) &&
+					     (cpi->GfuMotionSpeed <= MAX_GF_UPDATE_MOTION) && 
+						 (cpi->GfuBoost >= 80)  &&
+					     (MaxVariance <= GF_MAX_VAR_THRESH) ) 
+					{
+
+
+#endif 
+
+						cpi->ThisFrameTarget = (cpi->ThisFrameTarget * (100 * cpi->GfUpdateInterval)) /
+										       ((100 * cpi->GfUpdateInterval) + cpi->GfuBoost);
+
+					    cpi->ThisFrameTarget = cpi->ThisFrameTarget + ((cpi->ThisFrameTarget * cpi->GfuBoost) / 100);	
+
+					    cpi->pb.RefreshGoldenFrame = TRUE;
+
+						if(0)
+						{
+							FILE *gfstats= fopen("gf.stt","a");
+							fprintf(gfstats,"Frame : %8d boost:%d, sp:%d,base:%d,ratio:%d,motion:%d,Gf:%d \n",
+								- 1 + (INT32) cpi->CurrentFrame , 
+								cpi->GfuBoost,
+								cpi->GfuMotionSpeed,
+								GfuDataRateBoost[cpi->pb.AvgFrameQIndex],
+								100 * cpi->IntraError / (cpi->InterError),
+								Sum3,
+								cpi->GfUsage
+								);
+							fclose(gfstats);
+						}
+
+
+
+					    // Select the interval before the next GF update
+					    // To find the interval we find the max of AvX and AvY and work out how many frames
+					    // it will take to move X pels (GF_UPDATE_MOTION_INTERVAL in 1/4 pel) assuming the motion 
+					    // level does not change. The value is then capped to the range MIN_GF_UPDATE_INTERVAL to MAX_GF_UPDATE_INTERVAL
+					    if ( cpi->GfuMotionSpeed > 0 )
+					    {
+						    cpi->GfUpdateInterval = GF_UPDATE_MOTION_INTERVAL / cpi->GfuMotionSpeed;
+
+						    if ( cpi->GfUpdateInterval < MIN_GF_UPDATE_INTERVAL )
+							    cpi->GfUpdateInterval = MIN_GF_UPDATE_INTERVAL;
+
+						    else if ( cpi->GfUpdateInterval > MAX_GF_UPDATE_INTERVAL )
+							    cpi->GfUpdateInterval = MAX_GF_UPDATE_INTERVAL;
+
+					    }
+					    else
+						    cpi->GfUpdateInterval = MAX_GF_UPDATE_INTERVAL;
+
+
+
+				    }
+			    }
+            }
+		}
+
+		// If we have a mode where RD opt is to be used re-do pickmodes with rdopt enabled
+		if (cpi->QuickCompress == 0)
+			cpi->RdOpt = 2;
+		else if  (cpi->QuickCompress == 3)
+			cpi->RdOpt = 2;
+			//cpi->RdOpt = 1;
+
+		// Get a cost estimate for the sake of RD opt.
+		// As we have not yet done pick modes for this frame this is by necessity 
+		// based upon stats from the last frame.
+		//if ( cpi->RdOpt )
+		{
+			RegulateQ ( cpi, (cpi->ThisFrameTarget - (cpi->ModeMvCostEstimate/64)) );
+		}
+
+		// Select the optimal modes
+		PickModes ( cpi, &InterError, &IntraError ); 
+
+        // Normalize the key frame indicator to the range 0-100
+		actualMBS   = (cpi->pb.MBRows - (BORDER_MBS*2)) * (cpi->pb.MBCols - (BORDER_MBS*2));
+	    KFIndicator = (cpi->MotionScore * 100)/((actualMBS * 2)/3);
+
+        cpi->InterErrorb = InterError;
+        cpi->InterError = InterError;
+        cpi->IntraError = IntraError;
+
+        // Test for auto key frame.
+        if( cpi->AutoKeyFrameEnabled )
+        {
+
+            if(    cpi->pass < 2  
+                && KFIndicator > (UINT32) cpi->AutoKeyFrameThreshold
+                && cpi->LastKeyFrame > cpi->MinimumDistanceToKeyFrame
+                && (   cpi->IntraError < 2 * cpi->InterError 
+                    && cpi->IntraError < cpi->InterError + 2000  * actualMBS  
+                   )
+                && (   100 * abs(cpi->InterError - cpi->LastInterError ) / cpi->LastInterError > 40
+                    || 100 * abs(cpi->LastIntraError - cpi->IntraError) / cpi->LastIntraError > 40
+					|| cpi->IntraError * 5 < cpi->InterError * 6
+                   )
+              )
+            {
+
+                CompressKeyFrame(cpi);  // Code a key frame
+                return;
+            }
+
+        }
+
+        // Increment the frames since last key frame count
+        cpi->LastKeyFrame++;
+
+#if defined(_MSC_VER)
+	    ClearSysState();
+#endif
+
+		// Maintain a record of GF usage over the last few frames
+		// Each frame reduce value by 1/8 then add in usage (0-100) for the current frame
+		{
+
+			UINT32 ThisFrameGolden;
+
+			ThisFrameGolden = cpi->ModeDist[CODE_USING_GOLDEN] + cpi->ModeDist[CODE_GOLDEN_MV] + 
+				              cpi->ModeDist[CODE_GOLD_NEAREST_MV] + cpi->ModeDist[CODE_GOLD_NEAR_MV]; 
+
+            ThisFrameGolden = (ThisFrameGolden * 100) / ((cpi->pb.MBRows-2*BORDER_MBS )*(cpi->pb.MBCols-2*BORDER_MBS));
+            cpi->fps.PercentGolden = ThisFrameGolden;
+
+			cpi->GfUsage = ((cpi->GfUsage * 7) + 4) / 8;
+			cpi->GfUsage += ThisFrameGolden;
+		}
+
+        // Get an estimate of the Q that we should code at.
+        RegulateQ ( cpi, (cpi->ThisFrameTarget - (cpi->ModeMvCostEstimate/64)) );
+
+        cpi->DropCount = 0;
+
+
+		// This code is experimental and needs further refinement.
+		if ( cpi->pb.Vp3VersionNo > 7 )
+		{
+			INT32 IIRatio;
+			UINT8 MaxAplha;
+			UINT8 MinThresh;
+
+			// Calucalte an intra inter ratio for blocks that use motion prediction.
+			if ( cpi->MotionInterErr > 0 )
+				IIRatio = (cpi->MotionIntraErr * 10)/cpi->MotionInterErr;
+			else
+				IIRatio = 10;
+
+			// Set Bicubic alpha and apply Q related limits
+			cpi->pb.PredictionFilterAlpha			= cpi->BaselineAlpha;
+
+			// If a golden frame was thrown recently use its q for deciding alpha and thresholdd limits else the current frame Q.
+			if ( cpi->FramesTillGfUpdateDue > 0 )
+			{
+				MaxAplha = BicubicMaxAlpha[cpi->LastGfOrKFrameQ];
+				MinThresh = BicubicMinThresh[cpi->LastGfOrKFrameQ];
+			}
+			else
+			{
+				MaxAplha = BicubicMaxAlpha[cpi->pb.quantizer->FrameQIndex];
+				MinThresh = BicubicMinThresh[cpi->pb.quantizer->FrameQIndex];
+			}
+
+			cpi->pb.PredictionFilterMode             = AUTO_SELECT_PM;	
+
+			// Select the filtering parameters based upon the inter intra ratio
+			if ( IIRatio < 15 )
+			{
+				cpi->pb.PredictionFilterVarThresh	 = 31;
+			}
+			else if ( IIRatio < 20 )
+			{
+				cpi->pb.PredictionFilterVarThresh    = cpi->BaselineBicThresh + 16;
+			}
+			else if ( IIRatio < 40 )
+			{
+				cpi->pb.PredictionFilterVarThresh    = cpi->BaselineBicThresh + 8;
+			}
+			else if ( IIRatio < 60 )
+			{
+				cpi->pb.PredictionFilterVarThresh    = cpi->BaselineBicThresh + 4;
+			}
+			else if ( IIRatio < 80 )
+			{
+				cpi->pb.PredictionFilterVarThresh    = cpi->BaselineBicThresh + 2;
+			}
+			else if ( IIRatio < 100 )
+			{
+				cpi->pb.PredictionFilterVarThresh    = cpi->BaselineBicThresh + 1;
+				cpi->pb.PredictionFilterAlpha        += 1;
+			}
+			else 
+			{
+				cpi->pb.PredictionFilterVarThresh    = cpi->BaselineBicThresh;
+				cpi->pb.PredictionFilterAlpha        += 1;
+			}
+
+			// Limit check alpha
+			if ( cpi->pb.PredictionFilterAlpha > MaxAplha )
+				cpi->pb.PredictionFilterAlpha = MaxAplha;
+
+			// Limit check variance threshold
+			if ( cpi->pb.PredictionFilterVarThresh > 31 )
+				cpi->pb.PredictionFilterVarThresh = 31;
+			else if ( cpi->pb.PredictionFilterVarThresh < MinThresh )
+				cpi->pb.PredictionFilterVarThresh = MinThresh;		
+		}
+
+        /* Proceed with the frame update. */
+        UpdateFrame ( cpi );
+    }
+	else
+	{
+		// Update the buffer level variable.
+		cpi->BytesOffTarget += cpi->PerFrameBandwidth;
+
+		// Are we are using the secondary buffer limit constraints
+		if ( cpi->MaxAllowedDatarate )
+		{
+			cpi->BufferLevel += ((cpi->MaxAllowedDatarate * cpi->PerFrameBandwidth) / 100);           
+			if ( cpi->BufferLevel > cpi->MaxBufferLevel )
+				cpi->BufferLevel = cpi->MaxBufferLevel;
+		}
+		// else update the secondary buffer level in line with the current buffer level
+		else
+		{
+			cpi->BufferLevel = cpi->BytesOffTarget;
+		}
+
+		// Update the drop frame flag etc.
+		cpi->DropFrame = FALSE;
+		cpi->DropCount++;
+		TotDropFrameCount++;
+		DropedFrame = TRUE;
+	}
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PredictScanOrder
+ *
+ *  INPUTS        : CP_INSTANCE *cpi   : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Work out an optimal DCT coefficient scan order based
+ *                  upon stats gathered from previous frame.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void PredictScanOrder ( CP_INSTANCE *cpi )
+{
+	UINT32 i, j, k;
+	UINT32 Sum;
+	UINT32 tmp2[2];
+	UINT32 tmp[BLOCK_SIZE][2];
+	UINT32 GroupStartPoint, GroupEndPoint;
+
+	// Convert frame nz counts to ratio values vs frame zero counts
+	for ( i=1; i<BLOCK_SIZE; i++ )
+	{
+		Sum = cpi->FrameNzCount[i][0] + cpi->FrameNzCount[i][1];
+		if ( Sum )
+			tmp[i][0] = (cpi->FrameNzCount[i][1]*255)/Sum;
+		else
+			tmp[i][0] = 0;			
+		tmp[i][1] = i;
+	}
+
+	// Sort into decending order.
+	for ( i=1; i<BLOCK_SIZE-1; i++ )
+	{
+		for ( j=i+1; j>1; j-- )
+		{
+			if ( tmp[j][0] > tmp[j-1][0] )
+			{
+				// Swap them over
+				tmp2[0] = tmp[j-1][0];
+				tmp2[1] = tmp[j-1][1];
+
+				tmp[j-1][0] = tmp[j][0];
+				tmp[j-1][1] = tmp[j][1];
+
+				tmp[j][0] = tmp2[0];
+				tmp[j][1] = tmp2[1];
+			}
+		}
+	}
+
+	// Split the coeffs into value range groups then re-sort within each group 
+	// into ascending order based upon zig zag scan position
+	GroupEndPoint = 0;
+	for ( k=0; k<SCAN_ORDER_BANDS; k++ )
+	{
+		GroupStartPoint = GroupEndPoint+1;
+		GroupEndPoint = EndpointLookup[k];
+
+		for ( i=GroupStartPoint; i<GroupEndPoint; i++ )
+		{
+			for ( j=i+1; j>GroupStartPoint; j-- )
+			{
+				if ( tmp[j][1] < tmp[j-1][1] )
+				{
+					// Swap them over
+					tmp2[0] = tmp[j-1][0];
+					tmp2[1] = tmp[j-1][1];
+
+					tmp[j-1][0] = tmp[j][0];
+					tmp[j-1][1] = tmp[j][1];
+
+					tmp[j][0] = tmp2[0];
+					tmp[j][1] = tmp2[1];
+				}
+			}
+		}
+
+		// For each coef index mark its band number
+		for ( i=GroupStartPoint; i<=GroupEndPoint; i++ )
+		{
+			// Note the scan band number for each coef.
+			// tmp[i][1] is the position of the coef in the traditional zig-zag scan order, 
+			// i is the position in the new scan order and K is the band number.
+			cpi->NewScanOrderBands[tmp[i][1]] = k;	
+		}
+	} 
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       :     UpdateFrame
+ *
+ *  INPUTS        :     None.
+ *
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Writes the fragment data to the output file and updates
+ *                      the displayed frame.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void UpdateFrame ( CP_INSTANCE *cpi )
+{
+    double FramePSNR = 0.0;
+    PB_INSTANCE *pbi = &cpi->pb;
+
+	// Key frames can not have backwards dependancy so set up defaults for pbi->ScanBands.
+	if ( VP6_GetFrameType( pbi ) == BASE_FRAME )
+	{
+		// Set starting point for key frames... These cannot rely on what went before
+		if ( pbi->Configuration.Interlaced )
+			memcpy ( pbi->ScanBands, DefaultInterlacedScanBands, sizeof(pbi->ScanBands) );
+		else
+			memcpy ( pbi->ScanBands, DefaultNonInterlacedScanBands, sizeof(pbi->ScanBands) );
+	}
+
+	// Based upon the previous coded frame work out a predicted best 
+	// scan order banding for coding this frame
+	if ( (cpi->CurrentFrame > 1) && (!cpi->ErrorResilliantMode) &&
+		 ((pbi->Configuration.Interlaced) || (cpi->AllowScanOrderUpdates)) )
+	{
+		PredictScanOrder( cpi );
+	}
+	else
+	{
+		// Chose between default interlaced and non-interlaced sets.
+		if ( pbi->Configuration.Interlaced )
+			memcpy ( cpi->NewScanOrderBands, DefaultInterlacedScanBands, sizeof(cpi->NewScanOrderBands) );
+		else
+			memcpy ( cpi->NewScanOrderBands, DefaultNonInterlacedScanBands, sizeof(cpi->NewScanOrderBands) );
+	}
+
+	// Build the scan order
+	BuildScanOrder ( pbi, cpi->NewScanOrderBands );
+
+    // Encode the frame.
+	EncodeData ( cpi );
+
+    /* Update the BpbCorrectionFactor variable according to whether or not we were
+    *  close enough with our selection of DCT quantiser.
+    */
+    if ( VP6_GetFrameType( pbi ) != BASE_FRAME )
+
+        UpdateBpbCorrectionFactor ( cpi, cpi->ThisFrameSize );
+
+    // Adjust carry over and or key frame context.
+    if ( VP6_GetFrameType( pbi ) == BASE_FRAME )
+        AdjustKeyFrameContext ( cpi );
+
+    cpi->TotalByteCount += (cpi->ThisFrameSize/8);
+
+	// The auto-drop frame code is only used in buffered mode.
+	// In unbufferd mode (eg video conferencing) the descision to
+	// code or drop a frame is made outside the codec in response to real
+	// world comms or buffer considerations.
+	if ( cpi->BufferedMode )
+	{
+		// If the frame was massively oversize and we are below optimal buffer level drop next frame
+		if ( (cpi->DropFramesAllowed) &&
+			 (cpi->BufferLevel < cpi->OptimalBufferLevel) && 
+			 ((int)cpi->ThisFrameSize > (4 * cpi->ThisFrameTarget))  )
+		{
+			cpi->DropFrame = TRUE;
+		}
+
+		// Set the count for maximum consequative dropped frames based upon ratio of 
+		// this frame size to target size for this frame.
+
+		if(cpi->ThisFrameTarget > 0) 
+		{
+			cpi->MaxDropCount = (cpi->ThisFrameSize / cpi->ThisFrameTarget);
+			if ( cpi->MaxDropCount > cpi->MaxConsecDroppedFrames )
+				cpi->MaxDropCount = cpi->MaxConsecDroppedFrames;
+		}
+	}
+
+    // If appropriate call the frame PSNR function
+#if defined PSNR_ON
+    if ( !cpi->AllowSpatialResampling )
+    {
+		if ( cpi->pb.quantizer->FrameQIndex < PPROC_QTHRESH )
+		{
+			cpi->pb.PostProcessingLevel = 4;
+
+			PostProcess ( cpi->pb.postproc,
+				          cpi->pb.Vp3VersionNo,
+				          cpi->pb.FrameType,
+				          cpi->pb.PostProcessingLevel,
+				          cpi->pb.quantizer->FrameQIndex,
+				          cpi->pb.LastFrameRecon,
+				          cpi->pb.PostProcessBuffer,
+				          (unsigned char *) cpi->pb.FragInfo,
+				          sizeof(FRAG_INFO),
+				          0x0001 );
+		}
+		else
+			cpi->pb.PostProcessingLevel = 0;
+
+        FramePSNR = CalcPSNR ( cpi );
+    }
+#endif
+
+    // If appropriate call the frame PSNR function
+#if defined FILE_PSNR 
+    if ( !cpi->AllowSpatialResampling )
+    {
+		if ( cpi->pb.quantizer->FrameQIndex < PPROC_QTHRESH )
+		{
+			cpi->pb.PostProcessingLevel=4;
+
+			PostProcess
+				(
+				cpi->pb.postproc,
+				cpi->pb.Vp3VersionNo,
+				cpi->pb.FrameType,
+				cpi->pb.PostProcessingLevel,
+				cpi->pb.quantizer->FrameQIndex,
+				cpi->pb.LastFrameRecon,
+				cpi->pb.PostProcessBuffer,
+				(unsigned char *) cpi->pb.FragInfo,
+				sizeof(FRAG_INFO),
+				0x0001
+				);
+		}
+		else
+			cpi->pb.PostProcessingLevel=0;
+    }
+#endif
+
+
+	// Update the buffer level variable.
+	cpi->BytesOffTarget += (cpi->PerFrameBandwidth - cpi->ThisFrameSize);
+
+	// Are we are using the secondary buffer limit constraints
+	if ( cpi->MaxAllowedDatarate  )
+	{
+		cpi->BufferLevel += (((cpi->MaxAllowedDatarate * cpi->PerFrameBandwidth) / 100) - cpi->ThisFrameSize);           
+		if ( cpi->BufferLevel > cpi->MaxBufferLevel )
+			cpi->BufferLevel = cpi->MaxBufferLevel;
+	}
+	// else update the secondary buffer level in line with the current buffer level
+	else
+	{
+		cpi->BufferLevel = cpi->BytesOffTarget;
+	}
+
+	// If appropriate update the "last key frame buffer level" value.
+	if ( VP6_GetFrameType( pbi ) == BASE_FRAME )
+		cpi->LastKeyFrameBufferLevel = cpi->BufferLevel;
+
+	// Keep a record of ambient average Q.
+	if ( pbi->FrameType == BASE_FRAME )
+		pbi->AvgFrameQIndex = pbi->quantizer->FrameQIndex;
+	else
+		pbi->AvgFrameQIndex = (2 + 3 * pbi->AvgFrameQIndex + pbi->quantizer->FrameQIndex) / 4 ;
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp_if.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp_if.c
new file mode 100644
index 00000000..804d1d6b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcomp_if.c
@@ -0,0 +1,1564 @@
+/****************************************************************************
+*
+*   Module Title :     vfwcomp_if.c
+*
+*   Description  :     Compressor interface definition.
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <stdio.h>
+#include "compdll.h"
+#include "mcomp.h"
+#include "misc_common.h"
+#include "vp60eversion.h"
+#include "twopass.h"
+#include <math.h>
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define CommentString "\nON2.COM VERSION VP60E " VP60EVERSION "\n"
+
+#ifdef _MSC_VER
+#pragma comment(exestr,CommentString)
+#endif
+
+/****************************************************************************
+*  Typedefs
+****************************************************************************/
+
+typedef struct _COMPRESSOR_STATE
+{
+    UINT32 PriorKeyFrameSize[KEY_FRAME_CONTEXT];
+    UINT32 PriorKeyFrameDistance[KEY_FRAME_CONTEXT];
+    INT64  CurrentFrame;
+    UINT32 LastFrameSize;
+    INT32  DropCount;
+    INT64  KeyFrameCount;
+    INT64  TotKeyFrameBytes;
+    UINT32 LastKeyFrameSize;
+    UINT32 LastKeyFrame;
+    INT64  TotalByteCount;
+    UINT32 ActiveMaxQ;
+    double BpbCorrectionFactor;
+} COMPRESSOR_STATE;
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/
+static const char vp60eVersion[] = VP60EVERSION;
+static INT32 ClipBytes;
+
+//#define TIMING
+#ifdef TIMING
+#include "mmsystem.h"
+static long ITotalTime=0;
+static long ITime1, ITime2;
+#endif
+
+#if defined MEASURE_SECTION_COSTS
+UINT32 ClipSectionBits[10] = {0,0,0,0,0,0,0,0,0,0};
+#endif
+
+static const UINT8 BicThreshTable[11] = { 31, 31, 31, 16,  8,  4,  3,  2,  1,  1,   1};
+static const UINT8 BicAlphaTable[11]  = {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10};
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern UINT32  scanupdates[64][2];
+
+extern void ScaleFrame (
+  YUV_BUFFER_CONFIG *src,
+  YUV_BUFFER_CONFIG *dst,
+  unsigned char *tempArea,
+  unsigned char tempHeight,
+  unsigned int hscale,
+  unsigned int hratio,
+  unsigned int vscale,
+  unsigned int vratio,
+  unsigned int interlaced
+  );
+
+extern void CompressFirstFrame ( CP_INSTANCE *cpi );
+extern void CompressKeyFrame ( CP_INSTANCE *cpi );
+extern void CompressFrame ( CP_INSTANCE *cpi, UINT32 FrameNumber );
+
+/****************************************************************************
+ *
+ *  ROUTINE       : VP60E_GetVersionNumber
+ *
+ *  INPUTS        : None.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : const char *CCONV: Pointer to version string.
+ *
+ *  FUNCTION      : Returns a pointer to the version string.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+const char *CCONV VP60E_GetVersionNumber ( void )
+{
+    return vp60eVersion;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : ChangeEncoderSize
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                  UINT32 Width     : New frame Width.
+ *                  UINT32 Height    : New frame Height.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Updates the encoder frame size.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV ChangeEncoderSize ( CP_INSTANCE *cpi, UINT32 Width, UINT32 Height )
+{
+    // Frame size __MUST__ be multiple of 16 pels in each dimension
+    cpi->pb.Configuration.VideoFrameHeight = ((Height+15)&0xFFFFFFF0); 
+    cpi->pb.Configuration.VideoFrameWidth  = ((Width +15)&0xFFFFFFF0);
+    cpi->pb.YPlaneSize = 0xFFF;
+
+    // Initialise image format details
+    if ( !VP6_InitFrameDetails( &cpi->pb ) )
+        return;
+
+    if ( !EAllocateFragmentInfo ( cpi ) )
+    {
+        VP6_DeleteFragmentInfo ( &cpi->pb );
+        VP6_DeleteFrameInfo ( &cpi->pb );
+        return;
+    }
+
+    if ( !EAllocateFrameInfo ( cpi ) )
+    {
+        VP6_DeleteFragmentInfo ( &cpi->pb );
+        VP6_DeleteFrameInfo ( &cpi->pb );
+        EDeleteFragmentInfo ( cpi );
+        return;
+    }
+
+    // Initialise Motion compensation
+    InitMotionCompensation ( cpi );
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : PickSizeStep
+ *
+ *  INPUTS        : CP_INSTANCE *cpi            : Pointer to encoder instance.
+ *                  COMP_CONFIG_VP6 *CompConfig : Encoder configuration.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Starts & initializes encoder's size stepping mechanism.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void pickSizeStep ( CP_INSTANCE *cpi, COMP_CONFIG_VP6 *CompConfig )
+{
+    double bitsPerPixel;
+
+    int Width  = ((CompConfig->FrameSize & 0xFFFF0000) >> 16);
+    int Height = CompConfig->FrameSize & 0x0000FFFF;
+
+	if ( CompConfig->FrameRate == 0 )
+		CompConfig->FrameRate = 30;
+
+	if ( Width==0 )
+		Width = 320;
+
+	if ( Height== 0 )
+		Height = 240;
+
+    bitsPerPixel = (CompConfig->TargetBitRate * 1024.0) /
+                   (CompConfig->FrameRate * Width * Height);
+
+    // drop size to 4/5 before dropping frame rate to 1/2 or 1/3
+    if ( bitsPerPixel < 0.03 )         // VP4 was 0.043
+    {
+        cpi->SizeStep = 2;
+        bitsPerPixel = (CompConfig->TargetBitRate * 1024.0) /
+                       (CompConfig->FrameRate * Width * Height * 4/5 * 4/5);
+    }
+
+    cpi->FrameRateInput = CompConfig->FrameRate;
+    cpi->FrameRateDropFrames = 0;
+
+    if ( cpi->DropFramesAllowed )
+    {
+        // figure out output frame rate
+        if ( bitsPerPixel > 0.025 )         
+            cpi->FrameRateDropFrames = 0;
+        else if ( bitsPerPixel > 0.015 )
+            cpi->FrameRateDropFrames = 1;
+        else
+            cpi->FrameRateDropFrames = 2;
+    }
+
+    cpi->FrameRateDropCount = 0;
+    cpi->Configuration.OutputFrameRate = CompConfig->FrameRate / (cpi->FrameRateDropFrames+1);
+    
+    bitsPerPixel = (CompConfig->TargetBitRate * 1024.0) /
+                   (cpi->Configuration.OutputFrameRate * Width * Height);
+    
+    // categorize the cpi->SizeStep of the clip by the number of
+    // bits we are allowing per pixel!
+    if( bitsPerPixel > 0.090 )
+        cpi->SizeStep = 0;
+    else if( bitsPerPixel > 0.060 )     // VP4 was 0.09
+        cpi->SizeStep = 1;
+    else if ( bitsPerPixel > .040 )     // VP4 was 0.070
+        cpi->SizeStep = 2;
+    else if ( bitsPerPixel > .030 )     // VP4 was 0.06
+        cpi->SizeStep = 3;
+    else if ( bitsPerPixel > .015 )     // VP4 was 0.043
+        cpi->SizeStep = 4;
+    else
+        cpi->SizeStep = 5;
+}
+
+
+/****************************************************************************
+ *
+ *  ROUTINE       : ChangeEncoderConfig
+ *
+ *  INPUTS        : CP_INSTANCE *cpi            : Pointer to encoder instance.
+ *                  COMP_CONFIG_VP6 *CompConfig : Encoder configuration.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Updates encoder with new configuration.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV ChangeEncoderConfig ( CP_INSTANCE *cpi, COMP_CONFIG_VP6 *CompConfig )
+{
+    INT32 Quality = CompConfig->Quality;
+    cpi->BufferedMode                  = (CompConfig->OptimalBufferLevel > 0) ? TRUE : FALSE;
+    cpi->AutoKeyFrameEnabled           = CompConfig->AutoKeyFrameEnabled;
+    cpi->MinimumDistanceToKeyFrame     = CompConfig->MinimumDistanceToKeyFrame;
+    cpi->ForceKeyFrameEvery            = CompConfig->ForceKeyFrameEvery;
+    cpi->PreProcFilterLevel            = CompConfig->NoiseSensitivity;
+    cpi->AllowSpatialResampling        = CompConfig->AllowSpatialResampling && cpi->BufferedMode;  // NOTE: disallow if mode is unbuffered.
+    cpi->AutoKeyFrameThreshold         = CompConfig->AutoKeyFrameThreshold;
+    cpi->CPUUsed                       = CompConfig->Speed;
+    cpi->Configuration.TargetBandwidth = CompConfig->TargetBitRate * 1024;
+    cpi->ActualTargetBitRate           = cpi->Configuration.TargetBandwidth;
+
+    cpi->OptimalBufferLevel            = CompConfig->OptimalBufferLevel * cpi->Configuration.TargetBandwidth;
+    cpi->StartingBufferLevel           = CompConfig->StartingBufferLevel * cpi->Configuration.TargetBandwidth;
+    cpi->MaxBufferLevel				   = CompConfig->MaximumBufferSize * cpi->Configuration.TargetBandwidth;
+	
+	cpi->DropFramesWaterMark           = (cpi->OptimalBufferLevel * CompConfig->DropFramesWaterMark) / 100;
+    cpi->ResampleDownWaterMark         = (cpi->OptimalBufferLevel * CompConfig->ResampleDownWaterMark) / 100;
+    cpi->ResampleUpWaterMark           = (cpi->OptimalBufferLevel * CompConfig->ResampleUpWaterMark) / 100;
+
+    cpi->DisableGolden                 = CompConfig->DisableGolden       ;         
+    cpi->VBMode                        = CompConfig->VBMode              ; 
+    cpi->BestAllowedQ                  = CompConfig->BestAllowedQ        ;          
+    cpi->UnderShootPct                 = CompConfig->UnderShootPct       ;         
+
+    cpi->MaxAllowedDatarate            = CompConfig->MaxAllowedDatarate  ;    
+    cpi->MaximumBufferSize             = CompConfig->MaximumBufferSize   ;     
+
+    cpi->TwoPassVBREnabled             = CompConfig->TwoPassVBREnabled   ;     
+    cpi->TwoPassVBRBias                = CompConfig->TwoPassVBRBias      ;        
+    cpi->TwoPassVBRMaxSection          = CompConfig->TwoPassVBRMaxSection;  
+    cpi->TwoPassVBRMinSection          = CompConfig->TwoPassVBRMinSection;  
+    cpi->Pass                          = CompConfig->Pass                ;                  
+    cpi->ErrorResilliantMode           = CompConfig->ErrorResilientMode; 
+
+	if(cpi->ErrorResilliantMode) 
+		cpi->DisableGolden             =1;
+
+    cpi->DropFramesAllowed             = CompConfig->AllowDF && cpi->BufferedMode; // NOTE: disallow if mode is unbuffered.
+	cpi->MaxConsecDroppedFrames		   = 4;	// TBD
+    cpi->QuickCompress                 = CompConfig->QuickCompress;
+
+	cpi->BaselineAlpha				   = BicAlphaTable[CompConfig->Sharpness];
+	cpi->BaselineBicThresh			   = BicThreshTable[CompConfig->Sharpness];
+
+    if(CompConfig->TwoPassVBRMaxSection == DEFAULT_VALUE)
+        cpi->TwoPassVBRMaxSection = CompConfig->MaxAllowedDatarate;
+
+    if( CompConfig->FixedQ > 0 )
+        cpi->FixedQ = 63 - CompConfig->Quality;
+	else
+		cpi->FixedQ = -1;
+
+    // compression mode dependant
+    switch(CompConfig->Mode)
+    {
+    case MODE_REALTIME: 
+    	cpi->Speed = 4;
+        if(CompConfig->QuickCompress == DEFAULT_VALUE)
+            cpi->QuickCompress = 2;
+        if(CompConfig->Pass == DEFAULT_VALUE)
+            cpi->Pass = 0;                            
+        break;
+    case MODE_GOODQUALITY:
+        if(CompConfig->QuickCompress == DEFAULT_VALUE)
+            cpi->QuickCompress = 1;
+        if(CompConfig->Pass == DEFAULT_VALUE)
+            cpi->Pass = 0;                            
+        break;
+    case MODE_BESTQUALITY:
+        if(CompConfig->QuickCompress == DEFAULT_VALUE)
+            cpi->QuickCompress = 0;
+        if(CompConfig->Pass == DEFAULT_VALUE)
+            cpi->Pass = 0;                            
+
+        break;
+    case MODE_FIRSTPASS:
+        if(CompConfig->QuickCompress == DEFAULT_VALUE)
+            cpi->QuickCompress = 1;
+        if(CompConfig->Pass == DEFAULT_VALUE)
+            cpi->Pass = 1;                            
+        cpi->PreProcFilterLevel = 0;
+        cpi->FixedQ = FIRSTPASS_Q;
+        cpi->ForceKeyFrameEvery = 99999;
+        cpi->AutoKeyFrameThreshold = 50;
+        cpi->MinimumDistanceToKeyFrame = 0;
+        cpi->AllowSpatialResampling = 0;
+        cpi->DropFramesAllowed = 0;
+        break;
+
+    case MODE_SECONDPASS:
+        if(CompConfig->QuickCompress == DEFAULT_VALUE)
+            cpi->QuickCompress = 1;
+        if(CompConfig->Pass == DEFAULT_VALUE)
+            cpi->Pass = 2;                            
+        break;
+    case MODE_SECONDPASS_BEST:
+        if(CompConfig->QuickCompress == DEFAULT_VALUE)
+            cpi->QuickCompress = 0;
+        if(CompConfig->Pass == DEFAULT_VALUE)
+            cpi->Pass = 2;                            
+        break;
+
+    } 
+
+	// Are we planning local file playback or streamed
+	cpi->EndUsage = CompConfig->EndUsage;
+
+	// We auto-adjust worst quality for 1 pass modes only and 
+	// disable when coding real time.
+	if ( (CompConfig->Mode < MODE_SECONDPASS) && (cpi->QuickCompress != 2) )
+		cpi->AutoWorstQ = TRUE;
+	else
+		cpi->AutoWorstQ = FALSE;
+	
+    // endusage dependent
+	// 1 pass + local file playback
+    if(CompConfig->EndUsage == USAGE_LOCAL_FILE_PLAYBACK && CompConfig->Mode < MODE_SECONDPASS)
+    {
+
+        cpi->MaxAllowedDatarate = 200;
+        cpi->StartingBufferLevel = 4 * cpi->Configuration.TargetBandwidth;
+        cpi->OptimalBufferLevel = 4 * cpi->Configuration.TargetBandwidth;
+        cpi->MaxBufferLevel = 5 * cpi->Configuration.TargetBandwidth;
+        cpi->VBMode = 1;                          
+        cpi->TwoPassVBREnabled = 0;   
+    }
+	// 2 pass local file playback
+    else if(CompConfig->EndUsage == USAGE_LOCAL_FILE_PLAYBACK && CompConfig->Mode >= MODE_SECONDPASS)
+    {
+        cpi->MaxAllowedDatarate = 400;
+        cpi->StartingBufferLevel = 10 * cpi->Configuration.TargetBandwidth;
+        cpi->OptimalBufferLevel = 10 * cpi->Configuration.TargetBandwidth;
+        cpi->MaxBufferLevel = 10 * cpi->Configuration.TargetBandwidth;
+        cpi->VBMode = 1;                          
+        cpi->TwoPassVBREnabled = 1;               
+    }
+	// 1 or 2 pass streaming playback
+    else
+    {
+        cpi->VBMode = 0;                          
+        cpi->TwoPassVBREnabled = 0;               
+    }
+
+
+    //if(cpi->QuickCompress == 0)
+        //cpi->QuickCompress = 3;
+    //if(cpi->QuickCompress == 3)
+    //    cpi->QuickCompress = 0;
+
+
+    // Set the output frame rate.
+    cpi->Configuration.OutputFrameRate = CompConfig->FrameRate;
+    if ( cpi->Configuration.OutputFrameRate < 1 )
+        cpi->Configuration.OutputFrameRate = CompConfig->OutputFrameRate;
+    else if ( cpi->Configuration.OutputFrameRate > 1000 )
+        cpi->Configuration.OutputFrameRate = 1000;
+
+    // Set key frame data rate target and frequency
+    cpi->KeyFrameDataTargetOrig = (CompConfig->KeyFrameDataTarget * 1024);
+    cpi->KeyFrameDataTarget     = cpi->KeyFrameDataTargetOrig;
+    if(cpi->KeyFrameDataTarget > (int) cpi->Configuration.TargetBandwidth / 2)
+        cpi->KeyFrameDataTarget = (int) cpi->Configuration.TargetBandwidth / 2;
+
+    cpi->KeyFrameFrequency = CompConfig->KeyFrameFrequency;
+
+    cpi->BytesOffTarget = cpi->StartingBufferLevel;				// Set the current buffer level
+    cpi->BufferLevel = cpi->StartingBufferLevel;				// Set the current buffer level
+
+    cpi->LastKeyFrameBufferLevel = cpi->StartingBufferLevel;	// Used to monitor changes in buffer level when considering re-sampling.
+
+    cpi->pb.Configuration.Interlaced  = CompConfig->Interlaced;
+    cpi->pb.Configuration.HScale      = CompConfig->HScale;
+    cpi->pb.Configuration.HRatio      = CompConfig->HRatio;
+    cpi->pb.Configuration.VScale      = CompConfig->VScale;
+    cpi->pb.Configuration.VRatio      = CompConfig->VRatio;
+    cpi->pb.Configuration.ScalingMode = CompConfig->ScalingMode;
+
+    // Set the quality settings.
+    ConfigureQuality ( cpi, Quality );
+
+    /* Set the video frame size. */
+    if ( CompConfig->FrameSize != 
+        (unsigned int) ((cpi->YuvInputData.YWidth << 16) | cpi->YuvInputData.YHeight) )
+    {
+        ChangeEncoderSize ( cpi, ((CompConfig->FrameSize & 0xFFFF0000) >> 16),
+            CompConfig->FrameSize & 0x0000FFFF);
+
+        cpi->InputConfig.YWidth   = ((CompConfig->FrameSize & 0xFFFF0000) >> 16);
+        cpi->InputConfig.YHeight  = CompConfig->FrameSize & 0x0000FFFF;
+        cpi->InputConfig.YStride  = cpi->InputConfig.YWidth;
+        cpi->InputConfig.UVWidth  = cpi->InputConfig.YWidth /2 ;
+        cpi->InputConfig.UVHeight = (CompConfig->FrameSize & 0x0000FFFF) /2;
+        cpi->InputConfig.UVStride = cpi->InputConfig.YWidth/2;
+
+        cpi->SizeStep = 0;
+    }
+
+    if(cpi->BufferedMode )
+        pickSizeStep ( cpi, CompConfig );
+
+    cpi->InterFrameTarget  =  cpi->Configuration.TargetBandwidth / cpi->Configuration.OutputFrameRate;
+    cpi->PerFrameBandwidth = (cpi->Configuration.TargetBandwidth / cpi->Configuration.OutputFrameRate);
+    // Calculate a new target bytes per frame allowing for predicted key frame frequency and size.
+    if ( (INT32)cpi->Configuration.TargetBandwidth > ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency) )
+        cpi->InterFrameTarget =  (INT32)((cpi->Configuration.TargetBandwidth - ((cpi->KeyFrameDataTarget * cpi->Configuration.OutputFrameRate)/cpi->KeyFrameFrequency)) / cpi->Configuration.OutputFrameRate);
+    else
+        cpi->InterFrameTarget = 1; 
+
+
+    cpi->pass = cpi->Pass;
+    if(cpi->pass)
+        Pass2Initialize(cpi,CompConfig);
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : StartEncoder
+ *
+ *  INPUTS        : COMP_CONFIG_VP6 *CompConfig : Encoder configuration.
+ *
+ *  OUTPUTS       : CP_INSTANCE **cpi           : Pointer to pointer to encoder instance.
+ *
+ *  RETURNS       : BOOL: TRUE=success, FALSE=failure.
+ *
+ *  FUNCTION      : Creates a new encoder instance & initializes it.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+BOOL CCONV StartEncoder ( CP_INSTANCE **cpi, COMP_CONFIG_VP6 *CompConfig )
+{
+#ifdef TIMING
+    // DEBUG CODE
+    ITime1=timeGetTime();
+    {
+        FILE *fp = fopen( "d:\\Times.txt", "at" );
+        fprintf(fp, "StartEncoder: %d\n",ITime1);
+        fclose(fp);
+    }
+#endif
+
+    // Create an instance of the encoder
+    *cpi = CreateCPInstance();
+
+    // Initialisation default config.
+    (*cpi)->pb.Configuration.HFragPixels = 8;
+    (*cpi)->pb.Configuration.VFragPixels = 8;
+    (*cpi)->pb.postproc = CreatePostProcInstance ( &((*cpi)->pb.Configuration) );
+    (*cpi)->pb.quantizer = VP6_CreateQuantizer();
+
+
+	// profile 4 is actually encode version 8 
+	if(CompConfig->Profile == 4) 
+	{
+	    (*cpi)->pb.VpProfile  = 3;
+		(*cpi)->pb.Vp3VersionNo = 8;
+	}
+	else
+	{
+	    (*cpi)->pb.VpProfile  = CompConfig->Profile;
+		(*cpi)->pb.Vp3VersionNo = 6;
+	}
+
+    ChangeEncoderConfig ( *cpi, CompConfig );
+
+    /* set the encoder version number */
+
+    /* Initialise the compression process. */
+    (*cpi)->CurrentFrame                = 1;
+    (*cpi)->BpbCorrectionFactor         = 1.0;
+    (*cpi)->KeyFrameBpbCorrectionFactor = 0.4;
+	(*cpi)->GfuBpbCorrectionFactor      = 2.0;
+    (*cpi)->TotalByteCount              = 0;
+    (*cpi)->TotalMotionScore            = 0;
+
+	(*cpi)->NiTotQi = 0;
+	(*cpi)->NiFrames = 0;
+	(*cpi)->NiAvQi = (*cpi)->Configuration.WorstQuality;
+
+    // This makes sure encoder version specific tables are initialised
+    VP6_InitQTables ( (*cpi)->pb.quantizer, (*cpi)->pb.Vp3VersionNo );
+
+    // Indicate that the next frame to be compressed is the first in the current clip.
+    (*cpi)->ThisIsFirstFrame = TRUE;
+
+    // Initialize the drop frame flags
+    (*cpi)->DropFrame = FALSE;
+	(*cpi)->MaxConsecDroppedFrames = 4;
+
+#if defined PSNR_ON
+    // DEBUG: Clear down PSNR variables
+    (*cpi)->TotalSqError =0.0;
+    (*cpi)->TotPsnr  = 0.0;
+    (*cpi)->TotYPsnr = 0.0;
+    (*cpi)->TotUPsnr = 0.0;
+    (*cpi)->TotVPsnr = 0.0;
+    (*cpi)->MinPsnr  = 999.00;
+    (*cpi)->MinYPsnr = 999.00;
+    (*cpi)->MinUPsnr = 999.00;
+    (*cpi)->MinVPsnr = 999.00;
+    (*cpi)->MaxPsnr  = 0.0;
+    (*cpi)->MaxYPsnr = 0.0;
+    (*cpi)->MaxUPsnr = 0.0;
+    (*cpi)->MaxVPsnr = 0.0;
+#endif
+
+#ifdef MAPCA    
+    InitMERefDs();
+#endif
+    return TRUE;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : ChangeCompressorSetting
+ *
+ *  INPUTS        : CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *                  C_SETTING Setting : Compreesor seeting to change.
+ *                  int Value         : Value to set setting to.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Sets the specified compressor setting to the 
+ *                  specified value.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CCONV ChangeCompressorSetting ( CP_INSTANCE *cpi, C_SETTING Setting, int Value )
+{
+    switch ( Setting )
+    {
+    case C_SET_RECOVERY_FRAME:
+        cpi->GfRecoveryFrame = TRUE;
+        break;
+
+    case C_SET_GOLDENFRAME:
+        cpi->pb.RefreshGoldenFrame = TRUE;
+        break;
+
+    case C_SET_REFERENCEFRAME:
+        CopyFrame ( cpi->pb.postproc, (YUV_BUFFER_CONFIG *) Value, cpi->pb.LastFrameRecon );
+        CopyFrame ( cpi->pb.postproc, (YUV_BUFFER_CONFIG *) Value, cpi->pb.GoldenFrame );
+        break;
+
+    case C_SET_INTERNAL_SIZE:
+        sscanf ( (unsigned char *)Value, "%d %d %d %d", &cpi->ForceHRatio, &cpi->ForceHScale, &cpi->ForceVRatio, &cpi->ForceVScale );
+        cpi->ForceInternalSize = 1;
+        cpi->ThisIsKeyFrame = TRUE;
+        break;
+
+    case C_SET_KEY_FRAME:
+        cpi->ThisIsKeyFrame = TRUE;
+        break;
+
+    case C_SET_FIXED_Q:
+        if ( (Value >= 0) && (Value < 64) )
+            cpi->FixedQ = 63 - Value;
+        break;
+
+    case C_SET_FIRSTPASS_FILE:
+        break;
+
+    case C_SET_TESTMODE:
+        cpi->pb.testMode = Value;
+        break;
+
+    default:
+        if ( (Setting >= C_SET_EXPERIMENTAL_MIN) && (Setting <= C_SET_EXPERIMENTAL_MAX) )
+        {
+            INT32 nExperimental = Setting - C_SET_EXPERIMENTAL_MIN;
+
+            if (nExperimental >= (INT32)cpi->nExperimentals)
+                cpi->nExperimentals = nExperimental + 1;
+
+            cpi->Experimental[nExperimental] = Value;
+
+            switch(nExperimental)
+            {
+            case 0:
+                cpi->DisableGolden = Value;
+                break;
+            case 1:
+                cpi->VBMode = Value;
+                break;
+            case 2:
+                cpi->BestAllowedQ = Value;
+                break;
+            case 3:
+                cpi->UnderShootPct = Value;
+                break;
+            case 4:
+                cpi->MaxAllowedDatarate = Value;
+                break;
+            case 5:
+                cpi->MaximumBufferSize = Value;
+		        cpi->MaxBufferLevel    = cpi->OptimalBufferLevel + ((cpi->MaximumBufferSize * cpi->Configuration.TargetBandwidth) / 100);
+                break;
+            case 250:
+                cpi->TwoPassVBREnabled = Value;
+                break;
+            case 251:
+                cpi->TwoPassVBRBias = Value;
+                break;
+            case 252:
+                cpi->TwoPassVBRMaxSection = Value;
+                break;
+            case 253:
+                cpi->TwoPassVBRMinSection = Value; 
+                break;
+            case 255:
+                cpi->Pass = Value;
+                cpi->pass = Value;
+                if(cpi->pass == 2)
+                {
+                    char dummy[1024];
+                    cpi->fs = fopen("firstpass.fst","r");
+                    cpi->ss = fopen("firstpass.sst","r");
+
+                    fgets(dummy,1024,cpi->fs);
+                    fgets(dummy,1024,cpi->ss);
+
+                    {   // calculate a q value to use 
+
+
+                		int    actualMBS =                       // number of macroblocks
+                              (cpi->pb.MBRows - (BORDER_MBS*2)) 
+                            * (cpi->pb.MBCols - (BORDER_MBS*2));
+
+                        double fpBitRate;                        // first pass bitrate
+                        double target;                           // target bitrate
+                        double NewQ;
+
+                        const double RoomForVariation = 5;       // 5 q steps above
+
+                        const double FirstPassQ = 32;            // 
+
+                        InputStats(cpi->ss,&cpi->fpmss);
+
+                        fpBitRate = cpi->fpmss.BitsPerMacroblock * actualMBS * cpi->Configuration.OutputFrameRate;
+                        target = (double) cpi->Configuration.TargetBandwidth;
+
+                        NewQ = (INT32)  63 -  ( RoomForVariation + FirstPassQ + .5 + log(fpBitRate/target) / log(1.05));
+                        if(NewQ < cpi->Configuration.WorstQuality )
+                            NewQ = cpi->Configuration.WorstQuality;
+
+                        if(NewQ > cpi->Configuration.ActiveBestQuality)
+                            NewQ = cpi->Configuration.ActiveBestQuality;
+
+                        cpi->Configuration.WorstQuality = (INT32) NewQ;
+                        cpi->Configuration.ActiveWorstQuality = cpi->Configuration.WorstQuality;
+
+                        
+                        /*
+                        NewQ += 5*RoomForVariation;
+                        if(NewQ < cpi->Configuration.WorstQuality )
+                            NewQ = cpi->Configuration.WorstQuality;
+
+                        if(NewQ > cpi->Configuration.ActiveBestQuality)
+                            NewQ = cpi->Configuration.ActiveBestQuality;
+
+                        cpi->Configuration.ActiveBestQuality = NewQ;
+
+                        */
+
+
+
+                    }
+                }
+                else if (cpi->pass == 1)
+                {
+                    cpi->fs = fopen("firstpass.fst","w");
+                    fprintf(cpi->fs,
+                        "%8s %8s %8s %8s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s \n",
+                        "","#","key","golden","bits/mb","sq bits/mb","Inter","Intra","Motion","VarX","VarY",
+                        "%Motion","%NewMotion","%Golden");
+
+                    cpi->ss = fopen("firstpass.sst","w");
+                    fprintf(cpi->ss,
+                        "%8s %8s %8s %8s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s \n",
+                        "","#","key","golden","bits/mb","sq bits/mb","Inter","Intra","Motion","VarX","VarY",
+                        "%Motion","%NewMotion","%Golden");
+
+
+                }
+                break;
+            }
+        }
+
+
+        break;
+    }
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : CopyOrResize
+ *
+ *  INPUTS        : CP_INSTANCE *cpi  : Pointer to encoder instance.
+ *					BOOL ResetPreproc : Should the preprocessor be reset (e.g for a key frame)
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Copies and if necessary scales the frame cpi->YuvInputData
+ *                  into the frame defined by cpi->InputConfig.
+ *
+ *  SPECIAL NOTES : cpi->pb.ThisFrameRecon is used as temporary workspace
+ *                  for the scaler.
+ *
+ ****************************************************************************/
+void CopyOrResize ( CP_INSTANCE *cpi, BOOL ResetPreproc )
+{
+    INT32  i;
+    unsigned char *LocalDataPtr;
+    unsigned char *InputDataPtr;
+    YUV_BUFFER_CONFIG yuvConfig = cpi->InputConfig;  //  For tempFilter
+
+    // Copy over input YUV to internal YUV buffers.
+    if( cpi->InputConfig.YWidth != cpi->YuvInputData.YWidth ||
+        cpi->InputConfig.YHeight!= cpi->YuvInputData.YHeight )
+    {
+        UINT8 tmpHeight;
+
+        if( cpi->InputConfig.YHeight*2 == cpi->YuvInputData.YHeight )
+            tmpHeight = 9;
+        else
+            tmpHeight = 11;
+
+        cpi->InputConfig.YBuffer = (char *) cpi->yuv1ptr;
+        cpi->InputConfig.UBuffer = (char *) &cpi->yuv1ptr[(cpi->pb.Configuration.VideoFrameHeight*cpi->pb.Configuration.VideoFrameWidth)];
+        cpi->InputConfig.VBuffer = (char *) &cpi->yuv1ptr[((cpi->pb.Configuration.VideoFrameHeight*cpi->pb.Configuration.VideoFrameWidth)*5)/4];
+
+        ScaleFrame ( &cpi->YuvInputData, &cpi->InputConfig, cpi->pb.ThisFrameRecon,tmpHeight,
+                      cpi->pb.Configuration.HScale, cpi->pb.Configuration.HRatio,
+                      cpi->pb.Configuration.VScale, cpi->pb.Configuration.VRatio,
+                      cpi->pb.Configuration.Interlaced); 
+    }
+    else
+    {
+        // First copy over the Y data
+        LocalDataPtr = cpi->yuv1ptr;
+        InputDataPtr = (unsigned char *)cpi->YuvInputData.YBuffer;
+        for ( i=0; i<cpi->YuvInputData.YHeight; i++ )
+        {
+            memcpy ( LocalDataPtr, InputDataPtr, cpi->YuvInputData.YWidth );
+            LocalDataPtr += cpi->YuvInputData.YWidth;
+            InputDataPtr += cpi->YuvInputData.YStride;
+        }
+
+        // Now copy over the U data
+        LocalDataPtr = &cpi->yuv1ptr[(cpi->YuvInputData.YHeight * cpi->YuvInputData.YWidth)];
+        InputDataPtr = (unsigned char *)cpi->YuvInputData.UBuffer;
+        for ( i=0; i<cpi->YuvInputData.UVHeight; i++ )
+        {
+            memcpy ( LocalDataPtr, InputDataPtr, cpi->YuvInputData.UVWidth );
+            LocalDataPtr += cpi->YuvInputData.UVWidth;
+            InputDataPtr += cpi->YuvInputData.UVStride;
+        }
+
+        // Now copy over the V data
+        LocalDataPtr = &cpi->yuv1ptr[((cpi->YuvInputData.YHeight * cpi->YuvInputData.YWidth) * 5) / 4];
+        InputDataPtr = (unsigned char *)cpi->YuvInputData.VBuffer;
+        for ( i=0; i<cpi->YuvInputData.UVHeight; i++ )
+        {
+            memcpy ( LocalDataPtr, InputDataPtr, cpi->YuvInputData.UVWidth );
+            LocalDataPtr += cpi->YuvInputData.UVWidth;
+            InputDataPtr += cpi->YuvInputData.UVStride;
+        }
+    }
+
+
+	if ( cpi->PreProcFilterLevel != 0 )
+    {
+
+		// Take a copy of the un-preprocessed frame
+#if defined FILE_PSNR 
+        memcpy(cpi->yuv0ptr, cpi->yuv1ptr, (cpi->pb.YPlaneSize + (2 * cpi->pb.UVPlaneSize))); 
+#endif
+
+#if defined PSNR_ON 
+        memcpy(cpi->yuv0ptr, cpi->yuv1ptr, (cpi->pb.YPlaneSize + (2 * cpi->pb.UVPlaneSize))); 
+#endif
+
+		// If appropriate reset the proprocessor frame counter.
+		if ( ResetPreproc )
+			cpi->preproc.frame = 0;
+
+        if ( yuvConfig.YStride < 0 )
+        {
+            yuvConfig.YBuffer = &cpi->yuv1ptr[(yuvConfig.YHeight - 1) * yuvConfig.YWidth];
+            yuvConfig.UBuffer = &cpi->yuv1ptr[yuvConfig.YHeight * yuvConfig.YWidth * 5 / 4 - yuvConfig.YWidth / 2];
+            yuvConfig.VBuffer = &cpi->yuv1ptr[yuvConfig.YHeight * yuvConfig.YWidth * 3 / 2 - yuvConfig.YWidth / 2];
+            tempFilter ( &cpi->preproc,
+                yuvConfig.YBuffer + (yuvConfig.YHeight - 1) * yuvConfig.YStride ,
+                yuvConfig.YBuffer + (yuvConfig.YHeight - 1) * yuvConfig.YStride ,
+                yuvConfig.YHeight * yuvConfig.YWidth * 3 / 2 , cpi->PreProcFilterLevel);
+        }
+        else
+        {
+            yuvConfig.YBuffer = cpi->yuv1ptr;
+            yuvConfig.UBuffer = &cpi->yuv1ptr[yuvConfig.YHeight * yuvConfig.YWidth];
+            yuvConfig.VBuffer = &cpi->yuv1ptr[yuvConfig.YHeight * yuvConfig.YWidth * 5 / 4];
+            tempFilter ( &cpi->preproc, yuvConfig.YBuffer, yuvConfig.YBuffer,
+                yuvConfig.YHeight * yuvConfig.YWidth * 3 / 2, cpi->PreProcFilterLevel );
+        }
+    }
+
+    return;
+}
+
+
+/****************************************************************************
+ *
+ *  ROUTINE       : EncodeFrameYuv
+ *
+ *  INPUTS        : CP_INSTANCE *cpi                      : Pointer to encoder instance.
+ *                  YUV_INPUT_BUFFER_CONFIG *YuvInputData : Pointer to input frame (YUV).
+ *                  unsigned char *OutPutPtr              : Output buffer.
+ *                 
+ *  OUTPUTS       : unsigned int *is_key                  : Flag whether frame coded
+ *                                                          as intra-frame or not.
+ *
+ *  RETURNS       : UINT32: Number of bytes written to output buffer.
+ *
+ *  FUNCTION      : Encodes the specified frame creating an output buffer
+ *                  containing the compressed bitstream for the frame.
+ *
+ *  SPECIAL NOTES : The format of the input image is planar YUV 4:2:0.
+ *
+ ****************************************************************************/
+UINT32 CCONV EncodeFrameYuv ( CP_INSTANCE *cpi, YUV_INPUT_BUFFER_CONFIG *YuvInputData, unsigned char *OutPutPtr, unsigned int *is_key )
+{
+    UINT8 iskey;
+    UINT32 ret_val;
+
+    if ( cpi->FrameRateDropCount )
+    {
+        --cpi->FrameRateDropCount;
+        return 0;
+    }
+    
+    cpi->FrameRateDropCount = cpi->FrameRateDropFrames;
+    cpi->pb.Configuration.ExpandedFrameWidth  = YuvInputData->YWidth;
+    cpi->pb.Configuration.ExpandedFrameHeight = YuvInputData->YHeight;
+    cpi->pb.OutputWidth  = YuvInputData->YWidth;
+    cpi->pb.OutputHeight = YuvInputData->YHeight;
+
+    if ( cpi->PreProcFilterLevel )
+    {
+        int OldFrameSize = cpi->YuvInputData.YHeight *  cpi->YuvInputData.YWidth * 3/2;
+        int FrameSize = YuvInputData->YHeight * YuvInputData->YWidth * 3/2;
+
+        if ( OldFrameSize != FrameSize )
+        {
+            if ( !InitPreProc ( &cpi->preproc, FrameSize ) )
+            {
+                EDeleteFrameInfo ( cpi );
+                return FALSE;
+            }
+        }
+    }
+
+    // remember our input buffer (incase we want to do something to it later!)
+    memcpy ( &cpi->YuvInputData, YuvInputData, sizeof(YUV_INPUT_BUFFER_CONFIG) );
+
+    cpi->ThisFrameSize = 0;         // Reset the frame size monitor variable
+
+    cpi->DataOutputBuffer = OutPutPtr;
+    cpi->pb.DataOutputInPtr = cpi->DataOutputBuffer;
+
+#if defined(_MSC_VER)
+	ClearSysState();
+#endif
+
+	// Decide whether to allow selective bicubic filtered prediction
+	if ( cpi->pb.VpProfile == SIMPLE_PROFILE )
+	{
+        // NOTE: Use huffman only allowed if using multiple data streams
+        cpi->pb.MultiStream          = TRUE;
+		cpi->pb.UseHuffman           = TRUE;    
+		cpi->pb.UseLoopFilter        = NO_LOOP_FILTER;
+		cpi->pb.PredictionFilterMode = BILINEAR_ONLY_PM;	
+	}
+	else
+	{
+        // NOTE: Use huffman only allowed if using multiple data streams
+		cpi->pb.MultiStream                  = FALSE;
+		cpi->pb.UseHuffman                   = FALSE;
+		cpi->pb.UseLoopFilter                = LOOP_FILTER_BASIC;
+		cpi->pb.PredictionFilterMode         = AUTO_SELECT_PM;	
+
+		// Vp6.2 and later specific
+		if ( cpi->pb.Vp3VersionNo > 7 )
+		{
+			cpi->pb.PredictionFilterVarThresh    = 31;							// Default bicubic variance threshold
+			cpi->pb.PredictionFilterAlpha		 = cpi->BaselineAlpha;			// Default Aplha Index for bicubic filter.
+		}
+		else
+		{
+			cpi->pb.PredictionFilterVarThresh    = (2 << 5);    // Variance threshold for using bicubic (range 0 to 32) << 5. (note however 0 = no threshold)
+			cpi->pb.PredictionFilterAlpha		 = 16;			// Filter Alpha index 32 provides for backwards compatibility with VP61
+		}
+
+		// Size of frame influences default limit on motion length for use of bicubic.
+		if ( cpi->pb.Configuration.VideoFrameWidth >= 480 )
+			cpi->pb.PredictionFilterMvSizeThresh = 4;			// Restrict bicubic to mvs of < +/- (1 << (X-1)) pels. 0 Indicates unrestricted.
+		else
+			cpi->pb.PredictionFilterMvSizeThresh = 3;			// Restrict bicubic to mvs of < +/- (1 << (X-1)) pels. 0 Indicates unrestricted.
+
+		cpi->pb.UseLoopFilter        = NO_LOOP_FILTER;
+		cpi->pb.PredictionFilterMode = BICUBIC_ONLY_PM;	
+
+	}
+
+    // Variables used to track inter vs intra prediction error for mbs that use motion
+	cpi->MotionIntraErr = 0;
+	cpi->MotionInterErr = 0;
+	
+    // Set default KF boost
+    cpi->KFBoost = 4;
+
+    // 2nd pass datarate control
+    if(cpi->pass == 2)
+    {
+        Pass2Control(cpi);
+    }
+
+    // Special case for first frame
+    if ( cpi->ThisIsFirstFrame )
+    {
+		cpi->pb.RefreshGoldenFrame = TRUE;						// KF is also GF update
+
+		// Stats and other first frame initialisation
+        ClipBytes = 0;
+		cpi->NiAvQi = cpi->Configuration.WorstQuality;
+
+		// Now code the first frame
+        CompressFirstFrame ( cpi );
+        cpi->ThisIsFirstFrame = FALSE;
+        cpi->ThisIsKeyFrame   = FALSE;
+    }
+	// A key frame explicitly requested by the calling application
+    else if ( cpi->ThisIsKeyFrame )
+    {
+		cpi->pb.RefreshGoldenFrame = TRUE;						// KF is also GF update
+        CompressKeyFrame ( cpi );
+        cpi->ThisIsKeyFrame = FALSE;
+    }
+    else
+    {
+        /* Compress the frame. */
+        CompressFrame ( cpi, (unsigned int) cpi->CurrentFrame );
+    }
+
+
+	// Keep a record from which we can calculate the average Q excluding GF updates and key frames
+	if ( (cpi->pb.FrameType != BASE_FRAME) && !cpi->pb.RefreshGoldenFrame )
+	{
+		cpi->NiFrames++;
+
+		// Calculate the average Q for normal inter frames (not key or GFU frames)
+		// This is used as a basis for setting active worst quality.
+		if ( cpi->NiFrames > 150 )
+		{
+			cpi->NiTotQi += cpi->pb.quantizer->FrameQIndex;
+			cpi->NiAvQi = (cpi->NiTotQi/cpi->NiFrames);
+		}
+		// Early in the clip ... average the current frame Q value with the default
+		// entered by the user as a dampening measure (often there are very easy intro credits).
+		else
+		{
+			cpi->NiTotQi += ((cpi->Configuration.WorstQuality + cpi->pb.quantizer->FrameQIndex + 1) / 2);
+			cpi->NiAvQi = (cpi->NiTotQi/cpi->NiFrames);
+		}
+
+      // If the average is higher than what was used in the last frame 
+      // (after going through the recode loop to keep the frame size within range)
+      // then use the last frame value + 1.
+      // The +1 is designed to stop Q and hence the data rate, from progressively 
+      // falling away during difficult sections.
+      if ( cpi->pb.quantizer->FrameQIndex < cpi->NiAvQi )
+       cpi->NiAvQi = cpi->pb.quantizer->FrameQIndex + 1;
+    }
+
+    // Clip size stats
+    ClipBytes += (cpi->ThisFrameSize >> 3);
+  
+    // Update stats variables. 
+    cpi->LastFrameSize = (UINT32)cpi->ThisFrameSize;
+    cpi->CurrentFrame++;
+
+	// If we have had a GF update then reset the counter till next one due.
+	if ( cpi->pb.RefreshGoldenFrame )
+	{
+		cpi->FramesTillGfUpdateDue = cpi->GfUpdateInterval;
+		cpi->LastGfOrKFrameQ = cpi->pb.quantizer->FrameQIndex;
+		cpi->pb.RefreshGoldenFrame = FALSE;
+	}
+
+	// Decrement count till next GF update due
+	if ( cpi->FramesTillGfUpdateDue > 0 )
+		cpi->FramesTillGfUpdateDue--;
+
+    // return whether or not we are a key frame 
+    iskey = VP6_GetFrameType ( &cpi->pb );
+    if ( iskey == 0 )
+        *is_key = 1;
+    else
+        *is_key = 0;
+
+#if defined(_MSC_VER)
+	ClearSysState();
+#endif
+    if(cpi->pass==1)
+    {
+        Pass1Output(cpi);
+    }
+#if defined(_MSC_VER)
+    if ( cpi->pb.testMode )
+        vp6_appendframe ( &cpi->pb );
+#endif
+    cpi->GfRecoveryFrame = FALSE;
+    cpi->TotalBitsLeftInClip -= cpi->ThisFrameSize ;
+    // Set the output bytes buffered count and reset the  buffer input pointer. 
+    cpi->pb.DataOutputInPtr = cpi->DataOutputBuffer;
+    ret_val = (cpi->ThisFrameSize >> 3);		
+
+    cpi->LastInterError = cpi->InterError;
+    cpi->LastIntraError = cpi->IntraError;
+
+//TEMP STATS
+// DEBUG Code
+if ( FALSE )
+{
+    FILE  *StatsFilePtr;
+
+    // Open stats file and write out data
+    StatsFilePtr = fopen( "buffers.stt", "a" );
+    if ( StatsFilePtr )
+    {
+		fprintf( StatsFilePtr, "%12ld ", (UINT32)cpi->CurrentFrame );
+		fprintf( StatsFilePtr, "%12ld ", (cpi->BufferLevel * 100)/cpi->OptimalBufferLevel );
+		fprintf( StatsFilePtr, "%12ld ", (100 * cpi->BytesOffTarget / (cpi->TotalByteCount * 8)));
+		fprintf( StatsFilePtr, "%12ld ", cpi->NiAvQi );
+		fprintf( StatsFilePtr, "%12ld ", cpi->Configuration.ActiveWorstQuality );
+		fprintf( StatsFilePtr, "%12ld\n", ((cpi->ThisFrameSize * 100)/cpi->ThisFrameTarget) );
+        fclose ( StatsFilePtr );
+	}
+}
+
+#if defined MEASURE_SECTION_COSTS
+	{
+		UINT32 i;
+
+		// Temps Stats for section data rate analysis
+		for ( i = 0; i < 10; i++ )
+		{
+			ClipSectionBits[i] += (Sectionbits[i] / 256);
+			Sectionbits[i] = 0;
+		}
+	}
+#endif
+
+    return ret_val;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : StopEncoder
+ *
+ *  INPUTS        : None.
+ *
+ *  OUTPUTS       : CP_INSTANCE **cpi : Pointer to pointer to encoder instance.
+ *
+ *  RETURNS       : BOOL: Always TRUE.
+ *
+ *  FUNCTION      : Stops the encoder and de-allocates memory used for
+ *                  encoder data structures.
+ *
+ *  SPECIAL NOTES : Also include lots of debug/test code for outputting
+ *                  timing and run statistics to file.
+ *
+ ****************************************************************************/
+BOOL CCONV StopEncoder ( CP_INSTANCE **cpi )
+{
+#ifdef TIMING
+    ITime2 = timeGetTime();
+    ITotalTime = ITime2-ITime1;
+    {
+        FILE *fp = fopen( "d:\\Times.txt", "at" );
+        fprintf ( fp, "StopEncoder: %d\n", ITime2 );
+        fprintf ( fp, "The total time spent is %d\n", ITotalTime );
+        fprintf ( fp, "------------------------------------\n" );
+        fclose ( fp );
+    }
+#endif
+
+#if defined MEASURE_SECTION_COSTS
+    // DEBUG Code
+    if ( TRUE && *cpi )
+    {
+		UINT32 i;
+		UINT32 Sum = 0;
+        FILE  *StatsFilePtr;
+		
+		for ( i = 0; i < 6; i++ )
+		{
+			Sum += ClipSectionBits[i];
+		}
+
+		if ( Sum )
+		{
+			// Open stats file and write out data
+			StatsFilePtr = fopen( "Section_bits.stt", "a" );
+			if ( StatsFilePtr )
+			{
+				fprintf( StatsFilePtr, "Header %4ld  ", ((ClipSectionBits[0]+(Sum/200)) * 100)/Sum );
+				fprintf( StatsFilePtr, "Mode %4ld  ", ((ClipSectionBits[1]+(Sum/200)) * 100)/Sum );
+				fprintf( StatsFilePtr, "Mv %4ld  ", ((ClipSectionBits[2]+(Sum/200)) * 100)/Sum );
+				fprintf( StatsFilePtr, "Context %4ld  ", ((ClipSectionBits[3]+(Sum/200)) * 100)/Sum );
+				fprintf( StatsFilePtr, "DC %4ld  ", ((ClipSectionBits[4]+(Sum/200)) * 100)/Sum );
+				fprintf( StatsFilePtr, "AC %4ld  ", ((ClipSectionBits[5]+(Sum/200)) * 100)/Sum );
+				fprintf( StatsFilePtr, "\n" );
+				fclose ( StatsFilePtr );
+			}
+		}
+	}
+#endif
+
+#if defined PSNR_ON
+	if ( *cpi )
+    {
+        // TEST Code
+        if ( (*cpi)->CurrentFrame && !(*cpi)->AllowSpatialResampling )
+        {
+            FILE *StatsFilePtr;
+            UINT32 FrameCount = ((UINT32)(*cpi)->CurrentFrame) -1;
+            double FrameSize = 1.5 * (*cpi)->pb.YPlaneSize;
+            double OverallPSNR = 10.0 * log10((255.0 * 255.0 * FrameSize * (*cpi)->CurrentFrame) / (*cpi)->TotalSqError);
+
+            // Open stats file and write out data
+            StatsFilePtr = fopen( "psnr.stt", "a" );
+            if ( StatsFilePtr )
+            {
+				// Fudge to deal with 29.97 fps material
+				if ( (*cpi)->Configuration.OutputFrameRate == 30 )
+				{
+					fprintf( StatsFilePtr, "%6.3f %10.2f %6.3f\n",
+							 (*cpi)->TotPsnr / (double)(FrameCount),
+							 (((double)ClipBytes/1024) * 8 * 29.97) / ((UINT32)(*cpi)->CurrentFrame - 1) ,
+                             OverallPSNR);
+				}
+				else
+				{
+					fprintf( StatsFilePtr, "%6.3f %10.2f %6.3f\n",
+							 (*cpi)->TotPsnr / (double)(FrameCount),
+							 (((double)ClipBytes/1024) * 8 * (*cpi)->Configuration.OutputFrameRate) / ((UINT32)(*cpi)->CurrentFrame - 1),
+                             OverallPSNR);
+				}
+
+				fclose( StatsFilePtr );
+            }
+
+        }
+	}
+#endif
+
+
+#if 0
+    // DEBUG Code
+    if ( FALSE )
+    {
+		UINT32 i;
+        FILE  *StatsFilePtr;
+	
+        // Open stats file and write out data
+        StatsFilePtr = fopen( "tmp.stt", "a" );
+        if ( StatsFilePtr )
+        {
+			fprintf( StatsFilePtr, "%12ld %12ld\n", BcCount, TotTokens );
+            fclose ( StatsFilePtr );
+		}
+
+        StatsFilePtr = fopen( "tmp2.stt", "a" );
+        if ( StatsFilePtr  && NzCount[1][0] )
+        {
+			memcpy ( (*cpi)->FrameNzCount, NzCount, sizeof((*cpi)->FrameNzCount) );
+			PredictScanOrder( (*cpi) );
+
+			for ( i=0; i<64; i++ )
+			{
+				fprintf ( StatsFilePtr, "%2ld,", (*cpi)->NewScanOrderBands[i] );
+				if ( (i%8) == 7 )
+					fprintf ( StatsFilePtr, "\n" );
+			}
+			fprintf ( StatsFilePtr, "\n" );
+            fclose ( StatsFilePtr );
+		}
+
+		if ( scanupdates[1][0] > 0 )
+		{
+			FILE *StatsFilePtr;
+			UINT32 i, Sum, Sum2, Prob;
+
+			StatsFilePtr = fopen( "scanupdates.stt", "a" );
+			if ( StatsFilePtr  )
+			{
+				for ( i=0; i<64; i++ )
+				{
+					Sum = scanupdates[i][0] + scanupdates[i][1];
+					Sum2 = scanupdates[i][0];
+
+					if ( Sum > 0 )
+					{
+						Prob = (Sum2 * 255)/Sum;
+						if ( Prob == 0 )
+							Prob = 1;
+						fprintf( StatsFilePtr, "%3ld, ", Prob );
+					}
+					else
+						fprintf( StatsFilePtr, "%3ld, ", 255 );
+
+					if ( (i % 8) == 7 )
+						fprintf( StatsFilePtr, "\n");
+				}
+				fprintf ( StatsFilePtr, "\n" );
+	            fclose ( StatsFilePtr );
+			}
+		}
+	}
+#endif
+
+    if ( *cpi )
+    {
+#if defined FILE_PSNR 
+        // TEST Code
+        if ( (*cpi)->CurrentFrame && !(*cpi)->AllowSpatialResampling )
+        {
+            FILE *StatsFilePtr;
+            UINT32 FrameCount = ((UINT32)(*cpi)->CurrentFrame) -1;
+            double PSNR = (*cpi)->TotPsnr / (double)(FrameCount);
+            double KBS = ((double)ClipBytes * 8 * (*cpi)->Configuration.OutputFrameRate ) / ((double) FrameCount);
+            double LGKBS = log10(KBS);
+            double FrameSize = 1.5 * cpi->pb.YPlaneSize;
+            double OverallPSNR = 10.0 * log10((255.0 * 255.0 * FrameSize * cpi->CurrentFrame) / (double)Total);
+
+            // Open stats file and write out data
+            StatsFilePtr = fopen( "psnr.stt", "a" );
+            if ( StatsFilePtr )
+            {
+
+				// Fudge to deal with 29.97 fps material
+				if ( (*cpi)->Configuration.OutputFrameRate == 30 )
+				{
+					fprintf( StatsFilePtr, "%6.3f %10.2f %10.6f\n",
+							 PSNR,
+							 (((double)ClipBytes/1024) * 8 * 29.97) / (FrameCount),
+
+							 PSNR/
+							 log10((((double)ClipBytes/1024) * 8 * 29.97) / (FrameCount))
+                             );
+				}
+				else
+				{
+					fprintf( StatsFilePtr, "%6.3f %10.2f %10.6f\n",
+							 PSNR,
+							 KBS/1024,
+                             PSNR / LGKBS );
+				}
+
+				fclose( StatsFilePtr );
+            }
+
+        }
+#endif
+
+
+        AvgStats ( &(*cpi)->fpmss);
+        if((*cpi)->fpmss.count)
+            OutputStats((*cpi)->ss,&(*cpi)->fpmss);
+
+        if((*cpi)->fs)
+            fclose((*cpi)->fs);
+
+        if((*cpi)->ss)
+            fclose((*cpi)->ss);
+
+
+        VP6_DeleteFragmentInfo ( &(*cpi)->pb );
+        VP6_DeleteFrameInfo ( &(*cpi)->pb );
+        EDeleteFragmentInfo ( (*cpi) );
+        EDeleteFrameInfo ( (*cpi) );
+        VP6_DeleteQuantizer ( &(*cpi)->pb.quantizer );
+        DeletePostProcInstance ( &(*cpi)->pb.postproc );
+        DeleteCPInstance ( cpi );
+    }
+
+ // test output code for filter taps
+	if(0)
+	{
+		UINT32 i,j,k;
+        FILE  *StatsFilePtr;
+		double dval;
+		double aval = -0.05;
+		int y1,y2,y3,y4;
+		double d2, d3;
+		int sum;
+
+        // Open stats file and write out data
+        StatsFilePtr = fopen( "filters.stt", "a" );
+        if ( StatsFilePtr )
+        {
+			fprintf( StatsFilePtr, " **** \n" );
+			for ( i = 0; i < 32; i++ )
+			{
+
+				fprintf( StatsFilePtr, "    {\n" );
+				dval = 0.0;
+				for ( j = 0; j < 8; j++ )
+				{
+					d2 = dval * dval;
+					d3 = dval * dval * dval;
+
+					y1 = (int)floor(0.5 + (		((aval*dval)  -		(2.0*aval*d2)       +	(aval*d3)) * 128));
+					y2 = (int)floor(0.5 + (		(1.0		  -		((aval+3.0)*d2)     +	((aval+2.0)*d3)) * 128));
+					y3 = (int)floor(0.5 + (		(-(aval*dval) +		((2.0*aval+3.0)*d2) -	((aval+2.0)*d3)) * 128));
+					y4 = (int)floor(0.5 + (		(					(aval*d2)          -	(aval*d3)) * 128));
+
+					sum = y1 + y2 + y3 + y4;
+					if ( sum < 128 )
+					{
+						if ( sum < 127 )
+						{
+							y2++;
+							y3++;
+						}
+						else
+						{
+							if ( y2 >= y3 )
+								y2++;
+							else
+								y3++;
+						}
+					}
+					else if ( sum > 128 )
+					{
+						if ( sum > 129 )
+						{
+							y2--;
+							y3--;
+						}
+						else
+						{
+							if ( y2 >= y3 )
+								y2--;
+							else
+								y3--;
+						}
+					}
+					fprintf( StatsFilePtr, "        { ");
+					for(k=0;k<8;k++)
+						fprintf(StatsFilePtr,"%3ld,",y1);
+					fprintf( StatsFilePtr, "  ");
+					for(k=0;k<8;k++)
+						fprintf(StatsFilePtr,"%3ld,",y2);
+					fprintf( StatsFilePtr, "  ");
+					for(k=0;k<8;k++)
+						fprintf(StatsFilePtr,"%3ld,",y3);
+					fprintf( StatsFilePtr, "  ");
+					for(k=0;k<8;k++)
+						fprintf(StatsFilePtr,"%3ld,",y4);
+					fprintf( StatsFilePtr, " }");
+
+
+					if (y1 + y2 + y3 + y4 != 128)
+					{
+						fprintf( StatsFilePtr, " **** %ld %ld", (y1 + y2 + y3 + y4), sum );
+					}
+
+					fprintf( StatsFilePtr, "\n" );
+
+					dval += 0.125;
+				}
+				aval -= 0.05;
+				fprintf( StatsFilePtr, "    },\n" );
+				fprintf( StatsFilePtr, "\n" );
+			}
+
+
+			fprintf( StatsFilePtr, "%ld\n", i );
+			fclose( StatsFilePtr );
+        }
+	}
+
+    return TRUE;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : VPGetState
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                  void *ret        : Pointer to COMPRESSOR_STATE object
+ *                                     representing encoder state.
+ *  OUTPUTS       : None.    
+ *                      
+ *  RETURNS       : UINT32: Size of the returned COMPRESSOR_STATE object.
+ *
+ *  FUNCTION      : Fills in the supplied COMPRESSOR_STATE object with
+ *                  details of the compressor state.
+ *
+ *  SPECIAL NOTES : The buffer supplied by the caller (ret) should
+ *                  be large enough to hold a COMPRESSOR_STATE object.
+ *
+ ****************************************************************************/
+UINT32 CCONV VPGetState ( CP_INSTANCE *cpi, void *ret )
+{
+    INT32 i;
+    COMPRESSOR_STATE *cs = (COMPRESSOR_STATE *) ret;
+
+    if ( !ret )
+        return sizeof ( COMPRESSOR_STATE );
+
+    for ( i=0; i<KEY_FRAME_CONTEXT; i++ )
+    {
+        cs->PriorKeyFrameSize[i]     = cpi->PriorKeyFrameSize[i];
+        cs->PriorKeyFrameDistance[i] = cpi->PriorKeyFrameDistance[i];
+    }
+
+    cs->CurrentFrame        = cpi->CurrentFrame;
+    cs->LastFrameSize       = cpi->LastFrameSize;
+    cs->DropCount           = cpi->DropCount;
+    cs->KeyFrameCount       = cpi->KeyFrameCount;
+    cs->TotKeyFrameBytes    = cpi->TotKeyFrameBytes;
+    cs->LastKeyFrameSize    = cpi->LastKeyFrameSize;
+    cs->LastKeyFrame        = cpi->LastKeyFrame;
+    cs->TotalByteCount      = cpi->TotalByteCount;
+    cs->ActiveMaxQ          = cpi->Configuration.ActiveWorstQuality;
+    cs->BpbCorrectionFactor = cpi->BpbCorrectionFactor;
+
+    return sizeof ( COMPRESSOR_STATE );
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : VPSetState
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *                  void *arg        : Pointer to COMPRESSOR_STATE object
+ *                                     representing encoder state.
+ *
+ *  OUTPUTS       : None.    
+ *                      
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Sets the compressor state to that specified by the
+ *                  supplied COMPRESSOR_STATE object.
+ *
+ *  SPECIAL NOTES : arg should point to the COMPRESSOR_STATE object that
+ *                  contains the required state of the compressor.
+ *
+ ****************************************************************************/
+void CCONV VPSetState ( CP_INSTANCE *cpi, void *arg )
+{
+    INT32 i;
+    COMPRESSOR_STATE *cs = (COMPRESSOR_STATE *) arg;
+
+    for ( i=0; i<KEY_FRAME_CONTEXT; i++ )
+    {
+        cpi->PriorKeyFrameSize[i]     = cs->PriorKeyFrameSize[i];
+        cpi->PriorKeyFrameDistance[i] = cs->PriorKeyFrameDistance[i];
+    }
+
+    cpi->CurrentFrame        = cs->CurrentFrame;
+    cpi->LastFrameSize       = cs->LastFrameSize;
+
+    cpi->DropCount           = cs->DropCount;
+    cpi->KeyFrameCount       = cs->KeyFrameCount;
+    cpi->TotKeyFrameBytes    = cs->TotKeyFrameBytes;
+    cpi->LastKeyFrameSize    = cs->LastKeyFrameSize;
+    cpi->LastKeyFrame        = cs->LastKeyFrame;
+    cpi->TotalByteCount      = cs->TotalByteCount;
+    cpi->BpbCorrectionFactor = cs->BpbCorrectionFactor;
+    cpi->Configuration.ActiveWorstQuality = cs->ActiveMaxQ;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : VPGetPB
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : int: Pointer to the compressor's decoder object (cast to int)
+ *
+ *  FUNCTION      : Returns pointer to the compressor's decoder object as
+ *                  an int.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int CCONV VPGetPB ( CP_INSTANCE *cpi )
+{
+    return (int) &cpi->pb;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcompdll.def b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcompdll.def
new file mode 100644
index 00000000..dc006b38
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Generic/vfwcompdll.def
@@ -0,0 +1,20 @@
+LIBRARY   vp31e
+
+EXPORTS
+	StartEncoder
+	ChangeCompressorSetting
+	ChangeEncoderConfig
+	EncodeFrame
+	EncodeFrameYuv
+	StopEncoder
+	StartDecoder
+	SetPbParam
+	GetYUVConfig
+	DecodeFrame
+	DecodeFrameToYUV
+	DrawFrame
+	StopDecoder
+	wilkDXrefCreate
+	wilkDXrefDestroy
+	wilkDXrefKeyFrame
+	wilkDXrefInterFrame
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/COptFunctions.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/COptFunctions.c
new file mode 100644
index 00000000..ec98776d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/COptFunctions.c
@@ -0,0 +1,1967 @@
+/****************************************************************************
+*
+*   Module Title :     OptFunctions.c
+*
+*   Description  :     Encoder system dependant functions.
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "compdll.h"
+#include "math.h"
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#pragma warning(disable:4799)
+
+#define FILTER_WEIGHT 128
+#define FILTER_SHIFT  7
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/
+static __declspec(align(16)) short rd[] = { 64, 64, 64, 64, 64, 64, 64, 64 };
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern INT16  BilinearFilters_mmx[8][16];
+
+/****************************************************************************
+ *
+ *  ROUTINE       : MmxGetSAD
+ *
+ *  INPUTS        : UINT8 *NewDataPtr       : Pointer to first input data array.
+ *                  INT32  PixelsPerLine    : Length of line for NewDataPtr.
+ *                  UINT8 *RefDataPtr       : Pointer to second input data array.
+ *                  INT32  RefPixelsPerLine : Length of line for RefDataPtr.
+ *                  INT32  ErrorSoFar       : Error accumulated before this call.
+ *                  INT32  BestSoFar        : (NOT USED).
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : INT32: SAD for the two blocks.
+ *
+ *  FUNCTION      : Calculates the sum of the absolute differences for 
+ *                  the two blocks.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+INT32 MmxGetSAD
+( 
+    UINT8 *NewDataPtr, 
+    INT32 PixelsPerLine,
+    UINT8 *RefDataPtr,
+    INT32 RefPixelsPerLine,
+    INT32 ErrorSoFar, 
+    INT32 BestSoFar 
+)
+{
+    INT32   DiffVal = ErrorSoFar;
+    INT16   DiffAcc[4] = { 0, 0, 0, 0};     // MMX accumulator.
+
+    // MMX code for SAD.
+__asm
+    {
+        pxor        mm6, mm6                    ; Blank mmx6
+        pxor        mm7, mm7                    ; Blank mmx7
+
+        mov         eax,dword ptr [NewDataPtr]  ; Load base addresses
+        mov         ebx,dword ptr [RefDataPtr]
+        mov         ecx,dword ptr [PixelsPerLine]
+        mov         edx,dword ptr [RefPixelsPerLine]
+
+        // Row 1
+        movq        mm0, [eax]                  ; Copy eight bytes to mm0
+        movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+        movq        mm2, mm0                    ; Take copy of MM0
+
+        psubusb     mm0, mm1                    ; A-B to MM0
+        psubusb     mm1, mm2                    ; B-A to MM1
+        por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+        movq        mm1, mm0                    ; keep a copy
+        punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+        paddw       mm7, mm0                    ; accumulate difference...
+        punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+        add         eax,ecx                     ; Inc pointer into the new data
+        paddw       mm7, mm1                    ; accumulate difference...
+        add         ebx,edx                     ; Inc pointer into ref data
+
+        // Row 2
+        movq        mm0, [eax]                  ; Copy eight bytes to mm0
+        movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+        movq        mm2, mm0                    ; Take copy of MM0
+
+        psubusb     mm0, mm1                    ; A-B to MM0
+        psubusb     mm1, mm2                    ; B-A to MM1
+        por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+        movq        mm1, mm0                    ; keep a copy
+        punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+        paddw       mm7, mm0                    ; accumulate difference...
+        punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+        add         eax,ecx                     ; Inc pointer into the new data
+        paddw       mm7, mm1                    ; accumulate difference...
+        add         ebx,edx                     ; Inc pointer into ref data
+
+        // Row 3
+        movq        mm0, [eax]                  ; Copy eight bytes to mm0
+        movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+        movq        mm2, mm0                    ; Take copy of MM0
+
+        psubusb     mm0, mm1                    ; A-B to MM0
+        psubusb     mm1, mm2                    ; B-A to MM1
+        por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+        movq        mm1, mm0                    ; keep a copy
+        punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+        paddw       mm7, mm0                    ; accumulate difference...
+        punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+        add         eax,ecx                     ; Inc pointer into the new data
+        paddw       mm7, mm1                    ; accumulate difference...
+        add         ebx,edx                     ; Inc pointer into ref data
+
+        // Row 4
+        movq        mm0, [eax]                  ; Copy eight bytes to mm0
+        movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+        movq        mm2, mm0                    ; Take copy of MM0
+
+        psubusb     mm0, mm1                    ; A-B to MM0
+        psubusb     mm1, mm2                    ; B-A to MM1
+        por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+        movq        mm1, mm0                    ; keep a copy
+        punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+        paddw       mm7, mm0                    ; accumulate difference...
+        punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+        add         eax,ecx                     ; Inc pointer into the new data
+        paddw       mm7, mm1                    ; accumulate difference...
+        add         ebx,edx                     ; Inc pointer into ref data
+
+        // Row 5
+        movq        mm0, [eax]                  ; Copy eight bytes to mm0
+        movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+        movq        mm2, mm0                    ; Take copy of MM0
+
+        psubusb     mm0, mm1                    ; A-B to MM0
+        psubusb     mm1, mm2                    ; B-A to MM1
+        por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+        movq        mm1, mm0                    ; keep a copy
+        punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+        paddw       mm7, mm0                    ; accumulate difference...
+        punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+        add         eax,ecx                     ; Inc pointer into the new data
+        paddw       mm7, mm1                    ; accumulate difference...
+        add         ebx,edx                     ; Inc pointer into ref data
+
+        // Row 6
+        movq        mm0, [eax]                  ; Copy eight bytes to mm0
+        movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+        movq        mm2, mm0                    ; Take copy of MM0
+
+        psubusb     mm0, mm1                    ; A-B to MM0
+        psubusb     mm1, mm2                    ; B-A to MM1
+        por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+        movq        mm1, mm0                    ; keep a copy
+        punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+        paddw       mm7, mm0                    ; accumulate difference...
+        punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+        add         eax,ecx                     ; Inc pointer into the new data
+        paddw       mm7, mm1                    ; accumulate difference...
+        add         ebx,edx                     ; Inc pointer into ref data
+
+        // Row 7
+        movq        mm0, [eax]                  ; Copy eight bytes to mm0
+        movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+        movq        mm2, mm0                    ; Take copy of MM0
+
+        psubusb     mm0, mm1                    ; A-B to MM0
+        psubusb     mm1, mm2                    ; B-A to MM1
+        por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+        movq        mm1, mm0                    ; keep a copy
+        punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+        paddw       mm7, mm0                    ; accumulate difference...
+        punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+        add         eax,ecx                     ; Inc pointer into the new data
+        paddw       mm7, mm1                    ; accumulate difference...
+        add         ebx,edx                     ; Inc pointer into ref data
+
+        // Row 8
+        movq        mm0, [eax]                  ; Copy eight bytes to mm0
+        movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+        movq        mm2, mm0                    ; Take copy of MM0
+
+        psubusb     mm0, mm1                    ; A-B to MM0
+        psubusb     mm1, mm2                    ; B-A to MM1
+        por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+        movq        mm1, mm0                    ; keep a copy
+        punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+        paddw       mm7, mm0                    ; accumulate difference...
+        punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+        paddw       mm7, mm1                    ; accumulate difference...
+
+        movq        DWORD PTR [DiffAcc], mm7    ; copy back accumulated results into normal memory
+//      emms                                    ; Clear the MMX state.
+    }
+
+    //  Accumulate the 4 resulting word values.
+    DiffVal += DiffAcc[0] + DiffAcc[1] + DiffAcc[2] + DiffAcc[3];
+
+    return DiffVal;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : MmxGetHalfPixelSAD
+ *
+ *  INPUTS        : UINT8 *SrcData          : Pointer to first input data array.
+ *                  INT32  PixelsPerLine    : Length of line for NewDataPtr.
+ *                  UINT8 *RefDataPtr1      : Pointer to first reference data array.
+ *                  UINT8 *RefDataPtr2      : Pointer to second reference data array.
+ *                  INT32  RefPixelsPerLine : Length of line for RefDataPtr1/2.
+ *                  INT32  ErrorSoFar       : Error accumulated before this call.
+ *                  INT32  BestSoFar        : (NOT USED).
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : INT32: SAD at 1/2 pixel accuracy.
+ *
+ *  FUNCTION      : Calculates the sum of the absolute differences against
+ *                  half pixel interpolated references.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+INT32 MmxGetHalfPixelSAD
+( 
+    UINT8 *SrcData, 
+    INT32 PixelsPerLine,
+    UINT8 *RefDataPtr1, 
+    UINT8 *RefDataPtr2, 
+    INT32 RefPixelsPerLine,
+    INT32 ErrorSoFar, 
+    INT32 BestSoFar 
+)
+{
+    INT32   DiffVal = ErrorSoFar;
+    INT32   RefOffset = (int)(RefDataPtr1 - RefDataPtr2);
+    INT16   DiffAcc[4] = { 0, 0, 0, 0 };     // MMX accumulator.
+
+    if ( RefOffset == 0 )
+    {
+        // Simple case as for non 0.5 pixel
+        DiffVal += MmxGetSAD ( SrcData, PixelsPerLine, RefDataPtr1, RefPixelsPerLine, ErrorSoFar, BestSoFar );
+    }
+    else
+    {
+__asm
+        // MMX code for SAD.
+        {
+            pxor        mm6, mm6                    ; Blank mmx6
+            pxor        mm7, mm7                    ; Blank mmx7
+
+            mov         eax,dword ptr [SrcData]     ; Load base addresses and line increment
+            mov         ebx,dword ptr [RefDataPtr1]
+            mov         ecx,dword ptr [RefDataPtr2]
+            mov         edx,dword ptr [PixelsPerLine]
+            mov         esi,dword ptr [RefPixelsPerLine]
+
+            // Row 1
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            paddw       mm1, mm2                    ; Add word values together.
+            punpckhbw   mm4, mm6
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            paddw       mm3, mm4                    ; Add word values together.
+            movq        mm0, [eax]                  ; Copy eight of src data to mm0
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+            movq        mm2, mm0                    ; Take copy of MM0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data for SAD
+            psubusb     mm0, mm1                    ; A-B to MM0
+            psubusb     mm1, mm2                    ; B-A to MM1
+            por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+            movq        mm1, mm0                    ; keep a copy
+            punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+            paddw       mm7, mm0                    ; accumulate difference...
+            punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+            add         eax,edx                     ; Inc pointer into the src data
+            paddw       mm7, mm1                    ; accumulate difference...
+            add         ebx,esi                     ; Inc pointer into ref1
+            add         ecx,esi                     ; Inc pointer into ref2
+
+            // Row 2
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            paddw       mm1, mm2                    ; Add word values together.
+            punpckhbw   mm4, mm6
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            paddw       mm3, mm4                    ; Add word values together.
+            movq        mm0, [eax]                  ; Copy eight of src data to mm0
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+            movq        mm2, mm0                    ; Take copy of MM0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data for SAD
+            psubusb     mm0, mm1                    ; A-B to MM0
+            psubusb     mm1, mm2                    ; B-A to MM1
+            por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+            movq        mm1, mm0                    ; keep a copy
+            punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+            paddw       mm7, mm0                    ; accumulate difference...
+            punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+            add         eax,edx                     ; Inc pointer into the src data
+            paddw       mm7, mm1                    ; accumulate difference...
+            add         ebx,esi                     ; Inc pointer into ref1
+            add         ecx,esi                     ; Inc pointer into ref2
+
+            // Row 3
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            paddw       mm1, mm2                    ; Add word values together.
+            punpckhbw   mm4, mm6
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            paddw       mm3, mm4                    ; Add word values together.
+            movq        mm0, [eax]                  ; Copy eight of src data to mm0
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+            movq        mm2, mm0                    ; Take copy of MM0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data for SAD
+            psubusb     mm0, mm1                    ; A-B to MM0
+            psubusb     mm1, mm2                    ; B-A to MM1
+            por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+            movq        mm1, mm0                    ; keep a copy
+            punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+            paddw       mm7, mm0                    ; accumulate difference...
+            punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+            add         eax,edx                     ; Inc pointer into the src data
+            paddw       mm7, mm1                    ; accumulate difference...
+            add         ebx,esi                     ; Inc pointer into ref1
+            add         ecx,esi                     ; Inc pointer into ref2
+
+            // Row 4
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            paddw       mm1, mm2                    ; Add word values together.
+            punpckhbw   mm4, mm6
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            paddw       mm3, mm4                    ; Add word values together.
+            movq        mm0, [eax]                  ; Copy eight of src data to mm0
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+            movq        mm2, mm0                    ; Take copy of MM0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data for SAD
+            psubusb     mm0, mm1                    ; A-B to MM0
+            psubusb     mm1, mm2                    ; B-A to MM1
+            por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+            movq        mm1, mm0                    ; keep a copy
+            punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+            paddw       mm7, mm0                    ; accumulate difference...
+            punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+            add         eax,edx                     ; Inc pointer into the src data
+            paddw       mm7, mm1                    ; accumulate difference...
+            add         ebx,esi                     ; Inc pointer into ref1
+            add         ecx,esi                     ; Inc pointer into ref2
+
+            // Row 5
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            paddw       mm1, mm2                    ; Add word values together.
+            punpckhbw   mm4, mm6
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            paddw       mm3, mm4                    ; Add word values together.
+            movq        mm0, [eax]                  ; Copy eight of src data to mm0
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+            movq        mm2, mm0                    ; Take copy of MM0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data for SAD
+            psubusb     mm0, mm1                    ; A-B to MM0
+            psubusb     mm1, mm2                    ; B-A to MM1
+            por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+            movq        mm1, mm0                    ; keep a copy
+            punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+            paddw       mm7, mm0                    ; accumulate difference...
+            punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+            add         eax,edx                     ; Inc pointer into the src data
+            paddw       mm7, mm1                    ; accumulate difference...
+            add         ebx,esi                     ; Inc pointer into ref1
+            add         ecx,esi                     ; Inc pointer into ref2
+
+            // Row 6
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            paddw       mm1, mm2                    ; Add word values together.
+            punpckhbw   mm4, mm6
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            paddw       mm3, mm4                    ; Add word values together.
+            movq        mm0, [eax]                  ; Copy eight of src data to mm0
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+            movq        mm2, mm0                    ; Take copy of MM0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data for SAD
+            psubusb     mm0, mm1                    ; A-B to MM0
+            psubusb     mm1, mm2                    ; B-A to MM1
+            por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+            movq        mm1, mm0                    ; keep a copy
+            punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+            paddw       mm7, mm0                    ; accumulate difference...
+            punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+            add         eax,edx                     ; Inc pointer into the src data
+            paddw       mm7, mm1                    ; accumulate difference...
+            add         ebx,esi                     ; Inc pointer into ref1
+            add         ecx,esi                     ; Inc pointer into ref2
+
+            // Row 7
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            paddw       mm1, mm2                    ; Add word values together.
+            punpckhbw   mm4, mm6
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            paddw       mm3, mm4                    ; Add word values together.
+            movq        mm0, [eax]                  ; Copy eight of src data to mm0
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+            movq        mm2, mm0                    ; Take copy of MM0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data for SAD
+            psubusb     mm0, mm1                    ; A-B to MM0
+            psubusb     mm1, mm2                    ; B-A to MM1
+            por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+            movq        mm1, mm0                    ; keep a copy
+            punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+            paddw       mm7, mm0                    ; accumulate difference...
+            punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+            add         eax,edx                     ; Inc pointer into the src data
+            paddw       mm7, mm1                    ; accumulate difference...
+            add         ebx,esi                     ; Inc pointer into ref1
+            add         ecx,esi                     ; Inc pointer into ref2
+
+            // Row 8
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            paddw       mm1, mm2                    ; Add word values together.
+            punpckhbw   mm4, mm6
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            paddw       mm3, mm4                    ; Add word values together.
+            movq        mm0, [eax]                  ; Copy eight of src data to mm0
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+            movq        mm2, mm0                    ; Take copy of MM0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data for SAD
+            psubusb     mm0, mm1                    ; A-B to MM0
+            psubusb     mm1, mm2                    ; B-A to MM1
+            por         mm0, mm1                    ; OR MM0 and MM1 gives abs differences in MM0
+
+            movq        mm1, mm0                    ; keep a copy
+            punpcklbw   mm0, mm6                    ; unpack to higher precision for accumulation
+            paddw       mm7, mm0                    ; accumulate difference...
+            punpckhbw   mm1, mm6                    ; unpack high four bytes to higher precision
+            paddw       mm7, mm1                    ; accumulate difference...
+
+            movq        DWORD PTR [DiffAcc], mm7    ; copy back accumulated results into normal memory
+        }
+
+        //  Accumulate the 4 word values in DiffAcc
+        DiffVal += DiffAcc[0] + DiffAcc[1] + DiffAcc[2] + DiffAcc[3];
+    }
+    return DiffVal;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : MmxGetInterErr
+ *
+ *  INPUTS        : UINT8 *NewDataPtr       : Pointer to first input data array.
+ *                  INT32  PixelsPerLine    : Length of line for NewDataPtr.
+ *                  UINT8 *RefDataPtr1      : Pointer to first reference data array.
+ *                  UINT8 *RefDataPtr2      : Pointer to second reference data array.
+ *                  INT32  RefPixelsPerLine : Length of line for RefDataPtr1/2.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Error variance.
+ *
+ *  FUNCTION      : Calculates a difference error score for two blocks.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 MmxGetInterErr
+( 
+    UINT8 *NewDataPtr, 
+    INT32 PixelsPerLine, 
+    UINT8 *RefDataPtr1, 
+    UINT8 *RefDataPtr2, 
+    INT32 RefPixelsPerLine 
+)
+{
+    UINT32  XSum  = 0;
+    UINT32  XXSum = 0;
+    INT16   MmxXSum[4]  = { 0, 0, 0, 0 };      // XSum accumulators
+    INT32   MmxXXSum[2] = { 0, 0 };            // XXSum accumulators
+
+    INT32   AbsRefOffset = abs( (int)(RefDataPtr1 - RefDataPtr2) );
+
+    // Mode of interpolation chosen based upon on the offset of the second reference pointer
+    if ( AbsRefOffset == 0 )
+    {
+        __asm
+        {
+            pxor        mm5, mm5                    ; Blank mmx6
+            pxor        mm6, mm6                    ; Blank mmx7
+            pxor        mm7, mm7                    ; Blank mmx7
+
+            mov         eax,dword ptr [NewDataPtr]  ; Load base addresses
+            mov         ebx,dword ptr [RefDataPtr1]
+            mov         ecx,dword ptr [PixelsPerLine]
+            mov         edx,dword ptr [RefPixelsPerLine]
+
+            // Row 1
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpcklbw   mm1, mm6
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         ebx,edx                     ; Inc pointer into ref data
+            add         eax,ecx                     ; Inc pointer into the new data
+            movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 2
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            punpcklbw   mm1, mm6
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         ebx,edx                     ; Inc pointer into ref data
+            add         eax,ecx                     ; Inc pointer into the new data
+            movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 3
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            punpcklbw   mm1, mm6
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         ebx,edx                     ; Inc pointer into ref data
+            add         eax,ecx                     ; Inc pointer into the new data
+            movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 4
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            punpcklbw   mm1, mm6
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         ebx,edx                     ; Inc pointer into ref data
+            add         eax,ecx                     ; Inc pointer into the new data
+            movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 5
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            punpcklbw   mm1, mm6
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         ebx,edx                     ; Inc pointer into ref data
+            add         eax,ecx                     ; Inc pointer into the new data
+            movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 6
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            punpcklbw   mm1, mm6
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         ebx,edx                     ; Inc pointer into ref data
+            add         eax,ecx                     ; Inc pointer into the new data
+            movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 7
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            punpcklbw   mm1, mm6
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         ebx,edx                     ; Inc pointer into ref data
+            add         eax,ecx                     ; Inc pointer into the new data
+            movq        mm1, [ebx]                  ; Copy eight bytes to mm1
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 8
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            punpcklbw   mm1, mm6
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         ebx,edx                     ; Inc pointer into ref data
+            add         eax,ecx                     ; Inc pointer into the new data
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+        }
+
+        // Now accumulate the final results.
+        XSum  = MmxXSum[0] + MmxXSum[1] + MmxXSum[2] + MmxXSum[3];
+        XXSum = MmxXXSum[0] + MmxXXSum[1];
+    }
+    // Simple half pixel reference data
+    else
+    {
+__asm
+        {
+            pxor        mm5, mm5                    ; Blank mmx6
+            pxor        mm6, mm6                    ; Blank mmx7
+            pxor        mm7, mm7                    ; Blank mmx7
+
+            mov         eax,dword ptr [NewDataPtr]  ; Load base addresses
+            mov         ebx,dword ptr [RefDataPtr1]
+            mov         ecx,dword ptr [RefDataPtr2]
+            mov         edx,dword ptr [PixelsPerLine]
+            mov         esi,dword ptr [RefPixelsPerLine]
+
+            // Row 1
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            paddw       mm1, mm2                    ; Add word values together.
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            punpckhbw   mm4, mm6
+            paddw       mm3, mm4                    ; Add word values together.
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpcklbw   mm1, mm6
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            add         eax,edx                     ; Inc pointer into the new data
+            add         ebx,esi                     ; Inc pointer into ref data
+            add         ecx,esi                     ; Inc pointer into ref2 data
+
+            // Row 2
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            paddw       mm1, mm2                    ; Add word values together.
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            punpckhbw   mm4, mm6
+            paddw       mm3, mm4                    ; Add word values together.
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpcklbw   mm1, mm6
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            add         eax,edx                     ; Inc pointer into the new data
+            add         ebx,esi                     ; Inc pointer into ref data
+            add         ecx,esi                     ; Inc pointer into ref2 data
+
+            // Row 3
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            paddw       mm1, mm2                    ; Add word values together.
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            punpckhbw   mm4, mm6
+            paddw       mm3, mm4                    ; Add word values together.
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpcklbw   mm1, mm6
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            add         eax,edx                     ; Inc pointer into the new data
+            add         ebx,esi                     ; Inc pointer into ref data
+            add         ecx,esi                     ; Inc pointer into ref2 data
+
+            // Row 4
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            paddw       mm1, mm2                    ; Add word values together.
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            punpckhbw   mm4, mm6
+            paddw       mm3, mm4                    ; Add word values together.
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpcklbw   mm1, mm6
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            add         eax,edx                     ; Inc pointer into the new data
+            add         ebx,esi                     ; Inc pointer into ref data
+            add         ecx,esi                     ; Inc pointer into ref2 data
+
+            // Row 5
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            paddw       mm1, mm2                    ; Add word values together.
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            punpckhbw   mm4, mm6
+            paddw       mm3, mm4                    ; Add word values together.
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpcklbw   mm1, mm6
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            add         eax,edx                     ; Inc pointer into the new data
+            add         ebx,esi                     ; Inc pointer into ref data
+            add         ecx,esi                     ; Inc pointer into ref2 data
+
+            // Row 6
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            paddw       mm1, mm2                    ; Add word values together.
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            punpckhbw   mm4, mm6
+            paddw       mm3, mm4                    ; Add word values together.
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpcklbw   mm1, mm6
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+            add         eax,edx                     ; Inc pointer into the new data
+            add         ebx,esi                     ; Inc pointer into ref data
+            add         ecx,esi                     ; Inc pointer into ref2 data
+
+            // Row 7
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            paddw       mm1, mm2                    ; Add word values together.
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            punpckhbw   mm4, mm6
+            paddw       mm3, mm4                    ; Add word values together.
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpcklbw   mm1, mm6
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+
+            add         eax,edx                     ; Inc pointer into the new data
+            add         ebx,esi                     ; Inc pointer into ref data
+            add         ecx,esi                     ; Inc pointer into ref2 data
+
+            // Row 8
+            movq        mm1, [ebx]                  ; Copy eight bytes from each of ref 1 and ref 2.
+            movq        mm2, [ecx]
+            movq        mm3, mm1                    ; Take copies.
+            movq        mm4, mm2
+
+            punpcklbw   mm1, mm6                    ; unpack low four bytes to higher precision
+            punpcklbw   mm2, mm6
+            paddw       mm1, mm2                    ; Add word values together.
+            psrlw       mm1, 1                      ; Devide by two (shift right 1)
+            punpckhbw   mm3, mm6                    ; unpack high four bytes to higher precision
+            punpckhbw   mm4, mm6
+            paddw       mm3, mm4                    ; Add word values together.
+            psrlw       mm3, 1                      ; Devide by two (shift right 1)
+
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            packuswb    mm1, mm3                    ; Repack to give 1/2 pixel averaged reference data
+            movq        mm2, mm0                    ; Take copies
+            movq        mm3, mm1                    ; Take copies
+
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpcklbw   mm1, mm6
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            punpckhbw   mm3, mm6
+            psubsw      mm0, mm1                    ; A-B (low order) to MM0
+            psubsw      mm2, mm3                    ; A-B (high order) to MM2
+
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            movq        DWORD PTR [MmxXSum], mm5    ; copy back accumulated results into normal memory
+            movq        DWORD PTR [MmxXXSum], mm7   ; copy back accumulated results into normal memory
+        }
+
+        // Now accumulate the final results.
+        XSum  = MmxXSum[0] + MmxXSum[1] + MmxXSum[2] + MmxXSum[3];
+        XXSum = MmxXXSum[0] + MmxXXSum[1];
+    }
+
+    // Compute and return population variance as mis-match metric.
+    return ( ((XXSum << 6) - XSum*XSum ) );
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : MmxGetIntraError
+ *
+ *  INPUTS        : UINT8 *DataPtr       : Pointer to input block.
+ *                  INT32  PixelsPerLine : Length of line for input block.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Block variance.
+ *
+ *  FUNCTION      : Calculates a variance score for the block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 MmxGetIntraError ( UINT8 *DataPtr, INT32 PixelsPerLine )
+{
+    UINT8   *DiffPtr;
+    UINT32  XSum  = 0;
+    UINT32  XXSum = 0;
+
+    // Loop expanded out for speed.
+    DiffPtr = DataPtr;
+
+    __asm
+    {
+            pxor        mm5, mm5                    ; Blank mmx6
+            pxor        mm6, mm6                    ; Blank mmx7
+            pxor        mm7, mm7                    ; Blank mmx7
+
+            mov         eax,dword ptr [DiffPtr] ; Load base addresses
+            mov         ecx,dword ptr [PixelsPerLine]
+
+            // Row 1
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         eax,ecx                     ; Inc pointer into the new data
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+
+            // Row 2
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         eax,ecx                     ; Inc pointer into ref data
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 3
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         eax,ecx                     ; Inc pointer into ref data
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+
+            // Row 4
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         eax,ecx                     ; Inc pointer into ref data
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 5
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         eax,ecx                     ; Inc pointer into ref data
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 6
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         eax,ecx                     ; Inc pointer into ref data
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 7
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         eax,ecx                     ; Inc pointer into ref data
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            // Row 8
+            movq        mm0, [eax]                  ; Copy eight bytes to mm0
+            movq        mm2, mm0                    ; Take copies
+            punpcklbw   mm0, mm6                    ; unpack to higher precision
+            punpckhbw   mm2, mm6                    ; unpack to higher precision
+            paddw       mm5, mm0                    ; accumulate differences in mm5
+            paddw       mm5, mm2                    ; accumulate differences in mm5
+            pmaddwd     mm0, mm0                    ; square and accumulate
+            pmaddwd     mm2, mm2                    ; square and accumulate
+            add         eax,ecx                     ; Inc pointer into ref data
+            paddd       mm7, mm0                    ; accumulate in mm7
+            paddd       mm7, mm2                    ; accumulate in mm7
+
+            movq        mm4, mm5                    ;
+            punpcklwd   mm5, mm6
+            punpckhwd   mm4, mm6
+            movq        mm0, mm7
+            paddw       mm5, mm4
+
+            punpckhdq   mm0, mm6
+            punpckldq   mm7, mm6
+            movq        mm4, mm5
+            paddd       mm0, mm7
+            punpckhdq   mm4, mm6
+            punpckldq   mm5, mm6
+            movd        DWORD PTR [XXSum], mm0
+            paddw       mm4, mm5
+            movd        DWORD ptr [XSum], mm4
+    }
+
+    // Compute population variance as mis-match metric.
+    return ( (XXSum<<6) - XSum*XSum );
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : MmxGetMBFrameVertVar
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Vertical variance for frame.
+ *
+ *  FUNCTION      : Computes the vertical variance for a macroblock based
+ *                  upon the sum of the local 2 pixel variances within
+ *                  the entire frame.
+ *
+ *  SPECIAL NOTES : The difference between the last two rows in a MB
+ *                  are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 MmxGetMBFrameVertVar ( CP_INSTANCE *cpi )
+{
+    UINT32 FrameError;
+    INT32 Stride  = cpi->pb.Configuration.VideoFrameWidth;
+//    UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.Source];
+//sjlhack
+    UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[0].Source];
+
+    __asm
+    {
+        mov         ecx,    [Stride]
+        mov         eax,    DWORD PTR [SrcPtr]
+        
+        pxor        mm7,    mm7                     ; clear mm7
+        pxor        mm6,    mm6                     ; clear mm6
+        
+        mov         edx,    7                       ; 
+
+MmxGetMBFrameVertVarLoop:
+
+        movq        mm1,    [eax]                   ; 00 01 02 03 04 05 06 07
+        movq        mm0,    [eax+ecx]               ; 10 11 12 13 14 15 16 17
+        
+        movq        mm3,    mm0                     ; copy of 00 01 02 03 04 05 06 07
+        punpcklbw   mm0,    mm7                     ; xx 00 xx 01 xx 02 xx 03
+
+        punpckhbw   mm3,    mm7                     ; xx 04 xx 05 xx 06 xx 07
+        movq        mm2,    [eax+ecx*2]             ; 20 21 22 23 24 25 26 27
+
+        movq        mm4,    mm1                     ; 10 11 12 13 14 15 16 17
+        punpcklbw   mm1,    mm7                     ; xx 10 xx 11 xx 12 xx 13
+        
+        punpckhbw   mm4,    mm7                     ; xx 14 xx 15 xx 16 xx 17
+        movq        mm5,    mm2                     ; 20 21 22 23 24 25 26 27
+
+        punpcklbw   mm2,    mm7                     ; xx 20 xx 21 xx 22 xx 23
+        psubw       mm1,    mm0                     ; difference between 0, 1  low four
+        
+        pmaddwd     mm1,    mm1                     ; SD between 0, 1  low four
+        psubw       mm4,    mm3                     ; difference bwtween 0, 1   high four
+        
+        pmaddwd     mm4,    mm4                     ; SD between 0, 1  high foru
+        punpckhbw   mm5,    mm7                     ; xx 24 xx 25 xx 26 xx 27
+
+        psubw       mm2,    mm0                     ; difference between 0, 2   low four
+        pmaddwd     mm2,    mm2                     ; sd between 0, 2   low four
+        
+        psubw       mm5,    mm3                     ; difference between 0, 2   high four
+        pmaddwd     mm5,    mm5                     ; sd between 0, 2   high four
+
+        paddd       mm1,    mm4                     ;
+        paddd       mm2,    mm5                     ; 
+
+        paddd       mm6,    mm1                     ;
+        paddd       mm6,    mm2                     ; accumlated in mm6
+
+        // done with the low eight
+
+        movq        mm1,    8[eax]                   ; 00 01 02 03 04 05 06 07
+        movq        mm0,    8[eax+ecx]               ; 10 11 12 13 14 15 16 17
+        
+        movq        mm3,    mm0                     ; copy of 00 01 02 03 04 05 06 07
+        punpcklbw   mm0,    mm7                     ; xx 00 xx 01 xx 02 xx 03
+
+        punpckhbw   mm3,    mm7                     ; xx 04 xx 05 xx 06 xx 07
+        movq        mm2,    8[eax+ecx*2]             ; 20 21 22 23 24 25 26 27
+
+        movq        mm4,    mm1                     ; 10 11 12 13 14 15 16 17
+        punpcklbw   mm1,    mm7                     ; xx 10 xx 11 xx 12 xx 13
+        
+        punpckhbw   mm4,    mm7                     ; xx 14 xx 15 xx 16 xx 17
+        movq        mm5,    mm2                     ; 20 21 22 23 24 25 26 27
+
+        punpcklbw   mm2,    mm7                     ; xx 20 xx 21 xx 22 xx 23
+        psubw       mm1,    mm0                     ; difference between 0, 1  low four
+        
+        pmaddwd     mm1,    mm1                     ; SD between 0, 1  low four
+        psubw       mm4,    mm3                     ; difference bwtween 0, 1   high four
+        
+        pmaddwd     mm4,    mm4                     ; SD between 0, 1  high foru
+        punpckhbw   mm5,    mm7                     ; xx 24 xx 25 xx 26 xx 27
+
+        psubw       mm2,    mm0                     ; difference between 0, 2   low four
+        pmaddwd     mm2,    mm2                     ; sd between 0, 2   low four
+        
+        psubw       mm5,    mm3                     ; difference between 0, 2   high four
+        pmaddwd     mm5,    mm5                     ; sd between 0, 2   high four
+
+        paddd       mm1,    mm4                     ;
+        paddd       mm2,    mm5                     ; 
+
+        paddd       mm6,    mm1                     ;
+        paddd       mm6,    mm2                     ; accumlated in mm6
+
+        lea         eax,    [eax + ecx *2]          ; skip one line
+        sub         edx,    1
+
+        jnz         MmxGetMBFrameVertVarLoop
+       
+        movq        mm0,    mm6
+        psrlq       mm0,    32
+
+        paddd       mm0,    mm6
+        movd        [FrameError], mm0
+    }
+
+    return FrameError;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : MmxGetMBFieldVertVar
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Vertical variance for fields within a frame.
+ *
+ *  FUNCTION      : Computes the vertical variance for a macroblock based
+ *                  upon the sum of the local 2 pixel variances within
+ *                  the two fields of a frame.
+ *
+ *  SPECIAL NOTES : The difference between the last two rows in a MB
+ *                  are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 MmxGetMBFieldVertVar ( CP_INSTANCE *cpi )
+{
+    UINT32 FieldError;
+    INT32 Stride  = cpi->pb.Configuration.VideoFrameWidth;
+//    UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.Source];
+//sjlhack
+    UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[0].Source];
+
+    __asm
+    {
+        mov         ecx,    [Stride]
+        mov         eax,    DWORD PTR [SrcPtr]
+        
+        pxor        mm7,    mm7                     ; clear mm7
+        pxor        mm6,    mm6                     ; clear mm6
+        
+        mov         edx,    7                       ; 
+
+MmxGetMBFieldVertVarLoop:
+
+        movq        mm1,    [eax]                   ; 00 01 02 03 04 05 06 07
+        movq        mm0,    [eax+ecx*2]             ; 10 11 12 13 14 15 16 17
+
+        movq        mm2,    mm0                     ; 00 01 02 03 04 05 06 07
+        punpcklbw   mm0,    mm7                     ; xx 00 xx 01 xx 02 xx 03
+
+        movq        mm3,    mm1                     ; 10 11 12 13 14 15 16 17
+        punpckhbw   mm2,    mm7                     ; xx 04 xx 05 xx 06 xx 07
+
+        punpcklbw   mm1,    mm7                     ; xx 10 xx 11 xx 12 xx 13
+        punpckhbw   mm3,    mm7                     ; xx 14 xx 15 xx 16 xx 17
+
+        psubw       mm0,    mm1                     ; diff between 0    1 low four
+        pmaddwd     mm0,    mm0                     ; SD   between 0    1 low four
+        
+        psubw       mm2,    mm3                     ; diff between 0    1 high four
+        pmaddwd     mm2,    mm2                     ; SD between   0    1 high four
+
+        paddd       mm0,    mm2
+        paddd       mm6,    mm0
+
+        movq        mm1,    8[eax]                   ; 00 01 02 03 04 05 06 07
+        movq        mm0,    8[eax+ecx*2]             ; 10 11 12 13 14 15 16 17
+
+        movq        mm2,    mm0                     ; 00 01 02 03 04 05 06 07
+        punpcklbw   mm0,    mm7                     ; xx 00 xx 01 xx 02 xx 03
+
+        movq        mm3,    mm1                     ; 10 11 12 13 14 15 16 17
+        punpckhbw   mm2,    mm7                     ; xx 04 xx 05 xx 06 xx 07
+
+        punpcklbw   mm1,    mm7                     ; xx 10 xx 11 xx 12 xx 13
+        punpckhbw   mm3,    mm7                     ; xx 14 xx 15 xx 16 xx 17
+
+        psubw       mm0,    mm1                     ; diff between 0    1 low four
+        pmaddwd     mm0,    mm0                     ; SD   between 0    1 low four
+        
+        psubw       mm2,    mm3                     ; diff between 0    1 high four
+        pmaddwd     mm2,    mm2                     ; SD between   0    1 high four
+
+        paddd       mm0,    mm2
+        paddd       mm6,    mm0
+
+        lea         eax,    [eax+ecx]
+
+        movq        mm1,    [eax]                   ; 00 01 02 03 04 05 06 07
+        movq        mm0,    [eax+ecx*2]             ; 10 11 12 13 14 15 16 17
+
+        movq        mm2,    mm0                     ; 00 01 02 03 04 05 06 07
+        punpcklbw   mm0,    mm7                     ; xx 00 xx 01 xx 02 xx 03
+
+        movq        mm3,    mm1                     ; 10 11 12 13 14 15 16 17
+        punpckhbw   mm2,    mm7                     ; xx 04 xx 05 xx 06 xx 07
+
+        punpcklbw   mm1,    mm7                     ; xx 10 xx 11 xx 12 xx 13
+        punpckhbw   mm3,    mm7                     ; xx 14 xx 15 xx 16 xx 17
+
+        psubw       mm0,    mm1                     ; diff between 0    1 low four
+        pmaddwd     mm0,    mm0                     ; SD   between 0    1 low four
+        
+        psubw       mm2,    mm3                     ; diff between 0    1 high four
+        pmaddwd     mm2,    mm2                     ; SD between   0    1 high four
+
+        paddd       mm0,    mm2
+        paddd       mm6,    mm0
+
+        movq        mm1,    8[eax]                   ; 00 01 02 03 04 05 06 07
+        movq        mm0,    8[eax+ecx*2]             ; 10 11 12 13 14 15 16 17
+
+        movq        mm2,    mm0                     ; 00 01 02 03 04 05 06 07
+        punpcklbw   mm0,    mm7                     ; xx 00 xx 01 xx 02 xx 03
+
+        movq        mm3,    mm1                     ; 10 11 12 13 14 15 16 17
+        punpckhbw   mm2,    mm7                     ; xx 04 xx 05 xx 06 xx 07
+
+        punpcklbw   mm1,    mm7                     ; xx 10 xx 11 xx 12 xx 13
+        punpckhbw   mm3,    mm7                     ; xx 14 xx 15 xx 16 xx 17
+
+        psubw       mm0,    mm1                     ; diff between 0    1 low four
+        pmaddwd     mm0,    mm0                     ; SD   between 0    1 low four
+        
+        psubw       mm2,    mm3                     ; diff between 0    1 high four
+        pmaddwd     mm2,    mm2                     ; SD between   0    1 high four
+
+        paddd       mm0,    mm2
+        paddd       mm6,    mm0
+
+        lea         eax,    [eax + ecx ]            ; skip one line
+        sub         edx,    1
+
+        jnz         MmxGetMBFieldVertVarLoop
+       
+        movq        mm0,    mm6
+        psrlq       mm0,    32
+
+        paddd       mm0,    mm6
+        movd        [FieldError], mm0
+    }
+
+    return FieldError;
+
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FilterBlock2dBil_SAD_mmx
+ *
+ *  INPUTS        : UINT8 *SrcPtr           : Pointer to input block.
+ *                  INT32 SrcStride         : Stride for input block.
+ *                  UINT8 *RefPtr           : Pointer to reference block.
+ *                  UINT32 SrcPixelsPerLine : Stride for reference block.
+ *                  INT16 *HFilter          : Pointer to horizontal filter taps.
+ *                  INT16 *VFilter          : Pointer to vertical filter taps.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD error.
+ *
+ *  FUNCTION      : Produces a filtered fractional block in 2-D
+ *  				using bilinear filters and calculate the SAD.
+ *
+ *  SPECIAL NOTES : The difference between the last two rows in a MB
+ *                  are not accounted for!
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock2dBil_SAD_mmx 
+(
+    UINT8 *SrcPtr, 
+    INT32 SrcStride, 
+    UINT8 *RefPtr, 
+    UINT32 SrcPixelsPerLine, 
+    INT16 *HFilter, 
+    INT16 *VFilter 
+)
+{
+
+    UINT32 Error=0;
+    __asm
+    {
+        mov         eax,        HFilter             ; 
+        mov         edi,        SrcPtr              ; 
+
+        mov         esi,        RefPtr              ;
+        mov         ecx,        8            ;
+
+        mov         edx,        SrcPixelsPerLine    ;
+               
+        movq        mm1,        [eax]               ;
+        movq        mm2,        [eax+16]            ;
+        
+        mov         eax,        VFilter             ;       
+        pxor         mm0,        mm0                ;
+
+        // get the first horizontal line done       ;
+        movq        mm3,        [esi]               ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+        movq        mm4,        mm3                 ; make a copy of current line
+        
+        punpcklbw   mm3,        mm0                 ; xx 00 01 02 03 04 05 06
+        punpckhbw   mm4,        mm0                 ;
+
+        pmullw      mm3,        mm1                 ;
+        pmullw      mm4,        mm1                 ;
+
+        movq        mm5,        [esi+1]             ;
+        movq        mm6,        mm5                 ;
+
+        punpcklbw   mm5,        mm0                 ;
+        punpckhbw   mm6,        mm0                 ;
+
+        pmullw      mm5,        mm2                 ;
+        pmullw      mm6,        mm2                 ;
+
+        paddw       mm3,        mm5                 ;
+        paddw       mm4,        mm6                 ;
+        
+        paddw       mm3,        rd                  ; xmm3 += round value
+        psraw       mm3,        FILTER_SHIFT        ; xmm3 /= 128
+
+        paddw       mm4,        rd                  ;
+        psraw       mm4,        FILTER_SHIFT        ;
+        
+        movq        mm7,        mm3                 ;
+        packuswb    mm7,        mm4                 ;
+
+
+        add         esi,        edx                 ; next line
+NextRow:
+        movq        mm3,        [esi]               ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+        movq        mm4,        mm3                 ; make a copy of current line
+        
+        punpcklbw   mm3,        mm0                 ; xx 00 01 02 03 04 05 06
+        punpckhbw   mm4,        mm0                 ;
+
+        pmullw      mm3,        mm1                 ;
+        pmullw      mm4,        mm1                 ;
+
+        movq        mm5,        [esi+1]             ;
+        movq        mm6,        mm5                 ;
+
+        punpcklbw   mm5,        mm0                 ;
+        punpckhbw   mm6,        mm0                 ;
+
+        pmullw      mm5,        mm2                 ;
+        pmullw      mm6,        mm2                 ;
+
+        paddw       mm3,        mm5                 ;
+        paddw       mm4,        mm6                 ;
+        
+        movq        mm5,        mm7                 ;
+        movq        mm6,        mm7                 ;                
+
+        punpcklbw   mm5,        mm0                 ;
+        punpckhbw   mm6,        mm0
+
+        pmullw      mm5,        [eax]               ;
+        pmullw      mm6,        [eax]               ;
+        
+        paddw       mm3,        rd                  ; xmm3 += round value
+        psraw       mm3,        FILTER_SHIFT        ; xmm3 /= 128
+
+        paddw       mm4,        rd                  ;
+        psraw       mm4,        FILTER_SHIFT        ;
+        
+        movq        mm7,        mm3                 ;
+        packuswb    mm7,        mm4                 ;    
+        
+
+        pmullw      mm3,        [eax+16]            ;
+        pmullw      mm4,        [eax+16]            ;
+
+        paddw       mm3,        mm5                 ;
+        paddw       mm4,        mm6                 ;
+        
+        
+        paddw       mm3,        rd                  ; xmm3 += round value
+        psraw       mm3,        FILTER_SHIFT        ; xmm3 /= 128
+
+        paddw       mm4,        rd                  ;
+        psraw       mm4,        FILTER_SHIFT        ;
+               
+        packuswb    mm3,        mm4                                         
+        movq        mm4,        [edi]               ;
+        
+        psadbw      mm3,        mm4                 ;
+        movd        mm4,        Error               ;
+
+        paddd       mm3,        mm4                 ;
+        movd        Error,      mm3                 ;        
+        
+        add         esi,        edx                 ; next line
+        add         edi,        SrcStride           ;                   ; 
+
+        dec         ecx                             ;
+        jne         NextRow                         
+    }
+    return Error;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FilterBlock1d_vb8_SAD_mmx
+ *
+ *  INPUTS        : UINT8 *SrcPtr           : Pointer to input block.
+ *                  INT32 SrcStride         : Stride for input block.
+ *                  UINT8 *RefPtr           : Pointer to reference block.
+ *                  UINT32 PixelsPerLine    : Stride for reference block.
+ *                  UINT32 PixelStep        : Offset to move to next pixel in input.
+ *                  INT16 *Filter           : Pointer to filter taps.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD error.
+ *
+ *  FUNCTION      : Applies 1-D vertical bi-linear filter to input block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock1d_vb8_SAD_mmx
+(
+    UINT8 *SrcPtr, 
+    INT32 SrcStride, 
+    UINT8 *RefPtr, 
+    UINT32 PixelsPerLine, 
+    UINT32 PixelStep, 
+    INT16 *Filter 
+)
+{
+    UINT32 Error;
+    __asm
+    {
+        mov         edi,        Filter
+        movq        mm1,       [edi]                ; mm3 *= kernel 0 modifiers.
+        movq        mm2,       [edi + 16]            ; mm3 *= kernel 0 modifiers.
+
+        mov         edi,        SrcPtr
+		mov			esi,        RefPtr
+        
+        mov         ecx,        8                   ;
+
+        mov         edx,        SrcStride
+        mov         eax,        PixelsPerLine;
+        
+        pxor        mm7,        mm7
+		pxor		mm0,       mm0                  ; mm0 = 00000000
+
+nextrow:
+        movq		mm3,        [esi]               ; mm3 = p0..p7
+        movq        mm4,        mm3                 ; mm4 = p0..p7
+        
+        punpcklbw   mm3,        mm0                 ; mm3 = p0..p3
+        punpckhbw   mm4,        mm0                 ; mm4 = p4..p7
+        
+        pmullw      mm3,        mm1                 ; mm3 *= kernel 0 modifiers.
+        pmullw      mm4,        mm1                 ; mm4 *= kernel 0 modifiers.
+
+        movq        mm5,        [esi + eax]         ; 
+        movq        mm6,        mm5                 ;
+
+        punpcklbw   mm5,        mm0                 ;
+        punpckhbw   mm6,        mm0                 ;
+
+        pmullw      mm5,        mm2                 ;
+        pmullw      mm6,        mm2                 ;
+
+        paddw       mm3,        mm5                 ;
+        paddw       mm4,        mm6                 ;
+
+        paddw       mm3,        rd                  ; xmm3 += round value
+        psraw       mm3,        FILTER_SHIFT        ; xmm3 /= 128
+
+        paddw       mm4,        rd                  ;
+        psraw       mm4,        FILTER_SHIFT        ;
+
+
+        packuswb    mm3,        mm4                 ; pack and unpack to saturate
+        movq        mm5,        [edi]               ;
+        
+        psadbw      mm3,        mm5                 ;
+        paddd       mm7,        mm3
+        // the subsequent iterations repeat 3 out of 4 of these reads.  Since the 
+        // recon block should be in cache this shouldn't cost much.  Its obviously 
+        // avoidable!!!. 
+        add         esi,        eax
+        add         edi,        edx 
+
+        dec         ecx                             ; decrement count
+        jnz         nextrow                         ; next row
+
+        movd        Error,      mm7       
+    }
+    return Error;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FilterBlock1d_hb8_SAD_mmx
+ *
+ *  INPUTS        : UINT8 *SrcPtr           : Pointer to input block.
+ *                  INT32 SrcStride         : Stride for input block.
+ *                  UINT8 *RefPtr           : Pointer to reference block.
+ *                  UINT32 SrcPixelsPerLine : Stride for reference block.
+ *                  UINT32 PixelStep        : Offset to move to next pixel in input.
+ *                  INT16 *Filter           : Pointer to filter taps.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD error.
+ *
+ *  FUNCTION      : Applies 1-D horizontal bi-linear filter to input block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock1d_hb8_SAD_mmx
+(
+    UINT8 *SrcPtr, 
+    INT32 SrcStride, 
+    UINT8 *RefPtr, 
+    UINT32 SrcPixelsPerLine, 
+    UINT32 PixelStep, 
+    INT16 *Filter 
+)
+{
+    UINT32 Error = 0;
+
+    __asm
+    {
+
+        mov         edi,        Filter
+        movq        mm1,        [edi]               ; xmm3 *= kernel 0 modifiers.
+        movq        mm2,        [edi + 16]          ; xmm3 *= kernel 0 modifiers.
+
+        mov         edi,        SrcPtr
+		mov			esi,        RefPtr
+		
+        pxor		mm0,        mm0                 ; mm0 = 00000000
+        pxor        mm7,        mm7                 ; mm7 = 0
+        
+        mov         ecx,        8                   ;
+
+        mov         edx,        SrcStride
+        mov         eax,        SrcPixelsPerLine;
+
+nextrow:
+        movq		mm3,        [esi]               ; mm3 = p-1..p6
+        movq        mm4,        mm3                 ; make a copy
+            
+        punpcklbw   mm3,        mm0                 ;                      
+        pmullw      mm3,        mm1                 ;
+
+        movq        mm5,        [esi+1]             ;mm5 = p0 ..... p7
+        punpckhbw   mm4,        mm0            
+
+        pmullw      mm4,        mm1                 ;        
+        movq        mm6,        mm5                 ;
+
+        punpcklbw   mm5,        mm0                 ; mm5 = p0..p7
+        pmullw      mm5,        mm2                 ;
+
+        punpckhbw   mm6,        mm0                 ;
+        pmullw      mm6,        mm2                 ;
+
+        paddw       mm3,        mm5                 ;            
+        paddw       mm4,        mm6                 ;
+
+        paddw       mm3,        rd                  ; xmm3 += round value
+        psraw       mm3,        FILTER_SHIFT        ; xmm3 /= 128
+
+        paddw       mm4,        rd                  ;
+        psraw       mm4,        FILTER_SHIFT        ;
+        
+        packuswb    mm3,        mm4                 ; pack and unpack to saturate
+        
+        movq        mm5,        [edi]               ; read src  
+        psadbw      mm3,        mm5                 ;
+        paddd       mm7,        mm3
+
+        add         esi,        eax                 ; next line
+        add         edi,        edx                 ; 
+
+        dec         ecx                             ; decrement count
+        jnz         nextrow                         ; next row
+
+        movd        Error,        mm7;        
+
+    }
+    return Error;
+}
+                         
+/****************************************************************************
+ *
+ *  ROUTINE       : FiltBlockBilGetSad_mmx
+ *
+ *  INPUTS        : UINT8 *SrcPtr        : Pointer to input block.
+ *                  INT32 SrcStride      : Stride for input block.
+ *                  UINT8 *ReconPtr1     : Pointer to first reference block.
+ *                  UINT8 *ReconPtr2     : Pointer to second reference block.
+ *                  UINT32 PixelsPerLine : Stride for reference block.
+ *                  INT32 ModX           : Fractional part of x-component of MV.
+ *                  INT32 ModY           : Fractional part of x-component of MV.
+ *                  UINT32 BestSoFar     : Best error found so far.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD error.
+ *
+ *  FUNCTION      : Applies 2-D bi-linear filter to get prediction block
+ *                  and computes SAD for prediction error.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 FiltBlockBilGetSad_mmx
+(
+    UINT8 *SrcPtr,
+    INT32 SrcStride,
+    UINT8 *ReconPtr1,
+    UINT8 *ReconPtr2,
+    INT32 PixelsPerLine,
+    INT32 ModX, 
+    INT32 ModY,
+    UINT32 BestSoFar
+)
+{
+    INT32  diff;
+    UINT32 Error;
+
+    // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+	diff = ReconPtr2-ReconPtr1;
+	
+    // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+	// This works out to be what we want... despite the pointer swapping that goes on below.
+	// For example... if the X component of the vector is a +ve ModX = X%8.
+	//                if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+	if ( diff<0 ) 
+	{								// swap pointers so ReconPtr1 smaller
+		UINT8 *temp = ReconPtr1;
+		ReconPtr1   = ReconPtr2;
+		ReconPtr2   = temp;
+		diff = (int)(ReconPtr2-ReconPtr1);
+	}
+
+	if ( diff==1 )
+		Error = FilterBlock1d_hb8_SAD_mmx ( SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, 1, BilinearFilters_mmx[ModX] );
+	else if (diff == (int)(PixelsPerLine) )				// Fractional pixel in vertical only
+		Error = FilterBlock1d_vb8_SAD_mmx ( SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, PixelsPerLine, BilinearFilters_mmx[ModY] );
+	else if(diff == (int)(PixelsPerLine - 1))			// ReconPtr1 is Top right
+        Error = FilterBlock2dBil_SAD_mmx ( SrcPtr, SrcStride, ReconPtr1-1, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );        
+	else if(diff == (int)(PixelsPerLine + 1) )			// ReconPtr1 is Top left
+        Error = FilterBlock2dBil_SAD_mmx ( SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );		
+    return Error;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/CWmtFunctions.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/CWmtFunctions.c
new file mode 100644
index 00000000..8064c503
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/CWmtFunctions.c
@@ -0,0 +1,1728 @@
+/****************************************************************************
+*
+*   Module Title :     CWmtFunctions.c
+*
+*   Description  :     Encoder system dependant functions
+*
+*    AUTHOR      :     Paul Wilkins
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <math.h>
+#include "compdll.h"
+#include <assert.h>
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#pragma warning(disable:4799)
+
+#define FILTER_WEIGHT 128
+#define FILTER_SHIFT    7
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern XMMGetSAD ( UINT8 *NewDataPtr,
+                   INT32 PixelsPerLine,
+                   UINT8 *RefDataPtr,
+                   INT32 RefPixelsPerLine,
+                   UINT32 ErrorSoFar,
+                   UINT32 BestSoFar );
+
+extern UINT32 GetSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8  * RefPtr,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern INT16  BilinearFilters_wmt[8][16];
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/
+static __declspec(align(16)) short rd[] = { 64, 64, 64, 64, 64, 64, 64, 64 };
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : WmtGetSumAbsDiffs16
+ *
+ *  INPUTS        : UINT8 *SrcPtr       : Pointer to input 16x16 block.
+ *                  INT32  SourceStride : Stride of input block.
+ *                  UINT8 *RefPtr       : Pointer to reference 16x16 block.
+ *                  INT32  ReconStride  : Stride of reference block.
+ *                  UINT32 ErrorSoFar   : Accumulated error to date.
+ *                  UINT32 BestSoFar    : Best error found so far.
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD.
+ *
+ *  FUNCTION      : Calculate the Sum of Absolute difference on 16x16 pixels.
+ *
+ *  SPECIAL NOTES : The function assumes the SrcPtr is aligned on 16 bytes,
+ *                  RefPtr can be aligned any byte boundary.
+ *
+ ****************************************************************************/
+UINT32 WmtGetSumAbsDiffs16 
+(
+    UINT8 *SrcPtr,
+    INT32  SourceStride,
+    UINT8 *RefPtr,
+    INT32  ReconStride,
+    UINT32 ErrorSoFar,
+    UINT32 BestSoFar
+)
+{
+
+    UINT32 Error = 0;
+    __asm
+    {
+        mov         esi,    SrcPtr                      ;
+        mov         edi,    RefPtr                      ; 
+
+        mov         ecx,    SourceStride                ;
+        mov         edx,    ReconStride                 ;
+        
+        movdqu      xmm0,   [edi]                       ;   Row0 Ref
+        lea         eax,    [esi + 2 * ecx ]            ;   Calculate Row3 Source address
+
+        psadbw      xmm0,   [esi]                       ;   Row0 SAD
+        lea         ebx,    [edi + 2 * edx ]            ;   Calculate Row3 Ref address
+
+        movdqu      xmm1,   [edi + edx]                 ;   Row1 Ref
+        add         eax,    ecx                         ;   Calculate Row3 Source address    
+
+        psadbw      xmm1,   [esi + ecx]                 ;   Row1 SAD
+        add         ebx,    edx                         ;   Calculate Row3 Ref address
+
+        movdqu      xmm2,   [edi + 2 * edx ]            ;   Row2 Ref
+        paddw       xmm0,   xmm1                        ;   Row0 sad + Row1 sad
+
+        psadbw      xmm2,   [esi + 2 * ecx ]            ;   Row2 Sad
+        lea         esi,    [eax + 2 * ecx ]            ;   Calculate Row6 Source address    
+
+        lea         edi,    [ebx + 2 * edx ]            ;   Calculate Row6 Ref address
+        movdqu      xmm3,   [ebx]                       ;   Row3 Ref
+        
+        add         esi,    ecx                         ;   Calculate Row6 Source address    
+        psadbw      xmm3,   [eax]                       ;   Row3 SAD
+        
+        add         edi,    edx                         ;   Calculate Row6 Ref address
+        movdqu      xmm4,   [ebx + edx]                 ;   Row4 Ref
+
+        paddw       xmm2,   xmm3                        ;   Row2 Sad + Row3 Sad
+        psadbw      xmm4,   [eax + ecx]                 ;   Row4 Sad
+
+        movdqu      xmm5,   [ebx + 2 * edx]             ;   Row5 Ref
+        paddd       xmm0,   xmm2                        ;   Row0 + Row1 + Row2 + Row3 SAD
+
+        psadbw      xmm5,   [eax + 2 * ecx]             ;   Row5 SAD
+        movdqu      xmm6,   [edi]                       ;   Row6 Ref
+
+        paddw       xmm4,   xmm5                        ;   Row4 + Row5 SAD
+        psadbw      xmm6,   [esi]                       ;   Row6 SAD
+
+        movdqu      xmm7,   [edi + edx ]                ;   Row7 Ref
+        paddd       xmm0,   xmm4                        ;   Row0 1 2 3 4 5 
+
+        psadbw      xmm7,   [esi + ecx]                 ;   Row7 Sad
+        
+        lea         esi,    [esi + 2* ecx]              ;   calculate Row8 source address
+        paddw       xmm7,   xmm6                        ;   Row7 + Row6 Sad
+        
+        lea         edi,    [edi + 2* edx]              ;   calculate Row8 source address        
+        paddd       xmm7,   xmm0                        ; 
+
+        // next eight row
+        movdqu      xmm0,   [edi]                       ;   Row0 Ref
+        lea         eax,    [esi + 2 * ecx ]            ;   Calculate Row3 Source address
+
+        psadbw      xmm0,   [esi]                       ;   Row0 SAD
+        lea         ebx,    [edi + 2 * edx ]            ;   Calculate Row3 Ref address
+
+        movdqu      xmm1,   [edi + edx]                 ;   Row1 Ref
+        add         eax,    ecx                         ;   Calculate Row3 Source address    
+
+        psadbw      xmm1,   [esi + ecx]                 ;   Row1 SAD
+        add         ebx,    edx                         ;   Calculate Row3 Ref address
+
+        movdqu      xmm2,   [edi + 2 * edx ]            ;   Row2 Ref
+        paddw       xmm0,   xmm1                        ;   Row0 sad + Row1 sad
+
+        psadbw      xmm2,   [esi + 2 * ecx ]            ;   Row2 Sad
+        lea         esi,    [eax + 2 * ecx ]            ;   Calculate Row6 Source address    
+
+        lea         edi,    [ebx + 2 * edx ]            ;   Calculate Row6 Ref address
+        movdqu      xmm3,   [ebx]                       ;   Row3 Ref
+        
+        add         esi,    ecx                         ;   Calculate Row6 Source address    
+        psadbw      xmm3,   [eax]                       ;   Row3 SAD
+        
+        add         edi,    edx                         ;   Calculate Row6 Ref address
+        movdqu      xmm4,   [ebx + edx]                 ;   Row4 Ref
+
+        paddw       xmm2,   xmm3                        ;   Row2 Sad + Row3 Sad
+        psadbw      xmm4,   [eax + ecx]                 ;   Row4 Sad
+
+        movdqu      xmm5,   [ebx + 2 * edx]             ;   Row5 Ref
+        paddd       xmm0,   xmm2                        ;   Row0 + Row1 + Row2 + Row3 SAD
+
+        psadbw      xmm5,   [eax + 2 * ecx]             ;   Row5 SAD
+        movdqu      xmm6,   [edi]                       ;   Row6 Ref
+
+        paddw       xmm4,   xmm5                        ;   Row4 + Row5 SAD
+        psadbw      xmm6,   [esi]                       ;   Row6 SAD
+
+        paddd       xmm0,   xmm4                        ;   Row0 1 2 3 4 5 
+
+        movdqu      xmm3,   [edi + edx ]                ;   Row7 Ref
+        psadbw      xmm3,   [esi + ecx ]                ;   Row7 Sad
+
+        paddw       xmm3,   xmm6                        ;
+        paddd       xmm0,   xmm3                        ;   Sum of 16 row sad
+        
+        paddd       xmm7,   xmm0;                       ; 
+
+        movdq2q     mm0,    xmm7                        ;   lower q
+        psrldq      xmm7,   8                           ;  
+
+        movdq2q     mm1,    xmm7                        ;    High Q
+        paddd       mm0,    mm1                         ;
+
+        movd        Error, mm0               
+
+    }
+    return Error;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : WmtGetHalfPixelSumAbsDiffs16
+ *
+ *  INPUTS        : UINT8 *SrcPtr       : Pointer to input 16x16 block.
+ *                  INT32  SourceStride : Stride of input block.
+ *                  UINT8 *RefPtr       : Pointer to first reference 16x16 block.
+ *                  UINT8 *RefPtr2      : Pointer to second reference 16x16 block.
+ *                  INT32  ReconStride  : Stride of reference blocks.
+ *                  UINT32 ErrorSoFar   : Accumulated error to date.
+ *                  UINT32 BestSoFar    : Best error found so far.
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD.
+ *
+ *  FUNCTION      : Calculates the Sum of Absolute differences between a 16x16 
+ *                  pixel MB and the average of two 16x16 pixel references.
+ *
+ *  SPECIAL NOTES : The function assumes the SrcPtr is aligned on 16 bytes,
+ *                  RefPtr & RefPtr2 can be aligned any byte boundary.
+ *
+ ****************************************************************************/
+UINT32 WmtGetHalfPixelSumAbsDiffs16
+(
+    UINT8 *SrcPtr,
+    INT32  SourceStride,
+    UINT8 *RefPtr,
+    UINT8 *RefPtr2,
+    INT32  ReconStride,
+    UINT32 ErrorSoFar,
+    UINT32 BestSoFar
+)
+{
+    UINT32 Error = 0;
+    
+    if ( RefPtr == RefPtr2 )
+    {
+        Error = GetSumAbsDiffs16 ( SrcPtr, SourceStride, RefPtr, ReconStride, 0, 0 );
+    }
+    else
+    {
+        __asm
+        {
+            mov         esi,        SrcPtr;
+            mov         edi,        RefPtr;
+
+            mov         eax,        RefPtr2;
+            mov         ecx,        SourceStride;
+
+            mov         edx,        ReconStride;
+            pxor        xmm7,       xmm7;
+
+            mov         ebx,         16;
+            pxor        xmm6,       xmm6;
+
+LoopWmtHalfSad:
+
+            movdqu      xmm0,       [edi]                   ;   Read 16 bytes from Ref
+            movdqu      xmm1,       [eax]                   ;   Read 16 bytes from Ref2
+                        
+            movdqa      xmm2,       xmm0                    ;   copy 
+            punpcklbw   xmm0,       xmm7                    ;   Low 8 bytes from Ref
+
+            movdqa      xmm3,       xmm1                    ;   copy
+            punpcklbw   xmm1,       xmm7                    ;   Low 8 bytes from Ref2
+
+            paddw       xmm0,       xmm1                    ;   Add low 8 bytes
+            punpckhbw   xmm2,       xmm7                    ;   High 8 bytes from Ref
+
+            psraw       xmm0,       1                       ;   average of Low 8 bytes Ref and Ref2
+            punpckhbw   xmm3,       xmm7                    ;   High 8 bytes from Ref2
+
+            add         eax,        edx                     ;   Next line of Ref1
+            paddw       xmm2,       xmm3                    ;   Add high 8 bytes
+
+            add         edi,        edx                     ;   Next line of Ref2
+            psraw       xmm2,       1                       ;   Average of high 8 bytes
+
+            packuswb    xmm0,       xmm2                    ;   pack the average back into bytes
+            psadbw      xmm0,       [esi]                   ;   sad 
+
+            add         esi,        ecx                     ;   next line of source
+            dec         ebx                                 ;
+            
+            paddd       xmm6,       xmm0                    ;   accumulate the sad
+            jnz         LoopWmtHalfSad
+
+            movdq2q     mm0,        xmm6                    ;   
+            psrldq      xmm6,       8                       ;
+
+            movdq2q     mm1,        xmm6                    ;
+            paddd       mm0,        mm1                     ;
+
+            movd        Error,      mm0                     ;
+
+        }                
+    }
+    return Error;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : WmtGetHalfPixelSAD
+ *
+ *  INPUTS        : UINT8 *SrcData          : Pointer to input 16x16 block.
+ *                  INT32  PixelsPerLine    : Stride of input block.
+ *                  UINT8 *RefDataPtr1      : Pointer to first reference 16x16 block.
+ *                  UINT8 *RefDataPtr2      : Pointer to second reference 16x16 block.
+ *                  INT32  RefPixelsPerLine : Stride of reference blocks.
+ *                  INT32  ErrorSoFar       : Accumulated error to date.
+ *                  INT32  BestSoFar        : Best error found so far.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : INT32: SAD at 1/2 pixel accuracy.
+ *
+ *  FUNCTION      : Calculates the sum of the absolute differences against
+ *                  half pixel interpolated references.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INT32 WmtGetHalfPixelSAD
+( 
+    UINT8 *SrcData, 
+    INT32 PixelsPerLine,
+    UINT8 *RefDataPtr1,
+    UINT8 *RefDataPtr2,
+    INT32 RefPixelsPerLine,
+    INT32 ErrorSoFar, 
+    INT32 BestSoFar 
+)
+{
+    INT32   DiffVal = ErrorSoFar;
+    INT16   DiffAcc[4] = { 0, 0, 0, 0 };     // MMX accumulator.
+    INT32   RefOffset = (int)(RefDataPtr1 - RefDataPtr2);
+
+    if ( RefOffset == 0 )
+    {
+        // Simple case as for non 0.5 pixel
+        DiffVal += XMMGetSAD ( SrcData, PixelsPerLine, RefDataPtr1, RefPixelsPerLine, ErrorSoFar, BestSoFar );
+    }
+    else
+    {
+        // WMT Code for HalfPixelSAD
+        __asm
+        {
+            mov         eax,        dword ptr [SrcData]         // Get Src Pointer
+            pxor        xmm6,       xmm6                        // clear mm6
+
+            mov         ebx,        dword ptr [RefDataPtr1]     // Get Reference pointers
+            pxor        xmm7,       xmm7
+
+            mov         edx,        dword ptr [PixelsPerLine]   // Width
+            mov         ecx,        dword ptr [RefDataPtr2]
+
+            mov         esi,        edx                         // width
+            mov         edx,        dword ptr [RefPixelsPerLine]   // Src Pitch
+
+            // Row 1 and 2
+            movq        xmm1,       QWORD ptr [ebx]             // Eight bytes from ref 1
+            movq        xmm2,       QWORD ptr [ecx]             // Eight Bytes from ref 2
+
+            punpcklbw   xmm1,       xmm6                        // unpack ref1 to shorts
+            movq        xmm3,       QWORD ptr [ebx+edx]         // Eight bytes from ref 1
+
+            punpcklbw   xmm2,       xmm6                        // unpack ref2 to shorts
+            movq        xmm4,       QWORD ptr [ecx+edx]         // Eight bytes from ref 2
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            paddw       xmm1,       xmm2                        // Add short values together.
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            psrlw       xmm1,       1                           // Devided by two (shift right 1)
+
+            paddw       xmm3,       xmm4                        // add short values togethter
+            movq        xmm0,       QWORD PTR [eax]             // Copy eight of src data to xmm0
+
+            psrlw       xmm3,       1                           // divided by 2
+            punpcklbw   xmm0,       xmm6                        // unpack to shorts
+
+            movq        xmm5,       QWORD PTR [eax+esi]         // get the source
+            movdqa      xmm2,       xmm0                        // make a copy of xmm0
+
+            punpcklbw   xmm5,       xmm6                        // unpack to shorts
+            psubusw     xmm0,       xmm1                        // A-B to xmm0
+
+            movdqa      xmm4,       xmm5                        // make a copy
+            psubusw     xmm1,       xmm2                        // B-A to xmm1
+
+            psubusw     xmm5,       xmm3                        // A-B to xmm5
+            psubusw     xmm3,       xmm4                        // B-A to mm1
+
+            por         xmm0,       xmm1                        // abs differences
+            por         xmm5,       xmm3                        // abs differences
+
+            paddw       xmm7,       xmm0                        // accumulate difference...
+            paddw       xmm7,       xmm5                        // accumulate difference...
+
+            lea         ebx,        [ebx+edx*2]                 // two line below
+            lea         ecx,        [ecx+edx*2]                 // two line below
+
+            lea         eax,        [eax+esi*2]                 // two line below
+
+            // Row 3 and 4
+            movq        xmm1,       QWORD PTR [ebx]             // Eight bytes from ref 1
+            movq        xmm2,       QWORD PTR [ecx]             // Eight Bytes from ref 2
+
+            punpcklbw   xmm1,       xmm6                        // unpack ref1 to shorts
+            movq        xmm3,       QWORD PTR [ebx+edx]         // Eight bytes from ref 1
+
+            punpcklbw   xmm2,       xmm6                        // unpack ref2 to shorts
+            movq        xmm4,       QWORD PTR [ecx+edx]         // Eight bytes from ref 2
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            paddw       xmm1,       xmm2                        // Add short values together.
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            psrlw       xmm1,       1                           // Devided by two (shift right 1)
+
+            paddw       xmm3,       xmm4                        // add short values togethter
+            movq        xmm0,       QWORD PTR [eax]             // Copy eight of src data to xmm0
+
+            psrlw       xmm3,       1                           // divided by 2
+            punpcklbw   xmm0,       xmm6                        // unpack to shorts
+
+            movq        xmm5,       QWORD PTR [eax+esi]         // get the source
+            movdqa      xmm2,       xmm0                        // make a copy of xmm0
+
+            punpcklbw   xmm5,       xmm6                        // unpack to shorts
+            psubusw     xmm0,       xmm1                        // A-B to xmm0
+
+            movdqa      xmm4,       xmm5                        // make a copy
+            psubusw     xmm1,       xmm2                        // B-A to xmm1
+
+            psubusw     xmm5,       xmm3                        // A-B to xmm5
+            psubusw     xmm3,       xmm4                        // B-A to mm1
+
+            por         xmm0,       xmm1                        // abs differences
+            por         xmm5,       xmm3                        // abs differences
+
+            paddw       xmm7,       xmm0                        // accumulate difference...
+            paddw       xmm7,       xmm5                        // accumulate difference...
+
+            lea         ebx,        [ebx+edx*2]                 // two line below
+            lea         ecx,        [ecx+edx*2]                 // two line below
+
+            lea         eax,        [eax+esi*2]                 // two line below
+
+            // Row 5 and 6
+            movq        xmm1,       QWORD PTR [ebx]             // Eight bytes from ref 1
+            movq        xmm2,       QWORD PTR [ecx]             // Eight Bytes from ref 2
+
+            punpcklbw   xmm1,       xmm6                        // unpack ref1 to shorts
+            movq        xmm3,       QWORD PTR [ebx+edx]         // Eight bytes from ref 1
+
+            punpcklbw   xmm2,       xmm6                        // unpack ref2 to shorts
+            movq        xmm4,       QWORD PTR [ecx+edx]         // Eight bytes from ref 2
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            paddw       xmm1,       xmm2                        // Add short values together.
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            psrlw       xmm1,       1                           // Devided by two (shift right 1)
+
+            paddw       xmm3,       xmm4                        // add short values togethter
+            movq        xmm0,       QWORD PTR [eax]             // Copy eight of src data to xmm0
+
+            psrlw       xmm3,       1                           // divided by 2
+            punpcklbw   xmm0,       xmm6                        // unpack to shorts
+
+            movq        xmm5,       QWORD PTR [eax+esi]         // get the source
+            movdqa      xmm2,       xmm0                        // make a copy of xmm0
+
+            punpcklbw   xmm5,       xmm6                        // unpack to shorts
+            psubusw     xmm0,       xmm1                        // A-B to xmm0
+
+            movdqa      xmm4,       xmm5                        // make a copy
+            psubusw     xmm1,       xmm2                        // B-A to xmm1
+
+            psubusw     xmm5,       xmm3                        // A-B to xmm5
+            psubusw     xmm3,       xmm4                        // B-A to mm1
+
+            por         xmm0,       xmm1                        // abs differences
+            por         xmm5,       xmm3                        // abs differences
+
+            paddw       xmm7,       xmm0                        // accumulate difference...
+            paddw       xmm7,       xmm5                        // accumulate difference...
+
+            lea         ebx,        [ebx+edx*2]                 // two line below
+            lea         ecx,        [ecx+edx*2]                 // two line below
+
+
+            lea         eax,        [eax+esi*2]                 // two line below
+
+            // Row 7 and 8
+            movq        xmm1,       QWORD PTR [ebx]             // Eight bytes from ref 1
+            movq        xmm2,       QWORD PTR [ecx]             // Eight Bytes from ref 2
+
+            punpcklbw   xmm1,       xmm6                        // unpack ref1 to shorts
+            movq        xmm3,       QWORD PTR [ebx+edx]         // Eight bytes from ref 1
+
+            punpcklbw   xmm2,       xmm6                        // unpack ref2 to shorts
+            movq        xmm4,       QWORD PTR [ecx+edx]         // Eight bytes from ref 2
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            paddw       xmm1,       xmm2                        // Add short values together.
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            psrlw       xmm1,       1                           // Devided by two (shift right 1)
+
+            paddw       xmm3,       xmm4                        // add short values togethter
+            movq        xmm0,       QWORD PTR [eax]                     // Copy eight of src data to xmm0
+
+            psrlw       xmm3,       1                           // divided by 2
+            punpcklbw   xmm0,       xmm6                        // unpack to shorts
+
+            movq        xmm5,       QWORD PTR [eax+esi]         // get the source
+            movdqa      xmm2,       xmm0                        // make a copy of xmm0
+
+            punpcklbw   xmm5,       xmm6                        // unpack to shorts
+            psubusw     xmm0,       xmm1                        // A-B to xmm0
+
+            movdqa      xmm4,       xmm5                        // make a copy
+            psubusw     xmm1,       xmm2                        // B-A to xmm1
+
+            psubusw     xmm5,       xmm3                        // A-B to xmm5
+            psubusw     xmm3,       xmm4                        // B-A to mm1
+
+            por         xmm0,       xmm1                        // abs differences
+            por         xmm5,       xmm3                        // abs differences
+
+            paddw       xmm7,       xmm0                        // accumulate difference...
+            paddw       xmm7,       xmm5                        // accumulate difference...
+
+            // add the value to gether
+            movdqa      xmm0,       xmm7                        // low four words
+            psrldq      xmm7,       8                           // shift 64 bits
+
+            paddw       xmm0,       xmm7                        // add
+            movq        QWORD PTR [DiffAcc], xmm0   ; copy back accumulated results into normal memory
+
+        }
+
+        //  Accumulate the 4 word values in DiffAcc
+        DiffVal += DiffAcc[0] + DiffAcc[1] + DiffAcc[2] + DiffAcc[3];
+    }
+
+    return DiffVal;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : WmtGetIntraError
+ *
+ *  INPUTS        : UINT8 *DataPtr       : Pointer to input block.
+ *                  INT32  PixelsPerLine : Stride of input block.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Intra frame variance.
+ *
+ *  FUNCTION      : Calculates the variance of the block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 WmtGetIntraError ( UINT8 *DataPtr, INT32 PixelsPerLine )
+{
+    UINT32  XSum;
+    UINT32  XXSum;
+    UINT8   *DiffPtr;
+
+    // Loop expanded out for speed.
+    DiffPtr = DataPtr;
+
+    __asm
+    {
+            pxor        xmm5,   xmm5                    ; Blank mmx6
+            pxor        xmm6,   xmm6                    ; Blank mmx7
+            pxor        xmm7,   xmm7                    ; Blank mmx7
+
+            mov         eax,    dword ptr [DiffPtr]     ; Load base addresses
+            mov         ecx,    dword ptr [PixelsPerLine]
+
+            // Row 1
+            movq        xmm0,   QWORD ptr [eax]         ; Copy eight bytes to xmm0;
+            punpcklbw   xmm0,   xmm6
+            paddw       xmm5,   xmm0
+            pmaddwd     xmm0,   xmm0
+            paddd       xmm7,   xmm0
+            
+            // Row 2
+            movq        xmm1,   QWORD ptr [eax+ecx]     ; Copy eight bytes to xmm0;
+            punpcklbw   xmm1,   xmm6
+            paddw       xmm5,   xmm1
+            pmaddwd     xmm1,   xmm1
+            paddd       xmm7,   xmm1
+
+            // Row 3    
+            movq        xmm2,   QWORD ptr [eax+ecx * 2]     ; Copy eight bytes to xmm0;
+            add         eax,    ecx
+            punpcklbw   xmm2,   xmm6
+            paddw       xmm5,   xmm2
+            pmaddwd     xmm2,   xmm2
+            paddd       xmm7,   xmm2
+            lea         eax,    [eax+ecx*2]
+            
+            // Row 4
+            movq        xmm0,   QWORD ptr [eax]         ; Copy eight bytes to xmm0;
+            punpcklbw   xmm0,   xmm6
+            paddw       xmm5,   xmm0
+            pmaddwd     xmm0,   xmm0
+            paddd       xmm7,   xmm0
+
+            // Row 5
+            movq        xmm1,   QWORD ptr [eax+ecx]     ; Copy eight bytes to xmm0;
+            punpcklbw   xmm1,   xmm6
+            paddw       xmm5,   xmm1
+            pmaddwd     xmm1,   xmm1
+            paddd       xmm7,   xmm1
+
+            // Row 6    
+            movq        xmm2,   QWORD ptr [eax+ecx * 2]     ; Copy eight bytes to xmm0;
+            add         eax,    ecx
+            punpcklbw   xmm2,   xmm6
+            paddw       xmm5,   xmm2
+            pmaddwd     xmm2,   xmm2
+            paddd       xmm7,   xmm2
+            lea         eax,    [eax+ecx*2]
+
+            // Row 7
+            movq        xmm0,   QWORD ptr [eax]         ; Copy eight bytes to xmm0;
+            punpcklbw   xmm0,   xmm6
+            paddw       xmm5,   xmm0
+            pmaddwd     xmm0,   xmm0
+            paddd       xmm7,   xmm0
+
+            // Row 8
+            movq        xmm1,   QWORD ptr [eax+ecx]     ; Copy eight bytes to xmm0;
+            punpcklbw   xmm1,   xmm6
+            paddw       xmm5,   xmm1
+            pmaddwd     xmm1,   xmm1
+            paddd       xmm7,   xmm1
+        
+            movdqa      xmm4,   xmm5
+            punpcklwd   xmm5,   xmm6
+
+            punpckhwd   xmm4,   xmm6
+            movdqa      xmm0,   xmm7
+
+            paddw       xmm5,   xmm4
+            punpckldq   xmm7,   xmm6
+
+            punpckhdq   xmm0,   xmm6
+            movdqa      xmm4,   xmm5
+
+            paddd       xmm0,   xmm7
+            punpckldq   xmm4,   xmm6
+            punpckhdq   xmm5,   xmm6
+            paddw       xmm4,   xmm5
+
+            movdqa      xmm5,   xmm4
+            movdqa      xmm7,   xmm0
+
+            psrldq      xmm5,   8;
+            psrldq      xmm7,   8;
+            
+            paddw       xmm4,   xmm5
+            paddd       xmm0,   xmm7
+            
+            movd        DWORD PTR [XXSum], xmm0
+            movd        DWORD ptr [XSum], xmm4
+    }
+    // Compute population variance as mis-match metric.
+    return ( ((XXSum<<6) - XSum*XSum) );
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : WmtGetInterErr
+ *
+ *  INPUTS        : UINT8 *NewDataPtr       : Pointer to input block.
+ *                  INT32  PixelsPerLine    : Stride of input block.
+ *                  UINT8 *RefDataPtr1      : Pointer to first reference block.
+ *                  UINT8 *RefDataPtr2      : Pointer to second reference block.
+ *                  INT32  RefPixelsPerLine : Stride of reference blocks.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD at 1/2 pixel accuracy.
+ *
+ *  FUNCTION      : Calculates the variance of the difference between a block
+ *                  and the half-pixel interpolated average of two reference blocks.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+UINT32 WmtGetInterErr
+( 
+    UINT8 *NewDataPtr, 
+    INT32 PixelsPerLine, 
+    UINT8 *RefDataPtr1, 
+    UINT8 *RefDataPtr2, 
+    INT32 RefPixelsPerLine 
+)
+{
+    UINT32  XSum; 
+    UINT32  XXSum;
+    INT16   MmxXSum[4];  
+    INT32   MmxXXSum[2]; 
+
+    // Mode of interpolation chosen based upon on the offset of the second reference pointer
+    if ( RefDataPtr1 == RefDataPtr2 )
+    {
+        __asm
+        {
+            mov         eax,        NewDataPtr                  // Load base addresses
+            pxor        xmm5,       xmm5                        // Clear Xmm5
+
+            mov         ebx,        RefDataPtr1                 // Ref1
+            pxor        xmm6,       xmm6                        // Clear Xmm6
+
+
+            mov         ecx,        PixelsPerLine               // Get Width
+            pxor        xmm7,       xmm7                        // Clear Xmm7
+
+            mov         edx,        RefPixelsPerLine            // Get Pitch
+
+            // Row 1 and Row 2
+            movq        xmm0,       QWORD PTR [eax]             // Copy eight bytes to xmm0
+            movq        xmm1,       QWORD PTR [ebx]             // Copy eight bytes to xmm1
+
+            punpcklbw   xmm0,       xmm6                        // unpack to higher precision
+            movq        xmm3,       QWORD Ptr [eax+ecx]         // Copy eight Bytes to xmm3
+
+            punpcklbw   xmm1,       xmm6                        // unpack to shorts
+            movq        xmm4,       QWORD ptr [ebx+edx]         // Copy eight Bytes to xmm4
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            psubsw      xmm0,       xmm1                        // A-B to xmm0
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            paddw       xmm5,       xmm0                        // accumulate differences in xmm5
+
+            psubsw      xmm3,       xmm4                        // A-B to xmm3
+            paddw       xmm5,       xmm3                        // accumulate the differences
+
+            pmaddwd     xmm0,       xmm0                        // square and accumulate
+            pmaddwd     xmm3,       xmm3                        // square and accumulate
+
+            lea         ebx,        [ebx+edx*2]                 // mov forward two lines
+            lea         eax,        [eax+ecx*2]                 // mov forward two lines
+
+            paddd       xmm7,       xmm0                        // accumulate in xmm7
+            paddd       xmm7,       xmm3                        // accumulate in xmm7
+
+            // Row 3 and Row 4
+            movq        xmm0,       QWORD PTR [eax]             // Copy eight bytes to xmm0
+            movq        xmm1,       QWORD PTR [ebx]             // Copy eight bytes to xmm1
+
+            punpcklbw   xmm0,       xmm6                        // unpack to higher precision
+            movq        xmm3,       QWORD Ptr [eax+ecx]         // Copy eight Bytes to xmm3
+
+            punpcklbw   xmm1,       xmm6                        // unpack to shorts
+            movq        xmm4,       QWORD ptr [ebx+edx]         // Copy eight Bytes to xmm4
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            psubsw      xmm0,       xmm1                        // A-B to xmm0
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            paddw       xmm5,       xmm0                        // accumulate differences in xmm5
+
+            psubsw      xmm3,       xmm4                        // A-B to xmm3
+            paddw       xmm5,       xmm3                        // accumulate the differences
+
+            pmaddwd     xmm0,       xmm0                        // square and accumulate
+            pmaddwd     xmm3,       xmm3                        // square and accumulate
+
+            lea         ebx,        [ebx+edx*2]                 // mov forward two lines
+            lea         eax,        [eax+ecx*2]                 // mov forward two lines
+
+            paddd       xmm7,       xmm0                        // accumulate in xmm7
+            paddd       xmm7,       xmm3                        // accumulate in xmm7
+
+            // Row 5 and Row6
+            movq        xmm0,       QWORD PTR [eax]             // Copy eight bytes to xmm0
+            movq        xmm1,       QWORD PTR [ebx]             // Copy eight bytes to xmm1
+
+            punpcklbw   xmm0,       xmm6                        // unpack to higher precision
+            movq        xmm3,       QWORD Ptr [eax+ecx]         // Copy eight Bytes to xmm3
+
+            punpcklbw   xmm1,       xmm6                        // unpack to shorts
+            movq        xmm4,       QWORD ptr [ebx+edx]         // Copy eight Bytes to xmm4
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            psubsw      xmm0,       xmm1                        // A-B to xmm0
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            paddw       xmm5,       xmm0                        // accumulate differences in xmm5
+
+            psubsw      xmm3,       xmm4                        // A-B to xmm3
+            paddw       xmm5,       xmm3                        // accumulate the differences
+
+            pmaddwd     xmm0,       xmm0                        // square and accumulate
+            pmaddwd     xmm3,       xmm3                        // square and accumulate
+
+            lea         ebx,        [ebx+edx*2]                 // mov forward two lines
+            lea         eax,        [eax+ecx*2]                 // mov forward two lines
+
+            paddd       xmm7,       xmm0                        // accumulate in xmm7
+            paddd       xmm7,       xmm3                        // accumulate in xmm7
+
+            // Row 7 and Row 8
+            movq        xmm0,       QWORD PTR [eax]             // Copy eight bytes to xmm0
+            movq        xmm1,       QWORD PTR [ebx]             // Copy eight bytes to xmm1
+
+            punpcklbw   xmm0,       xmm6                        // unpack to higher precision
+            movq        xmm3,       QWORD Ptr [eax+ecx]         // Copy eight Bytes to xmm3
+
+            punpcklbw   xmm1,       xmm6                        // unpack to shorts
+            movq        xmm4,       QWORD ptr [ebx+edx]         // Copy eight Bytes to xmm4
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            psubsw      xmm0,       xmm1                        // A-B to xmm0
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            paddw       xmm5,       xmm0                        // accumulate differences in xmm5
+
+            psubsw      xmm3,       xmm4                        // A-B to xmm3
+            paddw       xmm5,       xmm3                        // accumulate the differences
+
+            pmaddwd     xmm0,       xmm0                        // square and accumulate
+            pmaddwd     xmm3,       xmm3                        // square and accumulate
+
+            paddd       xmm7,       xmm0                        // accumulate in xmm7
+            paddd       xmm7,       xmm3                        // accumulate in xmm7
+
+
+            movdqa      xmm0,       xmm5
+            movdqa      xmm1,       xmm7
+
+            psrldq      xmm5,       8
+            psrldq      xmm7,       8
+
+            paddw       xmm0,       xmm5
+            paddd       xmm1,       xmm7
+
+
+            movq        QWORD PTR [MmxXSum], xmm0   ; copy back accumulated results into normal memory
+            movq        QWORD PTR [MmxXXSum], xmm1  ; copy back accumulated results into normal memory
+
+        }
+
+        // Now accumulate the final results.
+        XSum = MmxXSum[0] + MmxXSum[1] + MmxXSum[2] + MmxXSum[3];
+        XXSum = MmxXXSum[0] + MmxXXSum[1];
+    }
+    // Simple half pixel reference data
+    else
+    {
+        __asm
+        {
+
+            mov         eax,        NewDataPtr                  // Load base addresses
+            pxor        xmm5,       xmm5                        // Clear Xmm5
+
+            mov         ebx,        RefDataPtr1                 // Ref1
+            pxor        xmm6,       xmm6                        // Clear Xmm6
+
+
+            mov         ecx,        PixelsPerLine               // Get Width
+            pxor        xmm7,       xmm7                        // Clear Xmm7
+
+            mov         esi,        RefDataPtr2                 // Ref 2
+            mov         edx,        RefPixelsPerLine            // Get Pitch
+
+
+            // Row 1 and Row 2
+            movq        xmm1,       QWORD PTR [ebx]             // Copy eight bytes from each of ref 1
+            movq        xmm2,       QWORD PTR [esi]             // Copy eight bytes from each of ref 2
+
+            punpcklbw   xmm1,       xmm6                        // unpack to shorts
+            movq        xmm3,       QWORD PTR [ebx+edx]         // Copy eight bytes from each of ref 1
+
+            punpcklbw   xmm2,       xmm6                        // unpack to shorts
+            movq        xmm4,       QWORD PTR [esi+edx]         // Copy eight bytes from each of ref 2
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            paddw       xmm1,       xmm2                        // Add word values together.
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            psrlw       xmm1,       1                           // Devide by two (shift right 1)
+
+            paddw       xmm3,       xmm4                        // add word values together
+            movq        xmm0,       QWORD PTR [eax]             // copy eight source bytes to xmm2
+
+            psrlw       xmm3,       1                           // divided by two
+            movq        xmm2,       QWORD PTR [eax+ecx]         // copy eight source bytes to xmm2
+
+            punpcklbw   xmm0,       xmm6                        // unpack to words
+            punpcklbw   xmm2,       xmm6                        // unpack to words
+
+            psubsw      xmm0,       xmm1                        // the difference
+            psubsw      xmm2,       xmm3                        // the difference
+
+            paddw       xmm5,       xmm0                        // accumulate the difference
+            paddw       xmm5,       xmm2                        // accumulate the difference
+
+            pmaddwd     xmm0,       xmm0                        // square and accumulate
+            pmaddwd     xmm2,       xmm2                        // square and accumulate
+
+            lea         eax,        [eax+ecx*2]
+            lea         ebx,        [ebx+edx*2]
+
+            lea         esi,        [esi+edx*2]
+            paddd       xmm7,       xmm0                        // accumulate in mm7
+
+            paddd       xmm7,       xmm2                        // accumulate in mm7
+
+
+            // Row 3 and Row 4
+            movq        xmm1,       QWORD PTR [ebx]             // Copy eight bytes from each of ref 1
+            movq        xmm2,       QWORD PTR [esi]             // Copy eight bytes from each of ref 2
+
+            punpcklbw   xmm1,       xmm6                        // unpack to shorts
+            movq        xmm3,       QWORD PTR [ebx+edx]         // Copy eight bytes from each of ref 1
+
+            punpcklbw   xmm2,       xmm6                        // unpack to shorts
+            movq        xmm4,       QWORD PTR [esi+edx]         // Copy eight bytes from each of ref 2
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            paddw       xmm1,       xmm2                        // Add word values together.
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            psrlw       xmm1,       1                           // Devide by two (shift right 1)
+
+            paddw       xmm3,       xmm4                        // add word values together
+            movq        xmm0,       QWORD PTR [eax]             // copy eight source bytes to xmm2
+
+            psrlw       xmm3,       1                           // divided by two
+            movq        xmm2,       QWORD PTR [eax+ecx]         // copy eight source bytes to xmm2
+
+            punpcklbw   xmm0,       xmm6                        // unpack to words
+            punpcklbw   xmm2,       xmm6                        // unpack to words
+
+            psubsw      xmm0,       xmm1                        // the difference
+            psubsw      xmm2,       xmm3                        // the difference
+
+            paddw       xmm5,       xmm0                        // accumulate the difference
+            paddw       xmm5,       xmm2                        // accumulate the difference
+
+            pmaddwd     xmm0,       xmm0                        // square and accumulate
+            pmaddwd     xmm2,       xmm2                        // square and accumulate
+
+            lea         eax,        [eax+ecx*2]
+            lea         ebx,        [ebx+edx*2]
+
+            lea         esi,        [esi+edx*2]
+            paddd       xmm7,       xmm0                        // accumulate in mm7
+
+            paddd       xmm7,       xmm2                        // accumulate in mm7
+
+
+            // Row 5 and Row 6
+            movq        xmm1,       QWORD PTR [ebx]             // Copy eight bytes from each of ref 1
+            movq        xmm2,       QWORD PTR [esi]             // Copy eight bytes from each of ref 2
+
+            punpcklbw   xmm1,       xmm6                        // unpack to shorts
+            movq        xmm3,       QWORD PTR [ebx+edx]         // Copy eight bytes from each of ref 1
+
+            punpcklbw   xmm2,       xmm6                        // unpack to shorts
+            movq        xmm4,       QWORD PTR [esi+edx]         // Copy eight bytes from each of ref 2
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            paddw       xmm1,       xmm2                        // Add word values together.
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            psrlw       xmm1,       1                           // Devide by two (shift right 1)
+
+            paddw       xmm3,       xmm4                        // add word values together
+            movq        xmm0,       QWORD PTR [eax]             // copy eight source bytes to xmm2
+
+            psrlw       xmm3,       1                           // divided by two
+            movq        xmm2,       QWORD PTR [eax+ecx]         // copy eight source bytes to xmm2
+
+            punpcklbw   xmm0,       xmm6                        // unpack to words
+            punpcklbw   xmm2,       xmm6                        // unpack to words
+
+            psubsw      xmm0,       xmm1                        // the difference
+            psubsw      xmm2,       xmm3                        // the difference
+
+            paddw       xmm5,       xmm0                        // accumulate the difference
+            paddw       xmm5,       xmm2                        // accumulate the difference
+
+            pmaddwd     xmm0,       xmm0                        // square and accumulate
+            pmaddwd     xmm2,       xmm2                        // square and accumulate
+
+            lea         eax,        [eax+ecx*2]
+            lea         ebx,        [ebx+edx*2]
+
+            lea         esi,        [esi+edx*2]
+            paddd       xmm7,       xmm0                        // accumulate in mm7
+
+            paddd       xmm7,       xmm2                        // accumulate in mm7
+
+
+            // Row 7 and Row 8
+            movq        xmm1,       QWORD PTR [ebx]             // Copy eight bytes from each of ref 1
+            movq        xmm2,       QWORD PTR [esi]             // Copy eight bytes from each of ref 2
+
+            punpcklbw   xmm1,       xmm6                        // unpack to shorts
+            movq        xmm3,       QWORD PTR [ebx+edx]         // Copy eight bytes from each of ref 1
+
+            punpcklbw   xmm2,       xmm6                        // unpack to shorts
+            movq        xmm4,       QWORD PTR [esi+edx]         // Copy eight bytes from each of ref 2
+
+            punpcklbw   xmm3,       xmm6                        // unpack to shorts
+            paddw       xmm1,       xmm2                        // Add word values together.
+
+            punpcklbw   xmm4,       xmm6                        // unpack to shorts
+            psrlw       xmm1,       1                           // Devide by two (shift right 1)
+
+            paddw       xmm3,       xmm4                        // add word values together
+            movq        xmm0,       QWORD PTR [eax]             // copy eight source bytes to xmm2
+
+            psrlw       xmm3,       1                           // divided by two
+            movq        xmm2,       QWORD PTR [eax+ecx]         // copy eight source bytes to xmm2
+
+            punpcklbw   xmm0,       xmm6                        // unpack to words
+            punpcklbw   xmm2,       xmm6                        // unpack to words
+
+            psubsw      xmm0,       xmm1                        // the difference
+            psubsw      xmm2,       xmm3                        // the difference
+
+            paddw       xmm5,       xmm0                        // accumulate the difference
+            paddw       xmm5,       xmm2                        // accumulate the difference
+
+            pmaddwd     xmm0,       xmm0                        // square and accumulate
+            pmaddwd     xmm2,       xmm2                        // square and accumulate
+
+            paddd       xmm7,       xmm0                        // accumulate in mm7
+            paddd       xmm7,       xmm2                        // accumulate in mm7
+
+            movdqa      xmm0,       xmm5
+            movdqa      xmm1,       xmm7
+
+            psrldq      xmm5,       8
+            psrldq      xmm7,       8
+
+            paddw       xmm0,       xmm5
+            paddd       xmm1,       xmm7
+
+
+            movq        QWORD Ptr [MmxXSum],    xmm0            // copy back accumulated results into normal memory
+            movq        QWORD Ptr [MmxXXSum],   xmm1            // copy back accumulated results into normal memory
+
+        }
+
+        // Now accumulate the final results.
+        XSum = MmxXSum[0] + MmxXSum[1] + MmxXSum[2] + MmxXSum[3];
+        XXSum = MmxXXSum[0] + MmxXXSum[1];
+    }
+
+    // Compute and return population variance as mis-match metric.
+    return ( ((XXSum << 6) - XSum*XSum ) );
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : WmtGetMBFrameVertVar
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Vertical variance for the entire frame.
+ *
+ *  FUNCTION      : Calculates the vertical variance for a frame based
+ *                  upon the sum of the local 2 pixel variances within
+ *                  the entire frame.
+ *
+ *  SPECIAL NOTE  : The difference between the last two rows in a macro-
+ *                  block are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 WmtGetMBFrameVertVar ( CP_INSTANCE *cpi )
+{
+    UINT32 FrameError;
+    INT32 Stride  = cpi->pb.Configuration.VideoFrameWidth;
+//    UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.Source];
+//sjlhack
+    UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[0].Source];
+
+    __asm
+    {
+        mov         ecx,    DWORD PTR [Stride]
+        mov         eax,    DWORD PTR [SrcPtr]
+        
+        pxor        xmm7,   xmm7
+        pxor        xmm6,   xmm6
+
+        mov         edx,    7
+
+WmtGetMBFrameVertVarLoop:
+
+        movdqa      xmm1,   [eax]               ; 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
+        movdqa      xmm0,   [eax+ecx]           ; 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
+            
+        movdqa      xmm3,   xmm0                ; 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
+        punpcklbw   xmm0,   xmm7                ; xx 00 xx 01 xx 02 xx 03 xx 04 xx 05 xx 06 xx 07
+        
+        movdqa      xmm4,   xmm1                ; 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
+        punpckhbw   xmm3,   xmm7                ; xx 08 xx 09 xx 0a xx 0b xx 0c xx 0d xx 0e xx 0f
+
+        movdqa      xmm2,   [eax+ecx*2]         ; 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
+        punpcklbw   xmm1,   xmm7                ; xx 10 xx 11 xx 12 xx 13 xx 14 xx 15 xx 16 xx 17
+
+        movdqa      xmm5,   xmm2                ; 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
+        punpckhbw   xmm4,   xmm7                ; xx 18 xx 19 xx 1a xx 1b xx 1c xx 1d xx 1e xx 1f
+
+        psubw       xmm1,   xmm0                ; difference between 0 1    low eight
+        pmaddwd     xmm1,   xmm1                ; SD         between 0 1    low eight
+
+        punpcklbw   xmm2,   xmm7                ; xx 20 xx 21 xx 22 xx 23 xx 24 xx 25 xx 26 xx 27
+        psubw       xmm4,   xmm3                ; difference between 0 1    high four
+
+        pmaddwd     xmm4,   xmm4                ; SD         between 0 1    high four
+        punpckhbw   xmm5,   xmm7                ; xx 28 xx 29 xx 2a xx 2b xx 2c xx 2d xx 2e xx 2f
+
+        psubw       xmm2,   xmm0                ; difference between 0 2    low eight
+        pmaddwd     xmm2,   xmm2                ; SD         between 0 2    low eight
+
+        psubw       xmm5,   xmm3                ; difference between 0 2    High eight
+        pmaddwd     xmm5,   xmm5                ; SD         between 0 2    High eight
+
+        paddd       xmm1,   xmm4
+        paddd       xmm2,   xmm5                
+
+        paddd       xmm6,   xmm1                ; accumlated in xmm6
+        paddd       xmm6,   xmm2                ; xx xx xx s0 xx xx xx s1 xx xx xx s2 xx xx xx s3
+
+        lea         eax,    [eax+ecx*2]
+        sub         edx,    1
+
+        jnz         WmtGetMBFrameVertVarLoop
+
+        movdqa      xmm0,   xmm6                ; xx xx xx s0 xx xx xx s1 xx xx xx s2 xx xx xx s3
+        punpckldq   xmm6,   xmm7                ; xx xx xx xx xx xx xx s0 xx xx xx xx xx xx xx s2
+
+        punpckhdq   xmm0,   xmm7                ; xx xx xx xx xx xx xx s1 xx xx xx xx xx xx xx s3
+        paddd       xmm0,   xmm6                ; xx xx xx xx xx xx xxs01 xx xx xx xx xx xx xxs23
+
+        movdqa      xmm6,   xmm0                ; xx xx xx xx xx xx xxs01 xx xx xx xx xx xx xxs23
+        psrldq      xmm0,   8;                  ; xx xx xx xx xx xx xx 23 xx xx xx xx xx xx xx xx
+
+        paddd       xmm0,   xmm6                 
+        movd        [FrameError], xmm0
+    }
+
+    return FrameError;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : WmtGetMBFieldVertVar
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Vertical variance for the entire frame.
+ *
+ *  FUNCTION      : Calculates the vertical variance for a frame based
+ *                  upon the sum of the local 2 pixel variances within
+ *                  the individual fields of the frame.
+ *
+ *  SPECIAL NOTE  : The difference between the last two rows in a macro-
+ *                  block are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 WmtGetMBFieldVertVar( CP_INSTANCE *cpi )
+{
+    UINT32 FieldError;
+    INT32 Stride = cpi->pb.Configuration.VideoFrameWidth;
+//    UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.Source];
+//sjlhack
+    UINT8 *SrcPtr = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[0].Source];
+
+    __asm
+    {
+        mov         ecx,    DWORD PTR [Stride]
+        mov         eax,    DWORD PTR [SrcPtr]
+        
+        pxor        xmm7,   xmm7
+        pxor        xmm6,   xmm6
+
+        mov         edx,    7
+
+WmtGetMBFieldVertVarLoop:
+
+        movdqa      xmm1,   [eax]               ; 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
+        movdqa      xmm0,   [eax+ecx*2]         ; 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
+            
+        movdqa      xmm2,   xmm0                ; 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
+        punpcklbw   xmm0,   xmm7                ; xx 00 xx 01 xx 02 xx 03 xx 04 xx 05 xx 06 xx 07
+        
+        movdqa      xmm3,   xmm1                ; 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
+        punpckhbw   xmm2,   xmm7                ; xx 08 xx 09 xx 0a xx 0b xx 0c xx 0d xx 0e xx 0f
+
+        punpcklbw   xmm1,   xmm7                ; xx 20 xx 21 xx 22 xx 23 xx 24 xx 25 xx 26 xx 27
+        punpckhbw   xmm3,   xmm7                ; xx 28 xx 29 xx 2a xx 2b xx 2c xx 2d xx 2e xx 2f
+
+
+        psubw       xmm0,   xmm1;
+        pmaddwd     xmm0,   xmm0;
+        
+        psubw       xmm2,   xmm3;
+        pmaddwd     xmm2,   xmm2
+
+        paddd       xmm0,   xmm2;
+        lea         eax,    [eax + ecx]
+
+        movdqa      xmm2,   [eax]
+        movdqa      xmm3,   [eax + ecx*2]
+
+        movdqa      xmm4,   xmm2    ;
+        punpcklbw   xmm2,   xmm7
+
+        movdqa      xmm5,   xmm3
+        punpckhbw   xmm4,   xmm7
+
+        punpcklbw   xmm3,   xmm7
+        punpckhbw   xmm5,   xmm7
+
+        psubw       xmm2,   xmm3
+        pmaddwd     xmm2,   xmm2
+
+        psubw       xmm4,   xmm5
+        pmaddwd     xmm4,   xmm4
+
+        paddd       xmm2,   xmm4
+        paddd       xmm0,   xmm2
+
+        paddd       xmm6,   xmm0
+
+        lea         eax,    [eax+ecx]
+        sub         edx,    1
+
+        jnz         WmtGetMBFieldVertVarLoop
+
+        movdqa      xmm0,   xmm6                ; xx xx xx s0 xx xx xx s1 xx xx xx s2 xx xx xx s3
+        punpckldq   xmm6,   xmm7                ; xx xx xx xx xx xx xx s0 xx xx xx xx xx xx xx s2
+
+        punpckhdq   xmm0,   xmm7                ; xx xx xx xx xx xx xx s1 xx xx xx xx xx xx xx s3
+        paddd       xmm0,   xmm6                ; xx xx xx xx xx xx xxs01 xx xx xx xx xx xx xxs23
+
+        movdqa      xmm6,   xmm0                ; xx xx xx xx xx xx xxs01 xx xx xx xx xx xx xxs23
+        psrldq      xmm0,   8;                  ; xx xx xx xx xx xx xx 23 xx xx xx xx xx xx xx xx
+
+        paddd       xmm0,   xmm6                 
+        movd        [FieldError], xmm0
+    }
+
+    return FieldError;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FilterBlock2dBil_SAD_wmt
+ *
+ *  INPUTS        : UINT8 *SrcPtr           : Pointer to source block.
+ *                  INT32 SrcStride         : Stride of source block.
+ *                  UINT8 *RefPtr           : Pointer to reference block.
+ *                  UINT32 SrcPixelsPerLine : Number of pels per line in source.
+ *                  INT16 *HFilter          : Pointer to array of horizontal filter taps.
+ *                  INT16 *VFilter          : Pointer to array of vertical filter taps.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD.
+ *
+ *  FUNCTION      : Produces a filtered fractional block prediction in 2-D
+ *   				using bi-linear filters and calculates the SAD.
+ *
+ *  SPECIAL NOTE  : The difference between the last two rows in a macro-
+ *                  block are not accounted for!
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock2dBil_SAD_wmt
+( 
+    UINT8 *SrcPtr, 
+    INT32 SrcStride, 
+    UINT8 *RefPtr, 
+    UINT32 SrcPixelsPerLine, 
+    INT16 *HFilter, 
+    INT16 *VFilter 
+)
+{
+    UINT32 Error;
+
+    __asm
+    {
+        mov         eax,        HFilter             ; 
+        mov         edi,        SrcPtr              ; 
+
+        mov         esi,        RefPtr              ;
+        mov         ecx,        8            ;
+
+        mov         edx,        SrcPixelsPerLine    ;
+               
+        movdqa      xmm1,       [eax]               ;
+        movdqa      xmm2,       [eax+16]            ;
+        
+        mov         eax,        VFilter             ;       
+        pxor        xmm0,       xmm0                ;
+
+        // get the first horizontal line done       ;
+        movdqu      xmm3,       [esi]               ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+        movdqa      xmm4,       xmm3                ; make a copy of current line
+        
+        punpcklbw   xmm3,       xmm0                ; xx 00 01 02 03 04 05 06
+        psrldq      xmm4,       1                   ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx        
+        
+        pmullw      xmm3,       xmm1                ;        
+        punpcklbw   xmm4,       xmm0                ; 00 01 02 03 04 05 06 07
+
+        pmullw      xmm4,       xmm2                ;
+        paddw       xmm3,       xmm4                ;   
+
+        paddw       xmm3,       rd                  ; 
+        psraw       xmm3,       FILTER_SHIFT        ; ready for output
+        
+        movdqa      xmm5,       xmm3                ;
+        pxor        mm7,        mm7
+
+        add         esi,        edx                 ; next line
+NextRow:
+        pmullw      xmm5,       [eax]               ; 
+        movdqu      xmm3,       [esi]               ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+
+        movdqa      xmm4,       xmm3                ; make a copy of current line        
+        punpcklbw   xmm3,       xmm0                ; xx 00 01 02 03 04 05 06
+
+        psrldq      xmm4,       1                   ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx                
+        pmullw      xmm3,       xmm1                ;        
+        punpcklbw   xmm4,       xmm0                ; 00 01 02 03 04 05 06 07
+
+        movdqa      xmm6,       xmm5                ; 
+        pmullw      xmm4,       xmm2                ;
+
+        paddw       xmm3,       xmm4                ;   
+        paddw       xmm3,       rd                  ; 
+
+        psraw       xmm3,       FILTER_SHIFT        ; ready for output
+        movdqa      xmm5,       xmm3                ; make a copy for the next row
+        
+        pmullw      xmm3,       [eax+16]            ; 
+        paddw       xmm6,       xmm3                ;
+        
+
+        paddw       xmm6,       rd                  ; xmm6 += round value
+        psraw       xmm6,       FILTER_SHIFT        ; xmm6 /= 128
+
+        packuswb    xmm6,       xmm0                ; pack and unpack to saturate
+        movdq2q     mm0,        xmm6
+
+        movq        mm1,        [edi]               ;
+        psadbw      mm0,        mm1                 ;
+        
+        paddd       mm7,        mm0
+        
+        add         esi,        edx                 ; next line
+        add         edi,        SrcStride           ;                   ; 
+
+        dec         ecx                             ;
+        jne         NextRow                         
+        
+        movd        Error,      mm7;
+
+    }
+    return  Error;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FilterBlock1d_vb8_SAD_wmt
+ *
+ *  INPUTS        : UINT8 *SrcPtr        : Pointer to source block.
+ *                  INT32 SrcStride      : Stride of source block.
+ *                  UINT8 *RefPtr        : Pointer to reference block.
+ *                  UINT32 PixelsPerLine : Number of pels per line in source.
+ *                  UINT32 FilterStep    : Pointer to array of horizontal filter taps.
+ *                  INT16 *Filter        : Pointer to array of filter taps.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD.
+ *
+ *  FUNCTION      : Produces a filtered fractional block vertically 
+ *   				using bi-linear filters and calculates the SAD.
+ *
+ *  SPECIAL NOTE  : The difference between the last two rows in a macro-
+ *                  block are not accounted for!
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock1d_vb8_SAD_wmt
+( 
+    UINT8 *SrcPtr, 
+    INT32 SrcStride, 
+    UINT8 *RefPtr, 
+    UINT32 PixelsPerLine, 
+    UINT32 PixelStep, 
+    INT16 *Filter 
+)
+{
+    UINT32 Error;
+    __asm
+
+    {
+
+        mov         edi,        Filter
+        movdqa      xmm1,       [edi]               ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm2,       [edi + 16]          ; xmm3 *= kernel 0 modifiers.
+
+        mov         edi,        SrcPtr
+		mov			esi,        RefPtr
+        
+        mov         ecx,        8                   ;
+
+        mov         edx,        SrcStride
+        mov         eax,        PixelsPerLine;
+        
+        pxor        mm7,        mm7
+		pxor		xmm0,       xmm0                ; xmm0 = 00000000
+
+nextrow:
+        movdqu		xmm3,       [esi]               ; xmm3 = p0..p16
+        punpcklbw   xmm3,       xmm0                ; xmm3 = p0..p8
+        pmullw      xmm3,       xmm1                ; xmm3 *= kernel 0 modifiers.
+
+        movdqu		xmm4,       [esi + eax ]         ; xmm4 = p0..p16
+        punpcklbw   xmm4,       xmm0                ; xmm4 = p0..p8
+        pmullw      xmm4,       xmm2                ; xmm4 *= kernel 1 modifiers.
+        paddw       xmm3,       xmm4                ; xmm3 += xmm4
+
+        paddw       xmm3,       rd                  ; xmm3 += round value
+        psraw       xmm3,       FILTER_SHIFT        ; xmm3 /= 128
+        packuswb    xmm3,       xmm0                ; pack and unpack to saturate
+
+        movdq2q     mm0,        xmm3
+        movq        mm1,        [edi]               ;
+        
+        psadbw      mm0,        mm1                 ;
+        paddd       mm7,        mm0
+        
+        // the subsequent iterations repeat 3 out of 4 of these reads.  Since the 
+        // recon block should be in cache this shouldn't cost much.  Its obviously 
+        // avoidable!!!. 
+        add         esi,        eax
+        add         edi,        edx 
+
+        dec         ecx                             ; decrement count
+        jnz         nextrow                         ; next row
+
+        movd        Error,      mm7       
+
+    }
+    return Error;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : FilterBlock1d_hb8_SAD_wmt
+ *
+ *  INPUTS        : UINT8 *SrcPtr           : Pointer to source block.
+ *                  INT32 SrcStride         : Stride of source block.
+ *                  UINT8 *RefPtr           : Pointer to reference block.
+ *                  UINT32 SrcPixelsPerLine : Number of pels per line in source.
+ *                  UINT32 FilterStep       : Offset to nest pixel in input image.
+ *                  INT16 *Filter           : Pointer to array of filter taps.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD.
+ *
+ *  FUNCTION      : Produces a filtered fractional block horizontally 
+ *   				using bi-linear filters and calculates the SAD.
+ *
+ *  SPECIAL NOTE  : The difference between the last two rows in a macro-
+ *                  block are not accounted for!
+ *
+ ****************************************************************************/
+_inline UINT32 FilterBlock1d_hb8_SAD_wmt
+(
+    UINT8 *SrcPtr, 
+    INT32 SrcStride, 
+    UINT8 *RefPtr, 
+    UINT32 SrcPixelsPerLine, 
+    UINT32 PixelStep, 
+    INT16 *Filter 
+)
+{
+    UINT32 Error = 0;
+
+    __asm
+    {
+
+        mov         edi,        Filter
+        movdqa      xmm1,       [edi]               ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm2,       [edi + 16]          ; xmm3 *= kernel 0 modifiers.
+
+        mov         edi,        SrcPtr
+		mov			esi,        RefPtr
+		
+        pxor		xmm0,       xmm0                ; xmm0 = 00000000
+        pxor        mm7,        mm7                 ; mm7 = 0
+        
+        mov         ecx,        8                   ;
+
+        mov         edx,        SrcStride
+        mov         eax,        SrcPixelsPerLine;
+
+nextrow:
+        movdqu		xmm3,       [esi]               ; xmm3 = p-1..p14    
+        movdqu      xmm5,       xmm3                ; xmm4 = p-1..p14
+
+        punpcklbw   xmm3,       xmm0                ; xmm3 = p-1..p6
+        pmullw      xmm3,       xmm1                ; xmm3 *= kernel 0 modifiers.
+
+        psrldq      xmm5,       1                   ; xmm4 = p0..p13
+        punpcklbw   xmm5,       xmm0                ; xmm5 = p0..p7
+        
+        pmullw      xmm5,       xmm2                ; xmm5 *= kernel 1 modifiers
+        paddw       xmm3,       xmm5                ; xmm3 += xmm5
+
+        paddw       xmm3,       rd                  ; xmm3 += round value
+        psraw       xmm3,       FILTER_SHIFT        ; xmm3 /= 128
+        
+        packuswb    xmm3,       xmm0                ; pack and unpack to saturate
+        
+        movdq2q     mm0,        xmm3
+        movq        mm1,        [edi]               ; read src
+    
+        psadbw      mm0,        mm1                 ;
+        paddd       mm7,        mm0
+
+        add         esi,        eax                 ; next line
+        add         edi,        edx                 ; 
+
+        dec         ecx                             ; decrement count
+        jnz         nextrow                         ; next row
+
+        movd        Error,        mm7;
+    }
+    return Error;
+}
+                         
+/****************************************************************************
+ *
+ *  ROUTINE       : FiltBlockBilGetSad_wmt
+ *
+ *  INPUTS        : UINT8 *SrcPtr        : Pointer to source block.
+ *                  INT32 SrcStride      : Stride of source block.
+ *                  UINT8 *ReconPtr1     : Pointer to first reference block.
+ *                  UINT8 *ReconPtr2     : Pointer to second reference block.
+ *                  UINT32 PixelsPerLine : Number of pels per line in source.
+ *                  UINT32 FilterStep    : Offset to nest pixel in input image.
+ *                  INT32  ModX          : Fraction part of MV x-component.
+ *                  INT32  ModY          : Fraction part of MV y-component.
+ *                  UINT32 BestSoFar     : Best error found so far.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: SAD.
+ *
+ *  FUNCTION      : Produces a filtered fractional pel block using
+ *   				bi-linear filters and calculates the SAD.
+ *
+ *  SPECIAL NOTE  : The difference between the last two rows in a macro-
+ *                  block are not accounted for!
+ *
+ ****************************************************************************/
+UINT32 FiltBlockBilGetSad_wmt
+(
+    UINT8 *SrcPtr,
+    INT32 SrcStride,
+    UINT8 *ReconPtr1,
+    UINT8 *ReconPtr2,
+    INT32 PixelsPerLine,
+    INT32 ModX, 
+    INT32 ModY,
+    UINT32 BestSoFar
+)
+{
+    INT32  diff;
+    UINT32 Error;
+
+    // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+	diff = ReconPtr2-ReconPtr1;
+	
+    // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+	// This works out to be what we want... despite the pointer swapping that goes on below.
+	// For example... if the X component of the vector is a +ve ModX = X%8.
+	//                if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+	if ( diff < 0 ) 
+	{											// swap pointers so ReconPtr1 smaller
+		UINT8 *temp = ReconPtr1;
+		ReconPtr1   = ReconPtr2;
+		ReconPtr2   = temp;
+		diff        = (int)(ReconPtr2-ReconPtr1);
+	}
+
+	if( diff==1 )
+		Error = FilterBlock1d_hb8_SAD_wmt(SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, 1, BilinearFilters_wmt[ModX] );
+	else if (diff == (int)(PixelsPerLine) )				// Fractional pixel in vertical only
+		Error = FilterBlock1d_vb8_SAD_wmt(SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, PixelsPerLine, BilinearFilters_wmt[ModY]);
+	else if(diff == (int)(PixelsPerLine - 1))			// ReconPtr1 is Top right
+        Error = FilterBlock2dBil_SAD_wmt( SrcPtr, SrcStride, ReconPtr1-1, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );        
+	else if(diff == (int)(PixelsPerLine + 1) )			// ReconPtr1 is Top left
+        Error = FilterBlock2dBil_SAD_wmt( SrcPtr, SrcStride, ReconPtr1, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );		
+    
+    return Error;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : WmtComputeBlockReconError
+ *
+ *  INPUTS        : CP_INSTANCE *cpi : Pointer to encoder instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : UINT32: Variance for the block (Scaled by 2^6)
+ *
+ *  FUNCTION      : Computes a reconstruction error variance for a block.
+ *
+ *  SPECIAL NOTES : The variance value returned is scaled by a factor
+ *                  2^6 (i.e.64). 
+ *
+ ****************************************************************************/
+
+UINT32 WmtComputeBlockReconError ( CP_INSTANCE *cpi, UINT32 bp )
+{
+    INT32  XXSum;
+	INT32  MaxXXDiff;
+
+	UINT8 *NewDataPtr   = &cpi->yuv1ptr[cpi->pb.mbi.blockDxInfo[bp].Source];
+	UINT8 *RefDataPtr1  = &cpi->pb.ThisFrameRecon[cpi->pb.mbi.blockDxInfo[bp].thisRecon];
+
+    INT32  SourceStride = cpi->pb.mbi.blockDxInfo[bp].CurrentSourceStride;
+	INT32  ReconStride  = cpi->pb.mbi.blockDxInfo[bp].CurrentReconStride;
+    __asm
+    {
+
+        mov         esi,        NewDataPtr
+        mov         edi,        RefDataPtr1
+
+        mov         eax,        SourceStride
+        mov         edx,        ReconStride
+
+        lea         ecx,        [esi+eax*8]
+
+        pxor        xmm7,       xmm7            
+        pxor        xmm6,       xmm6
+
+        pxor        xmm5,       xmm5
+        
+WmtReconErrorLoop:
+        movq        xmm0,       QWORD ptr [esi]         // s0 s1 s2 s3 s4 s5 s6 s7 xx xx xx xx xx xx xx xx
+        movq        xmm1,       QWORD ptr [edi]         // r0 r1 r2 r3 r4 r5 r6 r7 xx xx xx xx xx xx xx xx
+
+        movdqa      xmm2,       xmm0                    //  make a copy
+        movdqa      xmm3,       xmm1                    //  make a copy
+
+        psubusb     xmm0,       xmm1                    //
+        psubusb     xmm3,       xmm2                    //
+
+        por         xmm0,       xmm3                    // abs( d0 d1 d2 d3 d4 d5 d6 d7 xx xx xx xx xx xx xx xx )
+        movdqa      xmm2,       xmm0                    // make a copy
+
+        punpcklbw   xmm0,       xmm7                    // abs ( xxd0 xxd1 xxd2 xxd3 xxd4 xxd5 xxd6 xxd7)       
+        punpcklbw   xmm2,       xmm7                    // abs ( xxd0 xxd1 xxd2 xxd3 xxd4 xxd5 xxd6 xxd7)       
+
+        movdqa      xmm1,       xmm2                    // abs ( xxd0 xxd1 xxd2 xxd3 xxd4 xxd5 xxd6 xxd7)       
+        pmaddwd      xmm0,       xmm0                    // 
+    
+        punpcklwd   xmm1,       xmm7                    //  xxxx xxd0 xxxx xxd1 xxxx xxd2 xxxx xxd3
+        punpckhwd   xmm2,       xmm7                    //  xxxx xxd4 xxxx xxd5 xxxx xxd6 xxxx xxd7
+        
+        pmaxsw      xmm1,       xmm2                    //  xxxx xxM0 xxxx xxM1 xxxx xxM2 xxxx xxM3
+        movdqa      xmm2,       xmm1                    //  xxxx xxM0 xxxx xxM1 xxxx xxM2 xxxx xxM3
+
+        punpckldq   xmm1,       xmm7                    //  xxxx xxxx xxxx xxM0 xxxx xxxx xxxx xxM1
+        punpckhdq   xmm2,       xmm7                    //  xxxx xxxx xxxx xxM2 xxxx xxxx xxxx xxM3
+        
+        pmaxsw      xmm1,       xmm2                    //  xxxx xxxx xxxx max0 xxxx xxxx xxxx max1
+        movdqa      xmm2,       xmm1                    //  xxxx xxxx xxxx max0 xxxx xxxx xxxx max1
+
+        psrldq      xmm1,       8                       //  xxxx xxxx xxxx xxxx xxxx xxxx xxxx max0
+        pmaxsw      xmm1,       xmm2                    //  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx MMAX
+
+        pmaxsw      xmm5,       xmm1                    //  
+        paddd       xmm6,       xmm0                    // 
+
+        lea         esi,        [esi+eax]
+        lea         edi,        [edi+edx]               //
+
+        cmp         ecx,        esi
+        jne         WmtReconErrorLoop
+
+        movd        MaxXXDiff,  xmm5                    // get the max
+                
+        movdqa      xmm4,       xmm6                    // xxxx xxs0 xxxx xxs1 xxxx xxs2 xxxx xxs3
+        psrldq      xmm4,       8                       // xxxx xxs2 xxxx xxs3 xxxx xxxx xxxx xxxx                      
+
+        paddd       xmm6,       xmm4                    // xxxx s0s2 xxxx s1s3 xxxxxxxxxxxxxxxxxxx
+        movdqa      xmm4,       xmm6                    // xxxx s0s2 xxxx s1s3 xxxxxxxxxxxxxxxxxxx
+
+        psrldq      xmm4,       4                       // xxxx s1s3 xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+        paddd       xmm6,       xmm4                    //      0123
+        
+        movd        XXSum,      xmm6
+        
+
+    }
+	return  (UINT32)(XXSum + (2 * MaxXXDiff*MaxXXDiff)) << 6;
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/MmxEncodeMath.asm b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/MmxEncodeMath.asm
new file mode 100644
index 00000000..39fa7c3b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/MmxEncodeMath.asm
@@ -0,0 +1,371 @@
+;
+; **-MmxEncodeMath.asm
+;
+; MMX versions of SUB8, SUB8_AV2, SUB8 with fixed subtract of 128
+;
+;******************************************************************
+;	Revision History
+;	
+;	1.01	JBB	 23-Mar-01  Fixed frame	updating for preprocessor
+;	1.00	YWX	 dd-mmm-yy	Configuration baseline from Jong Chen's code
+;
+;******************************************************************
+
+
+
+        .586
+        .387
+        .MODEL  flat, SYSCALL, os_dos
+        .MMX
+
+; macros
+
+        .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA' 
+        ALIGN 32
+
+; local constants go here
+OneTwentyEight    QWORD  00080008000800080h
+
+@CurSeg ENDS
+
+
+;
+; external variables
+;
+
+; external variables go here
+
+ 
+
+; structures
+SUB8Params  STRUC
+                    dd  6 dup (?)   ;6 pushed regs
+                    dd  ?           ;return address
+    FiltPtr         dd  ?
+    ReconPtr        dd  ?
+    DctInputPtr     dd  ?
+    old_ptr1        dd  ?
+    new_ptr1        dd  ?
+    PixelsPerLine   dd  ?
+    ReconPixelsPerLine dd ?
+SUB8Params  ENDS
+
+SUB8_128Params  STRUC
+                    dd  6 dup (?)   ;6 pushed regs
+                    dd  ?           ;return address
+    FiltPtr2        dd  ?
+    DctInputPtr2    dd  ?
+    old_ptr12       dd  ?
+    new_ptr12       dd  ?
+    PixelsPerLine2  dd  ?
+SUB8_128Params  ENDS
+
+SUB8AV2Params  STRUC
+                    dd  6 dup (?)   ;6 pushed regs
+                    dd  ?           ;return address
+    FiltPtr         dd  ?
+    ReconPtr1       dd  ?
+    ReconPtr2       dd  ?
+    DctInputPtr     dd  ?
+    old_ptr1        dd  ?
+    new_ptr1        dd  ?
+    PixelsPerLine   dd  ?
+    ReconPixelsPerLine dd ?
+SUB8AV2Params  ENDS
+
+;
+; macro functions
+;
+SUB8Calc8Bytes MACRO Index:REQ
+    movq        mm0,[eax]                   ; mm0 = FiltPtr
+    movq        mm1,[ebx]                   ; mm1 = ReconPtr
+    movq        mm2,mm0                     ; dup to prepare for up conversion
+    movq        mm3,mm1                     ; dup to prepare for up conversion
+
+    ; convert from UINT8 to INT16
+;    movq        mm6,[esi]
+    punpcklbw   mm0,mm7                     ; mm0 = INT16(FiltPtr)
+    punpcklbw   mm1,mm7                     ; mm1 = INT16(ReconPtr)
+    punpckhbw   mm2,mm7                     ; mm2 = INT16(FiltPtr)
+    punpckhbw   mm3,mm7                     ; mm3 = INT16(ReconPtr)
+
+    ; start calculation
+    psubw       mm0,mm1                     ; mm0 = FiltPtr - ReconPtr
+    psubw       mm2,mm3                     ; mm2 = FiltPtr - ReconPtr
+
+    ; Update the screen canvas in one step
+    ;memcpy( old_ptr1, new_ptr1, BLOCK_HEIGHT_WIDTH ); 
+;    movq        [edx],mm6
+;     add         edx,edi
+;     add         esi,edi
+
+    movq        [ecx+Index],mm0             ; write answer out
+    movq        [ecx+Index+8],mm2           ; write answer out
+
+    ; Increment pointers
+    add         eax,edi
+    add         ebx,ebp
+ENDM
+
+;
+; **-SUB8_128Calc8Bytes
+;
+SUB8_128Calc8Bytes MACRO Index:REQ
+    movq        mm0,[eax]                   ; mm0 = FiltPtr
+    movq        mm2,mm0                     ; dup to prepare for up conversion
+
+    ; convert from UINT8 to INT16
+;    movq        mm6,[esi]
+    punpcklbw   mm0,mm7                     ; mm0 = INT16(FiltPtr)
+    punpckhbw   mm2,mm7                     ; mm2 = INT16(FiltPtr)
+
+    ; start calculation
+    psubw       mm0,mm1                     ; mm0 = FiltPtr - 128
+    psubw       mm2,mm1                     ; mm2 = FiltPtr - 128
+
+    ; Update the screen canvas in one step
+    ;memcpy( old_ptr1, new_ptr1, BLOCK_HEIGHT_WIDTH ); 
+;    movq        [edx],mm6
+;    add         edx,edi
+;    add         esi,edi
+
+    movq        [ecx+Index],mm0             ; write answer out
+    movq        [ecx+Index+8],mm2           ; write answer out
+
+    ; Increment pointers
+    add         eax,edi
+ENDM
+
+;
+; **-SUB8AV2Calc8Bytes
+;
+SUB8AV2Calc8Bytes MACRO Index:REQ
+    movq        mm0,[eax]                   ; mm0 = FiltPtr
+    movq        mm1,[ebx]                   ; mm1 = ReconPtr1
+    movq        mm4,[ebp]                   ; mm4 = ReconPtr2
+    movq        mm2,mm0                     ; dup to prepare for up conversion
+    movq        mm3,mm1                     ; dup to prepare for up conversion
+    movq        mm5,mm4                     ; dup to prepere for up conversion
+
+    ; convert from UINT8 to INT16
+;    movq        mm6,[esi]
+    punpcklbw   mm0,mm7                     ; mm0 = INT16(FiltPtr)
+    punpcklbw   mm1,mm7                     ; mm1 = INT16(ReconPtr1)
+    punpcklbw   mm4,mm7                     ; mm4 = INT16(ReconPtr2)
+
+    punpckhbw   mm2,mm7                     ; mm2 = INT16(FiltPtr)
+    punpckhbw   mm3,mm7                     ; mm3 = INT16(ReconPtr1)
+    punpckhbw   mm5,mm7                     ; mm5 = INT16(ReconPtr2)
+
+    ; average ReconPtr1 and ReconPtr2
+    paddw       mm1,mm4                     ; mm1 = ReconPtr1 + ReconPtr2
+    paddw       mm3,mm5                     ; mm3 = ReconPtr1 + ReconPtr2
+    psrlw       mm1,1                       ; mm1 = (ReconPtr1 + ReconPtr2) / 2
+    psrlw       mm3,1                       ; mm3 = (ReconPtr1 + ReconPtr2) / 2
+
+    psubw       mm0,mm1                     ; mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2)
+    psubw       mm2,mm3                     ; mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2)
+
+    ; Update the screen canvas in one step
+    ;memcpy( old_ptr1, new_ptr1, BLOCK_HEIGHT_WIDTH ); 
+;    movq        [edx],mm6
+;    add         edx,edi
+;    add         esi,edi
+
+    movq        [ecx+Index],mm0             ; write answer out
+    movq        [ecx+Index+8],mm2           ; write answer out
+
+    ; Increment pointers
+    add         eax,edi
+    add         ebx,(SUB8AV2Params PTR [esp]).ReconPixelsPerLine
+    add         ebp,(SUB8AV2Params PTR [esp]).ReconPixelsPerLine
+ENDM
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE     EQU 0
+
+
+;
+; **-MmxSUB8
+;
+; Input:
+;   FiltPtr
+;   ReconPtr
+;   DctInputPtr
+;   old_ptr1
+;   new_ptr1
+;
+; Output:
+;
+;------------------------------------------------
+; void MmxSUB8( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, 
+;               INT32 PixelsPerLine, INT32 ReconPixelsPerLine )
+;
+        .CODE
+
+NAME MmxSUB8
+
+PUBLIC MmxSUB8_
+PUBLIC _MmxSUB8
+MmxSUB8_:
+_MmxSUB8:
+
+    push    ecx
+    push    ebx 
+    push    edx
+    push    esi
+    push    edi
+    push    ebp
+    
+
+	mov         eax,(SUB8Params PTR [esp]).FiltPtr
+    mov         ebx,(SUB8Params PTR [esp]).ReconPtr
+    mov         ecx,(SUB8Params PTR [esp]).DctInputPtr
+;    mov         edx,(SUB8Params PTR [esp]).old_ptr1
+;    mov         esi,(SUB8Params PTR [esp]).new_ptr1
+    mov         edi,(SUB8Params PTR [esp]).PixelsPerLine
+    mov         ebp,(SUB8Params PTR [esp]).ReconPixelsPerLine
+
+    pxor        mm7,mm7                     ; clear mm7 for up precision conversion
+
+    LoopCtr = 0
+WHILE LoopCtr LT 128
+    SUB8Calc8Bytes <LoopCtr>
+    LoopCtr = LoopCtr + 16
+ENDM
+
+theExit1:
+        pop     ebp
+        pop     edi
+        pop     esi
+        pop     edx
+        pop     ebx
+        pop     ecx
+
+
+    ret
+
+;
+; **-MmxSUB8_128
+;
+; Input:
+;   FiltPtr
+;   ReconPtr
+;   DctInputPtr
+;   old_ptr1
+;   new_ptr1
+;
+; Output:
+;
+;------------------------------------------------
+; void MmxSUB8_128( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, 
+;               INT32 PixelsPerLine )
+;
+        .CODE
+
+NAME MmxSUB8_128
+
+PUBLIC MmxSUB8_128_
+PUBLIC _MmxSUB8_128
+MmxSUB8_128_:
+_MmxSUB8_128:
+
+    push    ecx
+    push    ebx 
+    push    edx
+    push    esi
+    push    edi
+    push    ebp
+    
+
+	mov         eax,(SUB8_128Params PTR [esp]).FiltPtr2
+    mov         ecx,(SUB8_128Params PTR [esp]).DctInputPtr2
+;    mov         edx,(SUB8_128Params PTR [esp]).old_ptr12
+;    mov         esi,(SUB8_128Params PTR [esp]).new_ptr12
+    mov         edi,(SUB8_128Params PTR [esp]).PixelsPerLine2
+
+    movq        mm1,OneTwentyEight          ; load value to subtract with
+    pxor        mm7,mm7                     ; clear mm7 for up precision conversion
+
+    LoopCtr = 0
+WHILE LoopCtr LT 128
+    SUB8_128Calc8Bytes <LoopCtr>
+    LoopCtr = LoopCtr + 16
+ENDM
+
+theExit3:
+        pop     ebp
+        pop     edi
+        pop     esi
+        pop     edx
+        pop     ebx
+        pop     ecx
+
+
+    ret
+
+;
+; **-MmxSUB8AV2
+;
+; Input:
+;   FiltPtr
+;   ReconPtr
+;   DctInputPtr
+;   old_ptr1
+;   new_ptr1
+;
+; Output:
+;
+;------------------------------------------------
+; void MmxSUB8AV2( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr1, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, 
+;                  INT32 PixelsPerLine, INT32 ReconPixelsPerLine )
+;
+        .CODE
+
+NAME MmxSUB8AV2
+
+PUBLIC MmxSUB8AV2_
+PUBLIC _MmxSUB8AV2
+MmxSUB8AV2_:
+_MmxSUB8AV2:
+
+    push    ecx
+    push    ebx 
+    push    edx
+    push    esi
+    push    edi
+    push    ebp
+    
+
+	mov         eax,(SUB8AV2Params PTR [esp]).FiltPtr
+    mov         ebx,(SUB8AV2Params PTR [esp]).ReconPtr1
+    mov         ecx,(SUB8AV2Params PTR [esp]).DctInputPtr
+;   mov         edx,(SUB8AV2Params PTR [esp]).old_ptr1
+;   mov         esi,(SUB8AV2Params PTR [esp]).new_ptr1
+    mov         edi,(SUB8AV2Params PTR [esp]).PixelsPerLine
+    mov         ebp,(SUB8AV2Params PTR [esp]).ReconPtr2
+
+    pxor        mm7,mm7                     ; clear mm7 for up precision conversion
+
+    LoopCtr = 0
+WHILE LoopCtr LT 128
+    SUB8AV2Calc8Bytes <LoopCtr>
+    LoopCtr = LoopCtr + 16
+ENDM
+
+theExit2:
+        pop     ebp
+        pop     edi
+        pop     esi
+        pop     edx
+        pop     ebx
+        pop     ecx
+
+
+    ret
+
+;************************************************
+        END
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/WmtTransform.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/WmtTransform.c
new file mode 100644
index 00000000..ef0917d0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/WmtTransform.c
@@ -0,0 +1,255 @@
+/****************************************************************************
+ * 
+ *   Module Title :     WmtTransform.c
+ *
+ *   Description  :     Subtraction functions.
+ *
+ ***************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <stdio.h>
+#include "compdll.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#ifdef _MSC_VER 
+#pragma warning(disable:4799)
+#pragma warning(disable:4731)
+#endif
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/
+_declspec(align(16)) static UINT16 Eight128s[8] = { 128, 128, 128, 128, 128, 128, 128, 128 };
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : WmtSUB8
+ *
+ *  INPUTS        : UINT8 *FiltPtr     : 
+ *                  UINT8 *ReconPtr    : 
+ *                  INT16 *DctInputPtr : 
+ *                  UINT8 *old_ptr1    : 
+ *                  UINT8 *new_ptr1    : 
+ *                  INT32 SourceStride : 
+ *                  INT32 ReconStride  :  
+ *					
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Subtracts 2 8x8 blocks.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void WmtSUB8
+(   
+    UINT8 *FiltPtr, 
+    UINT8 *ReconPtr, 
+    INT16 *DctInputPtr, 
+    UINT8 *old_ptr1, 
+    UINT8 *new_ptr1, 
+    INT32 SourceStride, 
+    INT32 ReconStride 
+)
+{
+    (void) old_ptr1;
+    (void) new_ptr1;
+
+    _asm
+    {      
+        mov         eax,    [FiltPtr]
+        mov         ebx,    [ReconPtr]
+
+        mov         ecx,    [DctInputPtr]
+        mov         edi,    [SourceStride]
+
+        mov         esi,    [ReconStride]
+        pxor        xmm7,   xmm7
+
+        lea         edx,    [ecx+128]       
+
+WmtSub8Loop:
+     
+        movq        xmm0,   QWORD ptr [eax]
+        movq        xmm1,   QWORD ptr [ebx]
+                
+        punpcklbw   xmm0,   xmm7
+        punpcklbw   xmm1,   xmm7
+
+        psubw       xmm0,   xmm1
+        lea         ecx,    [ecx+16]
+
+        cmp         ecx,     edx
+        
+        lea         eax,    [eax+edi]
+        movdqa      [ecx-16],  xmm0
+
+        lea         ebx,    [ebx+esi]
+        jc          WmtSub8Loop
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : Sub8_128
+ *
+ *  INPUTS        : UINT8 *FiltPtr     : 
+ *                  INT16 *DctInputPtr : 
+ *                  UINT8 *old_ptr1    : 
+ *                  UINT8 *new_ptr1    : 
+ *                  INT32 SourceStride : 
+ *					
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Subtracts 128 from each pixel in an 8x8 block.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void WmtSUB8_128
+(   
+    UINT8 *FiltPtr, 
+    INT16 *DctInputPtr, 
+    UINT8 *old_ptr1, 
+    UINT8 *new_ptr1, 
+    INT32 SourceStride 
+)
+{
+    (void) old_ptr1;
+    (void) new_ptr1;
+
+    _asm
+    {
+        mov     eax,    [FiltPtr]
+        mov     edx,    [DctInputPtr]
+        
+        mov     ecx,    [SourceStride]
+        lea     edi,    [edx + 128]
+
+        pxor    xmm7,   xmm7
+        movdqa  xmm1,   [Eight128s]
+
+wmtsub8_128loop:
+        
+        movq    xmm0,   QWORD PTR [eax]
+        punpcklbw   xmm0,   xmm7
+
+        psubw   xmm0,   xmm1;        
+        lea     edx,    [edx+16]
+
+        cmp     edx,    edi        
+        movdqa  [edx-16], xmm0
+
+        lea     eax,    [eax+ecx]        
+        jc     wmtsub8_128loop 
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     Sub8AV2
+ *
+ *  INPUTS        :     
+ *						
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Subtracts 2 8x8 blocks
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+/****************************************************************************
+ * 
+ *  ROUTINE       : WmtSUB8AV2
+ *
+ *  INPUTS        : UINT8 *FiltPtr     : 
+ *                  UINT8 *ReconPtr1    : 
+ *                  UINT8 *ReconPtr2    : 
+ *                  INT16 *DctInputPtr : 
+ *                  UINT8 *old_ptr1    : 
+ *                  UINT8 *new_ptr1    : 
+ *                  INT32 SourceStride : 
+ *                  INT32 ReconStride  :  
+ *					
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Subtracts 2 8x8 blocks.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void WmtSUB8AV2
+(    
+    UINT8 *FiltPtr, 
+    UINT8 *ReconPtr1, 
+    UINT8 *ReconPtr2, 
+    INT16 *DctInputPtr, 
+    UINT8 *old_ptr1, 
+    UINT8 *new_ptr1, 
+    INT32 SourceStride, 
+    INT32 ReconStride 
+)
+{
+    (void) old_ptr1;
+    (void) new_ptr1;
+
+    _asm
+    {
+        push        ebp
+
+        mov         esi,    [FiltPtr]
+        mov         edi,    [DctInputPtr]
+
+        mov         eax,    [ReconPtr1]
+        mov         ebx,    [ReconPtr2]
+
+        mov         ecx,    [SourceStride]
+        mov         edx,    [ReconStride]
+
+        lea         ebp,    [edi+128]
+        pxor        xmm7,   xmm7
+
+WmtSUB8AV2loop:
+
+        movq        xmm0,   QWORD PTR [eax]
+        movq        xmm1,   QWORD PTR [ebx]
+
+        punpcklbw   xmm0,   xmm7
+        punpcklbw   xmm1,   xmm7
+
+        paddw       xmm0,   xmm1
+        movq        xmm2,   QWORD PTR [esi]
+        
+        psraw       xmm0,   1
+        psubw       xmm2,   xmm0
+
+        lea         edi,    [edi+16]
+        cmp         edi,    ebp
+
+        movdqa      [edi-16],   xmm2
+        lea         eax,    [eax+edx]
+
+        lea         ebx,    [ebx+edx]
+        lea         esi,    [ecx+esi]
+
+        jc          WmtSUB8AV2loop
+
+        pop         ebp
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetError.asm b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetError.asm
new file mode 100644
index 00000000..4d56ad37
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetError.asm
@@ -0,0 +1,308 @@
+; structures
+XmmGetErrorParams  STRUC
+                    dd  6 dup (?)   ;6 pushed regs
+                    dd  ?           ;return address
+    NewDataPtr		dd  ?
+    PixelsPerLine   dd  ?
+    ReconPtr1       dd  ?
+    ReconPixelsPerLine   dd  ?
+	XSum			dd	?
+	XXSum			dd	?
+XmmGetErrorParams  ENDS
+
+
+ 
+        .686P
+        .387
+        .MODEL  flat, SYSCALL, os_dos
+        .XMM
+
+; macros
+
+        .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA' 
+
+        ALIGN 32
+
+
+        .CODE
+
+NAME XmmGetError
+
+PUBLIC XmmGetError_
+PUBLIC _XmmGetError
+ 
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE     EQU 0
+
+;------------------------------------------------
+; 		XmmGetError(UINT8*	NewDataPtr, 
+;					UINT32	PixelsPerLine, 
+;					UINT8*	RefDataPtr1,
+;					UINT32	RefPixelsPerLine, 
+;					INT32*	XSum, 
+;					INT32*	XXSum)
+		
+XmmGetError_:
+_XmmGetError:
+
+		push    ecx
+	    push    ebx 
+	    push    edx
+	    push    esi
+
+	    mov         ecx,(XmmGetErrorParams PTR [esp-8]).PixelsPerLine
+		mov         eax,(XmmGetErrorParams PTR [esp-8]).NewDataPtr
+
+	    push    edi
+
+	    mov	        ebx,(XmmGetErrorParams PTR [esp-4]).ReconPtr1
+		mov			edx,(XmmGetErrorParams PTR [esp-4]).ReconPixelsPerLine
+
+	    push    ebp
+	
+		mov			esi,(XmmGetErrorParams PTR [esp]).XSum
+		mov			edi,(XmmGetErrorParams PTR [esp]).XXSum
+
+	    prefetcht0	[eax+ecx]
+		prefetcht0	[ebx+edx]
+		
+		pxor        mm5, mm5					; Blank mmx6
+	    pxor        mm6, mm6					; Blank mmx7
+
+		;Row 1
+		
+		movq		mm1, [ebx]					; Copy eight bytes to mm1
+		movq		mm0, [eax]					; Copy eight bytes to mm0
+
+	    pxor        mm7, mm7					; Blank mmx7
+		
+		prefetcht0	[eax+ecx*2]
+		prefetcht0	[ebx+edx*2]
+		movq		mm2, mm0					; Take copies
+		movq		mm3, mm1					; Take copies
+
+		punpcklbw   mm0, mm6					; unpack to higher precision
+		punpcklbw   mm1, mm6					
+		punpckhbw   mm2, mm6					; unpack to higher precision
+		punpckhbw   mm3, mm6					
+        psubsw		mm0, mm1					; A-B (low order) to MM0
+        psubsw		mm2, mm3					; A-B (high order) to MM2
+
+		paddw       mm5, mm0					; accumulate differences in mm5
+		paddw       mm5, mm2					; accumulate differences in mm5
+
+		pmaddwd     mm0, mm0					; square and accumulate
+		pmaddwd     mm2, mm2					; square and accumulate
+		add         ebx,edx						; Inc pointer into ref data
+	    add         eax,ecx						; Inc pointer into the new data
+	    movq		mm1, [ebx]					; Copy eight bytes to mm1
+		prefetcht0	[ebx+edx*2]
+	    paddd       mm7, mm0					; accumulate in mm7
+	    paddd       mm7, mm2					; accumulate in mm7
+
+
+        ; Row 2
+	    movq		mm0, [eax]					; Copy eight bytes to mm0
+	    prefetcht0	[eax+ecx*2]
+		movq		mm2, mm0					; Take copies
+	    movq		mm3, mm1					; Take copies
+
+	    punpcklbw   mm0, mm6					; unpack to higher precision
+	    punpcklbw   mm1, mm6					
+	    punpckhbw   mm2, mm6					; unpack to higher precision
+	    punpckhbw   mm3, mm6					
+        psubsw		mm0, mm1					; A-B (low order) to MM0
+        psubsw		mm2, mm3					; A-B (high order) to MM2
+
+	    paddw       mm5, mm0					; accumulate differences in mm5
+	    paddw       mm5, mm2					; accumulate differences in mm5
+
+	    pmaddwd     mm0, mm0					; square and accumulate
+	    pmaddwd     mm2, mm2					; square and accumulate
+	    add         ebx,edx						; Inc pointer into ref data
+	    add         eax,ecx						; Inc pointer into the new data
+	    movq		mm1, [ebx]					; Copy eight bytes to mm1
+		prefetcht0	[ebx+edx*2]
+	    paddd       mm7, mm0					; accumulate in mm7
+	    paddd       mm7, mm2					; accumulate in mm7
+
+        ; Row 3
+	    movq		mm0, [eax]					; Copy eight bytes to mm0
+		prefetcht0	[eax+ecx*2]
+	    movq		mm2, mm0					; Take copies
+	    movq		mm3, mm1					; Take copies
+
+	    punpcklbw   mm0, mm6					; unpack to higher precision
+	    punpcklbw   mm1, mm6					
+	    punpckhbw   mm2, mm6					; unpack to higher precision
+	    punpckhbw   mm3, mm6					
+        psubsw		mm0, mm1					; A-B (low order) to MM0
+        psubsw		mm2, mm3					; A-B (high order) to MM2
+
+	    paddw       mm5, mm0					; accumulate differences in mm5
+	    paddw       mm5, mm2					; accumulate differences in mm5
+
+	    pmaddwd     mm0, mm0					; square and accumulate
+	    pmaddwd     mm2, mm2					; square and accumulate
+	    add         ebx,edx						; Inc pointer into ref data
+	    add         eax,ecx						; Inc pointer into the new data
+	    movq		mm1, [ebx]					; Copy eight bytes to mm1
+		prefetcht0	[ebx+edx*2]
+	    paddd       mm7, mm0					; accumulate in mm7
+	    paddd       mm7, mm2					; accumulate in mm7
+
+        ; Row 4
+	    movq		mm0, [eax]					; Copy eight bytes to mm0
+		prefetcht0	[eax+ecx*2]
+	    movq		mm2, mm0					; Take copies
+	    movq		mm3, mm1					; Take copies
+
+	    punpcklbw   mm0, mm6					; unpack to higher precision
+	    punpcklbw   mm1, mm6					
+	    punpckhbw   mm2, mm6					; unpack to higher precision
+	    punpckhbw   mm3, mm6					
+        psubsw		mm0, mm1					; A-B (low order) to MM0
+        psubsw		mm2, mm3					; A-B (high order) to MM2
+
+	    paddw       mm5, mm0					; accumulate differences in mm5
+	    paddw       mm5, mm2					; accumulate differences in mm5
+
+	    pmaddwd     mm0, mm0					; square and accumulate
+	    pmaddwd     mm2, mm2					; square and accumulate
+	    add         ebx,edx						; Inc pointer into ref data
+	    add         eax,ecx						; Inc pointer into the new data
+	    movq		mm1, [ebx]					; Copy eight bytes to mm1
+		prefetcht0 [ebx+edx*2]
+	    paddd       mm7, mm0					; accumulate in mm7
+	    paddd       mm7, mm2					; accumulate in mm7
+
+        ; Row 5
+	    movq		mm0, [eax]					; Copy eight bytes to mm0
+	    prefetcht0	[eax+ecx*2]
+		movq		mm2, mm0					; Take copies
+	    movq		mm3, mm1					; Take copies
+
+	    punpcklbw   mm0, mm6					; unpack to higher precision
+    	punpcklbw   mm1, mm6					
+    	punpckhbw   mm2, mm6					; unpack to higher precision
+	    punpckhbw   mm3, mm6					
+        psubsw		mm0, mm1					; A-B (low order) to MM0
+        psubsw		mm2, mm3					; A-B (high order) to MM2
+
+	    paddw       mm5, mm0					; accumulate differences in mm5
+	    paddw       mm5, mm2					; accumulate differences in mm5
+
+	    pmaddwd     mm0, mm0					; square and accumulate
+	    pmaddwd     mm2, mm2					; square and accumulate
+	    add         ebx, edx						; Inc pointer into ref data
+	    add         eax, ecx						; Inc pointer into the new data
+	    movq		mm1, [ebx]					; Copy eight bytes to mm1
+		prefetcht0 [ebx+edx*2]
+	    paddd       mm7, mm0					; accumulate in mm7
+	    paddd       mm7, mm2					; accumulate in mm7
+
+        ; Row 6
+	    movq		mm0, [eax]					; Copy eight bytes to mm0
+	    prefetcht0	[eax+ecx*2]
+		movq		mm2, mm0					; Take copies
+	    movq		mm3, mm1					; Take copies
+
+	    punpcklbw   mm0, mm6					; unpack to higher precision
+	    punpcklbw   mm1, mm6					
+	    punpckhbw   mm2, mm6					; unpack to higher precision
+	    punpckhbw   mm3, mm6					
+        psubsw		mm0, mm1					; A-B (low order) to MM0
+        psubsw		mm2, mm3					; A-B (high order) to MM2
+
+	    paddw       mm5, mm0					; accumulate differences in mm5
+	    paddw       mm5, mm2					; accumulate differences in mm5
+
+	    pmaddwd     mm0, mm0					; square and accumulate
+	    pmaddwd     mm2, mm2					; square and accumulate
+	    add         ebx,edx						; Inc pointer into ref data
+	    add         eax,ecx						; Inc pointer into the new data
+	    movq		mm1, [ebx]					; Copy eight bytes to mm1
+		prefetcht0 [ebx+edx]
+	    paddd       mm7, mm0					; accumulate in mm7
+	    paddd       mm7, mm2					; accumulate in mm7
+
+        ; Row 7
+	    movq		mm0, [eax]					; Copy eight bytes to mm0
+		prefetcht0	[eax+ecx]
+		
+	    movq		mm2, mm0					; Take copies
+	    movq		mm3, mm1					; Take copies
+
+	    punpcklbw   mm0, mm6					; unpack to higher precision
+	    punpcklbw   mm1, mm6					
+	    punpckhbw   mm2, mm6					; unpack to higher precision
+	    punpckhbw   mm3, mm6					
+        psubsw		mm0, mm1					; A-B (low order) to MM0
+        psubsw		mm2, mm3					; A-B (high order) to MM2
+
+	    paddw       mm5, mm0					; accumulate differences in mm5
+	    paddw       mm5, mm2					; accumulate differences in mm5
+
+	    pmaddwd     mm0, mm0					; square and accumulate
+	    pmaddwd     mm2, mm2					; square and accumulate
+	    add         ebx,edx						; Inc pointer into ref data
+	    add         eax,ecx						; Inc pointer into the new data
+	    movq		mm1, [ebx]					; Copy eight bytes to mm1
+	    paddd       mm7, mm0					; accumulate in mm7
+	    paddd       mm7, mm2					; accumulate in mm7
+
+        ; Row 8
+	    movq		mm0, [eax]					; Copy eight bytes to mm0
+	    movq		mm2, mm0					; Take copies
+	    movq		mm3, mm1					; Take copies
+
+	    punpcklbw   mm0, mm6					; unpack to higher precision
+	    punpcklbw   mm1, mm6					
+	    punpckhbw   mm2, mm6					; unpack to higher precision
+	    punpckhbw   mm3, mm6					
+        psubsw		mm0, mm1					; A-B (low order) to MM0
+        psubsw		mm2, mm3					; A-B (high order) to MM2
+
+	    paddw       mm5, mm0					; accumulate differences in mm5
+	    paddw       mm5, mm2					; accumulate differences in mm5
+
+	    pmaddwd     mm0, mm0					; square and accumulate
+	    pmaddwd     mm2, mm2					; square and accumulate
+	    paddd       mm7, mm0					; accumulate in mm7
+	    paddd       mm7, mm2					; accumulate in mm7
+
+
+	    ; Now accumulate the final results.
+		
+		movq		mm4, mm5					; 
+		punpcklwd	mm5, mm6		
+		punpckhwd	mm4, mm6
+		movq		mm0, mm7
+		paddw		mm5, mm4
+
+		punpckhdq	mm0, mm6
+		punpckldq	mm7, mm6
+		movq		mm4, mm5
+		paddd		mm0, mm7	
+		punpckhdq	mm4, mm6
+		punpckldq	mm5, mm6
+		movd		eax, mm0
+		paddw	    mm4, mm5
+		movd		ebp, mm4
+		movsx		ebx, bp;
+
+        pop     ebp
+		mov		DWORD PTR [edi], eax			;XXSum
+		mov		DWORD PTR [esi], ebx;			;XSum
+        pop     edi
+    	emms									; Clear the MMX state.        
+		pop     esi
+        pop     edx
+        pop     ebx
+        pop     ecx
+		ret
+;------------------------------------------------------------------------
+		END
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetSAD8.asm b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetSAD8.asm
new file mode 100644
index 00000000..6cbbbaab
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmGetSAD8.asm
@@ -0,0 +1,153 @@
+;------------------------------------------------
+XmmGetSAD8Params  STRUC
+                    dd  6 dup (?)   ;6 pushed regs
+                    dd  ?           ;return address
+    NewDataPtr      dd  ?
+    RefDataPtr      dd  ?
+	OffsetN			dd  ?
+	OffsetR			dd	?
+XmmGetSAD8Params  ENDS
+;------------------------------------------------
+
+ 
+        .686P
+        .387
+        .MODEL  flat, SYSCALL, os_dos
+        .XMM
+
+; macros
+
+
+        .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA' 
+
+        ALIGN 32
+
+
+        .CODE
+
+NAME XmmGetSAD8
+
+PUBLIC XmmGetSAD8_
+PUBLIC _XmmGetSAD8
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE     EQU 0
+
+
+;------------------------------------------------
+;INT32 XmmGetSAD8( UINT8 * NewDataPtr, UINT8  * RefDataPtr, 
+;					INT32 OffsetN, INT32 OffsetR) 
+;
+XmmGetSAD8_:
+_XmmGetSAD8:
+
+   push    ecx
+    push    ebx 
+    push    edx
+
+    push    esi
+	mov         ecx,(XmmGetSAD8Params PTR [esp-8]).OffsetN
+	mov         eax,(XmmGetSAD8Params PTR [esp-8]).NewDataPtr	; Load base addresses
+
+    push    edi
+	mov         ebx,(XmmGetSAD8Params PTR [esp-4]).RefDataPtr
+    mov         edx,(XmmGetSAD8Params PTR [esp-4]).OffsetR
+
+    push    ebp
+    
+
+;
+; ESP = Stack Pointer                      MM0 = Free
+; ESI = Free                               MM1 = Free
+; EDI = Free                               MM2 = Free
+; EBP = Free                               MM3 = Free
+; EBX = RefDataPtr                         MM4 = Free
+; ECX = OffsetN		                       MM5 = Free
+; EDX =	OffsetR							   MM6 = Free
+; EAX = NewDataPtr                         MM7 = Free
+;
+
+
+        ; Row 1
+		movq		mm0, [eax]					; Copy eight bytes to mm0
+		add         eax,ecx						; Inc pointer into the new data
+        psadbw      mm0, [ebx]
+
+		add         ebx,edx						; Inc pointer into ref data
+
+        ; Row 2
+		movq		mm1, [eax]					; Copy eight bytes to mm0
+		add         eax,ecx						; Inc pointer into the new data
+        psadbw      mm1, [ebx]
+
+		add         ebx,edx						; Inc pointer into ref data
+
+        ; Row 3
+		movq		mm2, [eax]					; Copy eight bytes to mm0
+		add         eax,ecx						; Inc pointer into the new data
+        psadbw      mm2, [ebx]
+
+		add         ebx,edx						; Inc pointer into ref data
+
+        ; Row 4
+		movq		mm3, [eax]					; Copy eight bytes to mm0
+		add         eax,ecx						; Inc pointer into the new data
+        psadbw      mm3, [ebx]
+
+		add         ebx,edx						; Inc pointer into ref data
+
+        ; Row 5
+		movq		mm4, [eax]					; Copy eight bytes to mm0
+		add         eax,ecx						; Inc pointer into the new data
+        psadbw      mm4, [ebx]
+
+		add         ebx,edx						; Inc pointer into ref data
+
+        ; Row 6
+		movq		mm5, [eax]					; Copy eight bytes to mm0
+		add         eax,ecx						; Inc pointer into the new data
+        psadbw      mm5, [ebx]
+
+		add         ebx,edx						; Inc pointer into ref data
+
+        ; Row 7
+		movq		mm6, [eax]					; Copy eight bytes to mm0
+		add         eax,ecx						; Inc pointer into the new data
+        psadbw      mm6, [ebx]
+
+		add         ebx,edx						; Inc pointer into ref data
+
+        ; Row 8
+		movq		mm7, [eax]					; Copy eight bytes to mm0
+        psadbw      mm7, [ebx]
+
+        ; start accumulating differences
+        paddd       mm0,mm1
+        paddd       mm2,mm3
+
+        pop     ebp
+        paddd       mm4,mm5
+        paddd       mm6,mm7
+
+        pop     edi
+        paddd       mm0,mm2
+        paddd       mm4,mm6
+
+        pop     esi
+        paddd       mm0,mm4
+        movd        ecx,mm0
+
+theExit:
+        pop     edx
+    	mov         eax, ecx                     ; add in calculated error
+
+        pop     ebx
+        pop     ecx
+
+	    ret
+
+;************************************************
+        END
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.ash b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.ash
new file mode 100644
index 00000000..8022acba
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.ash
@@ -0,0 +1,12 @@
+;------------------------------------------------
+XMMGetSADParams  STRUC
+                    dd  6 dup (?)   ;6 pushed regs
+                    dd  ?           ;return address
+    NewDataPtr      dd  ?
+    PixelsPerLine   dd  ?
+    RefDataPtr      dd  ?
+    RefPixelsPerLine   dd  ?
+    ErrorSoFar      dd  ?
+    BestSoFar       dd  ?
+XMMGetSADParams  ENDS
+;------------------------------------------------
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.asm b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.asm
new file mode 100644
index 00000000..867b7200
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/XmmSAD.asm
@@ -0,0 +1,141 @@
+
+.686P
+.387
+.MODEL  flat, SYSCALL, os_dos
+.XMM
+
+; macros
+
+.DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA' 
+
+ALIGN 32
+
+
+.CODE
+
+NAME XmmGetSAD
+
+PUBLIC XMMGetSAD_
+PUBLIC _XMMGetSAD
+
+INCLUDE XmmSAD.ash
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE     EQU 0
+
+
+;------------------------------------------------
+;INT32 XMMGetSAD( UINT8 * NewDataPtr, INT32 PixelsPerLine,
+;                 UINT8 * RefDataPtr, INT32 RefPixelsPerLine,
+;                 INT32 ErrorSoFar, INT32 BestSoFar )
+;
+XMMGetSAD_:
+_XMMGetSAD:
+
+push    ecx
+push    ebx 
+push    edx
+
+push    esi
+push    edi
+push    ebp
+
+mov     ecx,	(XMMGetSADParams PTR [esp]).PixelsPerLine
+mov     eax,	(XMMGetSADParams PTR [esp]).NewDataPtr	
+mov     ebx,	(XMMGetSADParams PTR [esp]).RefDataPtr
+
+movq	mm0,	[eax]					; Copy eight bytes to mm0
+;
+; ESP = Stack Pointer                      MM0 = Free
+; ESI = Free                               MM1 = Free
+; EDI = Free                               MM2 = Free
+; EBP = Free                               MM3 = Free
+; EBX = RefDataPtr                         MM4 = Free
+; ECX = PixelsPerLine                      MM5 = Free
+; EDX = RefPixelsPerLine                   MM6 = Free
+; EAX = NewDataPtr                         MM7 = Free
+
+
+; Row 1
+mov         edx, (XMMGetSADParams PTR [esp]).RefPixelsPerLine
+lea		esi, [eax+2*ecx];			; Calculate the source ptr for row4
+psadbw      mm0, [ebx]
+
+; Row 2
+movq		mm1, [eax+ecx]				; Copy eight bytes to mm1
+lea		edi, [ebx+2*edx]			; Calculate the source ptr for row4
+psadbw      mm1, [ebx+edx]
+
+
+
+; Row 3
+movq		mm2, [eax+2*ecx]			; Copy eight bytes to mm2
+add		esi, ecx;					; Calculate the source ptr for row4
+psadbw      mm2, [ebx+2*edx]
+
+
+add		edi, edx;					; Calculate the source ptr for row4
+
+; Row 4
+movq		mm3, [esi]					; Copy eight bytes to mm3
+psadbw      mm3, [edi]
+
+
+
+; Row 5
+movq		mm4, [eax+4*ecx]			; Copy eight bytes to mm4
+paddd       mm0,mm1						
+psadbw      mm4, [ebx+4*edx]
+
+
+
+; Row 6
+movq		mm5, [esi+2*ecx]			; Copy eight bytes to mm5
+lea		eax, [esi+2*ecx]
+psadbw      mm5, [edi+2*edx]
+
+
+lea		ebx, [edi+2*edx]
+
+; Row 7
+movq		mm6, [eax+ecx]					; Copy eight bytes to mm0
+psadbw      mm6, [ebx+edx]
+paddd       mm2,mm3
+
+
+
+; Row 8
+movq		mm7, [esi+4*ecx]					; Copy eight bytes to mm0
+psadbw      mm7, [edi+4*edx]
+
+; start accumulating differences
+
+mov		eax,	(XMMGetSADParams PTR [esp]).ErrorSoFar
+
+pop		ebp
+paddd       mm4,mm5
+paddd       mm6,mm7
+
+pop		edi
+paddd       mm0,mm2
+paddd       mm4,mm6
+
+pop		esi
+paddd       mm0,mm4
+movd        ecx,mm0
+
+theExit:
+pop		edx
+add         eax,ecx                     ; add in calculated error
+
+pop     ebx
+pop     ecx
+
+
+ret
+
+;************************************************
+END
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/csystemdependant.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/csystemdependant.c
new file mode 100644
index 00000000..49d2d0ff
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/csystemdependant.c
@@ -0,0 +1,181 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "compdll.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define MMX_ENABLED 1
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+// Functions that should only be used in assembly versions of the code
+extern unsigned long VP6_GetProcessorFrequency();
+extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
+
+extern UINT32 ComputeBlockReconError ( CP_INSTANCE *cpi, UINT32 bp  );
+extern UINT32 GetSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8  * RefPtr,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern UINT32 GetHalfPixelSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8 * RefPtr,UINT8 * RefPtr2,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern UINT32 WmtGetSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8  * RefPtr,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+extern UINT32 WmtGetHalfPixelSumAbsDiffs16(UINT8 * SrcPtr,INT32 SourceStride,UINT8 * RefPtr,UINT8 * RefPtr2,INT32 ReconStride,UINT32 ErrorSoFar,UINT32 BestSoFar);
+
+extern UINT32 GetIntraErrorC( UINT8* DataPtr, INT32 SourceStride);
+extern UINT32 GetInterErr(  UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr1,  UINT8 * RefDataPtr2, INT32 RefStride );
+extern UINT32 GetSumAbsDiffs( UINT8 * NewDataPtr, INT32 SourceStride, UINT8  * RefDataPtr, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+extern UINT32 GetHalfPixelSumAbsDiffs( UINT8 * SrcData, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+
+extern UINT32 MmxGetSAD( UINT8 * NewDataPtr, INT32 SourceStride, UINT8  * RefDataPtr, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+extern UINT32 MmxGetHalfPixelSAD( UINT8 * SrcData, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+extern UINT32 MmxGetInterErr(  UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr1,  UINT8 * RefDataPtr2, INT32 RefStride );
+extern UINT32 MmxGetIntraError( UINT8* DataPtr, INT32 SourceStride);
+extern void   MmxSUB8( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconSourceStride );
+extern void   MmxSUB8_128( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride );
+extern void   MmxSUB8AV2( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconSourceStride );
+
+extern UINT32 WmtComputeBlockReconError ( CP_INSTANCE *cpi, UINT32 bp  );
+extern void   WmtSUB8( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconSourceStride );
+extern void   WmtSUB8_128( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride );
+extern void   WmtSUB8AV2( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconSourceStride );
+
+extern UINT32 XmmGetInterErr(  UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr1,  UINT8 * RefDataPtr2, INT32 RefStride );
+extern UINT32 XMMGetSAD( UINT8 * NewDataPtr, INT32 SourceStride, UINT8  * RefDataPtr, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+extern UINT32 WmtGetIntraError( UINT8* DataPtr, INT32 SourceStride);
+extern UINT32 WmtGetHalfPixelSAD( UINT8 * SrcData, INT32 SourceStride, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2, INT32 RefStride, UINT32 ErrorSoFar, UINT32 BestSoFar  );
+extern UINT32 WmtGetInterErr(  UINT8 * NewDataPtr, INT32 SourceStride, UINT8 * RefDataPtr1,  UINT8 * RefDataPtr2, INT32 RefStride );
+
+extern void VP6_quantize_c( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+extern void VP6_quantize_wmt( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+extern void VP6_quantize_mmx( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+
+
+extern UINT32 GetMBFrameVerticalVariance( CP_INSTANCE *cpi);
+extern UINT32 MmxGetMBFrameVertVar( CP_INSTANCE *cpi);
+extern UINT32 WmtGetMBFrameVertVar( CP_INSTANCE *cpi);
+
+extern UINT32 GetMBFieldVerticalVariance( CP_INSTANCE *cpi);
+extern UINT32 MmxGetMBFieldVertVar( CP_INSTANCE *cpi);
+extern UINT32 WmtGetMBFieldVertVar( CP_INSTANCE *cpi);
+
+extern UINT32 FiltBlockBilGetSad_C(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+extern UINT32 FiltBlockBilGetSad_mmx(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+extern UINT32 FiltBlockBilGetSad_wmt(UINT8 *SrcPtr,INT32 SrcStride,UINT8 *ReconPtr1,UINT8 *ReconPtr2,INT32 PixelsPerLine,INT32 ModX, INT32 ModY,UINT32 BestSoFar);
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : MachineSpecificConfig
+ *
+ *  INPUTS        : None.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Checks for machine specifc features such as MMX support 
+ *                  sets appropriate flags and function pointers.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void CMachineSpecificConfig( void )
+{
+	INT32 MmxEnabled;
+	INT32 XmmEnabled; 
+	INT32 WmtEnabled;
+
+	GetProcessorFlags( &MmxEnabled, &XmmEnabled, &WmtEnabled);
+
+	GetSAD          = GetSumAbsDiffs;
+	GetSadHalfPixel = GetHalfPixelSumAbsDiffs;
+	GetInterError   = GetInterErr;
+
+	if( WmtEnabled )
+    {
+        GetSAD16           = WmtGetSumAbsDiffs16;
+        GetSadHalfPixel16  = WmtGetHalfPixelSumAbsDiffs16; 
+
+		GetSAD             = XMMGetSAD; 
+		GetSadHalfPixel    = WmtGetHalfPixelSAD;  		
+        GetInterError      = WmtGetInterErr;
+		GetIntraError      = WmtGetIntraError;
+        Sub8               = WmtSUB8;
+        Sub8_128           = WmtSUB8_128;
+        Sub8Av2            = WmtSUB8AV2;
+		VP6_quantize       = VP6_quantize_wmt;
+        GetMBFrameVertVar  = WmtGetMBFrameVertVar;
+        GetMBFieldVertVar  = WmtGetMBFieldVertVar;
+        FiltBlockBilGetSad = FiltBlockBilGetSad_wmt;
+        GetBlockReconErr   = WmtComputeBlockReconError;
+
+    }
+    else if ( XmmEnabled )
+	{
+        GetSAD16           = GetSumAbsDiffs16;
+        GetSadHalfPixel16  = GetHalfPixelSumAbsDiffs16; 
+
+        GetSAD             = XMMGetSAD;
+		GetSadHalfPixel    = MmxGetHalfPixelSAD;  		
+        GetInterError      = MmxGetInterErr;
+		GetIntraError      = MmxGetIntraError;
+        Sub8               = MmxSUB8;
+        Sub8_128           = MmxSUB8_128;
+        Sub8Av2            = MmxSUB8AV2;
+		VP6_quantize       = VP6_quantize_mmx;
+        GetMBFrameVertVar  = MmxGetMBFrameVertVar;
+        GetMBFieldVertVar  = MmxGetMBFieldVertVar;
+        FiltBlockBilGetSad = FiltBlockBilGetSad_mmx;
+        GetBlockReconErr   = ComputeBlockReconError;
+
+	}
+	else if ( MmxEnabled )
+    {
+        GetSAD16           = GetSumAbsDiffs16;
+        GetSadHalfPixel16  = GetHalfPixelSumAbsDiffs16; 
+
+        GetSAD             = MmxGetSAD;
+        GetSadHalfPixel    = MmxGetHalfPixelSAD;
+        GetInterError      = MmxGetInterErr;
+		GetIntraError      = MmxGetIntraError;
+        Sub8               = MmxSUB8;
+        Sub8_128           = MmxSUB8_128;
+        Sub8Av2            = MmxSUB8AV2;
+		VP6_quantize       = VP6_quantize_mmx;
+        GetMBFrameVertVar  = MmxGetMBFrameVertVar;
+        GetMBFieldVertVar  = MmxGetMBFieldVertVar;
+        FiltBlockBilGetSad = FiltBlockBilGetSad_mmx;
+        GetBlockReconErr   = ComputeBlockReconError;
+
+    }
+    else
+    {
+        GetSAD16           = GetSumAbsDiffs16;
+        GetSadHalfPixel16  = GetHalfPixelSumAbsDiffs16; 
+
+        GetSAD             = GetSumAbsDiffs;
+        GetSadHalfPixel    = GetHalfPixelSumAbsDiffs;
+        GetInterError      = GetInterErr;
+		GetIntraError      = GetIntraErrorC;
+		fdct_short         = fdct_short_C;
+		VP6_quantize       = VP6_quantize_c;
+        Sub8               = SUB8;
+        Sub8_128           = SUB8_128;
+        Sub8Av2            = SUB8AV2;
+        GetMBFrameVertVar  = GetMBFrameVerticalVariance;
+        GetMBFieldVertVar  = GetMBFieldVerticalVariance;
+        FiltBlockBilGetSad = FiltBlockBilGetSad_C;
+        GetBlockReconErr   = ComputeBlockReconError;
+
+    }
+
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/fdct_m.asm b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/fdct_m.asm
new file mode 100644
index 00000000..01d694f1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/cx/Win32/fdct_m.asm
@@ -0,0 +1,1000 @@
+;***********************************************************************
+;	File:			fdct_m.asm
+;
+;	Description:
+;					This function perform 2-D Forward DCT on a 8x8 block
+;					
+;
+;	Input:			Pointers to input source data buffer and destination 
+;					buffer.
+;
+;	Note:			none
+;
+;	Special Notes:	We try to do the truncation right to match the result 
+;					of the c version. 
+;
+;************************************************************************
+;
+;
+;
+ 
+        .586
+        .387
+        .MODEL  flat, SYSCALL, os_dos
+        .MMX
+;
+; macro functions
+;
+Fdct MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7
+    ; execute stage 1 of forward DCT
+    
+	
+	movq        mm0,ip0             ; mm0 = ip0
+    movq        mm1,ip1             ; mm1 = ip1
+    movq        mm2,ip3             ; mm2 = ip3
+    movq        mm3,ip5             ; mm3 = ip5
+    movq        mm4,mm1             ; mm4 = ip1
+    movq        mm5,mm3             ; mm5 = ip5
+    movq        mm6,mm0             ; mm0 = ip0
+    movq        mm7,mm2             ; mm7 = ip3
+
+    paddsw      mm0,ip7             ; mm0 = ip0 + ip7 = is07
+    paddsw      mm1,ip2             ; mm1 = ip1 + ip2 = is12
+    paddsw      mm2,ip4             ; mm2 = ip3 + ip4 = is34
+    paddsw      mm3,ip6             ; mm3 = ip5 + ip6 = is56
+    psubsw      mm6,ip7             ; mm6 = ip0 - ip7 = id07
+    psubsw      mm7,ip4             ; mm7 = ip3 - ip4 = id34
+    psubsw      mm4,ip2             ; mm4 = ip1 - ip2 = id12
+    psubsw      mm5,ip6             ; mm5 = ip5 - ip6 = id56
+
+    movq        TID07,mm6           ; save id07
+    movq        TID34,mm7           ; save id34
+
+    ; free = mm6, mm7
+
+    movq        mm6,mm4             ; mm6 = id12
+    psubsw      mm4,mm5             ; mm4 = id12 - id56 = irot_input_x
+
+	movq        TIRX,mm4            ; save irot_input_x    
+    paddsw      mm6,mm5             ; mm6 = id12 + id56
+	movq		mm5,mm6				; 
+
+    pmulhw      mm6,xC4S4           ; (xC4S4 * (id12 + id56)) - (id12 + id56) 
+	paddw		mm6,mm5				; (xC4S4 * (id12 + id56))
+	psrlw		mm5,15				;
+
+	paddw		mm6,mm5;			;
+	
+
+    ; free = mm4 ,mm5, mm7
+
+    movq        mm4,mm0             ; mm4 = is07
+    psubsw      mm0,mm2             ; mm0 = is07 - is34 = irot_input_y
+
+    movq        TIRY,mm0            ; save irot_input_y
+
+    ; free = mm0, mm5, mm7
+
+    movq        mm0,mm1             ; mm0 = is12
+    psubsw      mm1,mm3             ; mm1 = is12 - is56
+
+    movq        TIC2,mm6            ; save icommon_product2
+	movq		mm7, mm1
+
+    pmulhw      mm1,xC4S4           ; mm1 = (xC4S4 * (is12 - is56)) - (is12 - is56)
+	paddw		mm1, mm7			; mm1 = (xC4S4 * (is12 - is56))	
+	psrlw		mm7, 15				;
+	
+	paddw		mm1, mm7
+    movq        TIC1,mm1            ; save icommon_product1
+
+    ; free = mm1, mm5, mm6, mm7
+
+    paddsw      mm4,mm2             ; mm4 = is07 + is34 = is0734
+    paddsw      mm0,mm3             ; mm0 = is12 + is56 = is1256
+    movq        mm1,mm4             ; mm1 = is07 + is34 = is0734
+
+    paddsw      mm4,mm0             ; mm4 = is0734 + is1256
+    psubsw      mm1,mm0             ; mm1 = is0734 - is1256
+
+	movq		mm7,mm4
+	movq		mm6,mm1
+
+    pmulhw      mm4,xC4S4           ; mm4 = (xC4S4 * (is0734 + is1256)) - (is0734 + is1256)
+    pmulhw      mm1,xC4S4           ; mm1 = (xC4S4 * (is0734 - is1256)) - (is0734 - is1256)
+	paddw		mm4,mm7				; mm4 = (xC4S4 * (is0734 + is1256))
+	paddw       mm1,mm6				; mm1 = (xC4S4 * (is0734 - is1256))
+
+	psrlw		mm7, 15
+	psrlw		mm6, 15
+
+	paddw		mm4, mm7
+    movq        ip0,mm4             ; write out ip0
+
+	paddw		mm1, mm6
+    movq        ip4,mm1             ; write out ip4
+
+    ; free = mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7
+
+    movq        mm0,TIRY            ; mm0 = irot_input_y
+    movq        mm1,TIRX            ; mm1 = irot_input_x
+
+    movq        mm2,mm0             ; mm2 = irot_input_y
+	movq		mm3,mm1				; mm3 = irot_input_x
+
+	movq		mm4,mm0				;
+	movq		mm5,mm1				;
+	
+	movq		mm6,xC2S6			;
+	movq		mm7,xC6s2			;
+
+    pmulhw      mm0,mm6             ; mm0 = xC2S6*irot_input_y - irot_input_y
+    pmulhw      mm3,mm6             ; mm3 = xC2S6*irot_input_x - irot_input_x
+	psrlw		mm4, 15
+	psrlw		mm5, 15
+	paddw		mm0,mm2				; mm0 = xC2S6*irot_input_y
+	paddw		mm3,mm1				; mm3 = xC2S6*irot_input_x
+	paddw		mm0,mm4
+	paddw		mm3,mm5;
+
+	pmulhw      mm1,mm7             ; mm1 = xC6S2*irot_input_x
+    pmulhw      mm2,mm7             ; mm2 = xC6S2*irot_input_y 
+
+	paddw		mm1,mm5				;
+	paddw		mm2,mm4				;
+    
+    paddsw      mm0,mm1             ; mm0 = xC2S6(irot_input_y * 2) + xC6S2(irot_input_x * 2) = ip2
+    psubsw      mm2,mm3             ; mm2 = xC6S2(irot_input_y * 2) - xC2S6(irot_input_x * 2) = ip6
+
+    movq        ip2,mm0             ; write out ip2
+    movq        ip6,mm2             ; write out ip6
+
+    ;
+
+    movq        mm6,TIC1            ; mm6 = icommon_product1
+    movq        mm4,TID07           ; mm4 = id07
+
+    movq        mm5,TID34           ; mm5 = id34
+    movq        mm7,TIC2            ; mm7 = icommon_product2
+
+    movq        mm1,mm6             ; mm1 = icommon_product1
+    movq        mm3,mm7             ; mm3 = icommon_product2
+
+    pxor        mm0,mm0             ; clear mm0
+    paddsw      mm7,mm5             ; mm7 = icommon_product2 + id34
+    
+	paddsw      mm6,mm4             ; mm6 = icommon_product1 + id07 = irot_input_x
+    psubsw      mm0,mm7             ; mm0 = -(icommon_product2 + id34) = irot_input_y
+
+
+    ; free = mm2, mm7, mm4, mm5;
+
+    movq        mm2,mm6             ; mm2 = irot_input_x 
+    movq        mm7,mm0             ; mm7 = irot_input_y 
+
+	movq		mm4,mm6;
+	movq		mm5,mm0;			
+
+    pmulhw      mm6,xC1S7           ; mm6 = xC1S7*irot_input_x -irot_input_x
+	psrlw		mm4,15;
+
+	psrlw		mm5,15;			
+   	pmulhw      mm7,xC1S7           ; mm7 = xC1S7*irot_input_y -irot_input_y
+
+	paddw		mm6,mm2				; mm6 = xC1S7*irot_input_x 
+	paddw		mm7,mm0				; mm7 = xC1S7*irot_input_y 
+
+    pmulhw      mm0,xC7S1           ; mm0 = xC7S1*irot_input_y 
+	paddw		mm6,mm4				;
+
+	paddw		mm7,mm5				;
+    pmulhw      mm2,xC7S1           ; mm2 = xC7S1*irot_input_x 
+
+	paddw		mm0,mm5				;
+	paddw		mm2,mm4				;
+
+    psubsw      mm6,mm0             ; mm6 = xC1S7*irot_input_x - xC7S1*irot_input_y = ip1
+    paddsw      mm2,mm7             ; mm2 = xC7S1*irot_input_x + xC1S7*irot_input_y = ip7
+
+    movq        ip1,mm6             ; write out ip1
+
+    movq        mm4,TID07           ; mm4 = id07
+    movq        mm5,TID34           ; mm5 = id34
+
+    movq        ip7,mm2             ; write out ip7
+
+
+    psubsw      mm4,mm1             ; mm4 = id07 - icommon_product1 = irot_input_x
+    psubsw      mm5,mm3             ; mm5 = id34 - icommon_product2 = irot_input_y
+
+    movq        mm6,mm4             ; mm6 = irot_input_x 
+	movq		mm0,mm4				; mm0 = irot_input_x 
+
+    movq        mm7,mm5             ; mm7 = irot_input_y 
+	movq		mm2,mm5				; mm2 = irot_input_y 
+
+	movq		mm1,xC3S5
+	movq		mm3,xC5S3
+
+    pmulhw      mm4,mm1             ; mm4 = xC3S5*irot_input_x - irot_input_x
+    pmulhw      mm6,mm3             ; mm6 = xC5S3*irot_input_x - irot_input_x
+	pmulhw      mm5,mm3             ; mm5 = xC5S3*irot_input_y - irot_input_y
+    pmulhw      mm7,mm1             ; mm7 = xC3S5*irot_input_y - irot_input_y
+
+	paddw		mm4, mm0			; mm4 = xC3S5*irot_input_x
+	paddw       mm6, mm0			; mm6 = xC5S3*irot_input_x
+	paddw		mm5, mm2			; mm5 = xC5S3*irot_input_y
+	paddw		mm7, mm2			; mm7 = xC3S5*irot_input_y
+
+	
+	psrlw		mm0, 15				;
+	psrlw		mm2, 15				;
+	
+	paddw		mm4, mm0			;
+	paddw		mm6, mm0			;
+	paddw		mm5, mm2			;
+	paddw		mm7, mm2			;
+
+    psubsw      mm4,mm5             ; mm4 = xC3S4*irot_input_x - xC5S3*irot_input_y  = ip3
+    paddsw      mm6,mm7             ; mm6 = xC5S3*irot_input_x + xC3S5*irot_input_y  = ip5
+
+    movq        ip3,mm4             ; write out ip3
+    movq        ip5,mm6             ; write out ip5
+
+
+ENDM
+
+Fdct_new MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7
+    ; execute stage 1 of forward DCT
+    
+	
+	movq        mm0,ip0             ; mm0 = ip0
+    movq        mm1,ip1             ; mm1 = ip1
+    movq        mm2,ip3             ; mm2 = ip3
+    movq        mm3,ip5             ; mm3 = ip5
+	movq        mm4,ip0             ; mm0 = ip0
+    movq        mm5,ip1             ; mm1 = ip1
+    movq        mm6,ip3             ; mm2 = ip3
+    movq        mm7,ip5             ; mm3 = ip5
+
+
+    paddsw      mm0,ip7             ; mm0 = ip0 + ip7 = is07
+    paddsw      mm1,ip2             ; mm1 = ip1 + ip2 = is12
+    paddsw      mm2,ip4             ; mm2 = ip3 + ip4 = is34
+    paddsw      mm3,ip6             ; mm3 = ip5 + ip6 = is56
+    psubsw      mm4,ip7             ; mm4 = ip0 - ip7 = id07
+    psubsw      mm5,ip2             ; mm5 = ip1 - ip2 = id12
+
+	 psubsw		mm0,mm2				; mm0 = is07 - is34
+
+	 paddsw		mm2,mm2				
+
+    psubsw      mm6,ip4             ; mm6 = ip3 - ip4 = id34
+	 
+     paddsw		mm2,mm0				; mm2 = is07 + is34 = is0734
+	 psubsw		mm1,mm3				; mm1 = is12 - is56
+	 movq		TIRY,mm0			; Save is07 - is34 to free mm0;
+	 paddsw		mm3,mm3				
+     paddsw		mm3,mm1				; mm3 = is12 + 1s56	= is1256
+
+    psubsw      mm7,ip6             ; mm7 = ip5 - ip6 = id56
+
+;--------------------------------------------------------------------
+;
+
+	psubsw		mm5,mm7				; mm5 = id12 - id56
+	paddsw		mm7,mm7				
+	paddsw		mm7,mm5				; mm7 = id12 + id56
+
+									; mm4 = id07
+									
+									; mm6 = id34	
+;---------------------------------------------------------------------
+; ip[0], ip[4]
+;	mm0			Free
+;	mm2			is0734
+;	mm3			is1256
+	
+
+	psubsw		mm2,mm3				; mm2 = is0734 - is1256
+	paddsw		mm3,mm3				
+
+	movq		mm0,mm2				; make a copy 
+	paddsw		mm3,mm2				; mm3 = is0734 + is1256
+
+	pmulhw		mm0,xC4S4			; mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 )
+	paddw		mm0,mm2				; mm0 = xC4S4 * ( is0734 - is1256 )
+	psrlw		mm2,15				;
+	paddw		mm0,mm2				; Truncate mm0, now it is op[4]
+
+	movq		mm2,mm3				;
+	movq		ip4,mm0				; save ip4, now mm0,mm2 are free
+
+	movq		mm0,mm3				;
+	pmulhw		mm3,xC4S4			; mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 )
+
+	psrlw		mm2,15				; 
+	paddw		mm3,mm0				; mm3 = xC4S4 * ( is0734 +is1256 )	
+	paddw		mm3,mm2				; Truncate mm3, now it is op[0]
+
+	movq		ip0,mm3				;
+
+;----------------------------------------------------------------------
+; ip[2], ip[6]
+;	mm0			Free
+;	mm2			Free
+;	mm3			Free
+;	mm5			id12 - id56			irot_input_x
+;	TIRY		is07 - is34			irot_input_y
+
+	movq		mm3,TIRY			; mm3 = irot_input_y
+	pmulhw		mm3,xC2S6			; mm3 = xC2S6 * irot_input_y - irot_input_y
+
+	movq		mm2,TIRY			;
+	movq		mm0,mm2				;
+	
+	psrlw		mm2,15				; mm3 = xC2S6 * irot_input_y
+	paddw		mm3,mm0
+	
+	paddw       mm3,mm2				; Truncated
+	movq		mm0, mm5;			;
+
+
+	movq		mm2, mm5;
+	pmulhw		mm0, xC6S2			; mm0 = xC6S2 * irot_input_x
+
+	psrlw		mm2, 15			
+	paddw		mm0, mm2			; Truncated
+
+	paddsw		mm3, mm0			; ip[2]
+	movq		ip2, mm3			; Save ip2
+
+
+	movq		mm0, mm5			;
+	movq		mm2, mm5			;
+	
+	pmulhw		mm5, xC2S6			; mm5 = xC2S6 * irot_input_x - irot_input_x
+	psrlw		mm2, 15				;
+
+	movq		mm3, TIRY			;
+	paddw		mm5, mm0		    ; mm5 = xC2S6 * irot_input_x
+
+	paddw		mm5, mm2			; Truncated
+	movq		mm2, mm3			
+	
+	pmulhw		mm3, xC6S2			; mm3 = xC6S2 * irot_input_y
+	psrlw		mm2, 15
+
+	paddw		mm3, mm2			; Truncated
+	psubsw		mm3, mm5			;
+
+	movq		ip6, mm3			;
+
+
+
+;-----------------------------------------------------------------------
+; icommon_product1, icommon_product2
+;	mm0			Free	
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm1			is12 - is56
+;	mm7			id12 + id56
+
+	movq		mm0, xC4S4
+	movq		mm2, mm1
+	movq		mm3, mm1
+
+	pmulhw		mm1, mm0			; mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 )
+	psrlw		mm2, 15				
+
+	paddw		mm1, mm3			; mm0 = xC4S4 * ( is12 - is56 )
+	paddw		mm1, mm2			; Truncate mm1, now it is icommon_product1
+
+	movq		mm2, mm7
+	movq		mm3, mm7			
+
+	pmulhw		mm7, mm0			; mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 )
+	psrlw		mm2, 15			
+
+	paddw		mm7, mm3			; mm7 = xC4S4 * ( id12 + id56 )
+	paddw		mm7, mm2			; Truncate mm7, now it is icommon_product2
+
+;------------------------------------------------------------------------
+;	mm0			Free	
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm1			icommon_product1
+;	mm7			icommon_product2
+;   mm4			id07
+;	mm6			id34
+
+	
+	pxor		mm0, mm0			; Clear mm0
+	psubsw		mm0, mm6			; mm0 = - id34
+
+	psubsw		mm0, mm7			; mm0 = - ( id34 + idcommon_product2 )
+	paddsw		mm6, mm6			;
+	paddsw		mm6, mm0			; mm6 = id34 - icommon_product2
+
+	psubsw		mm4, mm1			; mm4 = id07 - icommon_product1
+	paddsw		mm1, mm1			;
+	paddsw		mm1, mm4			; mm1 = id07 + icommon_product1
+
+
+;-------------------------------------------------------------------------
+; ip1, ip7
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm7			Free	
+;	mm1			irot_input_x
+;	mm0			irot_input_y
+
+	movq		mm7, xC1S7
+	movq		mm2, mm1
+
+	movq		mm3, mm1;
+	pmulhw		mm1, mm7			; mm1 = xC1S7 * irot_input_x - irot_input_x
+
+	movq		mm7, xC7S1			;
+	psrlw		mm2, 15				
+	
+	paddw		mm1, mm3			; mm1 = xC1S7 * irot_input_x
+	paddw		mm1, mm2			; Trucated
+
+	pmulhw		mm3, mm7			; mm3 = xC7S1 * irot_input_x
+	paddw		mm3, mm2			; Truncated
+
+	movq		mm5, mm0			
+	movq	    mm2, mm0
+
+	movq		mm7, xC1S7			
+	pmulhw		mm0, mm7			; mm0 = xC1S7 * irot_input_y - irot_input_y
+	
+	movq		mm7, xC7S1			
+	psrlw		mm2, 15			
+	
+	paddw		mm0, mm5			; mm0 = xC1S7 * irot_input_y
+	paddw		mm0, mm2			; Truncated
+
+	pmulhw		mm5, mm7			; mm5 = xC7S1 * irot_input_y
+	paddw		mm5, mm2			; Truncated
+
+	psubsw		mm1, mm5			; mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1
+	paddsw		mm3, mm0			; mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7
+	
+	movq		ip1, mm1
+	movq		ip7, mm3
+;-----------------------------------------------------------------------------
+; ip3, ip5
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm7			Free	
+;	mm1			Free
+;	mm0			Free
+;   mm4         id07 - icommon_product1 = irot_input_x
+;   mm6			id34 - icommon_product2 = irot_input_y
+
+	movq		mm0, xC3S5
+	movq		mm1, xC5S3
+
+	movq		mm5, mm6
+	movq		mm7, mm6
+
+	movq		mm2, mm4
+	movq		mm3, mm4
+
+	pmulhw		mm4, mm0			; mm4 = xC3S5 * irot_input_x - irot_input_x
+	pmulhw		mm6, mm1			; mm6 = xC5S3 * irot_input_y - irot_input_y
+
+	psrlw		mm2, 15
+	psrlw		mm5, 15
+
+	paddw		mm4, mm3			; mm4 = xC3S5 * irot_input_x
+	paddw		mm6, mm7			; mm6 = xC5S3 * irot_input_y
+
+	paddw		mm4, mm2			; Truncated
+	paddw		mm6, mm5			; Truncated
+
+	psubsw		mm4, mm6			; ip3
+	movq		ip3, mm4			;
+
+	movq		mm4, mm3			;
+	movq		mm6, mm7			;
+
+	pmulhw		mm3, mm1			; mm3 = xC5S3 * irot_input_x - irot_input_x
+	pmulhw		mm7, mm0			; mm7 = xC3S5 * irot_input_y - irot_input_y
+
+	paddw		mm4, mm2
+	paddw		mm6, mm5
+
+	paddw		mm3, mm4			; mm3 = xC5S3 * irot_input_x
+	paddw		mm7, mm6			; mm7 = xC3S5 * irot_input_y
+
+	paddw		mm3, mm7			; ip5
+	movq		ip5, mm3			;
+
+ENDM
+
+Transpose MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7,
+                op0, op1, op2, op3, op4, op5, op6, op7
+    movq        mm0,ip0             ; mm0 = a0 a1 a2 a3
+    movq        mm4,ip4             ; mm4 = e4 e5 e6 e7
+    movq        mm1,ip1             ; mm1 = b0 b1 b2 b3
+    movq        mm5,ip5             ; mm5 = f4 f5 f6 f7
+    movq        mm2,ip2             ; mm2 = c0 c1 c2 c3
+    movq        mm6,ip6             ; mm6 = g4 g5 g6 g7
+    movq        mm3,ip3             ; mm3 = d0 d1 d2 d3
+    movq        op1,mm1             ; save  b0 b1 b2 b3
+    movq        mm7,ip7             ; mm7 = h0 h1 h2 h3
+
+    ; Transpose 2x8 block
+    movq		mm1, mm4            ; mm1 = e3 e2 e1 e0      
+	 punpcklwd	mm4, mm5            ; mm4 = f1 e1 f0 e0      
+	movq		op0, mm0            ; save a3 a2 a1 a0      
+	 punpckhwd	mm1, mm5            ; mm1 = f3 e3 f2 e2      
+	movq		mm0, mm6            ; mm0 = g3 g2 g1 g0      
+	 punpcklwd	mm6, mm7            ; mm6 = h1 g1 h0 g0      
+	movq		mm5, mm4            ; mm5 = f1 e1 f0 e0      
+	 punpckldq	mm4, mm6            ; mm4 = h0 g0 f0 e0 = MM4 
+	punpckhdq	mm5, mm6            ; mm5 = h1 g1 f1 e1 = MM5 
+	 movq		mm6, mm1            ; mm6 = f3 e3 f2 e2      
+	movq		op4, mm4            ;                           
+	 punpckhwd	mm0, mm7            ; mm0 = h3 g3 h2 g2      
+	movq		op5, mm5            ;                           
+	 punpckhdq	mm6, mm0            ; mm6 = h3 g3 f3 e3 = MM7 
+	movq		mm4, op0            ; mm4 = a3 a2 a1 a0      
+	 punpckldq	mm1, mm0            ; mm1 = h2 g2 f2 e2 = MM6 
+	movq		mm5, op1            ; mm5 = b3 b2 b1 b0      
+	 movq		mm0, mm4            ; mm0 = a3 a2 a1 a0      
+	movq		op7, mm6            ;                           
+	 punpcklwd	mm0, mm5            ; mm0 = b1 a1 b0 a0      
+	movq		op6, mm1            ;                           
+	 punpckhwd	mm4, mm5            ; mm4 = b3 a3 b2 a2      
+	movq		mm5, mm2            ; mm5 = c3 c2 c1 c0      
+	 punpcklwd	mm2, mm3            ; mm2 = d1 c1 d0 c0      
+	movq		mm1, mm0            ; mm1 = b1 a1 b0 a0      
+	 punpckldq	mm0, mm2            ; mm0 = d0 c0 b0 a0 = MM0 
+	punpckhdq	mm1, mm2            ; mm1 = d1 c1 b1 a1 = MM1 
+	 movq		mm2, mm4            ; mm2 = b3 a3 b2 a2      
+	movq		op0, mm0            ;                           
+	 punpckhwd	mm5, mm3            ; mm5 = d3 c3 d2 c2      
+	movq		op1, mm1            ;                           
+	 punpckhdq	mm4, mm5            ; mm4 = d3 c3 b3 a3 = MM3 
+	punpckldq	mm2, mm5            ; mm2 = d2 c2 b2 a2 = MM2 
+	movq		op3, mm4
+	movq		op2, mm2	 
+ENDM
+
+;------------------------------------------------
+fdctParams  STRUC
+                    dd  6 dup (?)   ;6 pushed regs
+                    dd  ?           ;return address
+    InputPtr        dd  ?
+    OutputPtr       dd  ?
+fdctParams  ENDS
+;------------------------------------------------
+
+
+
+        .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA' 
+
+        ALIGN 32
+
+xC1S7  QWORD   0fb15fb15fb15fb15h
+xC2S6  QWORD   0ec83ec83ec83ec83h
+xC3S5  QWORD   0d4dbd4dbd4dbd4dbh
+xC4S4  QWORD   0b505b505b505b505h
+xC5S3  QWORD   08e3a8e3a8e3a8e3ah
+xC6S2  QWORD   061f861f861f861f8h
+xC7S1  QWORD   031f131f131f131f1h
+TIRX   QWORD   00000000000000000h
+TIRY   QWORD   00000000000000000h
+TIC1   QWORD   00000000000000000h
+TIC2   QWORD   00000000000000000h
+TID07  QWORD   00000000000000000h
+TID34  QWORD   00000000000000000h
+
+; data goes here
+
+        .CODE
+
+NAME fdct
+
+PUBLIC fdct_MMX_
+PUBLIC _fdct_MMX
+ 
+; includes go here
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE     EQU 0
+
+
+;------------------------------------------------
+; void fdct_MMX ( INT16 * InputData, INT16 * OutputData )
+;
+fdct_MMX_:
+_fdct_MMX:
+    push    esi
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+;
+; ESP = Stack Pointer                      MM0 = Free
+; ESI = Free                               MM1 = Free
+; EDI = Free                               MM2 = Free
+; EBP = Free                               MM3 = Free
+; EBX = Free                               MM4 = Free
+; ECX = Free                               MM5 = Free
+; EDX = Free                               MM6 = Free
+; EAX = Free                               MM7 = Free
+;
+
+    mov         eax,(fdctParams PTR [esp]).InputPtr             ; load pointer to input data
+    mov         edx,(fdctParams PTR [esp]).OutputPtr            ; load pointer to output data
+    
+    ;
+    ; Input data is an 8x8 block.  To make processing of the data more efficent
+    ; we will transpose the block of data to two 4x8 blocks???
+    ;
+
+    Transpose [eax], [eax+16], [eax+32], [eax+48], [eax+8], [eax+24], [eax+40], [eax+56], [edx], [edx+16], [edx+32], [edx+48], [edx+8], [edx+24], [edx+40], [edx+56]
+	Fdct_new [edx], [edx+16], [edx+32], [edx+48], [edx+8], [edx+24], [edx+40], [edx+56]	
+
+    Transpose [eax+64], [eax+80], [eax+96], [eax+112], [eax+72], [eax+88], [eax+104], [eax+120], [edx+64], [edx+80], [edx+96], [edx+112], [edx+72], [edx+88], [edx+104], [edx+120]
+    Fdct_new [edx+64], [edx+80], [edx+96], [edx+112], [edx+72], [edx+88], [edx+104], [edx+120]
+
+    Transpose [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112], [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112] 
+    Fdct_new [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112]
+
+    Transpose [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120], [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120]
+    Fdct_new [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120]
+    
+    
+theExit:
+
+    emms
+
+    pop     edx
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+
+    ret
+
+
+NAME FDct1D4Mmx
+
+PUBLIC FDct1D4Mmx_
+PUBLIC _FDct1D4Mmx
+ 
+; includes go here
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE     EQU 0
+
+;------------------------------------------------
+; void FDct1D4Mmx ( INT16 * InputData, INT16 * OutputData )
+;
+FDct1D4Mmx_:
+_FDct1D4Mmx:
+    push    esi
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+;
+; ESP = Stack Pointer                      MM0 = Free
+; ESI = Free                               MM1 = Free
+; EDI = Free                               MM2 = Free
+; EBP = Free                               MM3 = Free
+; EBX = Free                               MM4 = Free
+; ECX = Free                               MM5 = Free
+; EDX = Free                               MM6 = Free
+; EAX = Free                               MM7 = Free
+;
+
+    mov         eax,(fdctParams PTR [esp]).InputPtr             ; load pointer to input data
+    mov         edx,(fdctParams PTR [esp]).OutputPtr            ; load pointer to output data
+
+
+	movq        mm0,[eax]           ; mm0 = ip0
+    movq        mm1,[eax + 8]       ; mm1 = ip1
+    movq        mm2,[eax + 24]      ; mm2 = ip3
+    movq        mm3,[eax + 40]      ; mm3 = ip5
+	movq        mm4,[eax]           ; mm0 = ip0
+    movq        mm5,[eax + 8]       ; mm1 = ip1
+    movq        mm6,[eax + 24]      ; mm2 = ip3
+    movq        mm7,[eax + 40]      ; mm3 = ip5
+
+
+    paddsw      mm0,[eax + 56]      ; mm0 = ip0 + ip7 = is07
+    paddsw      mm1,[eax + 16]      ; mm1 = ip1 + ip2 = is12
+    paddsw      mm2,[eax + 32]      ; mm2 = ip3 + ip4 = is34
+    paddsw      mm3,[eax + 48]      ; mm3 = ip5 + ip6 = is56
+    psubsw      mm4,[eax + 56]      ; mm4 = ip0 - ip7 = id07
+    psubsw      mm5,[eax + 16]      ; mm5 = ip1 - ip2 = id12
+
+	 psubsw		mm0,mm2				; mm0 = is07 - is34
+
+	 paddsw		mm2,mm2				
+
+     psubsw      mm6,[eax + 32]     ; mm6 = ip3 - ip4 = id34
+	 
+     paddsw		mm2,mm0				; mm2 = is07 + is34 = is0734
+	 psubsw		mm1,mm3				; mm1 = is12 - is56
+	 movq		TIRY,mm0			; Save is07 - is34 to free mm0;
+	 paddsw		mm3,mm3				
+     paddsw		mm3,mm1				; mm3 = is12 + 1s56	= is1256
+
+	 psubsw      mm7,[eax + 48]     ; mm7 = ip5 - ip6 = id56
+
+;--------------------------------------------------------------------
+;
+
+	psubsw		mm5,mm7				; mm5 = id12 - id56
+	paddsw		mm7,mm7				
+	paddsw		mm7,mm5				; mm7 = id12 + id56
+
+									; mm4 = id07
+									
+									; mm6 = id34	
+;---------------------------------------------------------------------
+; ip[0], ip[4]
+;	mm0			Free
+;	mm2			is0734
+;	mm3			is1256
+	
+
+	psubsw		mm2,mm3				; mm2 = is0734 - is1256
+	paddsw		mm3,mm3				
+
+	movq		mm0,mm2				; make a copy 
+	paddsw		mm3,mm2				; mm3 = is0734 + is1256
+
+	pmulhw		mm0,xC4S4			; mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 )
+	paddw		mm0,mm2				; mm0 = xC4S4 * ( is0734 - is1256 )
+	psrlw		mm2,15				;
+	paddw		mm0,mm2				; Truncate mm0, now it is op[4]
+
+	movq		mm2,mm3				;
+	movq		[edx + 32],mm0		; save op4, now mm0,mm2 are free
+
+	movq		mm0,mm3				;
+	pmulhw		mm3,xC4S4			; mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 )
+
+	psrlw		mm2,15				; 
+	paddw		mm3,mm0				; mm3 = xC4S4 * ( is0734 +is1256 )	
+	paddw		mm3,mm2				; Truncate mm3, now it is op[0]
+
+	movq		[edx],mm3				;
+
+;----------------------------------------------------------------------
+; ip[2], ip[6]
+;	mm0			Free
+;	mm2			Free
+;	mm3			Free
+;	mm5			id12 - id56			irot_input_x
+;	TIRY		is07 - is34			irot_input_y
+
+	movq		mm3,TIRY			; mm3 = irot_input_y
+	pmulhw		mm3,xC2S6			; mm3 = xC2S6 * irot_input_y - irot_input_y
+
+	movq		mm2,TIRY			;
+	movq		mm0,mm2				;
+	
+	psrlw		mm2,15				; mm3 = xC2S6 * irot_input_y
+	paddw		mm3,mm0
+	
+	paddw       mm3,mm2				; Truncated
+	movq		mm0, mm5;			;
+
+
+	movq		mm2, mm5;
+	pmulhw		mm0, xC6S2			; mm0 = xC6S2 * irot_input_x
+
+	psrlw		mm2, 15			
+	paddw		mm0, mm2			; Truncated
+
+	paddsw		mm3, mm0			; ip[2]
+	movq		[edx + 16], mm3			; Save ip2
+
+
+	movq		mm0, mm5			;
+	movq		mm2, mm5			;
+	
+	pmulhw		mm5, xC2S6			; mm5 = xC2S6 * irot_input_x - irot_input_x
+	psrlw		mm2, 15				;
+
+	movq		mm3, TIRY			;
+	paddw		mm5, mm0		    ; mm5 = xC2S6 * irot_input_x
+
+	paddw		mm5, mm2			; Truncated
+	movq		mm2, mm3			
+	
+	pmulhw		mm3, xC6S2			; mm3 = xC6S2 * irot_input_y
+	psrlw		mm2, 15
+
+	paddw		mm3, mm2			; Truncated
+	psubsw		mm3, mm5			;
+
+	movq		[edx + 48], mm3			;
+
+
+
+;-----------------------------------------------------------------------
+; icommon_product1, icommon_product2
+;	mm0			Free	
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm1			is12 - is56
+;	mm7			id12 + id56
+
+	movq		mm0, xC4S4
+	movq		mm2, mm1
+	movq		mm3, mm1
+
+	pmulhw		mm1, mm0			; mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 )
+	psrlw		mm2, 15				
+
+	paddw		mm1, mm3			; mm0 = xC4S4 * ( is12 - is56 )
+	paddw		mm1, mm2			; Truncate mm1, now it is icommon_product1
+
+	movq		mm2, mm7
+	movq		mm3, mm7			
+
+	pmulhw		mm7, mm0			; mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 )
+	psrlw		mm2, 15			
+
+	paddw		mm7, mm3			; mm7 = xC4S4 * ( id12 + id56 )
+	paddw		mm7, mm2			; Truncate mm7, now it is icommon_product2
+
+;------------------------------------------------------------------------
+;	mm0			Free	
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm1			icommon_product1
+;	mm7			icommon_product2
+;   mm4			id07
+;	mm6			id34
+
+	
+	pxor		mm0, mm0			; Clear mm0
+	psubsw		mm0, mm6			; mm0 = - id34
+
+	psubsw		mm0, mm7			; mm0 = - ( id34 + idcommon_product2 )
+	paddsw		mm6, mm6			;
+	paddsw		mm6, mm0			; mm6 = id34 - icommon_product2
+
+	psubsw		mm4, mm1			; mm4 = id07 - icommon_product1
+	paddsw		mm1, mm1			;
+	paddsw		mm1, mm4			; mm1 = id07 + icommon_product1
+
+
+;-------------------------------------------------------------------------
+; ip1, ip7
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm7			Free	
+;	mm1			irot_input_x
+;	mm0			irot_input_y
+
+	movq		mm7, xC1S7
+	movq		mm2, mm1
+
+	movq		mm3, mm1;
+	pmulhw		mm1, mm7			; mm1 = xC1S7 * irot_input_x - irot_input_x
+
+	movq		mm7, xC7S1			;
+	psrlw		mm2, 15				
+	
+	paddw		mm1, mm3			; mm1 = xC1S7 * irot_input_x
+	paddw		mm1, mm2			; Trucated
+
+	pmulhw		mm3, mm7			; mm3 = xC7S1 * irot_input_x
+	paddw		mm3, mm2			; Truncated
+
+	movq		mm5, mm0			
+	movq	    mm2, mm0
+
+	movq		mm7, xC1S7			
+	pmulhw		mm0, mm7			; mm0 = xC1S7 * irot_input_y - irot_input_y
+	
+	movq		mm7, xC7S1			
+	psrlw		mm2, 15			
+	
+	paddw		mm0, mm5			; mm0 = xC1S7 * irot_input_y
+	paddw		mm0, mm2			; Truncated
+
+	pmulhw		mm5, mm7			; mm5 = xC7S1 * irot_input_y
+	paddw		mm5, mm2			; Truncated
+
+	psubsw		mm1, mm5			; mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1
+	paddsw		mm3, mm0			; mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7
+	
+	movq		[edx + 8], mm1
+	movq		[edx + 56], mm3
+;-----------------------------------------------------------------------------
+; ip3, ip5
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm7			Free	
+;	mm1			Free
+;	mm0			Free
+;   mm4         id07 - icommon_product1 = irot_input_x
+;   mm6			id34 - icommon_product2 = irot_input_y
+
+	movq		mm0, xC3S5
+	movq		mm1, xC5S3
+
+	movq		mm5, mm6
+	movq		mm7, mm6
+
+	movq		mm2, mm4
+	movq		mm3, mm4
+
+	pmulhw		mm4, mm0			; mm4 = xC3S5 * irot_input_x - irot_input_x
+	pmulhw		mm6, mm1			; mm6 = xC5S3 * irot_input_y - irot_input_y
+
+	psrlw		mm2, 15
+	psrlw		mm5, 15
+
+	paddw		mm4, mm3			; mm4 = xC3S5 * irot_input_x
+	paddw		mm6, mm7			; mm6 = xC5S3 * irot_input_y
+
+	paddw		mm4, mm2			; Truncated
+	paddw		mm6, mm5			; Truncated
+
+	psubsw		mm4, mm6			; ip3
+	movq		[edx + 24], mm4			;
+
+	movq		mm4, mm3			;
+	movq		mm6, mm7			;
+
+	pmulhw		mm3, mm1			; mm3 = xC5S3 * irot_input_x - irot_input_x
+	pmulhw		mm7, mm0			; mm7 = xC3S5 * irot_input_y - irot_input_y
+
+	paddw		mm4, mm2
+	paddw		mm6, mm5
+
+	paddw		mm3, mm4			; mm3 = xC5S3 * irot_input_x
+	paddw		mm7, mm6			; mm7 = xC3S5 * irot_input_y
+
+	paddw		mm3, mm7			; ip5
+	movq		[edx + 40], mm3			;
+
+    
+    emms
+
+    pop     edx
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+
+    ret
+
+
+;************************************************
+        END
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DFrameR.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DFrameR.c
new file mode 100644
index 00000000..a2e7f774
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DFrameR.c
@@ -0,0 +1,470 @@
+/****************************************************************************
+*
+*   Module Title :     DFrameR.C
+*
+*   Description  :     Functions to read from the input bitstream.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Frames
+****************************************************************************/
+#include "pbdll.h"
+#include "postproc_if.h"
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_bitread
+ *
+ *  INPUTS        :     BOOL_CODER *br : Pointer to a Bool Decoder instance.
+ *                      int bits       : Number of bits to be read from input stream.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     UINT32: The requested bits.
+ *
+ *  FUNCTION      :     Decodes the requested number of bits from the encoded data buffer.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+UINT32 VP6_bitread ( BOOL_CODER *br, int bits )
+{
+	UINT32 z = 0;
+	int bit;
+	for ( bit=bits-1; bit>=0; bit-- )
+	{
+		z |= (VP6_DecodeBool128(br)<<bit);
+	}
+	return z;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_bitread1
+ *
+ *  INPUTS        :     BOOL_CODER *br : Pointer to a Bool Decoder instance.
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     UINT32: The next decoded bit (0 or 1).
+ *
+ *  FUNCTION      :     Decodes the next bit from the encoded data buffer.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+INLINE 
+UINT32 VP6_bitread1 ( BOOL_CODER *br ) 
+{
+	return (VP6_DecodeBool128(br));
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     InitHeaderBuffer
+ *
+ *  INPUTS        :     FRAME_HEADER *Header  : Pointer to FRAME_HEADER data structure.
+ *                      unsigned char *Buffer : Pointer to buffer containing bitstream header.
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     void
+ *
+ *
+ *  FUNCTION      :     Initialises extraction of bits from header buffer.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void InitHeaderBuffer ( FRAME_HEADER *Header, unsigned char *Buffer )
+{
+    Header->buffer = Buffer;
+    Header->value  = (Buffer[0]<<24)+(Buffer[1]<<16)+(Buffer[2]<<8)+Buffer[3];
+    Header->bits_available = 32;
+	Header->pos = 4;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ReadHeaderBits
+ *
+ *  INPUTS        :     FRAME_HEADER *Header : Pointer to FRAME_HEADER data structure.
+ *                      UINT32 BitsRequired  : Number of bits to extract.
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     UINT32: Bits requested
+ *
+ *  FUNCTION      :     Extracts requested number of bits from header buffer.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+UINT32 ReadHeaderBits ( FRAME_HEADER *Header, UINT32 BitsRequired )
+{
+    UINT32 pos       = Header->pos;
+    UINT32 available = Header->bits_available;
+    UINT32 value     = Header->value;
+    UINT8 *Buffer    = &Header->buffer[pos];
+    UINT32 RetVal    = 0;
+
+    if ( available < BitsRequired )
+    {
+        // Need more bits from input buffer...
+        RetVal = value >> (32-available);
+        BitsRequired -= available;
+        RetVal <<= BitsRequired;
+
+        value  = (Buffer[0]<<24)+(Buffer[1]<<16)+(Buffer[2]<<8)+(Buffer[3]);
+        pos += 4;
+        available = 32;
+    }
+
+    RetVal |= value >> (32-BitsRequired);
+    
+    // Update data struucture
+    Header->value          = value<<BitsRequired;
+    Header->bits_available = available-BitsRequired;
+    Header->pos = pos;
+
+    return RetVal;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     LoadFrameHeader
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     BOOL: FALSE in case of error, TRUE otherwise.
+ *
+ *  FUNCTION      :     Loads a frame header & carries out some initialization.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) ) 
+
+static BOOL LoadFrameHeader ( PB_INSTANCE *pbi )
+{
+    UINT8  DctQMask;
+    FRAME_HEADER *Header = &pbi->Header;
+    BOOL   RetVal = TRUE;
+
+    // Is the frame and inter frame or a key frame
+    pbi->FrameType = (UINT8)ReadHeaderBits(Header, 1);
+
+    // Quality (Q) index
+    DctQMask = (UINT8)ReadHeaderBits(Header, 6);
+
+	// Are we using two BOOL coder data streams/partitions
+    pbi->MultiStream = (UINT8)ReadHeaderBits(Header, 1);
+
+	// If the frame was a base frame then read the frame dimensions and build a bitmap structure. 
+	if ( (pbi->FrameType == BASE_FRAME) )
+	{
+        // Read the frame dimensions bytes (0,0 indicates vp31 or later)
+	    pbi->Vp3VersionNo = (UINT8)ReadHeaderBits(Header,   5 );
+	    pbi->VpProfile = (UINT8)ReadHeaderBits(Header,   2 );
+
+		if(pbi->Vp3VersionNo > CURRENT_DECODE_VERSION)
+		{
+			RetVal = FALSE;
+			return RetVal;
+		}
+
+		// Initialise version specific quantiser values
+		VP6_InitQTables( pbi->quantizer, pbi->Vp3VersionNo );
+
+		// is this keyframe section of the file interlaced
+		pbi->Configuration.Interlaced = (UINT8)ReadHeaderBits(Header, 1);	
+
+		// Start the first bool decoder (modes, mv, probs and some flags)
+		// The offset depends on whether or not we are using multiple bool code streams
+		if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+		{
+			VP6_StartDecode(&pbi->br, ((unsigned char*)(Header->buffer + 4)));
+
+			// Read the buffer offset for the second bool decoder buffer if it is being used
+		    pbi->Buff2Offset = (UINT32)ReadHeaderBits(Header, 16);
+		}
+		else
+			VP6_StartDecode(&pbi->br, ((unsigned char*)(Header->buffer + 2)));
+
+		// SCALING related stuff
+		SetPPInterlacedMode(pbi->postproc, pbi->Configuration.Interlaced);
+        if(pbi->Configuration.Interlaced)
+        {
+            SetDeInterlaceMode(pbi->postproc, pbi->DeInterlaceMode);
+        }
+
+        {             
+             UINT32 HFragments;             
+             UINT32 VFragments;             
+             UINT32 HOldScaled;
+             UINT32 VOldScaled;
+             UINT32 HNewScaled;
+             UINT32 VNewScaled;
+			 UINT32 OutputHFragments;
+			 UINT32 OutputVFragments;
+
+             VFragments = 2 * ((UINT8)VP6_bitread( &pbi->br,   8 ));             
+             HFragments = 2 * ((UINT8)VP6_bitread( &pbi->br,   8 ));              
+
+             OutputVFragments = 2 * ((UINT8)VP6_bitread( &pbi->br,   8 ));             
+             OutputHFragments = 2 * ((UINT8)VP6_bitread( &pbi->br,   8 ));              
+
+             if(pbi->Configuration.HRatio == 0)
+                 pbi->Configuration.HRatio = 1;
+
+             if(pbi->Configuration.VRatio == 0)
+                 pbi->Configuration.VRatio = 1;
+
+             HOldScaled = pbi->Configuration.HScale * pbi->HFragments * 8 / pbi->Configuration.HRatio;
+             VOldScaled = pbi->Configuration.VScale * pbi->VFragments * 8 / pbi->Configuration.VRatio;
+
+			 pbi->Configuration.ExpandedFrameWidth = OutputHFragments * 8;
+			 pbi->Configuration.ExpandedFrameHeight = OutputVFragments * 8;
+
+			 if(VFragments >= OutputVFragments)
+			 {
+	             pbi->Configuration.VScale = 1;
+		         pbi->Configuration.VRatio = 1;
+			 }
+			 else if (5*VFragments >= 4*OutputVFragments)
+			 {
+	             pbi->Configuration.VScale = 5;
+		         pbi->Configuration.VRatio = 4;
+			 }
+			 else if (5*VFragments >= 3*OutputVFragments)
+			 {
+	             pbi->Configuration.VScale = 5;
+		         pbi->Configuration.VRatio = 3;
+			 }
+			 else
+			 {
+	             pbi->Configuration.VScale = 2;
+		         pbi->Configuration.VRatio = 1;
+			 }
+
+			 if(HFragments >= OutputHFragments)
+			 {
+	             pbi->Configuration.HScale = 1;
+		         pbi->Configuration.HRatio = 1;
+			 }
+			 else if (5*HFragments >= 4*OutputHFragments)
+			 {
+	             pbi->Configuration.HScale = 5;
+		         pbi->Configuration.HRatio = 4;
+			 }
+			 else if (5*HFragments >= 3*OutputHFragments)
+			 {
+	             pbi->Configuration.HScale = 5;
+		         pbi->Configuration.HRatio = 3;
+			 }
+			 else
+			 {
+	             pbi->Configuration.HScale = 2;
+		         pbi->Configuration.HRatio = 1;
+			 }
+
+             HNewScaled = pbi->Configuration.HScale * HFragments * 8 / pbi->Configuration.HRatio;
+             VNewScaled = pbi->Configuration.VScale * VFragments * 8 / pbi->Configuration.VRatio;
+
+			 pbi->ScaleWidth = HNewScaled;
+			 pbi->ScaleHeight = VNewScaled; 
+
+             pbi->Configuration.ScalingMode = ((UINT32)VP6_bitread( &pbi->br, 2 ));
+
+             // we have a new input size
+             if( VFragments != pbi->VFragments || HFragments != pbi->HFragments )
+             {
+                 // Validate the combination of height and width.                 
+                 pbi->Configuration.VideoFrameWidth = HFragments*8;                 
+                 pbi->Configuration.VideoFrameHeight = VFragments*8;                  
+				 VP6_InitFrameDetails(pbi);
+             }
+
+             // we have a new intermediate buffer clean the screen 
+             if( pbi->ScaleBuffer != 0 &&
+                 (HOldScaled != HNewScaled || VOldScaled != VNewScaled) )
+             {
+                 // turn the screen black!!                 
+                 memset(pbi->ScaleBuffer, 0x0, (pbi->OutputWidth+32) * (pbi->OutputHeight+32) );                 
+                 memset(pbi->ScaleBuffer + 	(pbi->OutputWidth+32) * (pbi->OutputHeight+32),
+					 0x80, (pbi->OutputWidth+32) * (pbi->OutputHeight+32) / 2 );                                   
+             }
+		}         
+
+		// Unless in SIMPLE_PROFILE read the the filter strategy for fractional pels
+		if ( pbi->VpProfile != SIMPLE_PROFILE )
+		{
+			// Find out if selective bicubic filtering should be used for motion prediction.
+			if ( (BOOL)VP6_DecodeBool(&pbi->br, 128) )
+			{
+				pbi->PredictionFilterMode = AUTO_SELECT_PM;
+
+				// Read in the variance threshold to be used
+				pbi->PredictionFilterVarThresh = ((UINT32)VP6_bitread( &pbi->br, 5) << ((pbi->Vp3VersionNo > 7) ? 0 : 5) );
+
+				// Read the bicubic vector length limit (0 actually means ignore vector length)
+				pbi->PredictionFilterMvSizeThresh = (UINT8)VP6_bitread( &pbi->br, 3);
+			}
+			else
+			{
+				if ( (BOOL)VP6_DecodeBool(&pbi->br, 128) )
+					pbi->PredictionFilterMode = BICUBIC_ONLY_PM;
+				else
+					pbi->PredictionFilterMode = BILINEAR_ONLY_PM;
+			}
+
+			if ( pbi->Vp3VersionNo > 7 )
+				pbi->PredictionFilterAlpha = VP6_bitread( &pbi->br, 4);
+			else
+				pbi->PredictionFilterAlpha = 16;	// VP61 backwards compatibility
+		}
+    }
+	// Non key frame sopecific stuff
+	else
+	{
+		// Start the first bool decoder (modes, mv, probs and some flags)
+		// The offset depends on whether or not we are using multiple bool code streams
+		if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+		{
+			VP6_StartDecode(&pbi->br, ((unsigned char*)(Header->buffer + 3)));
+			
+			// Read the buffer offset for the second bool decoder buffer if it is being used
+		    pbi->Buff2Offset = (UINT32)ReadHeaderBits(Header, 16);
+		}
+		else
+			VP6_StartDecode(&pbi->br, ((unsigned char*)(Header->buffer + 1)));
+
+		// Find out if the golden frame should be refreshed this frame - use bool decoder
+		pbi->RefreshGoldenFrame = (BOOL)VP6_DecodeBool(&pbi->br, 128);
+
+		if ( pbi->VpProfile != SIMPLE_PROFILE )
+		{
+			// Determine if loop filtering is on and if so what type should be used
+			pbi->UseLoopFilter = VP6_DecodeBool(&pbi->br, 128);
+			if ( pbi->UseLoopFilter )
+			{
+				pbi->UseLoopFilter = (pbi->UseLoopFilter << 1) | VP6_DecodeBool(&pbi->br, 128);
+			}
+
+			if ( pbi->Vp3VersionNo > 7 )
+			{
+				// Are the prediction characteristics being updated this frame
+				if ( VP6_DecodeBool(&pbi->br, 128) )
+				{
+					// Find out if selective bicubic filtering should be used for motion prediction.
+					if ( (BOOL)VP6_DecodeBool(&pbi->br, 128) )
+					{
+						pbi->PredictionFilterMode = AUTO_SELECT_PM;
+
+						// Read in the variance threshold to be used
+						pbi->PredictionFilterVarThresh = (UINT32)VP6_bitread( &pbi->br, 5);
+
+						// Read the bicubic vector length limit (0 actually means ignore vector length)
+						pbi->PredictionFilterMvSizeThresh = (UINT8)VP6_bitread( &pbi->br, 3);
+					}
+					else
+					{
+						if ( (BOOL)VP6_DecodeBool(&pbi->br, 128) )
+							pbi->PredictionFilterMode = BICUBIC_ONLY_PM;
+						else
+							pbi->PredictionFilterMode = BILINEAR_ONLY_PM;
+					}
+
+					pbi->PredictionFilterAlpha = VP6_bitread( &pbi->br, 4 );
+				}
+			}
+			else
+				pbi->PredictionFilterAlpha = 16;	// VP61 backwards compatibility
+		}
+	}
+
+	// All frames (Key & Inter frames)
+	if(pbi->Vp3VersionNo < 3 )
+		RetVal = FALSE;
+
+	// Should this frame use huffman for the dct data
+	pbi->UseHuffman = (BOOL)VP6_DecodeBool(&pbi->br, 128);
+
+	// Set this frame quality value from Q Index
+	pbi->quantizer->FrameQIndex = DctQMask;
+	VP6_UpdateQ( pbi->quantizer, pbi->Vp3VersionNo );  
+
+    return RetVal;                    
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_LoadFrame
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     BOOL: FALSE on error or frame empty, TRUE otherwise.
+ *
+ *  FUNCTION      :     Loads the next frame from the encoded data buffer.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+BOOL VP6_LoadFrame ( PB_INSTANCE *pbi )
+{ 
+    BOOL RetVal = TRUE;           
+
+    // Load the frame header (including the frame size).     
+    if ( !LoadFrameHeader(pbi) )
+        RetVal = FALSE;
+    return RetVal;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_SetFrameType
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *                      UINT8 FrType     : Type of the frame.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Sets the current frame type.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_SetFrameType ( PB_INSTANCE *pbi, UINT8 FrType )
+{ 
+    /* Set the appropriate frame type according to the request */
+    pbi->FrameType = FrType;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_GetFrameType
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     UINT8: The current frame type.
+ *
+ *  FUNCTION      :     Gets the current frame type.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+UINT8 VP6_GetFrameType ( PB_INSTANCE *pbi )
+{
+    return pbi->FrameType; 
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DSystemDependant.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DSystemDependant.c
new file mode 100644
index 00000000..311cb78a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/DSystemDependant.c
@@ -0,0 +1,160 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "pbdll.h"
+extern void VP6_BuildQuantIndex_Generic ( QUANTIZER *pbi );
+
+/****************************************************************************
+* 
+*  ROUTINE       :     VP6_SetPbParam
+*
+*  INPUTS        :     PB_INSTANCE **pbi       : Pointer to decoder instance.
+*                      PB_COMMAND_TYPE Command : Command action specifier.
+*                      UINT32 *Parameter       : Command dependent value.
+*                      
+*  OUTPUTS       :     None.
+*
+*  RETURNS       :     void
+*  
+*  FUNCTION      :     Generalised command interface to decoder.
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void CCONV VP6_SetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, uintptr_t Parameter )
+{
+#if defined(POSTPROCESS)
+	switch ( Command )
+	{
+	case PBC_SET_CPUFREE:
+		{
+#if defined(_MSC_VER)
+			double Pixels = pbi->Configuration.VideoFrameWidth * pbi->Configuration.VideoFrameHeight;
+			double FreeMhz = pbi->ProcessorFrequency * Parameter / 100;
+			double PixelsPerMhz = 100 * sqrt(1.0*Pixels) / FreeMhz;
+#else
+			double PixelsPerMhz = 100 *10;
+#endif
+			pbi->CPUFree = Parameter; 
+
+			if( PixelsPerMhz > 150 )
+				pbi->PostProcessingLevel = 0;
+			else if( PixelsPerMhz > 100 )
+				pbi->PostProcessingLevel = 8;
+			else if( PixelsPerMhz > 90 )
+				pbi->PostProcessingLevel = 4;
+			else if( PixelsPerMhz > 80 )
+				pbi->PostProcessingLevel = 5;
+			else
+				pbi->PostProcessingLevel = 6;
+			break;
+		}
+
+	case PBC_SET_ADDNOISE:
+		pbi->AddNoiseMode = Parameter;
+		//SetAddNoiseMode(pbi->postproc, Parameter);
+		break;
+
+	case PBC_SET_REFERENCEFRAME:
+		CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->LastFrameRecon);
+		CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->GoldenFrame);
+		break;
+
+	case PBC_SET_POSTPROC:
+		if( Parameter == 9 )                
+			VP6_SetPbParam( pbi, PBC_SET_CPUFREE, 70);
+		else
+		{
+			pbi->CPUFree = 0;
+			pbi->PostProcessingLevel = Parameter;
+		}
+		break;
+
+	case PBC_SET_DEINTERLACEMODE:
+		pbi->DeInterlaceMode = Parameter;
+		break;
+
+	case PBC_SET_BLACKCLAMP:
+		pbi->BlackClamp = Parameter;
+		break;
+
+	case PBC_SET_WHITECLAMP:
+		pbi->WhiteClamp = Parameter;
+		break;
+
+	default:
+		break;
+	}
+#endif
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_GetProcessorFrequency()
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     unsigned long: clock speed of the host processor.
+ *
+ *  FUNCTION      :     Get the Processor's working freqency. 
+ *
+ *  SPECIAL NOTES :     Stub function--always returns value 0. 
+ *
+ ****************************************************************************/
+unsigned long VP6_GetProcessorFrequency ( void )
+{
+    return 0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DMachineSpecificConfig
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Sets up pointers to platform dependant functions.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_DMachineSpecificConfig ( void )
+{
+    VP6_BuildQuantIndex = VP6_BuildQuantIndex_Generic;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_IssueWarning
+ *
+ *  INPUTS        :     char *WarningMessage : Message to be issued.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Issues a warning message.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_IssueWarning ( char *WarningMessage )
+{
+	(void) WarningMessage;
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/FrameIni.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/FrameIni.c
new file mode 100644
index 00000000..704f3fe0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/FrameIni.c
@@ -0,0 +1,478 @@
+/****************************************************************************
+*
+*   Module Title :     FrameIni.c
+*
+*   Description  :     Initialization functions.
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "pbdll.h"
+#include "duck_mem.h"
+#include <intsafe.h> // TODO: make a mac version of this
+/****************************************************************************
+*  Module Static Variables
+****************************************************************************/  
+static const struct 
+{
+	INT32 row;
+	INT32 col;
+} NearMacroBlocks[12] = 
+{
+	{ -1,  0 },
+	{  0, -1 },
+	{ -1, -1 },
+	{ -1,  1 },
+	{ -2,  0 },
+	{  0, -2 },
+	{ -1, -2 },
+	{ -2, -1 },
+	{ -2,  1 },
+	{ -1,  2 },
+	{ -2, -2 },
+	{ -2,  2 }
+};
+
+/****************************************************************************
+* 
+*  ROUTINE       :     VP6_InitMBI
+*
+*  INPUTS        :     PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+*  OUTPUTS       :     None.
+*
+*  RETURNS       :     void
+*
+*  FUNCTION      :     Initialize MBI structure.
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void 
+VP6_InitMBI(PB_INSTANCE *pbi)
+{
+
+	pbi->mbi.blockDxInfo[0].ZeroRunProbsBasePtr = 
+		pbi->mbi.blockDxInfo[1].ZeroRunProbsBasePtr = 
+		pbi->mbi.blockDxInfo[2].ZeroRunProbsBasePtr = 
+		pbi->mbi.blockDxInfo[3].ZeroRunProbsBasePtr = 
+		pbi->mbi.blockDxInfo[4].ZeroRunProbsBasePtr = 
+		pbi->mbi.blockDxInfo[5].ZeroRunProbsBasePtr = (UINT8 *)pbi->ZeroRunProbs;
+
+	pbi->mbi.blockDxInfo[0].AcProbsBasePtr = 
+		pbi->mbi.blockDxInfo[1].AcProbsBasePtr = 
+		pbi->mbi.blockDxInfo[2].AcProbsBasePtr = 
+		pbi->mbi.blockDxInfo[3].AcProbsBasePtr = pbi->AcProbs + ACProbOffset(0,0,0,0);
+	pbi->mbi.blockDxInfo[4].AcProbsBasePtr = 
+		pbi->mbi.blockDxInfo[5].AcProbsBasePtr = pbi->AcProbs + ACProbOffset(1,0,0,0);
+
+	pbi->mbi.blockDxInfo[0].DcProbsBasePtr = 
+		pbi->mbi.blockDxInfo[1].DcProbsBasePtr = 
+		pbi->mbi.blockDxInfo[2].DcProbsBasePtr = 
+		pbi->mbi.blockDxInfo[3].DcProbsBasePtr = pbi->DcProbs + DCProbOffset(0,0);
+	pbi->mbi.blockDxInfo[4].DcProbsBasePtr = 
+		pbi->mbi.blockDxInfo[5].DcProbsBasePtr = pbi->DcProbs + DCProbOffset(1,0);
+
+	pbi->mbi.blockDxInfo[0].DcNodeContextsBasePtr = 
+		pbi->mbi.blockDxInfo[1].DcNodeContextsBasePtr = 
+		pbi->mbi.blockDxInfo[2].DcNodeContextsBasePtr = 
+		pbi->mbi.blockDxInfo[3].DcNodeContextsBasePtr = pbi->DcNodeContexts + DcNodeOffset(0,0,0);
+	pbi->mbi.blockDxInfo[4].DcNodeContextsBasePtr = 
+		pbi->mbi.blockDxInfo[5].DcNodeContextsBasePtr = pbi->DcNodeContexts + DcNodeOffset(1,0,0);
+
+	pbi->mbi.blockDxInfo[0].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[0]];
+	pbi->mbi.blockDxInfo[1].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[1]];
+	pbi->mbi.blockDxInfo[2].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[2]];
+	pbi->mbi.blockDxInfo[3].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[3]];
+	pbi->mbi.blockDxInfo[4].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[4]];
+	pbi->mbi.blockDxInfo[5].dequantPtr = pbi->quantizer->dequant_coeffs[VP6_QTableSelect[5]];
+
+	pbi->mbi.blockDxInfo[0].LastDc = 
+		pbi->mbi.blockDxInfo[1].LastDc = 
+		pbi->mbi.blockDxInfo[2].LastDc = 
+		pbi->mbi.blockDxInfo[3].LastDc = pbi->fc.LastDcY;
+	pbi->mbi.blockDxInfo[4].LastDc = pbi->fc.LastDcU;
+	pbi->mbi.blockDxInfo[5].LastDc = pbi->fc.LastDcV;
+
+	pbi->mbi.blockDxInfo[0].Left = &pbi->fc.LeftY[0];
+	pbi->mbi.blockDxInfo[1].Left = &pbi->fc.LeftY[0];
+	pbi->mbi.blockDxInfo[2].Left = &pbi->fc.LeftY[1];
+	pbi->mbi.blockDxInfo[3].Left = &pbi->fc.LeftY[1];
+	pbi->mbi.blockDxInfo[4].Left = &pbi->fc.LeftU;
+	pbi->mbi.blockDxInfo[5].Left = &pbi->fc.LeftV;
+
+	pbi->mbi.blockDxInfo[0].MvShift =
+		pbi->mbi.blockDxInfo[1].MvShift =
+		pbi->mbi.blockDxInfo[2].MvShift =
+		pbi->mbi.blockDxInfo[3].MvShift = Y_MVSHIFT;
+	pbi->mbi.blockDxInfo[4].MvShift =
+		pbi->mbi.blockDxInfo[5].MvShift = UV_MVSHIFT;
+
+	pbi->mbi.blockDxInfo[0].MvModMask =
+		pbi->mbi.blockDxInfo[1].MvModMask =
+		pbi->mbi.blockDxInfo[2].MvModMask =
+		pbi->mbi.blockDxInfo[3].MvModMask = Y_MVMODMASK;
+	pbi->mbi.blockDxInfo[4].MvModMask =
+		pbi->mbi.blockDxInfo[5].MvModMask = UV_MVMODMASK;
+
+	pbi->mbi.blockDxInfo[0].CurrentReconStride =
+		pbi->mbi.blockDxInfo[1].CurrentReconStride =
+		pbi->mbi.blockDxInfo[2].CurrentReconStride =
+		pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride;
+	pbi->mbi.blockDxInfo[4].CurrentReconStride =
+		pbi->mbi.blockDxInfo[5].CurrentReconStride = pbi->Configuration.UVStride;
+
+	pbi->mbi.blockDxInfo[0].FrameReconStride =
+		pbi->mbi.blockDxInfo[1].FrameReconStride =
+		pbi->mbi.blockDxInfo[2].FrameReconStride =
+		pbi->mbi.blockDxInfo[3].FrameReconStride = pbi->Configuration.YStride;
+	pbi->mbi.blockDxInfo[4].FrameReconStride =
+		pbi->mbi.blockDxInfo[5].FrameReconStride = pbi->Configuration.UVStride;
+
+	// Default clear data area down to 0s
+	memset(pbi->mbi.blockDxInfo[0].coeffsPtr, 0, 6*64*sizeof(Q_LIST_ENTRY));
+
+	//______ compressor only ______
+	pbi->mbi.blockDxInfo[0].FrameSourceStride =
+		pbi->mbi.blockDxInfo[1].FrameSourceStride =
+		pbi->mbi.blockDxInfo[2].FrameSourceStride =
+		pbi->mbi.blockDxInfo[3].FrameSourceStride = pbi->Configuration.VideoFrameWidth;
+	pbi->mbi.blockDxInfo[4].FrameSourceStride =
+		pbi->mbi.blockDxInfo[5].FrameSourceStride = pbi->Configuration.VideoFrameWidth/2;
+
+	pbi->mbi.blockDxInfo[0].CurrentSourceStride =
+		pbi->mbi.blockDxInfo[1].CurrentSourceStride =
+		pbi->mbi.blockDxInfo[2].CurrentSourceStride =
+		pbi->mbi.blockDxInfo[3].CurrentSourceStride = pbi->Configuration.VideoFrameWidth;
+	pbi->mbi.blockDxInfo[4].CurrentSourceStride =
+		pbi->mbi.blockDxInfo[5].CurrentSourceStride = pbi->Configuration.VideoFrameWidth/2;
+
+	pbi->mbi.blockDxInfo[0].Plane =
+		pbi->mbi.blockDxInfo[1].Plane =
+		pbi->mbi.blockDxInfo[2].Plane =
+		pbi->mbi.blockDxInfo[3].Plane = 0;
+	pbi->mbi.blockDxInfo[4].Plane =
+		pbi->mbi.blockDxInfo[5].Plane = 1;
+	//______ compressor only ______
+
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :     VP6_DeleteFragmentInfo
+*
+*  INPUTS        :     PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+*  OUTPUTS       :     None.
+*
+*  RETURNS       :     void
+*
+*  FUNCTION      :     De-allocates memory associated with decoder data structures.
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void VP6_DeleteFragmentInfo ( PB_INSTANCE *pbi )
+{
+	// Free prior allocs if present
+	if(pbi->mbi.blockDxInfo[0].coeffsPtr)
+		duck_free(pbi->mbi.blockDxInfo[0].coeffsPtr);
+	pbi->mbi.blockDxInfo[0].coeffsPtr = 0;
+
+	if(	pbi->FragInfo)
+		duck_free(pbi->FragInfo);
+	pbi->FragInfo      = 0;
+
+	if(	pbi->fc.AboveY)
+		duck_free(pbi->fc.AboveY);
+	pbi->fc.AboveY      = 0;
+
+	if(	pbi->fc.AboveU)
+		duck_free(pbi->fc.AboveU);
+	pbi->fc.AboveU      = 0;
+
+	if(	pbi->fc.AboveV)
+		duck_free(pbi->fc.AboveV);
+	pbi->fc.AboveV      = 0;
+
+	if(	pbi->MBInterlaced)
+		duck_free(pbi->MBInterlaced);
+	pbi->MBInterlaced      = 0;
+
+	if(	pbi->MBMotionVector)
+		duck_free(pbi->MBMotionVector);
+	pbi->MBMotionVector      = 0;
+
+	if(	pbi->predictionMode)
+		duck_free(pbi->predictionMode);
+	pbi->predictionMode      = 0;
+
+#ifdef DMAREADREFERENCE
+	if(pbi->ReferenceBlocks)
+		duck_free(pbi->ReferenceBlocks);
+	pbi->ReferenceBlocks      = 0;
+#endif
+#ifdef DMAWRITERECON
+	if(pbi->ReconstructedMBs)
+		duck_free(pbi->ReconstructedMBs);
+	pbi->ReconstructedMBs      = 0;
+#endif
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :     VP6_AllocateFragmentInfo
+*
+*  INPUTS        :     PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+*  OUTPUTS       :     None.
+*
+*  RETURNS       :     BOOL: TRUE if successful, FALSE on error.
+*
+*  FUNCTION      :     Initializes the Playback instance passed in.
+*
+*  SPECIAL NOTES :     Uses duck_memalign to ensure data structures are aligned
+*                      on 32-byte boundaries to improve cache performance. 
+*
+****************************************************************************/
+BOOL VP6_AllocateFragmentInfo ( PB_INSTANCE *pbi )
+{
+	// Clear any existing info
+	VP6_DeleteFragmentInfo(pbi);
+
+	pbi->mbi.blockDxInfo[0].coeffsPtr = (Q_LIST_ENTRY *) duck_memalign(32, sizeof(Q_LIST_ENTRY)*64*6, DMEM_GENERAL);
+	if(!pbi->mbi.blockDxInfo[0].coeffsPtr) {VP6_DeleteFragmentInfo(pbi); return FALSE;}
+	pbi->mbi.blockDxInfo[1].coeffsPtr = pbi->mbi.blockDxInfo[0].coeffsPtr + 64;
+	pbi->mbi.blockDxInfo[2].coeffsPtr = pbi->mbi.blockDxInfo[1].coeffsPtr + 64;
+	pbi->mbi.blockDxInfo[3].coeffsPtr = pbi->mbi.blockDxInfo[2].coeffsPtr + 64;
+	pbi->mbi.blockDxInfo[4].coeffsPtr = pbi->mbi.blockDxInfo[3].coeffsPtr + 64;
+	pbi->mbi.blockDxInfo[5].coeffsPtr = pbi->mbi.blockDxInfo[4].coeffsPtr + 64;
+
+	// context allocations
+	pbi->fc.AboveY = (BLOCK_CONTEXT *) duck_memalign(32, (8+pbi->HFragments) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+	if(!pbi->fc.AboveY) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+	pbi->fc.AboveU = (BLOCK_CONTEXT *) duck_memalign(32, (8+pbi->HFragments / 2) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+	if(!pbi->fc.AboveU) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+	pbi->fc.AboveV = (BLOCK_CONTEXT *) duck_memalign(32, (8+pbi->HFragments / 2) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+	if(!pbi->fc.AboveV) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+	// the encoder is the only thing using this move it to compdll
+	pbi->MBInterlaced = (char *) duck_memalign(32, pbi->MacroBlocks * sizeof(char), DMEM_GENERAL);
+	if(!pbi->MBInterlaced) { VP6_DeleteFragmentInfo(pbi); 	return FALSE; }
+
+	pbi->predictionMode = (char *) duck_memalign(32, pbi->MacroBlocks * sizeof(char), DMEM_GENERAL);
+	if(!pbi->predictionMode) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+	pbi->MBMotionVector = (MOTION_VECTOR *) duck_memalign(32, pbi->MacroBlocks * sizeof(MOTION_VECTOR ), DMEM_GENERAL);
+	if(!pbi->MBMotionVector) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+	// the encoder is the only thing using this move it to compdll
+	pbi->FragInfo = (FRAG_INFO *) duck_memalign(32, pbi->UnitFragments * sizeof(FRAG_INFO), DMEM_GENERAL);
+	if(!pbi->FragInfo) { VP6_DeleteFragmentInfo(pbi); return FALSE;}
+
+#ifdef DMAREADREFERENCE
+	pbi->ReferenceBlocks=(UINT8(*)[192])duck_memalign(32, 6*192, DMEM_GENERAL);
+	if(!pbi->ReferenceBlocks){ VP6_DeleteFragmentInfo(pbi); return FALSE;}
+#endif
+
+#ifdef DMAWRITERECON
+	pbi->ReconstructedMBs = (UINT8*) duck_memalign(32, 768, DMEM_GENERAL);
+	if(!pbi->ReconstructedMBs){ VP6_DeleteFragmentInfo(pbi); return FALSE;}
+#endif
+
+	return TRUE;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :     VP6_DeleteFrameInfo
+*
+*  INPUTS        :     PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+*  OUTPUTS       :     None.
+*
+*  RETURNS       :     void
+*
+*  FUNCTION      :     De-allocate memory associated with frame level data
+*                      structures.
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void VP6_DeleteFrameInfo ( PB_INSTANCE *pbi )
+{
+	if(pbi->ThisFrameRecon )
+		duck_free(pbi->ThisFrameRecon );
+	if(pbi->GoldenFrame)
+		duck_free(pbi->GoldenFrame);
+	if(pbi->LastFrameRecon)
+		duck_free(pbi->LastFrameRecon);
+	if(pbi->PostProcessBuffer)
+		duck_free(pbi->PostProcessBuffer);
+
+	pbi->ThisFrameRecon         = 0;
+	pbi->GoldenFrame            = 0;
+	pbi->LastFrameRecon         = 0;
+	pbi->PostProcessBuffer      = 0;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :     VP6_AllocateFrameInfo
+*
+*  INPUTS        :     PB_INSTANCE * pbi      : Pointer to decoder instance.
+*                      unsigned int FrameSize : Size of the YUV frame in bytes.
+*
+*  OUTPUTS       :     None
+*
+*  RETURNS       :     BOOL: TRUE if successful, FALSE on error.
+*
+*  FUNCTION      :     Initializes the Playback instance passed in
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+BOOL VP6_AllocateFrameInfo ( PB_INSTANCE *pbi, unsigned int FrameSize )
+{
+	// clear any existing info
+	VP6_DeleteFrameInfo(pbi);
+
+	// Allocate frame buffers:
+	// Added 2 extra lines to framebuffer so that copy12x12 doesn't fail
+	// when we have a large motion vector in V on the last v block.  
+	// Note : We never use these pixels anyway so this doesn't hurt.
+	pbi->ThisFrameRecon = (UINT8 *)duck_memalign(32, pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+	if(!pbi->ThisFrameRecon) { VP6_DeleteFrameInfo(pbi); return FALSE;}
+
+	pbi->GoldenFrame = (UINT8 *)duck_memalign(32, pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY ), DMEM_GENERAL);
+	if(!pbi->GoldenFrame) { VP6_DeleteFrameInfo(pbi); return FALSE;}
+
+	pbi->LastFrameRecon = (UINT8 *)duck_memalign(32, pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+	if(!pbi->LastFrameRecon) { VP6_DeleteFrameInfo(pbi); return FALSE;}
+
+	pbi->PostProcessBuffer = (UINT8 *)duck_memalign(32, pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+	if(!pbi->PostProcessBuffer) { VP6_DeleteFrameInfo(pbi); return FALSE;}
+
+	return TRUE;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :     VP6_InitFrameDetails
+*
+*  INPUTS        :     PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+*  OUTPUTS       :     None.
+*
+*  RETURNS       :     BOOL: TRUE on success, FALSE on failure.
+*
+*  FUNCTION      :     Initialises various details about the frame.
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+BOOL VP6_InitFrameDetails ( PB_INSTANCE *pbi )
+{
+	UINT32 i;
+	int FrameSize;
+
+	if ( pbi->CPUFree > 0 )
+		VP6_SetPbParam( pbi, PBC_SET_CPUFREE, pbi->CPUFree );
+
+	/* Set the frame size etc. */                                                        
+	if (UIntMult(pbi->Configuration.VideoFrameWidth, pbi->Configuration.VideoFrameHeight, &pbi->YPlaneSize) == S_OK)
+	{
+		pbi->UVPlaneSize = pbi->YPlaneSize / 4;  
+		pbi->HFragments = pbi->Configuration.VideoFrameWidth / pbi->Configuration.HFragPixels;
+		pbi->VFragments = pbi->Configuration.VideoFrameHeight / pbi->Configuration.VFragPixels;
+		if (UIntMult(pbi->VFragments, pbi->HFragments, &pbi->YPlaneFragments) == S_OK && 
+			UIntMult(pbi->YPlaneFragments, 3, &pbi->UnitFragments) == S_OK)
+		{
+			pbi->UnitFragments /= 2;
+			pbi->UVPlaneFragments = pbi->YPlaneFragments / 4;
+
+			pbi->Configuration.YStride = (pbi->Configuration.VideoFrameWidth + STRIDE_EXTRA);
+			pbi->Configuration.UVStride = pbi->Configuration.YStride / 2;
+
+			if (UIntMult(pbi->Configuration.YStride, pbi->Configuration.VideoFrameHeight + STRIDE_EXTRA, &pbi->ReconYPlaneSize) == S_OK)
+			{
+				pbi->ReconUVPlaneSize = pbi->ReconYPlaneSize / 4;
+
+				FrameSize = pbi->ReconYPlaneSize + 2 * pbi->ReconUVPlaneSize;
+
+				pbi->YDataOffset = 0;
+				pbi->UDataOffset = pbi->YPlaneSize;
+				pbi->VDataOffset = pbi->YPlaneSize + pbi->UVPlaneSize;
+				pbi->ReconYDataOffset = 0;
+				pbi->ReconUDataOffset = pbi->ReconYPlaneSize;
+				pbi->ReconVDataOffset = pbi->ReconYPlaneSize + pbi->ReconUVPlaneSize;
+
+				// Image dimensions in Macro-Blocks
+				pbi->MBRows  = (2*BORDER_MBS)+(pbi->Configuration.VideoFrameHeight/16)  + ( pbi->Configuration.VideoFrameHeight%16 ? 1 : 0 );
+				pbi->MBCols  = (2*BORDER_MBS)+(pbi->Configuration.VideoFrameWidth/16)  + ( pbi->Configuration.VideoFrameWidth%16 ? 1 : 0 );
+				pbi->MacroBlocks = pbi->MBRows * pbi->MBCols;
+
+				for( i=0; i<12; i++ )
+					pbi->mvNearOffset[i] = MBOffset(NearMacroBlocks[i].row, NearMacroBlocks[i].col);
+
+				ChangePostProcConfiguration(pbi->postproc, &pbi->Configuration);
+
+				if ( !VP6_AllocateFragmentInfo(pbi) )
+					return FALSE;
+
+				if ( !VP6_AllocateFrameInfo(pbi, FrameSize) )
+				{
+					VP6_DeleteFragmentInfo(pbi);
+					return FALSE;
+				}
+
+				// We have a differently output size than our scaling provides
+				if ( pbi->ScaleBuffer == 0 && pbi->OutputWidth &&
+					(pbi->Configuration.VideoFrameWidth != pbi->OutputWidth ||
+					pbi->Configuration.VideoFrameHeight != pbi->OutputHeight ) )
+				{
+					// Add 32 to outputwidth to ensure that we have enough to overscale 
+					// (ie scale to a size that's bigger than our output size). Do this
+					// now even though we don't use it so we don't have to check border conditions.
+					pbi->ScaleBuffer = (UINT8 *) 
+						duck_malloc(32 + 3 * 
+						(pbi->OutputWidth + 32) * 
+						(pbi->OutputHeight + 32)* 
+						sizeof(YUV_BUFFER_ENTRY) / 2, DMEM_GENERAL);
+
+				}
+
+
+				VP6_InitMBI(pbi);
+
+				return TRUE;
+			}
+		}
+	}
+	return FALSE;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :     VP6_InitialiseConfiguration
+*
+*  INPUTS        :     PB_INSTANCE * pbi : Pointer to decoder instance.
+*
+*  OUTPUTS       :     None.
+*
+*  RETURNS       :     void
+*
+*  FUNCTION      :     Sets the base size of a coding block (8x8).
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void VP6_InitialiseConfiguration ( PB_INSTANCE *pbi )
+{  
+	pbi->Configuration.HFragPixels = 8;
+	pbi->Configuration.VFragPixels = 8;
+} 
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/Huffman.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/Huffman.c
new file mode 100644
index 00000000..c6dcffdd
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/Huffman.c
@@ -0,0 +1,350 @@
+/****************************************************************************
+*
+*   Module Title :     Huffman.c
+*
+*   Description  :     Huffman coding routines.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "huffman.h"
+#include "pbdll.h"
+
+/****************************************************************************
+*  Typedefs
+****************************************************************************/              
+typedef struct _SORT_NODE
+{
+    int next;
+    int freq;
+    unsigned char value;
+} SORT_NODE;
+
+typedef struct _sortnode
+{
+    int next;
+    int freq;
+    tokenorptr value;
+} sortnode;
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     InsertSorted
+ *
+ *  INPUTS        :     sortnode *sn   : Array of sort nodes.
+ *                      int node       : Index of node to be inserted.
+ *                      int *startnode : Pointer to _head of linked-list.
+ *
+ *  OUTPUTS       :     int *startnode : Pointer to _head of linked-list.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Inserts a node into a sorted linklist.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+static void InsertSorted ( sortnode *sn, int node, int *startnode )
+{
+    int which = *startnode;
+    int prior = *startnode;
+
+    // find the position at which to insert the node
+    while( which != -1 && sn[node].freq > sn[which].freq )
+    {
+        prior = which;
+        which = sn[which].next;
+    }
+
+    if(which == *startnode)
+    {
+        *startnode = node;
+        sn[node].next = which;
+    }
+    else
+    {
+        sn[prior].next = node;
+        sn[node].next = which;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_BuildHuffTree
+ *
+ *  INPUTS        :     int values           : Number of values in the tree.
+ *                      unsigned int *counts : Histogram of token frequencies.
+ *
+ *  OUTPUTS       :     HUFF_NODE *hn        : Array of nodes (containing token frequency) 
+ *                                             from which to create tree.
+ *                      unsigned int *counts : Histogram of token frequencies (0 freq clipped to 1).
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Creates a Huffman tree data structure from list
+ *                      of token frequencies.
+ *
+ *  SPECIAL NOTES :     Maximum of 256 nodes can be handled. 
+ *
+ ****************************************************************************/
+void VP6_BuildHuffTree ( HUFF_NODE *hn, unsigned int *counts, int values )
+{
+    int i;
+    sortnode sn[256];
+    int sncount=0;
+    int startnode=0;
+
+    // NOTE:
+    // Create huffman tree in reverse order so that the root will always be 0
+    int huffptr=values-1;
+
+    // Set up sorted linked list of values/pointers into the huffman tree
+    for ( i=0; i<values; i++ )
+    {
+        sn[i].value.selector = 1;
+        sn[i].value.value = i;
+        if ( counts[i] == 0 )
+            counts[i] = 1;
+        sn[i].freq = counts[i];
+        sn[i].next = -1;
+    }
+
+    sncount = values;
+
+    // Connect above list into a linked list
+    for ( i=1; i<values; i++ )
+        InsertSorted ( sn, i, &startnode );
+
+    // while there is more than one node in our linked list
+    while ( sn[startnode].next != -1 )
+    {
+        int first = startnode;
+        int second = sn[startnode].next;
+        int sumfreq = sn[first].freq + sn[second].freq;
+
+        // set-up new merged huffman node
+        --huffptr;
+        hn[huffptr].leftunion.left = sn[first].value;
+        hn[huffptr].rightunion.right = sn[second].value;
+        hn[huffptr].freq = 256 * sn[first].freq / sumfreq;
+
+        // set up new merged sort node pointing to our huffnode
+        sn[sncount].value.selector = 0;
+        sn[sncount].value.value = huffptr;
+        sn[sncount].freq = sumfreq;
+        sn[sncount].next = -1;
+
+        // remove the two nodes we just merged from the linked list
+        startnode = sn[second].next;
+
+        // insert the new sort node into the proper location
+        InsertSorted(sn, sncount, &startnode);
+
+        // account for new nodes
+        sncount++;
+    }
+
+    return;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_BuildHuffLookupTable
+ *
+ *  INPUTS        :     HUFF_NODE *HuffTreeRoot : Pointer to root of Huffman tree. 
+ *
+ *  OUTPUTS       :     UINT16 *HuffTable       : Array (LUT) of Huffman codes.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Traverse Huffman tree to create LUT of Huffman codes.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_BuildHuffLookupTable ( HUFF_NODE *HuffTreeRoot, UINT16 *HuffTable )
+{
+    int i, j;
+    int bits;
+    tokenorptr torp;
+
+    for ( i=0; i<(1<<HUFF_LUT_LEVELS); i++ )
+    {
+        bits = i;        
+        j=0;
+
+        torp.value    = 0;
+        torp.selector = 0;
+
+        do
+        {
+            j++;
+            if ( (bits>>(HUFF_LUT_LEVELS - j)) & 1 )
+                torp = HuffTreeRoot[torp.value].rightunion.right;
+            else
+                torp = HuffTreeRoot[torp.value].leftunion.left;
+        }
+        while ( !(torp.selector) && (j < HUFF_LUT_LEVELS) );
+        
+//        HuffTable[i] = torp.value<<1 | torp.selector | (j << 12);
+        ((HUFF_TABLE_NODE *)HuffTable)[i].value = torp.value;
+        ((HUFF_TABLE_NODE *)HuffTable)[i].flag = torp.selector;
+        ((HUFF_TABLE_NODE *)HuffTable)[i].length = j;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_BuildHuffLookupTable
+ *
+ *  INPUTS        :     HUFF_NODE *hn  : List of Huffman tree nodes.
+ *                      int node       : Current position within list of Huffman tree nodes.
+ *                      int codevalue  : Huffman code as found so far. 
+ *                      int codelength : Length of Huffman code so far (in bits).
+ *
+ *  OUTPUTS       :     unsigned int *codearray    : Array to hold Huffman codes.
+ *                      unsigned char *lengtharray : Array to hold lengths of Huffman codes.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Recursively traverse Huffman tree to create LUT of Huffman codes.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_CreateCodeArray
+( 
+    HUFF_NODE *hn,
+    int node,
+    unsigned int *codearray,
+    unsigned char *lengtharray,
+    int codevalue, 
+    int codelength 
+)
+{    
+    /* If we are at a leaf then fill in a code array entry */
+    /* Use recursive calls to scan down the tree */
+    if( hn[node].leftunion.left.selector )
+    {
+        codearray[hn[node].leftunion.left.value] = (codevalue<<1)+0;
+        lengtharray[hn[node].leftunion.left.value] = codelength+1;
+    }
+    else
+    {
+        VP6_CreateCodeArray ( 
+            hn, 
+            hn[node].leftunion.left.value,
+            codearray,
+            lengtharray, 
+            ((codevalue << 1) + 0), 
+            (codelength + 1) );
+    }
+    
+    if( hn[node].rightunion.right.selector )
+    {
+        codearray[hn[node].rightunion.right.value] = (codevalue<<1)+1;
+        lengtharray[hn[node].rightunion.right.value] = codelength+1;
+    }
+    else
+    {
+        VP6_CreateCodeArray ( 
+            hn, 
+            hn[node].rightunion.right.value,
+            codearray,
+            lengtharray, 
+            ((codevalue << 1) + 1), 
+            (codelength + 1) );
+    }    
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DecodeValue
+ *
+ *  INPUTS        :     BOOL_CODER *bc : Pointer to a Bool Coder instance.
+ *                      HUFF_NODE *hn  : List of Huffman tree nodes.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     int: Decoded token value.
+ *
+ *  FUNCTION      :     Traverse the Huffman tree by reading node decisions
+ *                      from the bitstream until a leaf node is reached. Returns
+ *                      the value associated with this leaf node.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+int VP6_DecodeValue ( BOOL_CODER *bc, HUFF_NODE *hn )
+{
+    tokenorptr torp;
+	
+    torp.value    = 0;
+	torp.selector = 0;
+    
+    // Loop searches down through tree based upon bits read from the bitstream 
+    // until it hits a leaf at which point we have decoded a token.
+    do
+    {
+		if ( VP6_DecodeBool(bc, hn[torp.value].freq) )
+	        torp = hn[torp.value].rightunion.right;
+		else
+	        torp = hn[torp.value].leftunion.left;
+    }
+	while ( !(torp.selector) );
+
+	return torp.value;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_EncodeValue
+ *
+ *  INPUTS        :     BOOL_CODER *bc : Pointer to a Bool Coder instance.
+ *                      HUFF_NODE *hn  : List of Huffman tree nodes.
+ *                      int value      : Value to be encoded.
+ *                      int length     : Length of value to be encoded (in bits).
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Given a Huffman code either output its bits to the encoded
+ *                      stream or measure the cost of doing so, depending on the 
+ *                      flag bc->MeasureCost. Use VP6_EncodeBool2 if only measuring
+ *                      approximate number of bits required to encode the Huffman code
+ *                      or VP6_EncodeBool if actually producing the coded bits using
+ *                      the specified Bool Coder.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+void VP6_EncodeValue
+(
+	BOOL_CODER *bc,
+    HUFF_NODE *hn,
+    int value,
+    int length
+)
+{
+    int i;
+    int node = 0;
+
+    for ( i=length-1; i>=0; i-- )
+    {
+        int v = (value>>i) & 1;
+
+		if ( bc->MeasureCost )
+			VP6_EncodeBool2 ( bc, (BOOL)v, hn[node].freq );
+		else
+			VP6_EncodeBool ( bc, (BOOL)v, hn[node].freq );
+
+        if ( v )
+            node = hn[node].rightunion.right.value;
+        else
+            node = hn[node].leftunion.left.value;
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/TokenEntropy.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/TokenEntropy.c
new file mode 100644
index 00000000..4c952d04
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/TokenEntropy.c
@@ -0,0 +1,195 @@
+/****************************************************************************
+*
+*   Module Title :     TokenEntropy.c
+*
+*   Description  :     Entropy configuration routines.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking. */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "tokenentropy.h"
+#include "pbdll.h"
+
+/****************************************************************************
+*   Exports
+****************************************************************************/
+// Costs in bits for different probabilities (expressed in range 0-255)
+// Costs are multiplied by 256
+const UINT32 VP6_ProbCost[256] = 
+{
+2047, 2047,	1791, 1641,	1535, 1452,	1385, 1328,	1279, 1235,	1196, 1161,	1129, 1099,	1072, 1046,
+1023, 1000,	 979,  959,	 940,  922,  905,  889,	 873,  858,	 843,  829,	 816,  803,	 790,  778,
+ 767,  755,	 744,  733,	 723,  713,	 703,  693,	 684,  675,	 666,  657,	 649,  641,	 633,  625,
+ 617,  609,	 602,  594,	 587,  580,	 573,  567,	 560,  553,	 547,  541,	 534,  528,	 522,  516,
+ 511,  505,	 499,  494,	 488,  483,	 477,  472,	 467,  462,	 457,  452,	 447,  442,	 437,  433,
+ 428,  424,	 419,  415,	 410,  406,	 401,  397,	 393,  389,	 385,  381,	 377,  373,	 369,  365,
+ 361,  357,	 353,  349,	 346,  342,	 338,  335,	 331,  328,	 324,  321,	 317,  314,	 311,  307,
+ 304,  301,	 297,  294,	 291,  288,	 285,  281,	 278,  275,	 272,  269,	 266,  263,	 260,  257,
+ 255,  252,	 249,  246,	 243,  240,	 238,  235,	 232,  229,	 227,  224,	 221,  219,	 216,  214,
+ 211,  208,	 206,  203,	 201,  198,	 196,  194,	 191,  189,	 186,  184,	 181,  179,	 177,  174,
+ 172,  170,	 168,  165,	 163,  161,	 159,  156,	 154,  152,	 150,  148,	 145,  143,	 141,  139,
+ 137,  135,	 133,  131,	 129,  127,	 125,  123,	 121,  119,	 117,  115,	 113,  111,	 109,  107,
+ 105,  103,	 101,   99,	  97,   95,	  93,   92,	  90,   88,	  86,   84,	  82,   81,	  79,   77,
+  75,   73,	  72,   70,	  68,   66,	  65,	63,	  61,   60,	  58,   56,	  55,	53,	  51,	50,
+  48,   46,	  45,   43,	  41,   40,	  38,	37,	  35,   33,	  32,	30,	  29,	27,	  25,	24,
+  22,   21,	  19,   18,	  16,   15,	  13,	12,	  10,    9,	   7,	 6,	   4,	 3,	   1,	 1
+};
+
+// Index categories for previous tokens in this block 
+const UINT8  VP6_PrevTokenIndex[MAX_ENTROPY_TOKENS] = { 0,1,2,2,2,2,2,2,2,2,2,0 };
+
+// For details of tokens and extra bit breakdown see token definitions in huffman.h
+const UINT8  ExtraBitLengths_VP6[MAX_ENTROPY_TOKENS] = { 0, 1, 1, 1, 1, 2, 3, 4, 5, 6, 12, 0 };
+const UINT32 VP6_DctRangeMinVals[MAX_ENTROPY_TOKENS] = { 0, 1, 2, 3, 4, 5, 7, 11, 19, 35, 67, 0 };
+
+const UINT8 VP6_DcUpdateProbs[2][MAX_ENTROPY_TOKENS-1] = 
+{ 
+	{ 146, 255, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+	{ 179, 255, 214, 240, 250, 255, 244, 255, 255, 255, 255 }
+};
+
+const UINT8 ScanBandUpdateProbs[BLOCK_SIZE] = 
+{  
+	255, 132, 132, 159, 153, 151, 161, 170, 
+	164, 162, 136, 110, 103, 114, 129, 118, 
+	124, 125, 132, 136, 114, 110, 142, 135, 
+	134, 123, 143, 126, 153, 183, 166, 161, 
+	171, 180, 179, 164, 203, 218, 225, 217, 
+	215, 206, 203, 217, 229, 241, 248, 243,
+	253, 255, 253, 255, 255, 255, 255, 255, 
+	255, 255, 255, 255, 255, 255, 255, 255 
+};
+
+const UINT8 ZrlUpdateProbs[ZRL_BANDS][ZERO_RUN_PROB_CASES] =
+{
+	{ 219, 246, 238, 249, 232, 239, 249, 255, 248, 253, 239, 244, 241, 248 }, 
+	{ 198, 232, 251, 253, 219, 241, 253, 255, 248, 249, 244, 238, 251, 255 }, 
+};
+
+// Zero run probs 
+const UINT8 ZeroRunProbDefaults[ZRL_BANDS][ZERO_RUN_PROB_CASES] = 
+{  
+	{ 198, 197, 196, 146, 198, 204, 169, 142, 130, 136, 149, 149, 191, 249 },
+	{ 135, 201, 181, 154,  98, 117, 132, 126, 146, 169, 184, 240, 246, 254 },
+};
+
+const UINT8 VP6_AcUpdateProbs[PREC_CASES][2][VP6_AC_BANDS][MAX_ENTROPY_TOKENS-1] =
+{
+	{	// preceded by 0
+		{
+			{ 227, 246, 230, 247, 244, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 209, 231, 231, 249, 249, 253, 255, 255, 255 },
+			{ 255, 255, 225, 242, 241, 251, 253, 255, 255, 255, 255 },
+			{ 255, 255, 241, 253, 252, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+		},
+		{
+			{ 240, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 240, 253, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+		},
+	},
+	{	// preceded by 1
+		{
+			{ 206, 203, 227, 239, 247, 255, 253, 255, 255, 255, 255 },
+			{ 207, 199, 220, 236, 243, 252, 252, 255, 255, 255, 255 },
+			{ 212, 219, 230, 243, 244, 253, 252, 255, 255, 255, 255 },
+			{ 236, 237, 247, 252, 253, 255, 255, 255, 255, 255, 255 },
+			{ 240, 240, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+		},
+		{
+			{ 230, 233, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 238, 238, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 248, 251, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+		},
+	},
+	{	// preceded by > 1
+		{
+			{ 225, 239, 227, 231, 244, 253, 243, 255, 255, 253, 255 },
+			{ 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 255 },
+			{ 235, 249, 238, 240, 251, 255, 249, 255, 253, 253, 255 },
+			{ 249, 253, 251, 250, 255, 255, 255, 255, 255, 255, 255 },
+			{ 251, 250, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+		},
+		{
+			{ 243, 244, 250, 250, 255, 255, 255, 255, 255, 255, 255 },
+			{ 249, 248, 250, 253, 255, 255, 255, 255, 255, 255, 255 },
+			{ 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+			{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+		},
+	},
+};
+
+/****************************************************************************
+*   Module Statics
+****************************************************************************/
+// Dc context equations: Dc Token contexts are 00 0!0 and !0!0
+static const LINE_EQ VP6_DcNodeEqs[CONTEXT_NODES][DC_TOKEN_CONTEXTS] =
+{
+	{ { 122, 133 },{ 133,  51 },{ 142, -16 } },		// Zero Node	
+	{ {   0,   1 },{   0,   1 },{   0,   1 } },		// EOB Node		Dummy as no EOBs in DC
+	{ {  78, 171 },{ 169,  71 },{ 221, -30 } },		// One Node
+	{ { 139, 117 },{ 214,  44 },{ 246,  -3 } },		// Low Val Node
+	{ { 168,  79 },{ 210,  38 },{ 203,  17 } },		// Two Node (2 vs 3 or 4)
+};
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_ConfigureContexts
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Configures the context dependant entropy probabilities.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_ConfigureContexts ( PB_INSTANCE *pbi )
+{
+	UINT32 i;
+	UINT32 Node;
+	UINT32 Plane;
+	INT32  Temp;
+
+	// Clear MMX state so floating point can work again
+#if defined(_MSC_VER)
+	ClearSysState();
+#endif
+
+	// DC Node Probabilities
+	for ( Plane=0; Plane<2; Plane++ )
+	{
+		for ( i=0; i<DC_TOKEN_CONTEXTS; i++ )
+		{
+			// Tree Nodes
+			for ( Node=0; Node<CONTEXT_NODES; Node++ )
+			{
+				Temp = ( ( pbi->DcProbs[DCProbOffset(Plane,Node)] * VP6_DcNodeEqs[Node][i].M + 128 ) >> 8) 
+    					+ VP6_DcNodeEqs[Node][i].C;	
+				Temp = (Temp > 255)? 255: Temp;
+				Temp = (Temp <   1)? 1  : Temp;
+				
+				//pbi->DcNodeContexts[Plane][i][Node] = (UINT8)Temp;
+				*(pbi->DcNodeContexts + DcNodeOffset(Plane,i,Node)) = (UINT8)Temp;
+			}
+		}
+	}
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/boolhuff.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/boolhuff.c
new file mode 100644
index 00000000..901241f1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/boolhuff.c
@@ -0,0 +1,687 @@
+/****************************************************************************
+*
+*   Module Title :     boolhuff.c
+*
+*   Description  :     Boolean Encoder/Decoder
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "boolhuff.h"
+#include "TokenEntropy.h"
+#include <stdio.h>
+
+// STATS Variables for measuring section costs
+#if defined MEASURE_SECTION_COSTS
+UINT32 Sectionbits[10] = {0,0,0,0,0,0,0,0,0,0};
+UINT32 ActiveSection = 0;
+#endif
+
+#ifdef NOTNORMALIZED
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_StartDecode
+ *
+ *  INPUTS        :     BOOL_CODER *bc		  : pointer to instance of a boolean decoder.
+ *						unsigned char *buffer : pointer to buffer of data to be decoded.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Initializes the boolean decoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_StartDecode ( BOOL_CODER *bc, unsigned char *buffer )
+{
+    bc->pos    = 0;
+    bc->value  = 0;
+    bc->range  = 0;
+    bc->buffer = buffer;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DecodeBool
+ *
+ *  INPUTS        :     BOOL_CODER *bc  : pointer to instance of a boolean decoder.
+ *						int probability : probability next symbol is a 0 (0-255)
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		Next decoded bit: 0 or 1 
+ *
+ *  FUNCTION      :     Determines the next value stored in the boolean decoder 
+ *                      based upon the probability passed in. It uses a simple 
+ *                      probability model to approximate an arithmetic coder.
+ *
+ *  SPECIAL NOTES :     The accuracy of this decoder gets worse as the range 
+ *						approaches 0. This can be avoided with more complex 
+ *						normalization functions (as in a standard arithmetic)
+ *						coder. Chosen to avoid this for speed reasons.
+ *
+ ****************************************************************************/
+int VP6_DecodeBool ( BOOL_CODER *bc, int probability )
+{
+	unsigned int split;
+
+	// Don't have enough in our range to tell between a 0 and 1 so get 
+	// 3 new bytes. 
+    if( bc->range < 2)
+    {
+		unsigned char *spot = bc->buffer+bc->pos;
+		bc->v[0] = spot[0];
+		bc->v[1] = spot[1];
+		bc->v[2] = spot[2];
+
+		// range is set to 0x01000001 to avoid having the range * probability 
+		// calculation outrange (this can be handled differently at the cost 
+		// of an extra if).
+        bc->range = 0x01000000;
+        bc->pos += 3;
+    }
+
+	// calculate the decision point 
+	// black magic: This code works better than if I calculate probability *
+	// range and then truncating to 1 (can't explain why)
+	split = bc->range;
+	split --;				// we always have to maintain
+	split *= probability;
+	split >>= 8;
+	split ++;
+
+	if( bc->value < split )
+	{
+		bc->range = split;
+		return 0;
+	}
+	else
+	{
+		bc->range-=split;
+		bc->value-=split;
+		return 1;
+	}
+} 
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_StopDecode
+ *
+ *  INPUTS        :     BOOL_CODER *bc  : pointer to instance of a boolean decoder.
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Performs clean-up for boolean decoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_StopDecode ( BOOL_CODER *bc )
+{
+    return;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_StartEncode
+ *
+ *  INPUTS        :     BOOL_CODER *bc        : pointer to instance of a boolean encoder.
+ *						unsigned char *buffer : pointer to buffer to hold encoded data.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Initializes the boolean encoder
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_StartEncode ( BOOL_CODER *bc, unsigned char *buffer )
+{
+    bc->pos    = 0;
+    bc->value  = 0;
+    bc->range  = 0x01000000;
+    bc->buffer = buffer;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_EncodeBool
+ *
+ *  INPUTS        :     BOOL_CODER *bc  : pointer to instance of a boolean encoder.
+ *						int x		    : value to be encoded (0 or 1).
+ *						int probability : probability of getting a 0 (0-255) 
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		void.
+ *
+ *  FUNCTION      :     Encodes a boolean value (0 or 1) using the specified 
+ *                      boolean encoder.
+ *
+ *  SPECIAL NOTES :     The accuracy of this encoder gets worse as the range 
+ *						approaches 0. This can be avoided with more complex 
+ *						normalization functions (as in a standard arithmetic
+ *						coder). Chose to avoid this for speed reasons.
+ *
+ ****************************************************************************/
+void VP6_EncodeBool ( BOOL_CODER *bc, int x, int probability )
+{
+	unsigned int split;
+
+	// we don't have enough in our range to tell between a 0 and 1,
+	// so get 3 new bytes. 
+    if( bc->range < 2 )
+    {
+		bc->buffer[bc->pos] = bc->v[0];
+		bc->buffer[bc->pos+1] = bc->v[1];
+		bc->buffer[bc->pos+2] = bc->v[2];
+        bc->pos+=3;
+
+		// range is set to 0x01000001 to avoid having the range * probability 
+		// calculation outrange ( this can be handled differently at the cost 
+		// of an extra if).
+        bc->range = 0x01000000;
+        bc->value = 0;
+    }
+	
+	// calculate the decision point 
+	// black magic: This code works better than if I calculate probability *
+	// range and then truncating to 1 (can't explain why)
+	split = bc->range;
+	split --;
+	split *= probability;
+	split >>= 8;
+	split ++;
+	
+	if( x )
+	{
+		bc->range-=split;
+		bc->value+=split;
+	}
+	else
+	{
+		bc->range = split;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_StopEncode
+ *
+ *  INPUTS        :     BOOL_CODER *bc  : pointer to instance of a boolean encoder.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Performs clean-up for boolean encoder
+ *                           
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_StopEncode( BOOL_CODER *bc )
+{
+	int i;
+
+	for ( i=0; i<3; i++ )
+	{ 
+		bc->buffer[bc->pos + i] = *((unsigned char *) &bc->value + i);
+	}
+    bc->pos += 3;
+}
+
+#else 
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_StartEncode
+ *
+ *  INPUTS        :     BOOL_CODER *br        : pointer to instance of a boolean encoder.
+ *						unsigned char *source :	pointer to buffer to hold encoded data.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Perform initialization of the boolean encoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_StartEncode ( BOOL_CODER *br, unsigned char *source )
+{
+	br->lowvalue = 0;
+	br->range    = 255;
+	br->value    = 0;
+	br->count    = -24; 
+	br->buffer   = source;
+	br->pos      = 0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_StopEncode
+ *
+ *  INPUTS        :     BOOL_CODER *br : pointer to instance of a boolean encoder.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Performs clean-up for a boolean encoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_StopEncode ( BOOL_CODER *br )
+{	
+	if(br->count<-16)
+		br->lowvalue <<= (24-(br->count&7));
+	else if(br->count<-8)
+		br->lowvalue <<= (16-(br->count&7));
+	else 
+		br->lowvalue <<= (8-(br->count&7));
+
+	br->buffer[br->pos++] = (br->lowvalue>>24);
+	br->buffer[br->pos++] = (br->lowvalue>>16) & 0xff;
+	br->buffer[br->pos++] = (br->lowvalue>> 8) & 0xff;
+	br->buffer[br->pos++] = (br->lowvalue    ) & 0xff;
+	br->buffer[br->pos++] = 0;
+}
+	
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_EncodeBool
+ *
+ *  INPUTS        :     BOOL_CODER *br  : pointer to instance of a boolean encoder.
+ *						int bit         : value to be encoded (0 or 1).
+ *						int probability : probability of getting a 0 (0-255) 
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		void
+ *
+ *  FUNCTION      :     Encodes a boolean value (0 or 1) using the 
+ *                      specified boolean encoder.
+ *
+ *  SPECIAL NOTES :     This encoder uses normalizations, and is fairly accurate,
+ *
+ ****************************************************************************/
+void VP6_EncodeBool ( BOOL_CODER *br, int bit, int probability )
+{
+	unsigned int split;
+    unsigned int count = br->count;
+    unsigned int range = br->range;
+    unsigned int lowvalue = br->lowvalue;
+
+#if defined MEASURE_SECTION_COSTS
+	if (bit)
+		Sectionbits[ActiveSection] += VP6_ProbCost[255-probability];
+	else
+		Sectionbits[ActiveSection] += VP6_ProbCost[probability];
+#endif
+
+    split = 1 +  (((range-1) * probability) >> 8);
+	
+    range = split;
+    if(bit)
+	{
+		lowvalue += split;
+		range = br->range-split;
+	}
+
+    while(range < 0x80)
+	{
+		range <<= 1;
+
+		if((lowvalue & 0x80000000 ))
+        {
+            int x = br->pos-1;
+            while(x>=0 && br->buffer[x] == 0xff)
+            {
+                br->buffer[x] =(unsigned char)0;
+                x--;
+            }
+            br->buffer[x]+=1;
+            
+        }
+        lowvalue  <<= 1;
+		if (!++count) 
+		{
+			count = -8;
+			br->buffer[br->pos++]=(lowvalue >> 24);
+			lowvalue &= 0xffffff;
+		}
+	}
+    br->count = count;
+    br->lowvalue = lowvalue;
+    br->range = range;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_EncodeBoolOne
+ *
+ *  INPUTS        :     BOOL_CODER *br  : pointer to instance of a boolean encoder.
+ *						int bit         : value to be encoded (UNUSED).
+ *						int probability : probability of getting a 0 (0-255) 
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		void
+ *
+ *  FUNCTION      :     Encodes the boolean value 1 using the specified boolean encoder.
+ *
+ *  SPECIAL NOTES :     This encoder uses normalizations, and is fairly accurate,
+ *
+ ****************************************************************************/
+void VP6_EncodeBoolOne ( BOOL_CODER *br, int bit, int probability )
+{
+	unsigned int split;
+    unsigned int count    = br->count;
+    unsigned int range    = br->range;
+    unsigned int lowvalue = br->lowvalue;
+
+#if defined MEASURE_SECTION_COSTS
+	if (bit)
+		Sectionbits[ActiveSection] += VP6_ProbCost[255-probability];
+	else
+		Sectionbits[ActiveSection] += VP6_ProbCost[probability];
+#endif
+
+    split = 1 +  (((range-1) * probability) >> 8);
+	    
+    lowvalue += split;
+	range = range-split;
+
+    while(range < 0x80)
+	{
+		range <<= 1;
+
+		if((lowvalue & 0x80000000 ))
+        {
+            int x = br->pos-1;
+            while(x>=0 && br->buffer[x] == 0xff)
+            {
+                br->buffer[x] =(unsigned char)0;
+                x--;
+            }
+            br->buffer[x]+=1;
+            
+        }
+        lowvalue  <<= 1;
+		if (!++count) 
+		{
+			count = -8;
+			br->buffer[br->pos++]=(lowvalue >> 24);
+			lowvalue &= 0xffffff;
+		}
+	}
+    br->count = count;
+    br->lowvalue = lowvalue;
+    br->range = range;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_EncodeBoolZero
+ *
+ *  INPUTS        :     BOOL_CODER *br  : pointer to instance of a boolean encoder.
+ *						int bit         : value to be encoded (UNUSED).
+ *						int probability : probability of getting a 0 (0-255) 
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		void
+ *
+ *  FUNCTION      :     Encodes the boolean value 0 using the specified boolean encoder.
+ *
+ *  SPECIAL NOTES :     This encoder uses normalizations, and is fairly accurate,
+ *
+ ****************************************************************************/
+void VP6_EncodeBoolZero ( BOOL_CODER *br, int bit, int probability )
+{
+    unsigned int count = br->count;
+    unsigned int range = br->range;
+    unsigned int lowvalue = br->lowvalue;
+
+#if defined MEASURE_SECTION_COSTS
+	if (bit)
+		Sectionbits[ActiveSection] += VP6_ProbCost[255-probability];
+	else
+		Sectionbits[ActiveSection] += VP6_ProbCost[probability];
+#endif
+
+    range = 1 +  (((range-1) * probability) >> 8);
+
+    while(range < 0x80)
+	{
+		range <<= 1;
+
+		if((lowvalue & 0x80000000 ))
+        {
+            int x = br->pos-1;
+            while(x>=0 && br->buffer[x] == 0xff)
+            {
+                br->buffer[x] =(unsigned char)0;
+                x--;
+            }
+            br->buffer[x]+=1;
+            
+        }
+        lowvalue  <<= 1;
+		if (!++count) 
+		{
+			count = -8;
+			br->buffer[br->pos++]=(lowvalue >> 24);
+			lowvalue &= 0xffffff;
+		}
+	}
+    br->count = count;
+    br->lowvalue = lowvalue;
+    br->range = range;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_EncodeBool2
+ *
+ *  INPUTS        :     BOOL_CODER *br  : pointer to instance of a boolean encoder.
+ *						int bit         : value to be encoded (0 or 1).
+ *						int probability : probability of getting a 0 (0-255) 
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		void
+ *
+ *  FUNCTION      :     Updates br->BitCounter with approximate cost of encoding
+ *                      bit.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+void VP6_EncodeBool2 ( BOOL_CODER *br, int bit, int probability )
+{
+	if (bit)
+		br->BitCounter += VP6_ProbCost[255-probability];
+	else
+		br->BitCounter += VP6_ProbCost[probability];
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DecodeBool
+ *
+ *  INPUTS        :     BOOL_CODER *br  : pointer to instance of a boolean decoder.
+ *						int probability : probability that next symbol is a 0 (0-255) 
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		Next decoded symbol (0 or 1)
+ *
+ *  FUNCTION      :     Decodes the next symbol (0 or 1) using the specified
+ *                      boolean decoder.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ ****************************************************************************/
+int VP6_DecodeBool ( BOOL_CODER *br, int probability ) 
+{
+    unsigned int bit=0;
+	unsigned int split;
+	unsigned int bigsplit;
+    unsigned int count = br->count;
+    unsigned int range = br->range;
+    unsigned int value = br->value;
+
+	split = 1 +  (((range-1) * probability) >> 8);	
+    bigsplit = (split<<24);
+    
+    range = split;
+	if(value >= bigsplit)
+	{
+		range = br->range-split;
+		value = value-bigsplit;
+		bit = 1;
+	}
+
+	if(range>=0x80)
+    {
+        br->value = value;
+        br->range = range;
+        return bit;
+    }
+    else
+	{
+		do
+		{
+       	    range +=range;
+            value +=value;
+            
+        	if (!--count) 
+        	{
+    	        count = 8;
+	            value |= br->buffer[br->pos];
+        	    br->pos++;
+	    	}
+        } 
+        while(range < 0x80 );
+    }
+    br->count = count;
+    br->value = value;
+    br->range = range;
+    return bit;
+} 
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DecodeBool128
+ *
+ *  INPUTS        :     BOOL_CODER *br : pointer to instance of a boolean decoder.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		int: Next decoded symbol (0 or 1)
+ *
+ *  FUNCTION      :     This function determines the next value stored in the 
+ *						boolean coder based upon a fixed probability of 0.5 
+ *                      (128 in normalized units).
+ *
+ *  SPECIAL NOTES :     VP6_DecodeBool128() is a special case of VP6_DecodeBool()
+ *                      where the input probability is fixed at 128.
+ *
+ ****************************************************************************/
+int VP6_DecodeBool128 (	BOOL_CODER	*br ) 
+{
+    unsigned int bit;
+	unsigned int split;
+	unsigned int bigsplit;
+    unsigned int count = br->count;
+    unsigned int range = br->range;
+    unsigned int value = br->value;
+
+    split = ( range + 1) >> 1;
+    bigsplit = (split<<24);
+    
+	if(value >= bigsplit)
+	{
+		range = (range-split)<<1;
+		value = (value-bigsplit)<<1;
+		bit = 1;
+	}
+	else
+	{	
+		range = split<<1;
+		value = value<<1;
+		bit = 0;
+	}
+
+    if(!--count)
+    {
+        count=8;
+        value |= br->buffer[br->pos];
+        br->pos++;        
+    }
+    br->count = count;
+    br->value = value;
+    br->range = range;
+    return bit;
+        
+}    
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_StartDecode
+ *
+ *  INPUTS        :     BOOL_CODER *bc		  : pointer to instance of a boolean decoder.
+ *						unsigned char *source : pointer to buffer of data to be decoded.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Performs initialization of the boolean decoder.
+ *                           
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_StartDecode ( BOOL_CODER *br, unsigned char *source )
+{
+	br->lowvalue = 0;
+	br->range    = 255;
+	br->count    = 8;
+	br->buffer   = source;
+	br->pos      = 0;
+	br->value    = (br->buffer[0]<<24)+(br->buffer[1]<<16)+(br->buffer[2]<<8)+(br->buffer[3]);
+	br->pos     += 4;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_StopDecode
+ *
+ *  INPUTS        :     BOOL_CODER *bc : pointer to instance of a boolean decoder (UNUSED).
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Performs clean-up of the specified boolean decoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_StopDecode ( BOOL_CODER *bc )
+{
+}
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/debug.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/debug.c
new file mode 100644
index 00000000..c88878a7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/debug.c
@@ -0,0 +1,225 @@
+#include "pbdll.h"
+#include "misc_common.h"
+#include "xprintf.h"
+
+/****************************************************************************
+ * Debugging Aid Only
+ ****************************************************************************
+*/
+
+#ifdef _MSC_VER
+
+#include <stdio.h>
+
+void vp6_writeframe(PB_INSTANCE *pbi, char * address,int x)
+{
+	// write the frame
+	FILE *yframe;
+	char filename[255];
+	sprintf(filename,"y%04d.raw",x);
+	yframe=fopen(filename,"wb");
+	fwrite(address,pbi->ReconYPlaneSize+2*pbi->ReconUVPlaneSize,1,yframe);
+	fclose(yframe);
+}
+
+void vp6_writeframe2(PB_INSTANCE *pbi, char * address,int x)
+{
+	// write the frame
+	FILE *yframe;
+	char filename[255];
+	sprintf(filename,"y%d.raw",x);
+	yframe=fopen(filename,"wb");
+	fwrite(address,pbi->YPlaneSize,1,yframe);
+	fclose(yframe);
+}
+
+void vp6_draw(unsigned char *prefix, int frame, char * address,int size)
+{
+	// write the frame
+	FILE *yframe;
+	char filename[255];
+	sprintf(filename,"%s%04d.raw",prefix,frame);
+	yframe=fopen(filename,"wb");
+	fwrite(address,size,1,yframe);
+	fclose(yframe);
+}
+
+void vp6_drawb(unsigned char *prefix, int frame, char * address,int pitch,int width,int height)
+{
+	// write the frame
+	FILE *yframe;
+	int i;
+	char filename[255];
+	sprintf(filename,"%s%04d.raw",prefix,frame);
+	yframe=fopen(filename,"wb");
+	for(i=0;i<height;i++)
+	{
+		fwrite(address,width,1,yframe);
+		address+=pitch;
+	}
+	fclose(yframe);
+}
+
+void vp6_drawc(char *filename, char * address,int pitch,int width,int height)
+{
+	// write the frame
+	FILE *yframe;
+	int i;
+	yframe=fopen(filename,"ab");
+	for(i=0;i<height;i++)
+	{
+		fwrite(address,width,1,yframe);
+		address+=pitch;
+	}
+	fclose(yframe);
+}
+
+void vp6_showinfo2(PB_INSTANCE *pbi)
+{
+	vp6_xprintf(pbi, 
+			pbi->Configuration.YStride * UMV_BORDER + UMV_BORDER, 
+			"F:%d G:%d Q:%d S:%d B: %d W:%d H:%d V:%d Decode:%8d, Blit:%8d, PP:%8d, P:%d",
+			pbi->FrameType,
+            pbi->RefreshGoldenFrame,
+			pbi->quantizer->FrameQIndex,
+			pbi->CurrentFrameSize,
+            pbi->br.pos,
+			pbi->HFragments,
+			pbi->VFragments,
+			pbi->Vp3VersionNo,
+			pbi->avgDecodeTime,
+			pbi->avgBlitTime,
+			pbi->avgPPTime[8],
+			pbi->PostProcessingLevel);
+}
+
+void vp6_appendframe(PB_INSTANCE *pbi)
+{
+	// write the frame
+	FILE *yframe;
+	yframe=fopen("test.raw","ab");
+	fwrite(pbi->LastFrameRecon,pbi->ReconYPlaneSize+2*pbi->ReconUVPlaneSize,1,yframe);
+	fclose(yframe);
+}
+
+void vp6_showinfo(PB_INSTANCE *pbi)
+{
+	UINT32 MBrow, MBcol;
+	UINT32 MBRows = pbi->MBRows; 
+	UINT32 MBCols = pbi->MBCols;
+
+	// for each row of macroblocks 
+	for ( MBrow=0; MBrow<MBRows; MBrow++ )
+	{
+		// for each macroblock within a row of macroblocks
+		for ( MBcol=0; MBcol<MBCols; MBcol++)
+		{
+			vp6_xprintf(pbi, 
+				((MBrow)* 16+5) * pbi->Configuration.YStride  + (MBcol)*16+5, 
+				"%d",
+				pbi->predictionMode[MBOffset(MBrow,MBcol)]);
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PredictBlockToPostProcessBuffer
+ *
+ *  INPUTS        :     
+ *						
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Codes a DCT block
+ *
+ *                      Motion vectors and modes asumed to be defined at the MB level.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP6_PredictBlockToPostProcessBuffer ( PB_INSTANCE *pbi, BLOCK_POSITION bp )
+{
+/*
+we need a VP6_PredictMacroBlockToPostProcessBuffer
+
+	memset(pbi->ReconDataBuffer,0,64*sizeof(short));
+
+	// Action depends on decode mode.
+	if ( pbi->mbi.Mode == CODE_INTER_NO_MV )       // Inter with no motion vector
+	{
+		ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon], 
+			(UINT8 *)&pbi->LastFrameRecon[pbi->mbi.Recon], 
+			pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride);
+	}
+	else if ( VP6_ModeUsesMC[pbi->mbi.Mode] )          // The mode uses a motion vector.
+	{
+		// For the compressor we did this already ( possible optimization).
+		VP6_PredictFilteredBlock( pbi, pbi->TmpDataBuffer,bp);
+
+		ReconBlock( 
+			pbi->TmpDataBuffer,
+			pbi->ReconDataBuffer,
+			(UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon],
+			pbi->mbi.CurrentReconStride );
+	}
+	else if ( pbi->mbi.Mode == CODE_USING_GOLDEN )     // Golden frame with motion vector
+	{
+		// Reconstruct the pixel data using the golden frame reconstruction and change data
+		ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon], 
+			(UINT8 *)&pbi->GoldenFrame[ pbi->mbi.Recon ], 
+			pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+	}
+	else                                            // Simple Intra coding
+	{
+		// Get the pixel index for the first pixel in the fragment.
+		ReconIntra( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon], (UINT16 *)pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+	} 
+*/
+}
+
+void VP6_printmodes(PB_INSTANCE *pbi)
+{
+    static int nFrame = 0;  //  PB_INSTANCE doesn't provide a frame number, does it?
+    FILE *f=fopen("modes.txt","a");
+    unsigned int i,j;
+
+    fprintf(f, "Frame %d\n\n", nFrame);
+
+    for(i=BORDER_MBS;i<pbi->MBRows-BORDER_MBS;i++)
+    {
+		if(pbi->Configuration.Interlaced == 1)
+		{
+			for(j=BORDER_MBS;j<pbi->MBCols-BORDER_MBS;j++)
+			{
+				fprintf(f,"%d",pbi->MBInterlaced[MBOffset(i,j)]);
+			}
+			fprintf(f,"   ");
+		}
+		for(j=BORDER_MBS;j<pbi->MBCols-BORDER_MBS;j++)        
+		{
+            fprintf(f,"%d",pbi->predictionMode[MBOffset(i,j)]);
+        }
+        fprintf(f,"   ");
+		for(j=BORDER_MBS;j<pbi->MBCols-BORDER_MBS;j++)        
+        {
+            fprintf(f,"%3d:%-3d",pbi->MBMotionVector[MBOffset(i,j)].x,pbi->MBMotionVector[MBOffset(i,j)].y);
+        }
+        fprintf(f,"\n");
+	}
+
+    fprintf(f,"\n");
+    fprintf(f,"\n");
+    fclose(f);
+
+    ++nFrame;
+
+    return;
+}
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodembs.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodembs.c
new file mode 100644
index 00000000..168e5a80
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodembs.c
@@ -0,0 +1,2125 @@
+/****************************************************************************
+*
+*   Module Title :     Decodembs.c
+*
+*   Description  :     Compressor functions for block order transmittal
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "pbdll.h"
+#include "decodemode.h"
+#include "decodemv.h"
+
+/****************************************************************************
+*  Typedefs
+****************************************************************************/     
+
+// For details of tokens and extra bit breakdown see token definitions in huffman.h
+typedef struct 
+{    
+    UINT16  MinVal;
+    INT16   Length;
+    UINT8   Probs[11];
+} TOKENEXTRABITS;
+
+/****************************************************************************
+*  Module constants
+****************************************************************************/     
+static const UINT32 VP6_HuffTokenMinVal[MAX_ENTROPY_TOKENS] = { 0,1, 2, 3, 4, 5, 7, 11, 19, 35, 67, 0};
+
+static const TOKENEXTRABITS VP6_TokenExtraBits2[MAX_ENTROPY_TOKENS] =
+{
+    {  0,-1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //ZERO_TOKEN
+    {  1, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //ONE_TOKEN
+    {  2, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //TWO_TOKEN
+    {  3, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //THREE_TOKEN
+    {  4, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //FOUR_TOKEN
+    {  5, 0, { 159,0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //DCT_VAL_CATEGORY1
+    {  7, 1, { 145,165,0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //DCT_VAL_CATEGORY2
+    { 11, 2, { 140,148,173,0,  0,  0,  0,  0,  0,  0,  0   } },   //DCT_VAL_CATEGORY3
+    { 19, 3, { 135,140,155,176,0,  0,  0,  0,  0,  0,  0   } },   //DCT_VAL_CATEGORY4
+    { 35, 4, { 130,134,141,157,180,0,  0,  0,  0,  0,  0   } },   //DCT_VAL_CATEGORY5
+    { 67,10, { 129,130,133,140,153,177,196,230,243,254,254 } },   //DCT_VAL_CATEGORY6
+    {  0,-1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   // EOB TOKEN
+};
+
+// Definition of AC coefficient banding
+const INT32 VP6_CoeffToBand[65] = 
+{  
+   -1,0,1,1,1,2,2,2,
+	2,2,2,3,3,3,3,3,
+	3,3,3,3,3,3,4,4,
+	4,4,4,4,4,4,4,4,
+	4,4,4,4,4,5,5,5,
+	5,5,5,5,5,5,5,5,
+	5,5,5,5,5,5,5,5,
+	5,5,5,5,5,5,5,5,7
+};
+
+static const INT32 VP6_CoeffToHuffBand[65] = 
+{  
+   -1,0,1,1,1,2,2,2,
+	2,2,2,3,3,3,3,3,
+	3,3,3,3,3,3,3,3,
+    3,3,3,3,3,3,3,3,
+    3,3,3,3,3,3,3,3,
+    3,3,3,3,3,3,3,3,
+    3,3,3,3,3,3,3,3,
+    3,3,3,3,3,3,3,3,3
+};
+
+// Default scan bands for non-interlaced frames
+const UINT8 DefaultNonInterlacedScanBands[BLOCK_SIZE] = 
+{
+   0, 0, 1, 1, 1, 2, 2, 2, 
+   2, 2, 2, 3, 3, 4, 4, 4, 
+   5, 5, 5, 5, 6, 6, 7, 7, 
+   7, 7, 7, 8, 8, 9, 9, 9, 
+   9, 9, 9,10,10,11,11,11,
+  11,11,11,12,12,12,12,12,
+  12,13,13,13,13,13,14,14,
+  14,14,15,15,15,15,15,15
+};
+
+// Default scan badns for interlaced frames
+const UINT8 DefaultInterlacedScanBands[BLOCK_SIZE] = 
+{
+   0, 1, 0, 1, 1, 2, 5, 3, 
+   2, 2, 2, 2, 4, 7, 8,10, 
+   9, 7, 5, 4, 2, 3, 5, 6, 
+   8, 9,11,12,13,12,11,10, 
+   9, 7, 5, 4, 6, 7, 9,11, 
+  12,12,13,13,14,12,11, 9, 
+   7, 9,11,12,14,14,14,15, 
+  13,11,13,15,15,15,15,15, 
+};
+
+// AWG Should export this in decodembs.h rather than pbdll.h
+const int VP6_Mode2Frame[] =
+{
+	1,	// CODE_INTER_NO_MV		0 => Encoded diff from same MB last frame 
+	0,	// CODE_INTRA			1 => DCT Encoded Block
+	1,	// CODE_INTER_PLUS_MV	2 => Encoded diff from included MV MB last frame
+	1,	// CODE_INTER_LAST_MV	3 => Encoded diff from MRU MV MB last frame
+	1,	// CODE_INTER_PRIOR_MV	4 => Encoded diff from included 4 separate MV blocks
+	2,	// CODE_USING_GOLDEN	5 => Encoded diff from same MB golden frame
+	2,	// CODE_GOLDEN_MV		6 => Encoded diff from included MV MB golden frame
+	1,  // CODE_INTER_FOUR_MV	7 => Encoded diff from included 4 separate MV blocks
+	2,	// CODE_GOLD_NEAREST_MV 8 => Encoded diff from MRU MV MB last frame
+	2,	// CODE_GOLD_NEAR_MV	9 => Encoded diff from included 4 separate MV blocks
+};
+
+// For Bitread functions
+static const UINT32 loMaskTbl_VP60[] = 
+{   
+    0x00000000,
+    0x00000001, 0x00000003, 0x00000007, 0x0000000F,
+    0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
+	0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
+	0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
+	0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
+	0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
+	0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
+	0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
+};
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     NextWord (MACRO)
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Reads 32 bits from the input buffer for processing and
+ *                      reverts data to little endian.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+#	define BitsAreBigEndian 1
+#	if BitsAreBigEndian
+#		define NextWord \
+{ br->remainder = (br->position[0] << 24) + (br->position[1] << 16) + (br->position[2] << 8) + br->position[3];  br->position += 4;}
+#	else
+#		define NextWord \
+{ br->remainder = (br->position[3] << 24) + (br->position[2] << 16) + (br->position[1] << 8) + br->position[0];  br->position += 4;}
+#	endif
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     bitread
+ *
+ *  INPUTS        :     BITREADER *br : Wrapper for the encoded data buffer.
+ *                      int bits      : Number of bits to read.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Value of the number of bits requested (as UINT32)
+ *
+ *  FUNCTION      :     Extracts requested number of bits from the encoded data buffer.
+ *
+ *  SPECIAL NOTES :     Uses the NextWord macro. 
+ *
+ ****************************************************************************/
+FORCEINLINE
+UINT32 bitread ( BITREADER *br, int bits )
+{
+	UINT32 z = 0;
+
+	br->remainder &= loMaskTbl_VP60[br->bitsinremainder];
+	
+	if( (bits -= br->bitsinremainder) > 0) 
+	{
+		z |= br->remainder << bits;
+		NextWord
+			bits -= 32;
+	}
+	return z | br->remainder >> (br->bitsinremainder = -bits);
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     bitreadonly
+ *
+ *  INPUTS        :     BITREADER *br : Wrapper for the encoded data buffer.
+ *                      int bits      : Number of bits to read.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Value of the number of bits requested (as UINT32)
+ *
+ *  FUNCTION      :     Extracts requested number of bits from the encoded data buffer.
+ *
+ *  SPECIAL NOTES :     This reader variant will only read a further byte from the
+ *                      encoded data buffer. 
+ *
+ ****************************************************************************/
+FORCEINLINE
+UINT32 bitreadonly ( BITREADER *br, UINT32 bits )
+{
+    UINT32 x = br->bitsinremainder;
+    UINT32 z = (1<<x)-1;
+
+    z &= br->remainder;
+    if ( x >= bits )
+    {        
+        return z>>(x-bits);
+    }    
+    z <<= 8;
+    z  |= br->position[0];
+	return (z>>(8+x-bits));
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     bitShift
+ *
+ *  INPUTS        :     BITREADER *br : Wrapper for the encoded data buffer.
+ *                      int bits      : Number of bits to discard (shift off).
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Discards requested number of bits from the encoded data buffer.
+ *
+ *  SPECIAL NOTES :     Uses the NextWord macro.
+ *
+ ****************************************************************************/
+FORCEINLINE
+void bitShift ( BITREADER *br, int bits )
+{			
+	br->bitsinremainder -= bits;
+    if ( br->bitsinremainder < 0 ) 
+	{
+		NextWord
+			br->bitsinremainder += 32;
+	}	
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     bitread1
+ *
+ *  INPUTS        :     BITREADER *br : Wrapper for the encoded data buffer.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     A single bit (as UINT32)
+ *
+ *  FUNCTION      :     Extracts a single bit  from the encoded data buffer.
+ *
+ *  SPECIAL NOTES :     Uses the NextWord macro. 
+ *
+ ****************************************************************************/
+FORCEINLINE
+UINT32 bitread1 ( BITREADER *br ) 
+{
+	if( br->bitsinremainder)
+		return (br->remainder >> --br->bitsinremainder) & 1;
+	NextWord
+		return br->remainder  >> (br->bitsinremainder = 31);
+}
+
+#undef NextWord
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     nDecodeBool
+ *
+ *  INPUTS        :     BITREADER *br   : Wrapper for the encoded data buffer.
+ *                      int probability : Probability that next symbol in Boolean 
+ *                                        Coded buffer is a 0.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Value of the next encoded token 0 or 1 (as int)
+ *
+ *  FUNCTION      :     Extracts next token (0 or 1) from the Boolean encoded data buffer.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+FORCEINLINE
+int nDecodeBool ( BOOL_CODER *br, int probability ) 
+{
+	unsigned int split;
+    int count = br->count;
+    unsigned int range = br->range;
+    unsigned int value = br->value;
+
+	// perform the actual decoding
+	split = 1 +  (((range-1) * probability) >> 8);	
+
+	if ( value >= split<<24 )
+	{
+		value -= (split<<24);
+		range = range - split;
+
+        while(range < 0x80 )
+        {
+	        range += range;
+	        value += value;
+	        
+	        if ( !--count ) 
+	        {
+		        count  = 8;
+		        value |= br->buffer[br->pos];
+		        br->pos++;
+	        }
+        }
+
+        br->count = count;
+        br->value = value;
+        br->range = range;
+
+        return 1;
+
+	}
+	range = split;
+
+    while(range < 0x80 )
+	{
+		range += range;
+		value += value;
+		
+		if ( !--count ) 
+		{
+			count  = 8;
+			value |= br->buffer[br->pos];
+			br->pos++;
+		}
+	}
+    br->count = count;
+    br->value = value;
+    br->range = range;
+	return 0;
+} 
+
+/****************************************************************************
+ * 
+ *
+ ****************************************************************************/
+#define APPLYSIGN(dest, valueToSign) \
+{ \
+	unsigned int split; \
+    split    = (range + 1) >> 1; \
+	if ( value >= split<<24 ) \
+	{ \
+		value = value - (split<<24); \
+        value += value; \
+		range = range - split; \
+        range += range; \
+        if( !--count ) \
+        { \
+            count  = 8; \
+            value |= *brBuffer; \
+            brBuffer++; \
+        } \
+        dest = -valueToSign; \
+    } \
+    else \
+    { \
+        range = split; \
+        range += range; \
+        value += value; \
+        if( !--count ) \
+        { \
+            count  = 8; \
+            value |= *brBuffer; \
+            brBuffer++; \
+        } \
+        dest = valueToSign; \
+    } \
+}
+
+/****************************************************************************
+ * 
+ *
+ ****************************************************************************/
+//    register int count = _mm_cvtsi64_si32(m64_brCount); 
+#define NDECODEBOOL_AND_BRANCH_IF_ONE(probability, branch) \
+{ \
+	unsigned int split; \
+	split = 1 +  (((range-1) * probability) >> 8); \
+	if ( value >= split<<24 ) \
+	{ \
+		value -= (split<<24); \
+		range = range - split; \
+        while(range < 0x80 ) \
+        { \
+	        range += range; \
+	        value += value; \
+	        if ( !--count ) \
+	        { \
+		        count  = 8; \
+		        value |= *brBuffer; \
+		        brBuffer++; \
+	        } \
+        } \
+        goto branch; \
+	} \
+	range = split; \
+    while(range < 0x80 ) \
+	{ \
+		range += range; \
+		value += value; \
+		if ( !--count ) \
+		{ \
+			count  = 8; \
+			value |= *brBuffer; \
+			brBuffer++; \
+		} \
+	} \
+}
+/****************************************************************************
+ * 
+ *
+ ****************************************************************************/
+#define NDECODEBOOL_AND_BRANCH_IF_ZERO(probability, branch) \
+{ \
+	unsigned int split; \
+	split = 1 +  (((range-1) * probability) >> 8); \
+	if ( value < split<<24 ) \
+	{ \
+    	range = split; \
+        while(range < 0x80 ) \
+        { \
+	        range += range; \
+	        value += value; \
+	        if ( !--count ) \
+	        { \
+		        count  = 8; \
+		        value |= *brBuffer; \
+		        brBuffer++; \
+	        } \
+        } \
+        goto branch; \
+	} \
+	value -= (split<<24); \
+	range = range - split; \
+    while(range < 0x80 ) \
+	{ \
+		range += range; \
+		value += value; \
+		if ( !--count ) \
+		{ \
+			count  = 8; \
+			value |= *brBuffer; \
+			brBuffer++; \
+		} \
+	} \
+}
+
+
+/****************************************************************************
+* 
+*  ROUTINE       :     BuildScanOrder
+*
+*  INPUTS        :     PB_INSTANCE *pbi : Pointer to instance of a decoder.
+*                      UINT8 *ScanBands : Pointer to array containing band for 
+*                                         each DCT coeff position. 
+*
+*  OUTPUTS       :     None
+*
+*  RETURNS       :     void
+*
+*  FUNCTION      :     Builds a custom dct scan order from a set of band data.
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void BuildScanOrder( PB_INSTANCE *pbi, UINT8 *ScanBands )
+{
+	UINT32 i, j;
+	UINT32 ScanOrderIndex = 1;
+	UINT32 MaxOffset;
+	UINT32     *TransIndex = pbi->quantizer->transIndex; 
+
+	// DC is fixed
+	pbi->ModifiedScanOrder[0] = 0;
+
+	// Create a scan order where within each band the coefs are in ascending order
+	// (in terms of their original zig-zag positions).
+	for ( i = 0; i < SCAN_ORDER_BANDS; i++ )
+	{
+		for ( j = 1; j < BLOCK_SIZE; j++ )
+		{
+			if ( ScanBands[j] == i )
+			{
+				pbi->ModifiedScanOrder[ScanOrderIndex] = j;
+				ScanOrderIndex++;
+			}
+		}
+	}
+
+	// For each of the positions in the modified scan order work out the 
+	// worst case EOB offset in zig zag order. This is used in selecting
+    // the appropriate idct variant
+	for ( i = 0; i < BLOCK_SIZE; i++ )
+	{
+		MaxOffset = 0;
+		for ( j = 0; j <= i; j++ )
+		{
+			if ( pbi->ModifiedScanOrder[j] > MaxOffset )
+				MaxOffset = pbi->ModifiedScanOrder[j];
+		}
+
+		pbi->EobOffsetTable[i] = MaxOffset;
+
+		if(pbi->Vp3VersionNo > 6)
+            pbi->EobOffsetTable[i] = MaxOffset+1;
+
+    }
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :     BoolTreeToHuffCodes
+*
+*  INPUTS        :     UINT8  *BoolTreeProbs : Dct coeff tree node probabilities
+*
+*  OUTPUTS       :     UINT32 *HuffProbs     : Dct coeff probability distribution
+*
+*  RETURNS       :     void
+*
+*  FUNCTION      :     Convert set of internal tree node probabilities to set of
+*                      token probabilities (run lengths 1--8, and >8 are the tokens).
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void BoolTreeToHuffCodes ( UINT8 *BoolTreeProbs, UINT32 *HuffProbs )
+{
+    UINT32 Prob;
+    UINT32 Prob1;
+
+    HuffProbs[DCT_EOB_TOKEN]       = ((UINT32)BoolTreeProbs[0] * (UINT32)BoolTreeProbs[1]) >> 8;
+    HuffProbs[ZERO_TOKEN]          = ((UINT32)BoolTreeProbs[0] * (255 - (UINT32)BoolTreeProbs[1])) >> 8;
+
+    Prob = (255 - (UINT32)BoolTreeProbs[0]);
+    HuffProbs[ONE_TOKEN]           = (Prob * (UINT32)BoolTreeProbs[2]) >> 8;
+
+    Prob = (Prob*(255 - (UINT32)BoolTreeProbs[2])) >> 8;
+    Prob1 = (Prob * (UINT32)BoolTreeProbs[3]) >> 8;
+    HuffProbs[TWO_TOKEN]           = (Prob1 * (UINT32)BoolTreeProbs[4]) >> 8; 
+    Prob1 = (Prob1 * (255 - (UINT32)BoolTreeProbs[4])) >> 8;
+    HuffProbs[THREE_TOKEN]         = (Prob1 * (UINT32)BoolTreeProbs[5]) >> 8;
+    HuffProbs[FOUR_TOKEN]          = (Prob1 * (255 - (UINT32)BoolTreeProbs[5])) >> 8;
+
+    Prob = (Prob * (255 - (UINT32)BoolTreeProbs[3])) >> 8;
+    Prob1 = (Prob * (UINT32)BoolTreeProbs[6]) >> 8;
+    HuffProbs[DCT_VAL_CATEGORY1]   = (Prob1 * (UINT32)BoolTreeProbs[7]) >> 8;
+    HuffProbs[DCT_VAL_CATEGORY2]   = (Prob1 * (255 - (UINT32)BoolTreeProbs[7])) >> 8;
+    
+    Prob = (Prob * (255 - (UINT32)BoolTreeProbs[6])) >> 8;
+    Prob1 = (Prob * (UINT32)BoolTreeProbs[8]) >> 8; 
+    HuffProbs[DCT_VAL_CATEGORY3]   = (Prob1 * (UINT32)BoolTreeProbs[9]) >> 8;
+    HuffProbs[DCT_VAL_CATEGORY4]   = (Prob1 * (255 - (UINT32)BoolTreeProbs[9])) >> 8;
+
+    Prob = (Prob * (255 - (UINT32)BoolTreeProbs[8])) >> 8;
+    HuffProbs[DCT_VAL_CATEGORY5]   = (Prob * (UINT32)BoolTreeProbs[10]) >> 8;
+    HuffProbs[DCT_VAL_CATEGORY6]   = (Prob * (255 - (UINT32)BoolTreeProbs[10])) >> 8;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :     ZerosBoolTreeToHuffCodes
+*
+*  INPUTS        :     UINT8  *BoolTreeProbs : Zrl tree node probabilities
+*
+*  OUTPUTS       :     UINT32 *HuffProbs     : Zrl run-length distribution
+*
+*  RETURNS       :     void
+*
+*  FUNCTION      :     Convert zero run-length tree node probs to set 
+*                      of run-length probs (run lengths 1--8, and >8
+*                      are the tokens).
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void ZerosBoolTreeToHuffCodes ( UINT8 *BoolTreeProbs, UINT32 *HuffProbs )
+{
+    UINT32 Prob;
+
+    Prob  = ((UINT32)BoolTreeProbs[0] * (UINT32)BoolTreeProbs[1]) >> 8;
+    HuffProbs[0] = (Prob * (UINT32)BoolTreeProbs[2]) >> 8;
+    HuffProbs[1] = (Prob * (UINT32)(255 - BoolTreeProbs[2])) >> 8;
+
+    Prob = ((UINT32)BoolTreeProbs[0] * (UINT32)(255 - BoolTreeProbs[1])) >> 8;
+    HuffProbs[2] = (Prob * (UINT32)BoolTreeProbs[3]) >> 8;
+    HuffProbs[3] = (Prob * (UINT32)(255 - BoolTreeProbs[3])) >> 8;
+
+    Prob = ((UINT32)(255 - BoolTreeProbs[0]) * (UINT32)BoolTreeProbs[4]) >> 8;
+    Prob = (Prob * (UINT32)BoolTreeProbs[5]) >> 8;
+    HuffProbs[4] = (Prob * (UINT32)BoolTreeProbs[6]) >> 8;
+    HuffProbs[5] = (Prob * (UINT32)(255 - BoolTreeProbs[6])) >> 8;
+
+    Prob = ((UINT32)(255 - BoolTreeProbs[0]) * (UINT32)BoolTreeProbs[4]) >> 8;
+    Prob = (Prob * (UINT32)(255 - BoolTreeProbs[5])) >> 8;
+    HuffProbs[6] = (Prob * (UINT32)BoolTreeProbs[7]) >> 8;
+    HuffProbs[7] = (Prob * (UINT32)(255 - BoolTreeProbs[7])) >> 8;
+
+    Prob = ((UINT32)(255 - BoolTreeProbs[0]) * (UINT32)(255 - BoolTreeProbs[4])) >> 8;
+    HuffProbs[8] = Prob;
+}
+
+
+/****************************************************************************
+* 
+*  ROUTINE       :     ConvertBoolTrees
+*
+*  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+*
+*  OUTPUTS       :     None.
+*
+*  RETURNS       :     void
+*
+*  FUNCTION      :     Create set of Huffman codes for tokens from a set of
+*                      internal binary tree node probabilities.
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void ConvertBoolTrees ( PB_INSTANCE *pbi )
+{
+    UINT32  i;
+	UINT32	Plane;
+	UINT32	Band;
+	INT32   Prec;
+
+    // Convert bool tree node probabilities into array of token 
+    // probabilities. Use these to create a set of Huffman codes
+
+	// DC
+    for ( Plane = 0; Plane < 2; Plane++ )
+    {
+        BoolTreeToHuffCodes ( pbi->DcProbs+DCProbOffset(Plane,0), pbi->DcHuffProbs[Plane] );
+        VP6_BuildHuffTree ( pbi->DcHuffTree[Plane], pbi->DcHuffProbs[Plane], MAX_ENTROPY_TOKENS );
+        VP6_BuildHuffLookupTable(pbi->DcHuffTree[Plane], pbi->DcHuffLUT[Plane]);        
+        VP6_CreateCodeArray ( pbi->DcHuffTree[Plane], 0, pbi->DcHuffCode[Plane], pbi->DcHuffLength[Plane], 0, 0 );
+    }
+    
+    // ZEROS
+    for ( i = 0; i < ZRL_BANDS; i++ )
+	{
+        ZerosBoolTreeToHuffCodes ( pbi->ZeroRunProbs[i], pbi->ZeroHuffProbs[i] );
+        VP6_BuildHuffTree ( pbi->ZeroHuffTree[i], pbi->ZeroHuffProbs[i], 9 );
+        VP6_BuildHuffLookupTable(pbi->ZeroHuffTree[i], pbi->ZeroHuffLUT[i]);
+        VP6_CreateCodeArray ( pbi->ZeroHuffTree[i], 0, pbi->ZeroHuffCode[i], pbi->ZeroHuffLength[i], 0, 0 );
+    }
+
+    // AC
+    for ( Prec = 0; Prec < PREC_CASES; Prec++ )
+	{
+		// Baseline probabilities for each AC band.
+		for ( Plane = 0; Plane < 2; Plane++ )
+		{
+			for ( Band = 0; Band < VP6_AC_BANDS; Band++ )
+            {
+                BoolTreeToHuffCodes ( pbi->AcProbs+ACProbOffset(Plane,Prec,Band,0), pbi->AcHuffProbs[Prec][Plane][Band] );
+                VP6_BuildHuffTree ( pbi->AcHuffTree[Prec][Plane][Band], pbi->AcHuffProbs[Prec][Plane][Band], MAX_ENTROPY_TOKENS );
+                VP6_BuildHuffLookupTable(pbi->AcHuffTree[Prec][Plane][Band],pbi->AcHuffLUT[Prec][Plane][Band]);
+                VP6_CreateCodeArray ( pbi->AcHuffTree[Prec][Plane][Band], 0, pbi->AcHuffCode[Prec][Plane][Band], pbi->AcHuffLength[Prec][Plane][Band], 0, 0 );
+            }
+        }
+    }
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :     VP6_ConfigureEntropyDecoder
+*
+*  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+*                      UINT8 FrameType  : Type of frame.
+*
+*  OUTPUTS       :     None.
+*
+*  RETURNS       :     void
+*
+*  FUNCTION      :     Configure entropy subsystem ready for decode
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void VP6_ConfigureEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType )
+{
+	UINT32	i,j;
+	UINT32  Plane;
+	UINT32  Band;
+	INT32   Prec;
+	UINT8   PrecNonZero;
+	UINT8   LastProb[MAX_ENTROPY_TOKENS-1];
+	
+	// Clear down Last Probs data structure
+	memset( LastProb, 128, MAX_ENTROPY_TOKENS-1 );
+
+	// Read in the Baseline DC probabilities and initialise the DC context for Y and then UV plane
+	for ( Plane = 0; Plane < 2; Plane++ )
+	{
+		// If so then read them in.
+		for ( i = 0; i < MAX_ENTROPY_TOKENS-1; i++ )
+		{
+			if ( nDecodeBool(&pbi->br, VP6_DcUpdateProbs[Plane][i] ) )
+			{
+				// 0 is not a legal value, clip to 1.
+				LastProb[i] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				LastProb[i] += ( LastProb[i] == 0 );
+				pbi->DcProbs[DCProbOffset(Plane,i)] = LastProb[i];
+
+			}
+			else if ( FrameType == BASE_FRAME )
+			{
+				pbi->DcProbs[DCProbOffset(Plane,i)] = LastProb[i];
+			}
+		}
+	}
+
+	// Set Zero run probabilities to defaults if this is a key frame
+	if ( FrameType == BASE_FRAME )
+	{
+		memcpy( pbi->ZeroRunProbs, ZeroRunProbDefaults, sizeof(pbi->ZeroRunProbs) );
+	}
+
+	// If this frame contains updates to the scan order then read them
+	if ( nDecodeBool( &pbi->br, 128 ) )
+	{
+		// Read in the AC scan bands and build the custom scan order
+		for ( i = 1; i < BLOCK_SIZE; i++ )
+		{
+			// Has the band for this coef been updated ?
+			if ( nDecodeBool( &pbi->br, ScanBandUpdateProbs[i] ) )
+				pbi->ScanBands[i] = VP6_bitread( &pbi->br, SCAN_BAND_UPDATE_BITS );
+		}
+		// Build the scan order
+		BuildScanOrder( pbi, pbi->ScanBands );
+	}
+
+	// Update the Zero Run probabilities
+	for ( i = 0; i < ZRL_BANDS; i++ )
+	{
+		for ( j = 0; j < ZERO_RUN_PROB_CASES; j++ )
+		{
+			if ( nDecodeBool( &pbi->br, ZrlUpdateProbs[i][j] )  )
+			{
+				// Probabilities sent
+				pbi->ZeroRunProbs[i][j] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				pbi->ZeroRunProbs[i][j] += ( pbi->ZeroRunProbs[i][j] == 0 );
+			}
+		}		
+	}
+
+	// Read in the Baseline AC band probabilities and initialise the appropriate contexts
+	// Prec=0 means last token in current block was 0: Prec=1 means it was 1. Prec=2 means it was > 1
+	for ( Prec = 0; Prec < PREC_CASES; Prec++ )
+	{
+		PrecNonZero = ( Prec > 0 ) ? 1 : 0;
+		for ( Plane = 0; Plane < 2; Plane++ )
+		{
+			for ( Band = 0; Band < VP6_AC_BANDS; Band++ )
+			{
+				// If so then read them in.
+				for ( i = 0; i < MAX_ENTROPY_TOKENS-1; i++ )
+				{
+					if ( nDecodeBool(&pbi->br, VP6_AcUpdateProbs[Prec][Plane][Band][i] ) )
+					{
+						// Probabilities transmitted at reduced resolution. 
+						// 0 is not a legal value, clip to 1.
+						LastProb[i] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+						LastProb[i] += ( LastProb[i] == 0 );                        
+						pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = LastProb[i];
+					}
+					else if ( FrameType == BASE_FRAME )
+					{
+						pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = LastProb[i];
+					}
+				}
+			}
+		}
+	} 
+
+	// Create all the context specific propabilities based upon the new baseline data
+	VP6_ConfigureContexts(pbi);
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_ResetLeftContext
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Updates the left contexts.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_ResetLeftContext ( PB_INSTANCE *pbi)
+{
+	memset((void *) &pbi->fc.LeftY[0], 0, sizeof(BLOCK_CONTEXT));
+	memset((void *) &pbi->fc.LeftY[1], 0, sizeof(BLOCK_CONTEXT));
+	memset((void *) &pbi->fc.LeftU,    0, sizeof(BLOCK_CONTEXT));
+	memset((void *) &pbi->fc.LeftV,    0, sizeof(BLOCK_CONTEXT));
+		
+	pbi->fc.LeftY[0].Mode = (CODING_MODE)-1;
+	pbi->fc.LeftY[1].Mode = (CODING_MODE)-1;
+	pbi->fc.LeftU.Mode    = (CODING_MODE)-1;
+	pbi->fc.LeftV.Mode    = (CODING_MODE)-1;
+		
+	pbi->fc.LeftY[0].Frame = 4;
+	pbi->fc.LeftY[1].Frame = 4;
+	pbi->fc.LeftU.Frame    = 4;
+	pbi->fc.LeftV.Frame    = 4;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_ResetAboveContext
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Updates the above contexts.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_ResetAboveContext ( PB_INSTANCE *pbi )
+{
+	UINT32 i;
+
+    for ( i = 0 ; i < pbi->HFragments+8;i++)
+	{
+        pbi->fc.AboveY[i].Mode = -1;
+        pbi->fc.AboveY[i].Frame = 4;
+		pbi->fc.AboveY[i].Dc =0;
+		pbi->fc.AboveY[i].Token=0;
+	}
+	for ( i = 0 ; i < pbi->HFragments/2 + 8;i++)
+	{        
+        pbi->fc.AboveU[i].Mode = -1;
+        pbi->fc.AboveU[i].Frame = 4;
+		pbi->fc.AboveU[i].Token=0;
+		pbi->fc.AboveU[i].Dc=0;
+        pbi->fc.AboveV[i].Mode = -1;
+        pbi->fc.AboveV[i].Frame = 4;  
+		pbi->fc.AboveV[i].Token=0;
+		pbi->fc.AboveV[i].Dc=0;
+	}
+
+	if(pbi->Vp3VersionNo < 6)
+	{
+        pbi->fc.AboveU[1].Mode = 0;
+        pbi->fc.AboveU[1].Frame = 0;
+        pbi->fc.AboveV[1].Mode = 0;
+        pbi->fc.AboveV[1].Frame = 0;                
+	}
+
+	pbi->fc.LastDcY[0] = 0;
+	pbi->fc.LastDcU[0] = 128;
+	pbi->fc.LastDcV[0] = 128;
+	for ( i = 1 ; i < 3 ; i++)
+	{
+		pbi->fc.LastDcY[i] = 0;
+		pbi->fc.LastDcU[i] = 0;
+		pbi->fc.LastDcV[i] = 0;
+	}
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_UpdateContext
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *                      BLOCK_CONTEXT *c  : Pointer to 
+ *                      BLOCK_POSITION bp : Position of the block in the containing MB.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Updates the context for a particular block within a MB.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_UpdateContext ( PB_INSTANCE *pbi, BLOCK_CONTEXT *c, BLOCK_POSITION bp )
+{
+	c->Mode  = pbi->mbi.BlockMode[bp];
+	c->Dc    = pbi->mbi.blockDxInfo[bp].coeffsPtr[0]; //pbi->mbi.Coeffs[bp][0];
+	c->Frame = VP6_Mode2Frame[pbi->mbi.Mode];
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_UpdateContextA
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *                      BLOCK_CONTEXT *c  : Pointer to 
+ *                      BLOCK_POSITION bp : Position of the block in the containing MB.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Updates the context for a particular block within a MB.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_UpdateContextA ( PB_INSTANCE *pbi, BLOCK_CONTEXT *c, BLOCK_POSITION bp )
+{
+	c->Mode  = pbi->mbi.BlockMode[bp];
+	c->Dc    = pbi->mbi.blockDxInfo[bp].coeffsPtr[0]; //pbi->mbi.Coeffs[bp][0];
+	c->Frame = VP6_Mode2Frame[pbi->mbi.Mode];
+}
+
+#define HIGHBITDUPPED(X) (((signed short) X)  >> 15)
+
+/****************************************************************************
+ * 
+ *
+ ****************************************************************************/
+void VP6_PredictDC
+( 
+	PB_INSTANCE *pbi,
+	BLOCK_POSITION bp
+)
+{
+	UINT8 Frame = VP6_Mode2Frame[pbi->mbi.Mode];
+	Q_LIST_ENTRY *LastDC = pbi->mbi.blockDxInfo[bp].LastDc;
+	BLOCK_CONTEXT *Above = pbi->mbi.blockDxInfo[bp].Above;
+	BLOCK_CONTEXT *Left = pbi->mbi.blockDxInfo[bp].Left;
+	INT32 Avg;
+
+	Avg = LastDC[Frame];
+
+	if(Frame == Left->Frame) 
+	{
+		Avg = Left->Dc;
+	}
+	if(Frame == Above->Frame) 
+	{
+		Avg = Above->Dc;
+        if(Frame == Left->Frame)
+        {
+            Avg += Left->Dc;
+            Avg += (HIGHBITDUPPED(Avg)&1);
+			Avg >>= 1;
+
+        }
+	}
+
+	pbi->mbi.blockDxInfo[bp].coeffsPtr[0] += Avg;
+	LastDC[Frame] = pbi->mbi.blockDxInfo[bp].coeffsPtr[0];
+
+	return;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_PredictDC_MB
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi     : Pointer to decoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Predicts coefficients in this macroblock based on the 
+ *                      contexts provided.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void 
+VP6_PredictDC_MB(PB_INSTANCE *pbi)
+{
+	UINT8 Frame = VP6_Mode2Frame[pbi->mbi.Mode];
+	Q_LIST_ENTRY *  LastDC;
+	BLOCK_CONTEXT*  Above;
+	BLOCK_CONTEXT *  Left;
+
+    BLOCK_DX_INFO *bdi = pbi->mbi.blockDxInfo;
+    BLOCK_DX_INFO *bdiEnd = bdi + 6;
+
+	do
+    {
+    	INT32 Avg;
+	
+    	LastDC = bdi->LastDc;
+		Above = bdi->Above;
+ 		Left = bdi->Left;
+
+		Avg = LastDC[Frame];
+
+		if(Frame == Left->Frame) 
+		{
+			Avg = Left->Dc;
+		}
+		if(Frame == Above->Frame) 
+		{
+			Avg = Above->Dc;
+            if(Frame == Left->Frame)
+            {
+            	Avg += Left->Dc;
+                Avg += (HIGHBITDUPPED(Avg)&1);
+			    Avg >>= 1;
+
+            }
+		}
+
+		bdi->coeffsPtr[0] += Avg;
+		LastDC[Frame] = bdi->coeffsPtr[0];
+		
+        Above->Dc = bdi->coeffsPtr[0];
+		Above->Frame = Frame;
+
+		Left->Dc = bdi->coeffsPtr[0];
+		Left->Frame = Frame;
+
+	} while(++bdi < bdiEnd);	
+}
+ 
+
+/****************************************************************************
+* 
+*  ROUTINE       :     VP6_ExtractTokenN
+*
+*  INPUTS        :     BITREADER *br : Pointer to bitreader to grab the bits from.
+*                      HUFF_NODE *hn : Pointer to root of huffman tree to use for decoding.
+*                      UINT16* hlt   : Pointer to Huffman table node.
+*
+*  OUTPUTS       :     None.
+*
+*  RETURNS       :     The number of bits decoded.
+*
+*  FUNCTION      :     Unpacks and expands a DCT token.
+*
+*  SPECIAL NOTES :     PROBLEM !!!!!!!!!!!   right now handles only left 
+*                      justified bits in bitreader. The C version keeps every
+*                      thing in place so I can't use it!!
+*
+****************************************************************************/
+FORCEINLINE
+int VP6_ExtractTokenN ( BITREADER *br, HUFF_NODE *hn, UINT16* hlt )
+{
+    tokenorptr torp;
+    HUFF_TABLE_NODE *htptr = (HUFF_TABLE_NODE *)hlt;
+    UINT32 x = bitreadonly(br, HUFF_LUT_LEVELS);
+    
+    bitShift(br, (htptr[x].length));
+    if(htptr[x].flag)
+    {        
+        return htptr[x].value;
+    }
+        
+    torp.value = htptr[x].value;
+    do
+    {
+        if( bitread1(br) )
+        {
+            torp = hn[torp.value].rightunion.right;
+        }
+        else
+        {
+            torp = hn[torp.value].leftunion.left;
+        }
+    }
+    while ( !(torp.selector));
+    
+    return torp.value;
+    
+}
+
+/****************************************************************************
+****************************************************************************/
+void
+ReadHuffTokensPredictA_MB(PB_INSTANCE *pbi)
+{
+    BITREADER *br = &pbi->br3;
+    INT32   SignBit;
+	UINT32  Prec;
+
+	UINT32   token;
+    UINT32  blockIndex;
+
+    UINT32  Plane = 0;
+
+    INT16 *CoeffData;
+    MACROBLOCK_INFO *mbi = &pbi->mbi;
+
+    UINT8 *MergedScanOrderPtr;
+
+    //BLOCK_DX_INFO *bdi = pbi->mbi.blockDxInfo;
+    //BLOCK_DX_INFO *bdiEnd = bdi + 6;
+
+    for(blockIndex = 0; blockIndex < 6; blockIndex++)
+    {
+        MergedScanOrderPtr = pbi->MergedScanOrder;
+
+        CoeffData = pbi->mbi.blockDxInfo[blockIndex].coeffsPtr; //mbi->Coeffs[blockIndex];
+
+
+        if(blockIndex > 3)
+        {
+            Plane = 1;
+        }
+
+        if ( pbi->CurrentDcRunLen[Plane] > 0 )
+        {
+            // DC -- run of zeros in progress
+            --pbi->CurrentDcRunLen[Plane];
+            Prec = 0;        
+        }
+        else
+        {
+            // DC -- no current run of zeros
+            token = VP6_ExtractTokenN(br, pbi->DcHuffTree[Plane], pbi->DcHuffLUT[Plane]);                                                      
+
+            if(token == DCT_EOB_TOKEN)
+                goto Finished;
+
+            if(token == ZERO_TOKEN)
+            {   
+                // Read zero run-length
+                {
+                    // Run of zeros at DC is coded as a tree
+                    UINT32 val = 1 + bitread(br, 2);
+
+                    if ( val == 3 )
+                        val += bitread(br, 2);
+                    else if ( val == 4 )
+                    {
+                        if ( bitread1(br) )
+                            val = 11 + bitread(br, 6);
+                        else
+                            val = 7 + bitread(br, 2); 
+                    }
+                    pbi->CurrentDcRunLen[Plane] = val - 1;
+                }
+                Prec = 0;
+            }
+            else
+            {
+                register INT32 value;
+            
+                value = VP6_HuffTokenMinVal[token];
+    
+                if(token <=FOUR_TOKEN)
+                {
+                    SignBit = bitread1(br);
+                }
+                else if(token <=DCT_VAL_CATEGORY5)
+                {
+                    value   += bitread(br, (token-4));
+                    SignBit = bitread1(br);
+                }
+                else
+                {
+                    value   += bitread(br, 11);
+                    SignBit = bitread1(br);
+                
+                }
+                CoeffData[0] = (Q_LIST_ENTRY)((value ^ -SignBit) + SignBit); 
+                Prec = (value>1)?2:1;
+            }
+
+        }
+        //first AC
+    
+        MergedScanOrderPtr++;
+
+        if ( pbi->CurrentAc1RunLen[Plane] > 0 )
+        {
+            // First AC in scan order -- run of EOBs in progress
+            --pbi->CurrentAc1RunLen[Plane];
+            goto Finished;
+        }
+
+        do
+	    {
+		    
+            UINT32 Band = *(MergedScanOrderPtr + 64); //VP6_CoeffToHuffBand[EncodedCoeffs];        
+                
+            token = VP6_ExtractTokenN(br, pbi->AcHuffTree[Prec][Plane][Band], pbi->AcHuffLUT[Prec][Plane][Band]);              
+
+            if(token == ZERO_TOKEN)
+            {
+                {
+                    //UINT32 ZrlBand;
+                    //UINT32 ZrlToken;
+                    #define ZrlBand Band
+                    #define ZrlToken token
+
+                    // Read zero run-length
+                    ZrlBand  = (MergedScanOrderPtr >= (pbi->MergedScanOrder + ZRL_BAND2));
+                    
+                    ZrlToken = VP6_ExtractTokenN(br, pbi->ZeroHuffTree[ZrlBand], pbi->ZeroHuffLUT[ZrlBand]);
+              
+                    if ( ZrlToken<8 )
+                        MergedScanOrderPtr += ZrlToken;             // Zero run <= 8
+                    else
+                        MergedScanOrderPtr += 8 + bitread(br, 6);   // Zero run > 8
+                }
+                Prec =0;
+                MergedScanOrderPtr ++;			
+                continue;
+            }
+        
+            if(token == DCT_EOB_TOKEN)
+            {
+                if ( MergedScanOrderPtr == (pbi->MergedScanOrder + 1) )
+                {
+                    // Read run of EOB at first AC position
+                    UINT32 val = 1 + bitread(br, 2);
+                
+                    if ( val == 3 )
+                        val += bitread(br, 2);
+                    else if ( val == 4 )
+                    {
+                        if ( bitread1(br) )
+                            val = 11 + bitread(br, 6);
+                        else
+                            val = 7 + bitread(br, 2); 
+                    }
+                    pbi->CurrentAc1RunLen[Plane] = val - 1;
+                }
+                goto Finished;
+
+            }
+
+            {
+                register INT32 value;
+
+                value = VP6_HuffTokenMinVal[token];
+        
+                if(token <=FOUR_TOKEN)
+                {
+                    SignBit = bitread1(br);
+                }
+                else if(token <=DCT_VAL_CATEGORY5)
+                {
+                    value   += bitread(br, (token-4));
+                    SignBit = bitread1(br);
+                }
+                else
+                {
+                    value   += bitread(br, 11);
+                    SignBit = bitread1(br);
+            
+                }
+            
+                CoeffData[*(MergedScanOrderPtr)] = (Q_LIST_ENTRY)((value ^ -SignBit) + SignBit);             
+                Prec = (value>1)?2:1;        
+                MergedScanOrderPtr ++;			
+            }
+
+        } while (MergedScanOrderPtr < (pbi->MergedScanOrder + BLOCK_SIZE));
+
+	    MergedScanOrderPtr--;
+
+    Finished:
+	    //EobArray[blockIndex] =  pbi->EobOffsetTable[(UINT32)(MergedScanOrderPtr - (pbi->MergedScanOrder))];
+	    pbi->mbi.blockDxInfo[blockIndex].EobPos =  (unsigned int)(MergedScanOrderPtr - pbi->MergedScanOrder);
+
+    } //for(blockIndex = 0; blockIndex < 6; blockIndex++)
+    //}while(++bdi < bdiEnd);
+
+}
+
+/****************************************************************************
+****************************************************************************/
+
+void
+VP6_ReadTokensPredictA_MB(PB_INSTANCE *pbi) 
+{
+    BLOCK_DX_INFO *bdi = pbi->mbi.blockDxInfo;
+    BLOCK_DX_INFO *bdiEnd = bdi + 6;
+
+    INT32 token;
+
+    int count = pbi->mbi.br->count;
+    unsigned int range = pbi->mbi.br->range;
+    unsigned int value = pbi->mbi.br->value;
+
+    UINT8 *brBuffer = pbi->mbi.br->buffer;
+
+    UINT8 *MergedScanOrder = pbi->MergedScanOrder;
+    UINT8 *MergedScanOrderEnd = pbi->MergedScanOrder + BLOCK_SIZE;
+    UINT8 *MergedScanOrderPtr;
+
+    //bdi->br->buffer += bdi->br->pos;
+    brBuffer += pbi->mbi.br->pos;
+
+    //register __m64 m64_brCount;
+    //__m64 m64_brBuffer;
+
+    //{
+      //  BOOL_CODER *br = pbi->mbi.blockDxInfo[0].br;
+        //m64_brCount =  _mm_cvtsi32_si64((int)br->count);
+    //}
+
+#define BaselineProbsPtr bdi->BaselineProbsPtr
+#define ContextProbsPtr bdi->ContextProbsPtr
+#define AcProbsPtr bdi->AcProbsBasePtr
+//#define token bdi->token
+
+    do
+    {
+        MergedScanOrderPtr = MergedScanOrder;
+
+        ContextProbsPtr = bdi->DcNodeContextsBasePtr + DcNodeOffset(0, (bdi->Left->Token + bdi->Above->Token), 0);
+        BaselineProbsPtr = bdi->DcProbsBasePtr;
+
+	    // Decode the dc token -- first test to see if it is zero
+        NDECODEBOOL_AND_BRANCH_IF_ONE(ContextProbsPtr[ZERO_CONTEXT_NODE], DC_NON_ZERO_);
+
+		// Zero is implicit for DC token
+        //*(bdi->PrecTokenIndexPtr) = 0;
+		bdi->Left->Token = 0;					// Update the above and left token contexts to indicate a zero
+		bdi->Above->Token = 0;
+
+        MergedScanOrderPtr++;
+        BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+
+        goto AC_DO_WHILE;
+
+DC_NON_ZERO_:
+	    // A non zero DC value
+		bdi->Left->Token = 1;					// Update the above and left token contexts to indicate non zero
+		bdi->Above->Token = 1;
+
+		// Was the value a 1
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(ContextProbsPtr[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
+
+		//PrecTokenIndex = 2;		
+        //*(bdi->PrecTokenIndexPtr) = 2;
+
+		// Value token > 1
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(ContextProbsPtr[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_);
+								
+		// High value (value category) token
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_);
+
+		// Cat3,Cat4 or Cat5
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_);
+
+		token = DCT_VAL_CATEGORY5;
+
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_FIVE_CONTEXT_NODE], DC_EXTRA_BITS_);
+
+        token += 1;
+
+        goto DC_EXTRA_BITS_;
+                
+CAT_THREEFOUR_CONTEXT_NODE_0_:
+		token = DCT_VAL_CATEGORY3;
+        
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_THREE_CONTEXT_NODE], DC_EXTRA_BITS_);
+
+        token += 1;
+
+        goto DC_EXTRA_BITS_;
+
+HIGH_LOW_CONTEXT_NODE_0_:
+		// Either Cat1 or Cat2
+		token = DCT_VAL_CATEGORY1;
+
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_ONE_CONTEXT_NODE], DC_EXTRA_BITS_);
+
+        token += 1;
+
+DC_EXTRA_BITS_:
+        {
+            INT32       tValue;
+            INT32		BitsCount;
+
+	        unsigned int split;
+	
+        	tValue = VP6_TokenExtraBits2[token].MinVal;	
+
+			// Read the extra bits
+			BitsCount = VP6_TokenExtraBits2[token].Length;
+			do
+			{
+				//value += (NDECODEBOOL(VP6_TokenExtraBits2[token].Probs[BitsCount]) << BitsCount );
+	            // perform the actual decoding
+	            split = 1 +  (((range-1) * VP6_TokenExtraBits2[token].Probs[BitsCount] ) >> 8);	
+
+	            if ( value >= split<<24 )
+	            {
+		            value -= (split<<24);
+		            split = range - split;
+        
+            		tValue += (1 << BitsCount);
+
+	            }
+
+                while(split < 0x80 )
+	            {
+		            split += split;
+		            value += value;
+		            
+		            if ( !--count ) 
+		            {
+			            count  = 8;
+			            value |= *brBuffer;
+			            brBuffer++;
+    	            }
+                }
+	            range = split;
+
+			}
+			while(--BitsCount >= 0);
+
+
+			// apply the sign to the value
+            APPLYSIGN(bdi->coeffsPtr[0], tValue);
+
+		    MergedScanOrderPtr++;
+            BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+            goto AC_DO_WHILE;
+        }
+
+LOW_VAL_CONTEXT_NODE_0_:
+		// Low value token
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(ContextProbsPtr[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
+
+		// Either a 3 or a 4
+		token = THREE_TOKEN;
+
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
+        
+        token += 1;
+
+THREE_CONTEXT_NODE_0_:
+		// apply the sign to the value
+        APPLYSIGN(bdi->coeffsPtr[0], token);
+
+		MergedScanOrderPtr++;
+        BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+        goto AC_DO_WHILE;
+
+TWO_CONTEXT_NODE_0_:
+		// Is it a  2
+		// apply the sign to the value
+        APPLYSIGN(bdi->coeffsPtr[0], TWO_TOKEN);
+
+		MergedScanOrderPtr++;
+        BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+        goto AC_DO_WHILE;
+
+ONE_CONTEXT_NODE_0_:
+        MergedScanOrderPtr++;
+        BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 1, *(MergedScanOrderPtr + 64), 0 );
+
+		// apply the sign to the value
+        APPLYSIGN(bdi->coeffsPtr[0], 1);
+
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+AC_DO_WHILE:
+		// calculate the context for the next token. 
+        NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[ZERO_CONTEXT_NODE], NON_ZERO_RUN_);
+
+//ZERO_RUN_:
+		// Is the token a Zero or EOB
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[EOB_CONTEXT_NODE], BLOCK_FINISHED_1);
+
+		// Select the appropriate Zero run context
+		BaselineProbsPtr = bdi->ZeroRunProbsBasePtr;
+		
+        if(MergedScanOrderPtr >= (pbi->MergedScanOrder + ZRL_BAND2))
+            BaselineProbsPtr += ZERO_RUN_PROB_CASES;
+
+		// Now decode the zero run length
+		// Run lenght 1-4
+        NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[0], ZERO_RUN_5_8);
+
+//ZERO_RUN_1_4:
+        NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[1], ZERO_RUN_1_4_a);
+
+		MergedScanOrderPtr += 1;
+
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[2], ZERO_RUN_1_4_done);
+
+		MergedScanOrderPtr += 1;
+
+ZERO_RUN_1_4_done:
+    	BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+        if( MergedScanOrderPtr < MergedScanOrderEnd)
+            goto NON_ZERO_RUN_;
+
+        goto BLOCK_FINISHED;
+    
+ZERO_RUN_1_4_a:
+		MergedScanOrderPtr += 3;
+        
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[3], ZERO_RUN_1_4_a_done);
+
+		MergedScanOrderPtr += 1;
+
+ZERO_RUN_1_4_a_done:
+    	BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+        if( MergedScanOrderPtr < MergedScanOrderEnd)
+            goto NON_ZERO_RUN_;
+
+        goto BLOCK_FINISHED;
+
+ZERO_RUN_5_8:
+		// Run length 5-8
+        NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[4], ZERO_RUN_gt_8);
+
+        NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[5], ZERO_RUN_5_8_a);
+
+		MergedScanOrderPtr += 5;
+        
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[6], ZERO_RUN_5_8_done);
+
+		MergedScanOrderPtr += 1;
+
+ZERO_RUN_5_8_done:
+    	BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+        if( MergedScanOrderPtr < MergedScanOrderEnd)
+            goto NON_ZERO_RUN_;
+
+        goto BLOCK_FINISHED;
+
+ZERO_RUN_5_8_a:
+		MergedScanOrderPtr += 7;
+        
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[7], ZERO_RUN_5_8_a_done);
+
+		MergedScanOrderPtr += 1;
+
+ZERO_RUN_5_8_a_done:
+    	BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+        if( MergedScanOrderPtr < MergedScanOrderEnd)
+            goto NON_ZERO_RUN_;
+
+        goto BLOCK_FINISHED;
+
+ZERO_RUN_gt_8:
+		// Run length > 8
+        {
+	        unsigned int decodeCount;
+	        unsigned int split;
+
+            decodeCount = 0;
+            do
+            {
+	            // perform the actual decoding
+	            split = 1 +  (((range-1) * BaselineProbsPtr[8 + decodeCount]) >> 8);	
+
+	            if ( value >= split<<24 )
+	            {
+		            value -= (split<<24);
+		            split = range - split;
+        
+            		MergedScanOrderPtr += (1 << decodeCount);
+
+	            }
+
+                while(split < 0x80 )
+	            {
+		            split += split;
+		            value += value;
+		            
+		            if ( !--count ) 
+		            {
+			            count  = 8;
+			            value |= *brBuffer;
+			            brBuffer++;
+		            }
+	            }
+
+	            range = split;
+
+            } while (++decodeCount < 6);
+
+    		MergedScanOrderPtr += 9;
+
+        } 
+
+        if( MergedScanOrderPtr >= MergedScanOrderEnd)
+            goto BLOCK_FINISHED;
+        
+    	BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 0, *(MergedScanOrderPtr + 64), 0 );
+
+
+NON_ZERO_RUN_:
+		// The token codes a non zero value
+
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[ONE_CONTEXT_NODE], AC_ONE_CONTEXT_0_);
+
+ 		// Value token > 1
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[LOW_VAL_CONTEXT_NODE], AC_LOW_VAL_CONTEXT_0_);
+                   								
+    	// High value (value category) token
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE], AC_HIGH_LOW_CONTEXT_0_);
+
+		// Cat3,Cat4
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE], AC_CAT_THREEFOUR_CONTEXT_0_);
+
+		token = DCT_VAL_CATEGORY5;
+
+		// Cat5,Cat6
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_FIVE_CONTEXT_NODE], AC_EXTRA_BITS_);
+
+        //It is Cat6
+        token += 1;
+
+        goto AC_EXTRA_BITS_;
+
+AC_CAT_THREEFOUR_CONTEXT_0_:
+		token = DCT_VAL_CATEGORY3;
+
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_THREE_CONTEXT_NODE], AC_EXTRA_BITS_);
+
+        //It is Cat4
+        token += 1;
+
+        goto AC_EXTRA_BITS_;
+
+AC_HIGH_LOW_CONTEXT_0_:
+		// Either Cat1 or Cat2
+		token = DCT_VAL_CATEGORY1;
+
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[CAT_ONE_CONTEXT_NODE], AC_EXTRA_BITS_);
+        
+        //It is Cat2
+        token += 1;
+
+AC_EXTRA_BITS_:
+		{
+            INT32 BitsCount;
+            INT32 tValue;
+
+	        unsigned int split;
+
+    		tValue = VP6_TokenExtraBits2[token].MinVal;	
+
+            // Read the extra bits
+			BitsCount = VP6_TokenExtraBits2[token].Length;
+
+			do
+			{
+				//tValue += (NDECODEBOOL(VP6_TokenExtraBits2[token].Probs[BitsCount]) << BitsCount);
+	            split = 1 +  (((range-1) * VP6_TokenExtraBits2[token].Probs[BitsCount] ) >> 8);	
+
+	            if ( value >= split<<24 )
+	            {
+		            value -= (split<<24);
+		            split = range - split;
+        
+            		tValue += (1 << BitsCount);
+
+	            }
+
+                while(split < 0x80 )
+	            {
+		            split += split;
+		            value += value;
+		            
+		            if ( !--count ) 
+		            {
+			            count  = 8;
+			            value |= *brBuffer;
+			            brBuffer++;
+		            }
+                }
+
+	            range = split;
+			}
+			while(--BitsCount >= 0);
+
+
+        	// apply the sign to the value
+            APPLYSIGN(bdi->coeffsPtr[*(MergedScanOrderPtr)], tValue);
+            MergedScanOrderPtr++;
+        }
+
+
+    	//*(bdi->PrecTokenIndexPtr) = 2;
+        BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+        if( MergedScanOrderPtr < MergedScanOrderEnd)
+            goto AC_DO_WHILE;
+        
+        goto BLOCK_FINISHED;
+
+
+AC_LOW_VAL_CONTEXT_0_:
+		// Low value token
+        NDECODEBOOL_AND_BRANCH_IF_ZERO(BaselineProbsPtr[TWO_CONTEXT_NODE], AC_TWO_CONTEXT_0_);
+
+		// Either a 3 or a 4
+		token = THREE_TOKEN + 1;
+        
+        NDECODEBOOL_AND_BRANCH_IF_ONE(BaselineProbsPtr[THREE_CONTEXT_NODE], AC_THREE_CONTEXT_1_);
+
+        //It is a 3
+        token = token - 1;
+
+AC_THREE_CONTEXT_1_:
+        // apply the sign to the value
+        APPLYSIGN(bdi->coeffsPtr[*(MergedScanOrderPtr)], token);
+    	MergedScanOrderPtr++;
+
+    	//*(bdi->PrecTokenIndexPtr) = 2;
+        BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+        if( MergedScanOrderPtr < MergedScanOrderEnd)
+            goto AC_DO_WHILE;
+        
+        goto BLOCK_FINISHED;
+
+
+AC_TWO_CONTEXT_0_:
+		// Is it a  2
+        // apply the sign to the TWO_TOKEN
+        APPLYSIGN(bdi->coeffsPtr[*(MergedScanOrderPtr)], TWO_TOKEN);
+    	MergedScanOrderPtr++;
+
+    	//*(bdi->PrecTokenIndexPtr) = 2;
+        BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 2, *(MergedScanOrderPtr + 64), 0 );
+
+        if( MergedScanOrderPtr < MergedScanOrderEnd)
+            goto AC_DO_WHILE;
+        
+        goto BLOCK_FINISHED;
+
+AC_ONE_CONTEXT_0_:
+		// apply the sign to the value
+        APPLYSIGN(bdi->coeffsPtr[*(MergedScanOrderPtr)], 1);
+
+        MergedScanOrderPtr++;
+
+		//*(bdi->PrecTokenIndexPtr) = 1;
+
+        BaselineProbsPtr = AcProbsPtr + ACProbOffset(0, 1, *(MergedScanOrderPtr + 64), 0 );
+    
+        if( MergedScanOrderPtr < MergedScanOrderEnd)
+            goto AC_DO_WHILE;
+	
+BLOCK_FINISHED:
+        MergedScanOrderPtr--;
+
+BLOCK_FINISHED_1:				    
+	    bdi->EobPos =  (unsigned int)(MergedScanOrderPtr - MergedScanOrder);
+    }while(++bdi < bdiEnd); 
+
+    //bdi = pbi->mbi.blockDxInfo;
+    brBuffer -= pbi->mbi.br->pos;
+    pbi->mbi.br->pos += (unsigned int)(brBuffer - pbi->mbi.br->buffer);
+    //bdi->br->buffer = brBuffer;
+
+    pbi->mbi.br->count = count;
+    pbi->mbi.br->value = value;
+    pbi->mbi.br->range = range;
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DecodeMacroBlock
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *                      UINT32 MBrow      : Row of MBs that block is in.
+ *                  	UINT32 MBcol      : Col of MBs that block is in.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Decodes a single MacroBlock.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+INLINE
+void 
+VP6_DecodeMacroBlock ( PB_INSTANCE *pbi, UINT32 MBrow, UINT32 MBcol )
+{
+    UINT32 thisRecon;
+    UINT32 bp;
+
+    MACROBLOCK_INFO *mbi = &pbi->mbi;
+
+    //***********************************************************************
+    // Copy the existing structures into what we have now I'll fix this next.
+
+	pbi->mbi.Mode = CODE_INTRA;
+//	pbi->mbi.Interlaced = 0;
+
+	// dumb way to encode the interlaced decision but it works!!!
+	if(pbi->Configuration.Interlaced)
+	{
+		UINT8 prob = pbi->probInterlaced;
+		
+        // super simple context adjustment
+		if(MBcol>BORDER_MBS)
+		{
+			// adjust the probability per the last one we did 
+			if(pbi->mbi.Interlaced)
+				prob = prob-(prob>>1);
+			else 
+				prob = prob+((256-prob)>>1);
+		}
+		pbi->mbi.Interlaced = nDecodeBool(	&pbi->br, prob);
+
+	    if ( pbi->mbi.Interlaced == 1 )
+	    {
+		    pbi->mbi.blockDxInfo[0].CurrentReconStride = 
+		    pbi->mbi.blockDxInfo[1].CurrentReconStride = 
+		    pbi->mbi.blockDxInfo[2].CurrentReconStride = 
+		    pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride * 2;
+
+	        pbi->mbi.blockDxInfo[2].thisRecon -= (pbi->Configuration.YStride * 7);
+	        pbi->mbi.blockDxInfo[3].thisRecon -= (pbi->Configuration.YStride * 7);
+
+	    }
+	}
+
+	if(pbi->FrameType != BASE_FRAME )
+	{
+		VP6_decodeModeAndMotionVector ( pbi, MBrow, MBcol );
+	}
+
+	// read tokens from the bitstream and convert to coefficients.
+    if ( pbi->UseHuffman )
+    {
+        ReadHuffTokensPredictA_MB(pbi);
+    }
+    else
+    {
+        VP6_ReadTokensPredictA_MB(pbi);
+    }
+
+    VP6_PredictDC_MB(pbi);
+
+    bp = 0;
+    do    
+	{	
+//note: maybe offset table can contain a func ptr and the amount to meset
+//we can then get rid of the if then else....
+        UINT32 EOBPos = pbi->EobOffsetTable[mbi->blockDxInfo[bp].EobPos];
+
+        // Default clear data area down to 0s
+        if ( EOBPos <= 1 )
+        {
+		    idct[1]( mbi->blockDxInfo[bp].coeffsPtr, mbi->blockDxInfo[bp].dequantPtr, pbi->ReconDataBuffer[bp] );
+	        mbi->blockDxInfo[bp].coeffsPtr[0] = 0;
+        }
+        else if ( EOBPos <= 10 )
+        {
+		    idct[9]( mbi->blockDxInfo[bp].coeffsPtr, mbi->blockDxInfo[bp].dequantPtr, pbi->ReconDataBuffer[bp] );
+	        memset(mbi->blockDxInfo[bp].coeffsPtr,    0,8*sizeof(Q_LIST_ENTRY));
+	        memset(mbi->blockDxInfo[bp].coeffsPtr+8,  0,4*sizeof(Q_LIST_ENTRY));
+	        memset(mbi->blockDxInfo[bp].coeffsPtr+16, 0,4*sizeof(Q_LIST_ENTRY));
+	        memset(mbi->blockDxInfo[bp].coeffsPtr+24, 0,4*sizeof(Q_LIST_ENTRY));
+            //if(mbi->Coeffs[bp][32] )
+                mbi->blockDxInfo[bp].coeffsPtr[32] =0;
+        }
+        else 
+        {
+		    idct[63]( mbi->blockDxInfo[bp].coeffsPtr, mbi->blockDxInfo[bp].dequantPtr, pbi->ReconDataBuffer[bp] );
+	        memset(mbi->blockDxInfo[bp].coeffsPtr, 0, 64*sizeof(Q_LIST_ENTRY));
+        }
+
+    } while(++bp < 6);
+
+
+
+//note:all of the recon function should be written for mb's not blocks
+//also lets create a func table that selects the recon based on mode
+//i hate if then elses........
+
+    bp = 0;
+    // Action depends on decode mode.
+	if ( pbi->mbi.Mode == CODE_INTER_NO_MV )       // Inter with no motion vector
+	{
+        do
+        {
+            thisRecon = pbi->mbi.blockDxInfo[bp].thisRecon;
+		    ReconInter( pbi->TmpDataBuffer, 
+                            (UINT8 *)&pbi->ThisFrameRecon[thisRecon], 
+			                (UINT8 *)&pbi->LastFrameRecon[thisRecon], 
+			                pbi->ReconDataBuffer[bp], 
+                            pbi->mbi.blockDxInfo[bp].CurrentReconStride);
+        } while(++bp < 6);
+    }
+	else if ( VP6_ModeUsesMC[pbi->mbi.Mode] )          // The mode uses a motion vector.
+	{
+        do
+        {
+            thisRecon = pbi->mbi.blockDxInfo[bp].thisRecon;
+		    // For the compressor we did this already ( possible optimization).
+		    VP6_PredictFilteredBlock( pbi, pbi->TmpDataBuffer,bp);
+
+		    ReconBlock( pbi->TmpDataBuffer,
+			                pbi->ReconDataBuffer[bp],
+			                (UINT8 *)&pbi->ThisFrameRecon[thisRecon],
+			                pbi->mbi.blockDxInfo[bp].CurrentReconStride);
+        } while(++bp < 6);
+	}
+	else if ( pbi->mbi.Mode == CODE_USING_GOLDEN )     // Golden frame with motion vector
+	{
+        do
+        {
+            thisRecon = pbi->mbi.blockDxInfo[bp].thisRecon;
+		    // Reconstruct the pixel data using the golden frame reconstruction and change data
+		    ReconInter( pbi->TmpDataBuffer, 
+                            (UINT8 *)&pbi->ThisFrameRecon[thisRecon], 
+			                (UINT8 *)&pbi->GoldenFrame[thisRecon], 
+			                pbi->ReconDataBuffer[bp], 
+                            pbi->mbi.blockDxInfo[bp].CurrentReconStride );
+        } while(++bp < 6);
+	}
+	else                                            // Simple Intra coding
+	{
+        do
+        {
+            thisRecon = pbi->mbi.blockDxInfo[bp].thisRecon;
+		    // Get the pixel index for the first pixel in the fragment.
+		    ReconIntra( pbi->TmpDataBuffer, 
+                            (UINT8 *)&pbi->ThisFrameRecon[thisRecon], 
+                            (UINT16 *)pbi->ReconDataBuffer[bp], 
+                            pbi->mbi.blockDxInfo[bp].CurrentReconStride);
+        } while(++bp < 6);
+	}
+
+
+	if ( pbi->mbi.Interlaced == 1 )
+	{
+        /* reset to non interlaced */
+    	pbi->mbi.blockDxInfo[0].CurrentReconStride =
+	    pbi->mbi.blockDxInfo[1].CurrentReconStride =
+	    pbi->mbi.blockDxInfo[2].CurrentReconStride =
+	    pbi->mbi.blockDxInfo[3].CurrentReconStride = pbi->Configuration.YStride;
+
+	    pbi->mbi.blockDxInfo[2].thisRecon += (pbi->Configuration.YStride * 7);
+	    pbi->mbi.blockDxInfo[3].thisRecon += (pbi->Configuration.YStride * 7);
+    }
+
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DecodeFrameMbs
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Decodes all the MacroBlocks of a frame.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_DecodeFrameMbs ( PB_INSTANCE *pbi )
+{
+    //UINT32 blockIndex;
+
+	if(pbi->FrameType != BASE_FRAME )
+	{
+		VP6_DecodeModeProbs(pbi);
+		VP6_ConfigureMvEntropyDecoder( pbi, pbi->FrameType );
+        pbi->LastMode = CODE_INTER_NO_MV;
+	}
+	else
+	{
+		memcpy ( pbi->probXmitted,VP6_BaselineXmittedProbs,sizeof(pbi->probXmitted));
+
+		memcpy ( pbi->IsMvShortProb, DefaultIsShortProbs, sizeof(pbi->IsMvShortProb) );
+		memcpy ( pbi->MvShortProbs, DefaultMvShortProbs, sizeof(pbi->MvShortProbs) );
+		memcpy ( pbi->MvSignProbs, DefaultSignProbs, sizeof(pbi->MvSignProbs) );
+		memcpy ( pbi->MvSizeProbs, DefaultMvLongProbs, sizeof(pbi->MvSizeProbs) );
+
+		memset ( pbi->MBModeProb,128,sizeof(pbi->MBModeProb));
+		memset ( pbi->BModeProb,128,sizeof(pbi->MBModeProb));
+		memset ( pbi->predictionMode,1,sizeof(char)*pbi->MacroBlocks );
+
+		// Set up default scan order banding
+		if( pbi->Configuration.Interlaced == 1 )
+			memcpy( pbi->ScanBands, DefaultInterlacedScanBands, sizeof(pbi->ScanBands) );
+		else
+			memcpy( pbi->ScanBands, DefaultNonInterlacedScanBands, sizeof(pbi->ScanBands) );
+
+		// Build the scan order
+		BuildScanOrder( pbi, pbi->ScanBands );
+
+
+	}
+
+	VP6_ConfigureEntropyDecoder( pbi, pbi->FrameType ); 
+
+    {
+        UINT32  i;
+
+        for(i=0;i<64;i++)
+        {
+            pbi->MergedScanOrder[i] = pbi->quantizer->transIndex[pbi->ModifiedScanOrder[i]];
+        }
+
+
+        // Create Huffman codes for tokens based on tree probabilities
+        if ( pbi->UseHuffman )
+        {
+            ConvertBoolTrees ( pbi );
+
+            for(i = 64; i < 64+65; i++)
+            {
+                pbi->MergedScanOrder[i] = VP6_CoeffToHuffBand[i - 64];
+            }
+
+            // Reset Dc zero & Ac EOB run counters
+            pbi->CurrentDcRunLen[0]  = 0;
+            pbi->CurrentDcRunLen[1]  = 0;
+            pbi->CurrentAc1RunLen[0] = 0;
+            pbi->CurrentAc1RunLen[1] = 0;
+        }
+        else
+        {
+            for(i = 64; i < 64+65; i++)
+            {
+                pbi->MergedScanOrder[i] = VP6_CoeffToBand[i - 64];
+            }
+        }
+    }
+
+	if(pbi->Configuration.Interlaced == 1)
+		pbi->probInterlaced = ((UINT8)VP6_bitread( &pbi->br,   8 ));  
+
+	// since we are on a new frame reset the above contexts 
+	VP6_ResetAboveContext(pbi);
+
+    {
+	    UINT32 MBrow;
+	    UINT32 MBRows = pbi->MBRows; 
+	    UINT32 MBCols = pbi->MBCols;
+
+        MBCols -= BORDER_MBS;
+        MBRows -= BORDER_MBS;
+
+        // for each row of macroblocks 
+	    MBrow=BORDER_MBS;
+        do
+	    {
+            MACROBLOCK_INFO *mbi = &pbi->mbi;
+            UINT32 MBcol;
+
+		    VP6_ResetLeftContext(pbi);
+
+		    // for each macroblock within a row of macroblocks
+
+	        mbi->blockDxInfo[0].Above = &pbi->fc.AboveY[BORDER_MBS*2];
+	        mbi->blockDxInfo[1].Above = &pbi->fc.AboveY[BORDER_MBS*2+1];
+	        mbi->blockDxInfo[2].Above = &pbi->fc.AboveY[BORDER_MBS*2];
+	        mbi->blockDxInfo[3].Above = &pbi->fc.AboveY[BORDER_MBS*2+1];
+	        mbi->blockDxInfo[4].Above = &pbi->fc.AboveU[BORDER_MBS];
+	        mbi->blockDxInfo[5].Above = &pbi->fc.AboveV[BORDER_MBS];
+
+
+	        mbi->blockDxInfo[0].thisRecon = pbi->ReconYDataOffset + ((MBrow * pbi->Configuration.YStride) << 4) + (BORDER_MBS * 16);
+	        mbi->blockDxInfo[1].thisRecon = mbi->blockDxInfo[0].thisRecon + 8;
+	        mbi->blockDxInfo[2].thisRecon = mbi->blockDxInfo[0].thisRecon + (pbi->Configuration.YStride << 3);
+	        mbi->blockDxInfo[3].thisRecon = mbi->blockDxInfo[1].thisRecon + (pbi->Configuration.YStride << 3);
+
+	        mbi->blockDxInfo[4].thisRecon = pbi->ReconUDataOffset + ((MBrow * pbi->Configuration.UVStride) << 3) + (BORDER_MBS * 8);
+	        mbi->blockDxInfo[5].thisRecon = pbi->ReconVDataOffset + ((MBrow * pbi->Configuration.UVStride) << 3) + (BORDER_MBS * 8);
+
+
+            MBcol=BORDER_MBS;
+            do
+            {
+			    // Decode the macroblock
+			    VP6_DecodeMacroBlock(pbi, MBrow, MBcol);   
+
+
+	            mbi->blockDxInfo[0].Above += 2;
+	            mbi->blockDxInfo[1].Above += 2;
+	            mbi->blockDxInfo[2].Above += 2;
+	            mbi->blockDxInfo[3].Above += 2;
+	            mbi->blockDxInfo[4].Above += 1;
+	            mbi->blockDxInfo[5].Above += 1;
+
+                mbi->blockDxInfo[0].thisRecon += 16;
+                mbi->blockDxInfo[1].thisRecon += 16;
+                mbi->blockDxInfo[2].thisRecon += 16;
+                mbi->blockDxInfo[3].thisRecon += 16;
+                mbi->blockDxInfo[4].thisRecon += 8;
+                mbi->blockDxInfo[5].thisRecon += 8;
+
+		    } while(++MBcol < MBCols);
+
+
+	    } while(++MBrow < MBRows);
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemode.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemode.c
new file mode 100644
index 00000000..4d47f4da
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemode.c
@@ -0,0 +1,656 @@
+/****************************************************************************
+*          
+*   Module Title :	   Decodemode.c     
+*
+*   Description  :     Functions for decoding modes and motionvectors 
+*
+****************************************************************************/ 
+
+//************************************************************************************
+// Decoding the Modes: 
+//
+//  Decode Mode Tree Looks like this:
+//
+//
+//
+//
+//                                            zz 
+//                                                             
+//                               0                        Mode Same As Last
+//                                                                
+//                    
+//              1                                       2
+//
+//       3             4                  5                          6
+//
+//  NoMV   +MV    Nest  Near        Intra   FourMV          7                 8
+//                                                      
+//                                                   00Gold   GoldMV    GNrst   GNear
+//
+//
+// 30 probabilitity contexts are set up at each branch (in probMode) corresponding to 
+//
+//   3 for what situation we are in at the mode level (all modes available, 
+//     no nearest mv found, and no near mv found) 
+//
+//  10 one for each possible last mode
+//
+// Note: if the last mode was near then the probability of getting near at position 4 
+// above is set to 0 (it would have been coded as same as last). Note also that the 
+// probablity of getting near when no near mv is available is also always set to 0.
+//
+// These probs are created from the 20 that can be xmitted in the bitstream (probXmitted)
+//    For each mode 2 probabilities can be transmitted:
+//        probability that the mode will appear if the last mode was the same
+//        probability that the mode will appear if the last mode is not that mode
+//
+//************************************************************************************
+
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "pbdll.h"
+#include "decodemode.h"
+#include "decodemv.h"
+
+/****************************************************************************
+*  Implicit Imports
+****************************************************************************/        
+#define STRICT              /* Strict type checking */
+
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+
+//*****************************************************************************
+// ModeVQ: This structure holds a table of probability vectors for encoding modes
+// To build this table a number of clips were run through and allowed to 
+// select each of the probabilities that were best for them on each frame.  These 
+// choices were output and a vector quantizer was used to optimize the selection 
+// of 16 vectors for each MODETYPE (allmodes available, nonearest, and no near)
+//*****************************************************************************
+const UINT8 VP6_ModeVq[MODETYPES][MODEVECTORS][MAX_MODES*2] =
+{
+  9, 15, 32, 25,  7, 19,  9, 21,  1, 12, 14, 12,  3, 18, 14, 23,  3, 10,  0,  4,
+ 48, 39,  1,  2, 11, 27, 29, 44,  7, 27,  1,  4,  0,  3,  1,  6,  1,  2,  0,  0,
+ 21, 32,  1,  2,  4, 10, 32, 43,  6, 23,  2,  3,  1, 19,  1,  6, 12, 21,  0,  7,
+ 69, 83,  0,  0,  0,  2, 10, 29,  3, 12,  0,  1,  0,  3,  0,  3,  2,  2,  0,  0,
+ 11, 20,  1,  4, 18, 36, 43, 48, 13, 35,  0,  2,  0,  5,  3, 12,  1,  2,  0,  0,
+ 70, 44,  0,  1,  2, 10, 37, 46,  8, 26,  0,  2,  0,  2,  0,  2,  0,  1,  0,  0,
+  8, 15,  0,  1,  8, 21, 74, 53, 22, 42,  0,  1,  0,  2,  0,  3,  1,  2,  0,  0,
+141, 42,  0,  0,  1,  4, 11, 24,  1, 11,  0,  1,  0,  1,  0,  2,  0,  0,  0,  0,
+  8, 19,  4, 10, 24, 45, 21, 37,  9, 29,  0,  3,  1,  7, 11, 25,  0,  2,  0,  1,
+ 46, 42,  0,  1,  2, 10, 54, 51, 10, 30,  0,  2,  0,  2,  0,  1,  0,  1,  0,  0,
+ 28, 32,  0,  0,  3, 10, 75, 51, 14, 33,  0,  1,  0,  2,  0,  1,  1,  2,  0,  0,
+100, 46,  0,  1,  3,  9, 21, 37,  5, 20,  0,  1,  0,  2,  1,  2,  0,  1,  0,  0,
+ 27, 29,  0,  1,  9, 25, 53, 51, 12, 34,  0,  1,  0,  3,  1,  5,  0,  2,  0,  0,
+ 80, 38,  0,  0,  1,  4, 69, 33,  5, 16,  0,  1,  0,  1,  0,  0,  0,  1,  0,  0,
+ 16, 20,  0,  0,  2,  8,104, 49, 15, 33,  0,  1,  0,  1,  0,  1,  1,  1,  0,  0,
+194, 16,  0,  0,  1,  1,  1,  9,  1,  3,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0,
+
+ 41, 22,  1,  0,  1, 31,  0,  0,  0,  0,  0,  1,  1,  7,  0,  1, 98, 25,  4, 10,
+123, 37,  6,  4,  1, 27,  0,  0,  0,  0,  5,  8,  1,  7,  0,  1, 12, 10,  0,  2,
+ 26, 14, 14, 12,  0, 24,  0,  0,  0,  0, 55, 17,  1,  9,  0, 36,  5,  7,  1,  3,
+209,  5,  0,  0,  0, 27,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  0,  0,  0,  0,
+  2,  5,  4,  5,  0,121,  0,  0,  0,  0,  0,  3,  2,  4,  1,  4,  2,  2,  0,  1,
+175,  5,  0,  1,  0, 48,  0,  0,  0,  0,  0,  2,  0,  1,  0,  2,  0,  1,  0,  0,
+ 83,  5,  2,  3,  0,102,  0,  0,  0,  0,  1,  3,  0,  2,  0,  1,  0,  0,  0,  0,
+233,  6,  0,  0,  0,  8,  0,  0,  0,  0,  0,  1,  0,  1,  0,  0,  0,  1,  0,  0,
+ 34, 16,112, 21,  1, 28,  0,  0,  0,  0,  6,  8,  1,  7,  0,  3,  2,  5,  0,  2,
+159, 35,  2,  2,  0, 25,  0,  0,  0,  0,  3,  6,  0,  5,  0,  1,  4,  4,  0,  1,
+ 75, 39,  5,  7,  2, 48,  0,  0,  0,  0,  3, 11,  2, 16,  1,  4,  7, 10,  0,  2,
+212, 21,  0,  1,  0,  9,  0,  0,  0,  0,  1,  2,  0,  2,  0,  0,  2,  2,  0,  0,
+  4,  2,  0,  0,  0,172,  0,  0,  0,  0,  0,  1,  0,  2,  0,  0,  2,  0,  0,  0,
+187, 22,  1,  1,  0, 17,  0,  0,  0,  0,  3,  6,  0,  4,  0,  1,  4,  4,  0,  1,
+133,  6,  1,  2,  1, 70,  0,  0,  0,  0,  0,  2,  0,  4,  0,  3,  1,  1,  0,  0,
+251,  1,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+
+  2,  3,  2,  3,  0,  2,  0,  2,  0,  0, 11,  4,  1,  4,  0,  2,  3,  2,  0,  4,
+ 49, 46,  3,  4,  7, 31, 42, 41,  0,  0,  2,  6,  1,  7,  1,  4,  2,  4,  0,  1,
+ 26, 25,  1,  1,  2, 10, 67, 39,  0,  0,  1,  1,  0, 14,  0,  2, 31, 26,  1,  6,
+103, 46,  1,  2,  2, 10, 33, 42,  0,  0,  1,  4,  0,  3,  0,  1,  1,  3,  0,  0,
+ 14, 31,  9, 13, 14, 54, 22, 29,  0,  0,  2,  6,  4, 18,  6, 13,  1,  5,  0,  1,
+ 85, 39,  0,  0,  1,  9, 69, 40,  0,  0,  0,  1,  0,  3,  0,  1,  2,  3,  0,  0,
+ 31, 28,  0,  0,  3, 14,130, 34,  0,  0,  0,  1,  0,  3,  0,  1,  3,  3,  0,  1,
+171, 25,  0,  0,  1,  5, 25, 21,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0,  0,  0,
+ 17, 21, 68, 29,  6, 15, 13, 22,  0,  0,  6, 12,  3, 14,  4, 10,  1,  7,  0,  3,
+ 51, 39,  0,  1,  2, 12, 91, 44,  0,  0,  0,  2,  0,  3,  0,  1,  2,  3,  0,  1,
+ 81, 25,  0,  0,  2,  9,106, 26,  0,  0,  0,  1,  0,  1,  0,  1,  1,  1,  0,  0,
+140, 37,  0,  1,  1,  8, 24, 33,  0,  0,  1,  2,  0,  2,  0,  1,  1,  2,  0,  0,
+ 14, 23,  1,  3, 11, 53, 90, 31,  0,  0,  0,  3,  1,  5,  2,  6,  1,  2,  0,  0,
+123, 29,  0,  0,  1,  7, 57, 30,  0,  0,  0,  1,  0,  1,  0,  1,  0,  1,  0,  0,
+ 13, 14,  0,  0,  4, 20,175, 20,  0,  0,  0,  1,  0,  1,  0,  1,  1,  1,  0,  0,
+202, 23,  0,  0,  1,  3,  2,  9,  0,  0,  0,  1,  0,  1,  0,  1,  0,  0,  0,  0
+};
+
+// These are the probabilities that we reset to after each keyframe.  
+// It was created as the average probabilities of the trees.
+const UINT8 VP6_BaselineXmittedProbs[4][2][10] =
+{
+ 42,  2,  7, 42, 22,  3,  2,  5,  1,  0,   69,  1,  1, 44,  6,  1,  0,  1,  0,  0,
+  8,  1,  8,  0,  0,  2,  1,  0,  1,  0,  229,  1,  0,  0,  0,  1,  0,  0,  1,  0,
+ 35,  1,  6, 34,  0,  2,  1,  1,  1,  0,  122,  1,  1, 46,  0,  1,  0,  0,  1,  0,
+ 64,  0, 64, 64, 64,  0,  0,  0,  0,  0,   64,  0, 64, 64, 64,  0,  0,  0,  0,  0,
+};
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_BuildModeTree
+ *
+ *  INPUTS        : PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *						
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void    
+ *
+ *  FUNCTION      : Fills in probabilities at each branch of the huffman tree
+ *                  based upon probXmitted, the frequencies transmitted in the bitstream.
+ *
+ ****************************************************************************/
+void VP6_BuildModeTree ( PB_INSTANCE *pbi )
+{
+	int i,j,k;
+
+	// create a huffman tree and code array for each of our modes 
+    // Note: each of the trees is minus the node give by probmodesame
+	for ( i=0; i<10; i++ )
+	{
+		unsigned int Counts[MAX_MODES];
+		unsigned int total;
+
+		// set up the probabilities for each tree
+		for(k=0;k<MODETYPES;k++)
+		{
+			total=0;
+			for ( j=0; j<10; j++ )
+			{	
+				if ( i == j )
+				{
+					Counts[j]=0;
+				}
+				else
+				{
+					Counts[j]=100*pbi->probXmitted[k][0][j];
+				}
+
+				total+=Counts[j];
+			}
+
+			pbi->probModeSame[k][i] = 255-
+				255 * pbi->probXmitted[k][1][i] 
+				/
+				(	1 +
+					pbi->probXmitted[k][1][i] +	
+					pbi->probXmitted[k][0][i]
+				);
+
+			// each branch is basically calculated via 
+			// summing all posibilities at that branch.
+			pbi->probMode[k][i][0]= 1 + 255 *
+				(
+					Counts[CODE_INTER_NO_MV]+
+					Counts[CODE_INTER_PLUS_MV]+
+					Counts[CODE_INTER_NEAREST_MV]+
+					Counts[CODE_INTER_NEAR_MV]
+				) / 
+				(   1 +
+				    total
+				);
+
+			pbi->probMode[k][i][1]= 1 + 255 *
+				(
+					Counts[CODE_INTER_NO_MV]+
+					Counts[CODE_INTER_PLUS_MV]
+				) / 
+				(
+					1 + 
+					Counts[CODE_INTER_NO_MV]+
+					Counts[CODE_INTER_PLUS_MV]+
+					Counts[CODE_INTER_NEAREST_MV]+
+					Counts[CODE_INTER_NEAR_MV]
+				);
+
+			pbi->probMode[k][i][2]= 1 + 255 *
+				(
+					Counts[CODE_INTRA]+
+					Counts[CODE_INTER_FOURMV]
+				) / 
+				(
+					1 + 
+					Counts[CODE_INTRA]+
+					Counts[CODE_INTER_FOURMV]+
+					Counts[CODE_USING_GOLDEN]+
+					Counts[CODE_GOLDEN_MV]+
+					Counts[CODE_GOLD_NEAREST_MV]+
+					Counts[CODE_GOLD_NEAR_MV]
+				);
+			
+			pbi->probMode[k][i][3]= 1 + 255 *
+				(
+					Counts[CODE_INTER_NO_MV]
+				) / 
+				(
+					1 +
+					Counts[CODE_INTER_NO_MV]+
+					Counts[CODE_INTER_PLUS_MV]
+				);
+
+			pbi->probMode[k][i][4]= 1 + 255 *
+				(
+					Counts[CODE_INTER_NEAREST_MV]
+				) / 
+				(
+					1 +
+					Counts[CODE_INTER_NEAREST_MV]+
+					Counts[CODE_INTER_NEAR_MV]
+				) ;
+
+			pbi->probMode[k][i][5]= 1 + 255 *
+				(
+					Counts[CODE_INTRA]
+				) / 
+				(
+					1 +
+					Counts[CODE_INTRA]+
+					Counts[CODE_INTER_FOURMV]
+				);
+
+			pbi->probMode[k][i][6]= 1 + 255 *
+				(
+					Counts[CODE_USING_GOLDEN]+
+					Counts[CODE_GOLDEN_MV]
+				) / 
+				(
+					1 +
+					Counts[CODE_USING_GOLDEN]+
+					Counts[CODE_GOLDEN_MV]+
+					Counts[CODE_GOLD_NEAREST_MV]+
+					Counts[CODE_GOLD_NEAR_MV]
+				);
+
+			pbi->probMode[k][i][7]= 1 + 255 *
+				(
+					Counts[CODE_USING_GOLDEN]
+				) / 
+				(
+					1 +
+					Counts[CODE_USING_GOLDEN]+
+					Counts[CODE_GOLDEN_MV]
+				);
+
+			pbi->probMode[k][i][8]= 1 + 255 *
+				(
+					Counts[CODE_GOLD_NEAREST_MV]
+				) / 
+				(
+					1 +
+					Counts[CODE_GOLD_NEAREST_MV]+
+					Counts[CODE_GOLD_NEAR_MV]
+				);
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_decodeModeDiff
+ *
+ *  INPUTS        : PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *						
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : a probability difference value decoded from the bitstream.
+ *
+ *  FUNCTION      : this function returns a probability difference value in
+ *                  the range -256 to +256 (in steps of 4) transmitted in the
+ *                  bitstream using a fixed tree with hardcoded probabilities.
+ *
+ *  SPECIAL NOTES : The hard coded probabilities for the difference tree
+ *                  were calcualated by taking the average number of times a 
+ *                  branch was taken on some sample material ie 
+ *                  (bond,bike,beautifulmind)
+ *
+ ****************************************************************************/
+int VP6_decodeModeDiff ( PB_INSTANCE *pbi )
+{
+	int sign;
+
+    if ( VP6_DecodeBool(&pbi->br, 205) == 0 )
+		return 0;
+	
+	sign = 1 + -2 * VP6_DecodeBool128(&pbi->br);
+	
+	if( !VP6_DecodeBool(&pbi->br,171) )
+	{
+        return sign<<(3-VP6_DecodeBool(	&pbi->br,83));
+	}
+	else
+	{
+		if( !VP6_DecodeBool(	&pbi->br,199) ) 
+		{
+			if(VP6_DecodeBool(	&pbi->br,140))
+				return sign * 12;
+
+			if(VP6_DecodeBool(	&pbi->br,125))
+				return sign * 16;
+
+			if(VP6_DecodeBool(	&pbi->br,104))
+				return sign * 20;
+
+			return sign * 24;
+		}
+		else 
+		{
+			int diff = VP6_bitread(&pbi->br,7);
+			return sign * diff * 4;
+		}
+	}
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DecodeModeProbs
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *						
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     This function parses the probabilities transmitted in 
+ *                      the bitstream. The bitstream may either use the 
+ *                      last frames' baselines, or transmit a pointer to a
+ *                      vector of new probabilities. It may then additionally
+ *                      contain updates to each of these probabilities.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_DecodeModeProbs ( PB_INSTANCE *pbi )
+{
+	int i,j;
+
+	// For each mode type (all modes available, no nearest, no near mode)
+	for ( j=0; j<MODETYPES; j++ )
+	{
+		// determine whether we are sending a vector for this mode byte
+		if ( VP6_DecodeBool( &pbi->br, PROBVECTORXMIT ) )
+		{
+			// figure out which vector we have encoded
+			int whichVector = VP6_bitread(&pbi->br, 4);
+
+			// adjust the vector
+			for ( i=0; i<MAX_MODES; i++ )
+			{
+				pbi->probXmitted[j][1][i] = VP6_ModeVq[j][whichVector][i*2];
+				pbi->probXmitted[j][0][i] = VP6_ModeVq[j][whichVector][i*2+1];
+			}
+		} 
+
+		// decode whether updates to bring it closer to ideal 
+		if ( VP6_DecodeBool( &pbi->br, PROBIDEALXMIT) )
+		{
+			for ( i=0; i<10; i++ )
+			{
+				int diff;
+
+				// determine difference 
+				diff = VP6_decodeModeDiff(pbi);
+				diff += pbi->probXmitted[j][1][i];
+
+				pbi->probXmitted[j][1][i] = ( diff<0 ? 0 : (diff>255?255:diff) );
+
+				// determine difference 
+				diff = VP6_decodeModeDiff(pbi);
+				diff += pbi->probXmitted[j][0][i];
+
+				pbi->probXmitted[j][0][i] = ( diff<0 ? 0 : (diff>255?255:diff) );
+
+			}
+		}
+	}
+	
+	VP6_BuildModeTree(pbi);
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DecodeBlockMode
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *						
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Decoded coding mode (as a CODING_MODE)
+ *
+ *  FUNCTION      :     Decodes a coding mode for a block from 2 bits in the bitstream.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+CODING_MODE VP6_DecodeBlockMode ( PB_INSTANCE *pbi )
+{
+	int choice = VP6_DecodeBool128(&pbi->br)<<1;
+	
+    choice += VP6_DecodeBool128(&pbi->br);
+
+	switch ( choice )
+	{
+	    case 0: return CODE_INTER_NO_MV;        // 0
+	    case 1: return CODE_INTER_PLUS_MV;      // 2
+	    case 2: return CODE_INTER_NEAREST_MV;   // 3
+	    case 3: return CODE_INTER_NEAR_MV;      // 4
+	}
+	return (CODING_MODE)0;
+
+}   
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DecodeMode
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi     : Pointer to decoder instance.
+ *                      CODING_MODE lastmode : Mode of the last coded macroblock.
+ *						UINT32 type          : Mode type (all modes available,
+ *                                             nonearest macroblock, no near macroblock).
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Decoded coding mode (as a CODING_MODE)
+ *
+ *  FUNCTION      :     decodes a MBmode from the bitstream using modecodearray
+ *                      and probabilities that the value is the same as 
+ *                      lastmode stored in probModeSame, and the probability 
+ *                      of mode occuring if lastmode != mode stored in 
+ *                      probMode.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+CODING_MODE VP6_DecodeMode ( PB_INSTANCE *pbi, CODING_MODE lastmode, UINT32 type )
+{
+	CODING_MODE	mode;
+
+    if ( VP6_DecodeBool(&pbi->br,pbi->probModeSame[type][lastmode]) )
+	{
+		mode = lastmode;
+	}
+	else
+    {   // 0
+		UINT8 *Stats =pbi->probMode[type][lastmode]; 
+		
+        if ( VP6_DecodeBool(&pbi->br,Stats[0]) )
+        {   // 2
+			if ( VP6_DecodeBool(&pbi->br,Stats[2]) )
+            {   //6
+				if ( VP6_DecodeBool(&pbi->br,Stats[6]) )
+                {  // 8
+                    mode = CODE_GOLD_NEAREST_MV + VP6_DecodeBool(&pbi->br,Stats[8]);
+				}
+				else
+                {  // 7
+				    mode = CODE_USING_GOLDEN + VP6_DecodeBool(&pbi->br,Stats[7]);
+				}
+			}
+			else
+            {   //5
+				mode = CODE_INTRA;
+				if ( VP6_DecodeBool(&pbi->br,Stats[5]) )
+				{
+					mode = CODE_INTER_FOURMV;
+				}
+			}
+		}
+		else
+        {   // 1
+			if ( VP6_DecodeBool(&pbi->br,Stats[1]) )
+            {   // 4
+				mode = CODE_INTER_NEAREST_MV + VP6_DecodeBool(&pbi->br,Stats[4]);
+			}
+			else
+            {   // 3
+				mode = CODE_INTER_NO_MV + 2 * VP6_DecodeBool(&pbi->br,Stats[3]);
+			}
+		}
+	}
+	return mode;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_decodeModeAndMotionVector
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *                      UINT32 MBrow     : Row number for MB.  
+ *						UINT32 MBcol     : Col number for MB.
+ *						
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Decodes a macroblock's coding mode and any associated
+ *                      motion vectors from the bitstream .
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_decodeModeAndMotionVector ( PB_INSTANCE *pbi, UINT32 MBrow, UINT32 MBcol )
+{
+	int type;
+    int x, y;
+	UINT32 k;
+	CODING_MODE mode;   //lastmode;
+	MOTION_VECTOR mv;
+
+	VP6_FindNearestandNextNearest(pbi,MBrow,MBcol,1,&type);
+
+	mode = 	VP6_DecodeMode(pbi,pbi->LastMode,type);
+    pbi->LastMode = mode; 
+	
+	pbi->predictionMode[MBOffset(MBrow,MBcol)] = mode;
+	pbi->mbi.Mode = mode;
+    if ( mode == CODE_INTER_FOURMV )    
+    {
+		pbi->mbi.BlockMode[0] = VP6_DecodeBlockMode(pbi);
+		pbi->mbi.BlockMode[1] = VP6_DecodeBlockMode(pbi);
+		pbi->mbi.BlockMode[2] = VP6_DecodeBlockMode(pbi);
+		pbi->mbi.BlockMode[3] = VP6_DecodeBlockMode(pbi);
+
+		pbi->mbi.BlockMode[4] = CODE_INTER_FOURMV;
+		pbi->mbi.BlockMode[5] = CODE_INTER_FOURMV;
+		x=0;
+		y=0;
+		for ( k=0; k<4; k++ )
+		{
+			if ( pbi->mbi.BlockMode[k] == CODE_INTER_NO_MV )
+            {
+				pbi->mbi.Mv[k].x = 0;
+                pbi->mbi.Mv[k].y = 0;
+            }
+			else if( pbi->mbi.BlockMode[k] == CODE_INTER_NEAREST_MV )
+            {
+				pbi->mbi.Mv[k].x = pbi->mbi.NearestInterMVect.x;
+                pbi->mbi.Mv[k].y = pbi->mbi.NearestInterMVect.y;                
+                x+=pbi->mbi.NearestInterMVect.x;
+				y+=pbi->mbi.NearestInterMVect.y;
+            }
+            else if ( pbi->mbi.BlockMode[k] == CODE_INTER_NEAR_MV )
+            {
+				pbi->mbi.Mv[k].x = pbi->mbi.NearInterMVect.x;
+                pbi->mbi.Mv[k].y = pbi->mbi.NearInterMVect.y;                
+                x+=pbi->mbi.NearInterMVect.x;
+				y+=pbi->mbi.NearInterMVect.y;
+            }
+            else if ( pbi->mbi.BlockMode[k] == CODE_INTER_PLUS_MV )
+            {
+				VP6_decodeMotionVector(pbi,&mv,CODE_INTER_PLUS_MV);
+				pbi->mbi.Mv[k].x = mv.x;
+                pbi->mbi.Mv[k].y = mv.y;
+                x+=mv.x;
+				y+=mv.y;
+            }
+		}
+        x = (x+1+(x>=0))>>2;
+        y = (y+1+(y>=0))>>2;
+
+        pbi->MBMotionVector[MBOffset(MBrow,MBcol)].x = pbi->mbi.Mv[3].x;
+        pbi->MBMotionVector[MBOffset(MBrow,MBcol)].y = pbi->mbi.Mv[3].y;
+        
+        pbi->mbi.Mv[4].x = x; 
+        pbi->mbi.Mv[4].y = y;
+
+        pbi->mbi.Mv[5].x = x; 
+        pbi->mbi.Mv[5].y = y;
+    }
+    else
+    {
+        switch ( mode )
+        {
+        case CODE_INTER_NEAREST_MV:
+			x = pbi->mbi.NearestInterMVect.x;
+			y = pbi->mbi.NearestInterMVect.y;            
+			break;
+        case CODE_INTER_NEAR_MV:
+			x = pbi->mbi.NearInterMVect.x;
+			y = pbi->mbi.NearInterMVect.y;
+			break;
+        case CODE_GOLD_NEAREST_MV:
+            VP6_FindNearestandNextNearest(pbi, MBrow, MBcol, 2, &type);
+            x = pbi->mbi.NearestGoldMVect.x;
+            y = pbi->mbi.NearestGoldMVect.y;
+            break;
+        case CODE_GOLD_NEAR_MV:
+            VP6_FindNearestandNextNearest(pbi, MBrow, MBcol, 2, &type);
+            x = pbi->mbi.NearGoldMVect.x;
+            y = pbi->mbi.NearGoldMVect.y;
+            break;
+        case CODE_INTER_PLUS_MV:
+            VP6_decodeMotionVector(pbi,&mv,CODE_INTER_PLUS_MV);
+            x = mv.x;
+            y = mv.y;
+			break;
+        case CODE_GOLDEN_MV:
+            VP6_FindNearestandNextNearest(pbi, MBrow, MBcol, 2, &type);
+            VP6_decodeMotionVector(pbi,&mv,CODE_GOLDEN_MV);
+            x = mv.x;
+            y = mv.y;
+            break;
+        default:
+            x =0;
+            y =0;
+			break; 
+        }
+        pbi->MBMotionVector[MBOffset(MBrow,MBcol)].x = x;
+        pbi->MBMotionVector[MBOffset(MBrow,MBcol)].y = y;
+		for ( k=0; k<6 ; k++ )
+		{
+            pbi->mbi.Mv[k].x = x;
+            pbi->mbi.Mv[k].y = y;
+			pbi->mbi.BlockMode[k] = mode;
+		}
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemv.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemv.c
new file mode 100644
index 00000000..ff116871
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/decodemv.c
@@ -0,0 +1,339 @@
+/****************************************************************************
+*        
+*   Module Title :	   Decodemv.c     
+*
+*   Description  :     Functions for decoding modes and motion vectors.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "pbdll.h"					
+#include "decodemode.h" 
+#include "decodemv.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/        
+
+
+/****************************************************************************
+*  Exports
+****************************************************************************/        
+const UINT8 VP6_MvUpdateProbs[2][MV_NODES] = 
+{ 
+	{ 237, 246, 253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 250, 250, 252 }, 
+	{ 231, 243, 245, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 251, 251, 254 }
+};
+
+const UINT8 DefaultMvShortProbs[2][7] = 
+{ 
+    { 225, 146, 172, 147, 214,  39, 156 }, 
+    { 204, 170, 119, 235, 140, 230, 228 }
+};
+
+const UINT8 DefaultMvLongProbs[2][LONG_MV_BITS] =  
+{ 
+    { 247, 210, 135,  68, 138, 220, 239, 246 }, 
+    { 244, 184, 201,  44, 173, 221, 239, 253 } 
+};
+
+const UINT8 DefaultIsShortProbs[2] = { 162, 164 };
+const UINT8 DefaultSignProbs[2]    = { 128, 128 };
+
+/**************************************************************************** 
+ * 
+ *  ROUTINE       :     VP6_ConfigureMvEntropyDecoder
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *                      UINT8 FrameType  : Type of the frame.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Builds the MV entropy decoding tree.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+***************************************************************************/
+void VP6_ConfigureMvEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType )
+{
+	int i;
+    
+	// This funciton is not called at all for a BASE_FRAME
+	// Read any changes to mv probabilities.
+	for ( i = 0; i < 2; i++ )
+	{
+		// Short vector probability
+		if ( VP6_DecodeBool(&pbi->br, VP6_MvUpdateProbs[i][0]) )
+		{
+			pbi->IsMvShortProb[i] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+			if ( pbi->IsMvShortProb[i] == 0 )
+				pbi->IsMvShortProb[i] = 1;
+		}
+
+		// Sign probability
+		if ( VP6_DecodeBool(&pbi->br, VP6_MvUpdateProbs[i][1]) )
+		{
+			pbi->MvSignProbs[i] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+			if ( pbi->MvSignProbs[i] == 0 )
+				pbi->MvSignProbs[i] = 1;
+		}
+	}
+
+	// Short vector tree node probabilities
+	for ( i = 0; i < 2; i++ )
+	{
+		UINT32 j;
+		UINT32 MvUpdateProbsOffset = 2;				// Offset into MvUpdateProbs[i][]
+
+		for ( j = 0; j < 7; j++ )
+		{
+			if ( VP6_DecodeBool(&pbi->br, VP6_MvUpdateProbs[i][MvUpdateProbsOffset]) )
+			{
+				pbi->MvShortProbs[i][j] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvShortProbs[i][j] == 0 )
+					pbi->MvShortProbs[i][j] = 1;
+			}
+			MvUpdateProbsOffset++;
+		}
+	}
+
+	// Long vector tree node probabilities
+	for ( i = 0; i < 2; i++ )
+	{
+		UINT32 j;
+		UINT32 MvUpdateProbsOffset = 2 + 7;
+
+		for ( j = 0; j < LONG_MV_BITS; j++ )
+		{
+			if ( VP6_DecodeBool(&pbi->br, VP6_MvUpdateProbs[i][MvUpdateProbsOffset]) )
+			{
+				pbi->MvSizeProbs[i][j] = VP6_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvSizeProbs[i][j] == 0 )
+					pbi->MvSizeProbs[i][j] = 1;
+			}
+			MvUpdateProbsOffset++;
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_decodeMotionVector 
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *						CODING_MODE Mode  : MV coding mode.
+ *						
+ *  OUTPUTS       :     MOTION_VECTOR *mv : Returned motion vector.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Decodes a motion vector from the bitstream.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_decodeMotionVector
+(
+	PB_INSTANCE *pbi,
+	MOTION_VECTOR *mv,
+	CODING_MODE Mode 
+)
+{
+	UINT32 i;
+	INT32  Vector = 0;
+	INT32  SignBit = 0;
+	INT32  MvOffsetX = 0;
+	INT32  MvOffsetY = 0;
+
+	// Work out how the MV was coded so that the appropriate origin offset can be applied
+	if ( Mode == CODE_INTER_PLUS_MV )
+	{	
+        // Normal Inter MV
+		if ( pbi->mbi.NearestMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestInterMVect.x;
+			MvOffsetY = pbi->mbi.NearestInterMVect.y;
+		}
+	}
+	else
+	{	
+        // Golden Frame MV
+		if ( pbi->mbi.NearestGMvIndex < MAX_NEAREST_ADJ_INDEX )
+		{
+			MvOffsetX = pbi->mbi.NearestGoldMVect.x;
+			MvOffsetY = pbi->mbi.NearestGoldMVect.y;
+		}
+	}
+
+	for ( i = 0; i < 2; i++ )
+	{
+		Vector = 0;
+
+		// Is the vector a small vector or a large vector
+		if ( !VP6_DecodeBool(&pbi->br, pbi->IsMvShortProb[i]) )
+		{
+			// Small magnitude vector
+			if ( VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][0] ) )
+			{
+				Vector += (1 << 2);
+				if ( VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][4]) )
+				{
+					Vector += (1 << 1);
+					Vector += VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][6]);
+				}
+				else
+				{
+					Vector += VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][5]);
+				}
+			}
+			else
+			{
+				if ( VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][1]) )
+				{
+					Vector += (1 << 1);
+					Vector += VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][3]);
+				}
+				else
+				{
+					Vector = VP6_DecodeBool(&pbi->br, pbi->MvShortProbs[i][2]);
+				}
+			}
+		}
+		else
+		{
+			// Large magnitude vector
+			Vector = VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][0] );
+			Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][1] ) << 1);
+			Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][2] ) << 2);
+
+			Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][7] ) << 7);
+			Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][6] ) << 6);
+			Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][5] ) << 5);
+			Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][4] ) << 4);
+
+			// If none of the higher order bits are set then this bit is implicit
+			if ( Vector & 0xF0 )
+				Vector += (VP6_DecodeBool( &pbi->br, pbi->MvSizeProbs[i][3] ) << 3);
+			else
+				Vector += 0x08;
+		}
+
+		// Read the sign bit if needed.
+		if ( Vector != 0 )
+		{
+			SignBit = VP6_DecodeBool(&pbi->br, pbi->MvSignProbs[i]);
+
+			if ( SignBit )
+				Vector = -Vector;
+		}
+
+		if ( i )
+			mv->y = Vector + MvOffsetY;
+		else
+			mv->x = Vector + MvOffsetX;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_FindNearestandNextNearest
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *						UINT32 MBrow      : Row of macroblock to check.
+ *						UINT32 MBcol      : Col of macroblock to check.
+ *						UINT8 Frame       : Frame type which MV should come 
+ *                                          from (Golden or Last).
+ *
+ *  OUTPUTS       :     int *type         : Type of the vector returned.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Find a Nearest and NextNearest MV in nearby MBs in
+ *                      frames having the same type (Golden or Last).
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_FindNearestandNextNearest
+(
+	PB_INSTANCE *pbi,
+	UINT32 MBrow,
+	UINT32 MBcol,
+	UINT8 Frame,
+	int *type
+)
+{
+	int i;
+	UINT32 OffsetMB;
+	UINT32 BaseMB = MBOffset(MBrow,MBcol);
+	INT32 Nearest = 0;
+    INT32 NextNearest = 0;
+    INT32 nearestIndex;
+    UINT32 thisMv;
+    INT32 typet;
+    
+    typet = NONEAREST_MACROBLOCK;
+
+	// BEWARE:
+    // The use of (unsigned int *) casting here is potentially dangerous 
+	// and will only work if the motion vector structure consists of 
+	// two 16 bit values and is 32 bit aligned.
+	for ( i=0; i<12 ; i++ )
+	{ 
+		OffsetMB = pbi->mvNearOffset[i] + BaseMB;
+
+		if ( VP6_Mode2Frame[pbi->predictionMode[OffsetMB]] != Frame )
+			continue;
+
+		thisMv = *((unsigned int *) &pbi->MBMotionVector[OffsetMB]);
+
+		if ( thisMv ) 
+        {
+	
+		    *((unsigned int *) &Nearest) = thisMv;
+		    typet = NONEAR_MACROBLOCK;
+    		break;
+        }		     
+   	}
+
+    nearestIndex = i;
+
+    for ( i=i+1; i<12; i++ )
+    {
+        OffsetMB = pbi->mvNearOffset[i] + BaseMB;
+        
+        if ( VP6_Mode2Frame[pbi->predictionMode[OffsetMB]] != Frame )
+            continue;
+        
+		thisMv = *((unsigned int *) &pbi->MBMotionVector[OffsetMB]);
+        if( thisMv == *((unsigned int *) &Nearest) )
+			continue;
+		
+		if( thisMv ) 
+        {
+		    *((unsigned int *) &NextNearest) = thisMv;
+		    typet = MACROBLOCK;
+		    break;
+        }
+    }
+
+	// Only update type if normal frame
+	if ( Frame == 1 )
+	{
+        *type = typet;
+        pbi->mbi.NearestMvIndex = nearestIndex;
+		*((unsigned int *) &pbi->mbi.NearestInterMVect)  = *((unsigned int *) &Nearest);
+		*((unsigned int *) &pbi->mbi.NearInterMVect)  = *((unsigned int *) &NextNearest);
+	}
+	else
+	{
+        pbi->mbi.NearestGMvIndex = nearestIndex;
+		*((unsigned int *) &pbi->mbi.NearestGoldMVect)  = *((unsigned int *) &Nearest);
+		*((unsigned int *) &pbi->mbi.NearGoldMVect)  = *((unsigned int *) &NextNearest);
+	}
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/modestats.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/modestats.c
new file mode 100644
index 00000000..12f4bd01
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/modestats.c
@@ -0,0 +1,330 @@
+#include "type_aliases.h"
+
+
+UINT8 Stats[9][4][4][4]=
+{
+  
+   32 , 56 , 78 ,161 ,
+  105 ,129 ,182 ,241 ,
+   78 ,132 ,189 ,212 ,
+  112 ,169 ,203 ,226 ,
+  
+  
+  100 ,122 ,178 ,217 ,
+  200 ,175 ,239 ,247 ,
+  183 ,153 ,239 ,237 ,
+  201 ,192 ,242 ,244 ,
+  
+  
+   75 ,127 ,181 ,205 ,
+  183 ,178 ,238 ,249 ,
+  192 ,226 ,243 ,241 ,
+  190 ,205 ,239 ,244 ,
+  
+  
+   98 ,150 ,195 ,224 ,
+  219 ,189 ,243 ,244 ,
+  177 ,215 ,240 ,248 ,
+  190 ,207 ,241 ,247 ,
+  
+  
+   19 , 26 , 14 , 33 ,
+  113 ,121 , 97 , 87 ,
+   11 , 14 ,  8 , 14 ,
+   21 , 14 , 14 , 20 ,
+  
+  
+  100 , 88 ,112 , 73 ,
+  188 ,169 ,158 ,140 ,
+   70 , 81 , 46 , 58 ,
+  100 , 82 , 49 , 55 ,
+  
+  
+   11 , 20 ,  9 , 21 ,
+   76 , 91 , 68 , 74 ,
+    6 ,  6 ,  5 , 11 ,
+   15 , 18 , 10 , 14 ,
+  
+  
+   17 , 24 , 16 , 16 ,
+   87 , 68 , 64 , 61 ,
+    9 ,  8 ,  9 , 15 ,
+   17 , 23 , 10 , 18 ,
+  
+  
+  186 ,157 ,180 ,162 ,
+  184 ,208 ,206 ,189 ,
+  176 ,128 ,203 ,179 ,
+  184 ,196 ,192 ,176 ,
+  
+  
+  164 ,149 ,155 ,141 ,
+  133 ,130 ,156 ,116 ,
+  145 ,154 ,175 ,144 ,
+  124 ,150 ,149 ,126 ,
+  
+  
+  194 ,113 ,173 ,160 ,
+  191 ,208 ,205 ,164 ,
+  210 ,179 ,197 ,174 ,
+  175 ,154 ,190 ,182 ,
+  
+  
+  144 ,111 ,183 ,165 ,
+  123 ,118 ,186 ,144 ,
+  189 ,128 ,168 ,141 ,
+   99 ,164 ,174 ,145 ,
+  
+  
+  196 ,160 ,197 ,102 ,
+  156 ,125 ,173 , 83 ,
+  219 ,208 ,226 ,137 ,
+  189 ,148 ,191 ,100 ,
+  
+  
+  173 ,122 ,146 , 76 ,
+  111 ,128 ,124 , 81 ,
+  177 ,176 ,203 ,116 ,
+  118 ,103 ,145 , 82 ,
+  
+  
+  223 ,201 ,228 ,129 ,
+  181 ,137 ,199 , 99 ,
+  236 ,227 ,236 ,159 ,
+  200 ,177 ,204 ,121 ,
+  
+  
+  184 ,157 ,194 ,103 ,
+  128 ,121 ,135 , 81 ,
+  204 ,177 ,207 ,121 ,
+  158 ,127 ,173 , 88 ,
+  
+  
+   81 , 46 , 70 , 84 ,
+  135 ,107 , 81 , 73 ,
+  128 ,128 ,113 , 94 ,
+  109 ,128 , 84 , 81 ,
+  
+  
+  122 ,128 , 62 ,111 ,
+  184 ,171 ,145 ,131 ,
+  172 ,174 ,151 ,180 ,
+  160 ,114 ,131 , 91 ,
+  
+  
+  111 ,128 , 81 , 84 ,
+  157 ,147 ,127 ,133 ,
+  113 ,148 ,107 ,135 ,
+  140 ,140 ,104 ,139 ,
+  
+  
+  112 ,128 , 92 , 86 ,
+  146 ,128 ,143 ,137 ,
+  110 ,128 , 83 ,133 ,
+  128 ,136 ,144 ,120 ,
+  
+  
+  108 ,117 ,109 ,108 ,
+  101 ,107 ,112 ,105 ,
+   71 , 67 , 49 , 82 ,
+   66 , 86 , 54 , 68 ,
+  
+  
+  117 , 93 ,122 ,111 ,
+   78 ,106 ,152 ,113 ,
+   49 , 64 , 54 , 96 ,
+   67 , 62 , 68 , 62 ,
+  
+  
+   59 , 59 , 43 , 85 ,
+   70 , 60 , 72 , 72 ,
+   55 , 65 , 62 , 92 ,
+   93 , 98 , 92 , 87 ,
+  
+  
+   54 , 65 , 51 , 64 ,
+   56 , 31 , 56 , 54 ,
+  137 ,144 ,147 ,161 ,
+  110 ,113 ,128 ,134 ,
+  
+  
+   39 , 46 , 29 , 61 ,
+   76 , 86 , 98 , 81 ,
+   47 , 53 , 39 , 63 ,
+   61 , 69 , 45 , 92 ,
+  
+  
+  104 , 89 , 76 , 63 ,
+   97 ,128 , 88 ,108 ,
+   64 , 56 , 34 , 82 ,
+  135 , 93 , 82 , 98 ,
+  
+  
+   43 , 51 , 36 , 56 ,
+   71 , 64 , 50 , 79 ,
+   40 , 32 , 33 , 58 ,
+   73 , 75 , 53 , 85 ,
+  
+  
+   59 , 64 , 47 , 71 ,
+  123 , 49 , 61 , 96 ,
+   70 , 58 , 47 , 78 ,
+  100 ,105 , 56 ,102 ,
+  
+  
+   18 ,128 ,128 , 16 ,
+  175 ,128 ,162 ,128 ,
+   53 ,128 , 40 ,128 ,
+   49 ,128 , 41 ,  1 ,
+  
+  
+  122 ,128 ,128 ,128 ,
+  181 ,189 ,193 ,100 ,
+  162 ,128 ,166 ,133 ,
+  110 ,128 , 89 ,128 ,
+  
+  
+   23 ,128 , 12 ,128 ,
+  171 ,149 ,182 ,135 ,
+   36 , 56 , 47 , 16 ,
+   32 ,128 , 39 , 26 ,
+  
+  
+  128 ,128 , 37 ,128 ,
+  106 ,128 ,131 ,134 ,
+  128 ,128 , 19 , 18 ,
+  128 ,128 , 12 ,  9 ,
+  
+  
+  128 ,128 ,128 ,128 ,
+    9 ,128 , 52 ,128 ,
+  128 ,128 ,128 ,128 ,
+  128 ,128 ,128 ,128 ,
+  
+  
+  128 ,128 ,128 ,128 ,
+   52 , 57 , 68 ,128 ,
+   13 ,128 ,  7 ,128 ,
+  128 ,128 ,128 ,128 ,
+  
+  
+  128 ,128 ,128 ,128 ,
+   11 ,128 , 12 , 25 ,
+  128 ,128 ,  1 ,128 ,
+  128 ,128 ,128 ,128 ,
+  
+  
+  128 ,128 ,128 ,128 ,
+  128 ,128 , 45 ,128 ,
+  128 ,128 ,128 ,128 ,
+  128 ,128 ,128 ,128 ,
+  
+};
+UINT32 MBBitCosts[4][4][4][10]=
+{
+  
+      11 ,   524 ,   351 ,   293 ,   324 ,   523 ,   644 ,   458 ,   815 ,   815 ,    19 ,   394 ,   275 ,   286 ,   286 ,   511 ,   575 ,   351 ,   639 ,   639 ,    32 ,   462 ,   266 ,   200 ,   236 ,   431 ,   571 ,   371 ,   635 ,   635 ,    96 ,   319 ,   131 ,   189 ,   249 ,   322 ,   449 ,   194 ,   513 ,   513 ,
+      48 ,   265 ,   258 ,   246 ,   287 ,   252 ,   360 ,   294 ,   296 ,   482 ,    62 ,   262 ,   178 ,   237 ,   288 ,   233 ,   330 ,   333 ,   394 ,   394 ,   116 ,   261 ,   189 ,   176 ,   236 ,   155 ,   362 ,   322 ,   261 ,   540 ,   227 ,   195 ,   137 ,   235 ,   235 ,   159 ,   310 ,   170 ,   374 ,   374 ,
+      31 ,   485 ,   311 ,   162 ,   267 ,   509 ,   548 ,   433 ,   671 ,   671 ,    62 ,   418 ,   254 ,   110 ,   236 ,   472 ,   536 ,   347 ,   600 ,   600 ,   129 ,   459 ,   258 ,    53 ,   223 ,   439 ,   499 ,   382 ,   746 ,   746 ,   163 ,   379 ,   121 ,   119 ,   197 ,   345 ,   399 ,   199 ,   653 ,   653 ,
+      51 ,   492 ,   237 ,   145 ,   250 ,   404 ,   458 ,   285 ,   573 ,   573 ,    82 ,   408 ,   181 ,   135 ,   244 ,   318 ,   498 ,   225 ,   562 ,   562 ,   140 ,   440 ,   184 ,    74 ,   202 ,   369 ,   391 ,   304 ,   693 ,   693 ,   201 ,   330 ,   114 ,   139 ,   267 ,   336 ,   381 ,   130 ,   618 ,   618 ,
+  
+  
+      38 ,   219 ,   270 ,   303 ,   311 ,   285 ,   392 ,   287 ,   456 ,   456 ,    53 ,   206 ,   209 ,   317 ,   317 ,   264 ,   328 ,   280 ,   392 ,   392 ,   106 ,   161 ,   196 ,   198 ,   238 ,   193 ,   377 ,   292 ,   441 ,   441 ,   198 ,   189 ,   119 ,   210 ,   236 ,   214 ,   369 ,   170 ,   433 ,   433 ,
+     155 ,   115 ,   211 ,   314 ,   302 ,   203 ,   250 ,   298 ,   214 ,   305 ,   101 ,   136 ,   243 ,   279 ,   279 ,   213 ,   285 ,   243 ,   349 ,   349 ,   268 ,   131 ,   206 ,   245 ,   277 ,   148 ,   291 ,   229 ,   210 ,   293 ,   363 ,   107 ,   201 ,   242 ,   335 ,   177 ,   267 ,   150 ,   350 ,   350 ,
+     109 ,   225 ,   200 ,   116 ,   254 ,   313 ,   311 ,   276 ,   430 ,   430 ,    81 ,   233 ,   249 ,   144 ,   261 ,   281 ,   345 ,   239 ,   409 ,   409 ,   283 ,   244 ,   199 ,    64 ,   223 ,   233 ,   376 ,   278 ,   310 ,   560 ,   268 ,   215 ,   122 ,   143 ,   230 ,   262 ,   279 ,   179 ,   374 ,   374 ,
+     144 ,   178 ,   183 ,   161 ,   254 ,   289 ,   294 ,   177 ,   379 ,   379 ,   105 ,   219 ,   198 ,   228 ,   218 ,   245 ,   309 ,   177 ,   373 ,   373 ,   250 ,   212 ,   138 ,   100 ,   240 ,   316 ,   299 ,   214 ,   392 ,   392 ,   291 ,   196 ,   115 ,   156 ,   315 ,   278 ,   382 ,   130 ,   446 ,   446 ,
+  
+  
+      27 ,   503 ,   341 ,   166 ,   290 ,   510 ,   622 ,   472 ,   686 ,   686 ,    61 ,   355 ,   242 ,   115 ,   258 ,   468 ,   532 ,   335 ,   596 ,   596 ,   118 ,   439 ,   262 ,    55 ,   235 ,   472 ,   490 ,   388 ,   824 ,   824 ,   149 ,   333 ,   128 ,   125 ,   206 ,   359 ,   474 ,   194 ,   538 ,   538 ,
+      93 ,   256 ,   234 ,   149 ,   237 ,   233 ,   302 ,   247 ,   392 ,   392 ,    89 ,   272 ,   180 ,   178 ,   254 ,   217 ,   362 ,   243 ,   426 ,   426 ,   283 ,   296 ,   183 ,    86 ,   186 ,   206 ,   336 ,   266 ,   261 ,   517 ,   363 ,   220 ,   120 ,   187 ,   236 ,   196 ,   294 ,   156 ,   278 ,   488 ,
+     147 ,   496 ,   297 ,    47 ,   193 ,   476 ,   516 ,   388 ,   667 ,   667 ,   215 ,   444 ,   231 ,    46 ,   162 ,   454 ,   453 ,   374 ,   643 ,   643 ,   309 ,   477 ,   261 ,    30 ,   166 ,   497 ,   501 ,   407 ,   784 ,   784 ,   275 ,   410 ,   125 ,    96 ,   153 ,   424 ,   419 ,   200 ,   746 ,   746 ,
+     125 ,   448 ,   198 ,    90 ,   162 ,   400 ,   483 ,   299 ,   547 ,   547 ,   157 ,   357 ,   181 ,   104 ,   148 ,   411 ,   418 ,   221 ,   482 ,   482 ,   275 ,   443 ,   185 ,    65 ,   134 ,   448 ,   430 ,   282 ,   773 ,   773 ,   283 ,   366 ,   109 ,   119 ,   181 ,   430 ,   384 ,   152 ,   765 ,   765 ,
+  
+  
+      41 ,   467 ,   241 ,   154 ,   271 ,   494 ,   566 ,   324 ,   630 ,   630 ,    75 ,   318 ,   179 ,   129 ,   284 ,   475 ,   539 ,   245 ,   603 ,   603 ,   140 ,   369 ,   178 ,    72 ,   212 ,   398 ,   417 ,   304 ,   993 ,   993 ,   192 ,   339 ,   100 ,   130 ,   247 ,   373 ,   506 ,   166 ,   570 ,   570 ,
+     165 ,   190 ,   178 ,   147 ,   280 ,   241 ,   259 ,   170 ,   463 ,   463 ,   136 ,   209 ,   164 ,   160 ,   273 ,   229 ,   329 ,   187 ,   393 ,   393 ,   275 ,   237 ,   135 ,   108 ,   234 ,   249 ,   287 ,   220 ,   314 ,   415 ,   384 ,   190 ,   103 ,   178 ,   269 ,   261 ,   365 ,   134 ,   429 ,   429 ,
+     115 ,   472 ,   218 ,   124 ,   114 ,   485 ,   549 ,   278 ,   613 ,   613 ,   176 ,   358 ,   170 ,   132 ,   103 ,   430 ,   494 ,   230 ,   558 ,   558 ,   291 ,   451 ,   185 ,   111 ,    73 ,   464 ,   446 ,   288 ,   827 ,   827 ,   332 ,   342 ,   103 ,   172 ,   131 ,   406 ,   419 ,   152 ,   656 ,   656 ,
+     128 ,   300 ,   165 ,   146 ,   162 ,   403 ,   446 ,   188 ,   510 ,   510 ,   161 ,   361 ,   135 ,   162 ,   158 ,   340 ,   404 ,   164 ,   468 ,   468 ,   275 ,   397 ,   154 ,   110 ,   108 ,   390 ,   500 ,   209 ,   564 ,   564 ,   320 ,   313 ,   106 ,   169 ,   185 ,   362 ,   420 ,   112 ,   747 ,   747 ,
+  
+};
+UINT32 NNMBBitCosts[4][4][4][10]=
+{
+  
+       2 ,   572 ,   404 ,     0 ,     0 ,   557 ,   648 ,   594 ,   871 ,   871 ,     4 ,   466 ,   332 ,     0 ,     0 ,   474 ,   757 ,   559 ,   821 ,   821 ,     6 ,   512 ,   293 ,     0 ,     0 ,   461 ,   613 ,   531 ,   677 ,   677 ,    49 ,   338 ,   124 ,     0 ,     0 ,   252 ,   405 ,   285 ,   775 ,   775 ,
+      35 ,   231 ,   289 ,     0 ,     0 ,   213 ,   346 ,   413 ,   283 ,   459 ,    46 ,   176 ,   268 ,     0 ,     0 ,   248 ,   312 ,   268 ,   376 ,   376 ,    65 ,   167 ,   195 ,     0 ,     0 ,   166 ,   342 ,   465 ,   406 ,   406 ,   184 ,   100 ,   131 ,     0 ,     0 ,   162 ,   319 ,   296 ,   383 ,   383 ,
+      64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+      16 ,   297 ,   297 ,     0 ,     0 ,   361 ,   425 ,   297 ,   489 ,   489 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+  
+  
+      33 ,   218 ,   241 ,     0 ,     0 ,   244 ,   362 ,   421 ,   335 ,   483 ,    37 ,   171 ,   268 ,     0 ,     0 ,   259 ,   394 ,   323 ,   458 ,   458 ,   125 ,   156 ,   144 ,     0 ,     0 ,   119 ,   350 ,   357 ,   414 ,   414 ,   215 ,   156 ,    84 ,     0 ,     0 ,   144 ,   361 ,   245 ,   425 ,   425 ,
+     231 ,   110 ,   334 ,     0 ,     0 ,   146 ,   213 ,   334 ,   152 ,   272 ,   111 ,   121 ,   289 ,     0 ,     0 ,   107 ,   394 ,   289 ,   458 ,   458 ,   245 ,    76 ,   302 ,     0 ,     0 ,   115 ,   416 ,   302 ,   239 ,   308 ,   299 ,    78 ,   219 ,     0 ,     0 ,   129 ,   281 ,   219 ,   345 ,   345 ,
+      64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+      64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+  
+  
+      64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+      64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+      64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+      64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+  
+  
+      14 ,   312 ,   312 ,     0 ,     0 ,   376 ,   440 ,   312 ,   504 ,   504 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+      64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+      64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+      64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,    64 ,   192 ,   192 ,     0 ,     0 ,   256 ,   320 ,   192 ,   384 ,   384 ,
+  
+};
+UINT32 NN2MBBitCosts[4][4][4][10]=
+{
+  
+       7 ,   556 ,   377 ,   291 ,     0 ,   551 ,   569 ,   523 ,   813 ,   813 ,    12 ,   411 ,   317 ,   243 ,     0 ,   562 ,   626 ,   497 ,   690 ,   690 ,    22 ,   478 ,   280 ,   178 ,     0 ,   490 ,   579 ,   447 ,   643 ,   643 ,    68 ,   325 ,   129 ,   183 ,     0 ,   298 ,   445 ,   275 ,   509 ,   509 ,
+      47 ,   249 ,   250 ,   221 ,     0 ,   272 ,   346 ,   415 ,   231 ,   501 ,    48 ,   198 ,   293 ,   229 ,     0 ,   262 ,   326 ,   293 ,   390 ,   390 ,    90 ,   210 ,   237 ,   193 ,     0 ,   130 ,   349 ,   338 ,   413 ,   413 ,   195 ,   193 ,   134 ,   225 ,     0 ,   139 ,   304 ,   185 ,   368 ,   368 ,
+      32 ,   542 ,   349 ,   128 ,     0 ,   493 ,   583 ,   539 ,   647 ,   647 ,   107 ,   462 ,   363 ,    43 ,     0 ,   526 ,   590 ,   363 ,   654 ,   654 ,   122 ,   520 ,   289 ,    37 ,     0 ,   461 ,   576 ,   508 ,   640 ,   640 ,   113 ,   364 ,   143 ,    87 ,     0 ,   428 ,   492 ,   250 ,   556 ,   556 ,
+      54 ,   431 ,   231 ,   106 ,     0 ,   495 ,   559 ,   364 ,   623 ,   623 ,    81 ,   477 ,   159 ,   103 ,     0 ,   541 ,   605 ,   255 ,   669 ,   669 ,   112 ,   417 ,   195 ,    63 ,     0 ,   481 ,   545 ,   278 ,   609 ,   609 ,   132 ,   372 ,   120 ,   104 ,     0 ,   436 ,   500 ,   186 ,   564 ,   564 ,
+  
+  
+      46 ,   228 ,   230 ,   218 ,     0 ,   247 ,   420 ,   410 ,   318 ,   508 ,    54 ,   161 ,   273 ,   250 ,     0 ,   285 ,   349 ,   273 ,   413 ,   413 ,   121 ,   203 ,   199 ,   180 ,     0 ,   129 ,   294 ,   323 ,   358 ,   358 ,   215 ,   187 ,   107 ,   172 ,     0 ,   163 ,   397 ,   208 ,   461 ,   461 ,
+     129 ,   126 ,   283 ,   341 ,     0 ,   201 ,   238 ,   283 ,   170 ,   329 ,    76 ,   112 ,   316 ,   252 ,     0 ,   255 ,   319 ,   316 ,   383 ,   383 ,   235 ,   125 ,   192 ,   255 ,     0 ,   181 ,   281 ,   232 ,   160 ,   346 ,   275 ,    98 ,   184 ,   214 ,     0 ,   183 ,   264 ,   203 ,   328 ,   328 ,
+     122 ,   208 ,   319 ,    75 ,     0 ,   262 ,   326 ,   319 ,   390 ,   390 ,    60 ,   196 ,   260 ,   196 ,     0 ,   260 ,   324 ,   260 ,   388 ,   388 ,   227 ,   178 ,   253 ,    56 ,     0 ,   285 ,   349 ,   253 ,   413 ,   413 ,   211 ,   168 ,   169 ,   126 ,     0 ,   232 ,   296 ,   169 ,   360 ,   360 ,
+     116 ,   184 ,   204 ,   140 ,     0 ,   248 ,   312 ,   204 ,   376 ,   376 ,    64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,   211 ,   373 ,   132 ,    89 ,     0 ,   437 ,   501 ,   132 ,   565 ,   565 ,   283 ,   205 ,   162 ,    86 ,     0 ,   269 ,   333 ,   162 ,   397 ,   397 ,
+  
+  
+      27 ,   531 ,   343 ,   144 ,     0 ,   495 ,   525 ,   498 ,   779 ,   779 ,    67 ,   445 ,   247 ,    80 ,     0 ,   509 ,   573 ,   433 ,   637 ,   637 ,   163 ,   430 ,   236 ,    29 ,     0 ,   449 ,   630 ,   492 ,   694 ,   694 ,   153 ,   393 ,   130 ,    72 ,     0 ,   313 ,   503 ,   291 ,   567 ,   567 ,
+     116 ,   299 ,   257 ,    81 ,     0 ,   247 ,   311 ,   257 ,   375 ,   375 ,    64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,   240 ,   246 ,   153 ,    70 ,     0 ,   204 ,   337 ,   318 ,   401 ,   401 ,   140 ,   160 ,   189 ,   166 ,     0 ,   224 ,   288 ,   189 ,   352 ,   352 ,
+     184 ,   500 ,   324 ,    19 ,     0 ,   499 ,   563 ,   525 ,   627 ,   627 ,   198 ,   588 ,   303 ,    17 ,     0 ,   652 ,   716 ,   458 ,   780 ,   780 ,   268 ,   496 ,   315 ,    10 ,     0 ,   535 ,   591 ,   513 ,   655 ,   655 ,   198 ,   375 ,   143 ,    51 ,     0 ,   439 ,   503 ,   267 ,   567 ,   567 ,
+     140 ,   434 ,   254 ,    42 ,     0 ,   498 ,   562 ,   254 ,   626 ,   626 ,    64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,   181 ,   489 ,   240 ,    33 ,     0 ,   553 ,   617 ,   240 ,   681 ,   681 ,   165 ,   356 ,    99 ,    98 ,     0 ,   420 ,   484 ,   212 ,   548 ,   548 ,
+  
+  
+      51 ,   427 ,   230 ,   114 ,     0 ,   394 ,   536 ,   368 ,   600 ,   600 ,    93 ,   362 ,   182 ,    82 ,     0 ,   426 ,   490 ,   289 ,   554 ,   554 ,   137 ,   408 ,   183 ,    49 ,     0 ,   472 ,   536 ,   363 ,   600 ,   600 ,   163 ,   302 ,    95 ,   110 ,     0 ,   329 ,   434 ,   226 ,   498 ,   498 ,
+     134 ,   149 ,   221 ,   157 ,     0 ,   213 ,   277 ,   221 ,   341 ,   341 ,    64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,   219 ,   207 ,   175 ,    86 ,     0 ,   271 ,   335 ,   175 ,   399 ,   399 ,   227 ,   212 ,   141 ,   118 ,     0 ,   276 ,   340 ,   141 ,   404 ,   404 ,
+      64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,    64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,    64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,    64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,
+      64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,    64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,    64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,    64 ,   192 ,   256 ,   192 ,     0 ,   256 ,   320 ,   256 ,   384 ,   384 ,
+  
+};
+UINT32 BBitCosts[4][4][4][10]=
+{
+  
+     144 ,     0 ,    25 ,   418 ,   418 ,     0 ,     0 ,     0 ,     0 ,     0 ,   121 ,     0 ,    38 ,   308 ,   308 ,     0 ,     0 ,     0 ,     0 ,     0 ,   144 ,     0 ,    32 ,   294 ,   294 ,     0 ,     0 ,     0 ,     0 ,     0 ,   151 ,     0 ,    26 ,   300 ,   387 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     142 ,     0 ,    34 ,   278 ,   278 ,     0 ,     0 ,     0 ,     0 ,     0 ,    97 ,     0 ,   104 ,   168 ,   168 ,     0 ,     0 ,     0 ,     0 ,     0 ,   129 ,     0 ,    43 ,   253 ,   253 ,     0 ,     0 ,     0 ,     0 ,     0 ,   144 ,     0 ,    34 ,   278 ,   278 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     186 ,     0 ,    39 ,   162 ,   302 ,     0 ,     0 ,     0 ,     0 ,     0 ,   179 ,     0 ,    47 ,   190 ,   190 ,     0 ,     0 ,     0 ,     0 ,     0 ,   195 ,     0 ,    35 ,   180 ,   272 ,     0 ,     0 ,     0 ,     0 ,     0 ,   195 ,     0 ,    31 ,   196 ,   288 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     184 ,     0 ,    21 ,   283 ,   375 ,     0 ,     0 ,     0 ,     0 ,     0 ,   195 ,     0 ,    21 ,   268 ,   323 ,     0 ,     0 ,     0 ,     0 ,     0 ,   181 ,     0 ,    29 ,   226 ,   295 ,     0 ,     0 ,     0 ,     0 ,     0 ,   201 ,     0 ,    23 ,   241 ,   303 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+  
+  
+     105 ,     0 ,    57 ,   247 ,   247 ,     0 ,     0 ,     0 ,     0 ,     0 ,   131 ,     0 ,    30 ,   373 ,   373 ,     0 ,     0 ,     0 ,     0 ,     0 ,    94 ,     0 ,    55 ,   284 ,   284 ,     0 ,     0 ,     0 ,     0 ,     0 ,   161 ,     0 ,    26 ,   313 ,   313 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     121 ,     0 ,    34 ,   368 ,   368 ,     0 ,     0 ,     0 ,     0 ,     0 ,    64 ,     0 ,   128 ,   192 ,   192 ,     0 ,     0 ,     0 ,     0 ,     0 ,   144 ,     0 ,    36 ,   265 ,   265 ,     0 ,     0 ,     0 ,     0 ,     0 ,   179 ,     0 ,    31 ,   245 ,   245 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     227 ,     0 ,    31 ,   212 ,   212 ,     0 ,     0 ,     0 ,     0 ,     0 ,   179 ,     0 ,    56 ,   171 ,   171 ,     0 ,     0 ,     0 ,     0 ,     0 ,   195 ,     0 ,    33 ,   221 ,   221 ,     0 ,     0 ,     0 ,     0 ,     0 ,   176 ,     0 ,    39 ,   215 ,   215 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     235 ,     0 ,    18 ,   280 ,   280 ,     0 ,     0 ,     0 ,     0 ,     0 ,   176 ,     0 ,    27 ,   274 ,   274 ,     0 ,     0 ,     0 ,     0 ,     0 ,   186 ,     0 ,    27 ,   235 ,   298 ,     0 ,     0 ,     0 ,     0 ,     0 ,   227 ,     0 ,    21 ,   239 ,   290 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+  
+  
+     192 ,     0 ,    30 ,   206 ,   285 ,     0 ,     0 ,     0 ,     0 ,     0 ,   134 ,     0 ,    52 ,   217 ,   217 ,     0 ,     0 ,     0 ,     0 ,     0 ,   157 ,     0 ,    34 ,   238 ,   282 ,     0 ,     0 ,     0 ,     0 ,     0 ,   181 ,     0 ,    31 ,   225 ,   260 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     176 ,     0 ,    38 ,   218 ,   218 ,     0 ,     0 ,     0 ,     0 ,     0 ,   134 ,     0 ,    89 ,   153 ,   153 ,     0 ,     0 ,     0 ,     0 ,     0 ,   222 ,     0 ,    48 ,   133 ,   242 ,     0 ,     0 ,     0 ,     0 ,     0 ,   256 ,     0 ,    34 ,   179 ,   214 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     245 ,     0 ,    39 ,   155 ,   233 ,     0 ,     0 ,     0 ,     0 ,     0 ,   320 ,     0 ,    44 ,   169 ,   157 ,     0 ,     0 ,     0 ,     0 ,     0 ,   250 ,     0 ,    39 ,   166 ,   201 ,     0 ,     0 ,     0 ,     0 ,     0 ,   291 ,     0 ,    39 ,   165 ,   187 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     186 ,     0 ,    31 ,   204 ,   292 ,     0 ,     0 ,     0 ,     0 ,     0 ,   250 ,     0 ,    29 ,   210 ,   210 ,     0 ,     0 ,     0 ,     0 ,     0 ,   283 ,     0 ,    25 ,   201 ,   226 ,     0 ,     0 ,     0 ,     0 ,     0 ,   275 ,     0 ,    29 ,   196 ,   215 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+  
+  
+     171 ,     0 ,    24 ,   288 ,   331 ,     0 ,     0 ,     0 ,     0 ,     0 ,   151 ,     0 ,    36 ,   249 ,   257 ,     0 ,     0 ,     0 ,     0 ,     0 ,   181 ,     0 ,    31 ,   238 ,   248 ,     0 ,     0 ,     0 ,     0 ,     0 ,   186 ,     0 ,    25 ,   262 ,   284 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     167 ,     0 ,    35 ,   244 ,   234 ,     0 ,     0 ,     0 ,     0 ,     0 ,   176 ,     0 ,    35 ,   232 ,   232 ,     0 ,     0 ,     0 ,     0 ,     0 ,   192 ,     0 ,    43 ,   169 ,   224 ,     0 ,     0 ,     0 ,     0 ,     0 ,   219 ,     0 ,    27 ,   216 ,   256 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     219 ,     0 ,    32 ,   170 ,   292 ,     0 ,     0 ,     0 ,     0 ,     0 ,   227 ,     0 ,    42 ,   151 ,   226 ,     0 ,     0 ,     0 ,     0 ,     0 ,   250 ,     0 ,    35 ,   162 ,   230 ,     0 ,     0 ,     0 ,     0 ,     0 ,   262 ,     0 ,    30 ,   184 ,   235 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+     204 ,     0 ,    23 ,   254 ,   290 ,     0 ,     0 ,     0 ,     0 ,     0 ,   201 ,     0 ,    28 ,   222 ,   268 ,     0 ,     0 ,     0 ,     0 ,     0 ,   235 ,     0 ,    28 ,   206 ,   250 ,     0 ,     0 ,     0 ,     0 ,     0 ,   256 ,     0 ,    22 ,   224 ,   256 ,     0 ,     0 ,     0 ,     0 ,     0 ,
+  
+};
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/pb_globals.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/pb_globals.c
new file mode 100644
index 00000000..95b0a650
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/pb_globals.c
@@ -0,0 +1,248 @@
+/****************************************************************************
+*
+*   Module Title :     PB_Globals.c
+*
+*   Description  :     Video CODEC Demo: playback dll global declarations
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "pbdll.h"
+#include "duck_mem.h"
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/ 
+static UINT32 VP6_DCQuantScaleP[Q_TABLE_SIZE];
+
+/****************************************************************************
+*  Imports
+****************************************************************************/ 
+extern unsigned long VP6_GetProcessorFrequency();
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+unsigned int CPUFrequency;      // Process Frequency
+
+// Truth table to indicate if the given mode uses motion estimation
+BOOL VP6_ModeUsesMC[MAX_MODES] = { FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE };
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DeleteTmpBuffers
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     De-allocate buffers used during decoing.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_DeleteTmpBuffers ( PB_INSTANCE *pbi )
+{ 
+	if ( pbi->ReconDataBuffer[0] )
+		duck_free(pbi->ReconDataBuffer[0]);
+	if ( pbi->LoopFilteredBlock )
+		duck_free(pbi->LoopFilteredBlock);
+	if ( pbi->TmpDataBuffer )
+		duck_free(pbi->TmpDataBuffer);
+	if ( pbi->TmpReconBuffer )
+		duck_free(pbi->TmpReconBuffer);
+	if ( pbi->ScaleBuffer )
+		duck_free(pbi->ScaleBuffer);
+
+    
+	pbi->ReconDataBuffer[0]     = 0;
+	pbi->LoopFilteredBlock      = 0;
+	pbi->TmpDataBuffer          = 0;
+	pbi->TmpReconBuffer         = 0;
+    pbi->ScaleBuffer            = 0;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_AllocateTmpBuffers
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     BOOL: Always TRUE.
+ * 
+ *  FUNCTION      :     Allocates buffers required during decoding.
+ *
+ *  SPECIAL NOTES :     Uses ROUNDUP32 to ensure that buffers are aligned
+ *                      on 32-byte boundaries to improve cache performance.
+ *
+ ****************************************************************************/
+BOOL VP6_AllocateTmpBuffers ( PB_INSTANCE *pbi )
+{
+
+	// clear any existing info
+	VP6_DeleteTmpBuffers ( pbi ); 
+
+	pbi->ReconDataBuffer[0]      = (INT16 *)duck_memalign(32, 6*64*sizeof(INT16), DMEM_GENERAL);
+	if ( !pbi->ReconDataBuffer[0] )      { VP6_DeleteTmpBuffers(pbi); return FALSE;};
+    pbi->ReconDataBuffer[1] = pbi->ReconDataBuffer[0] + 64;
+    pbi->ReconDataBuffer[2] = pbi->ReconDataBuffer[1] + 64;
+    pbi->ReconDataBuffer[3] = pbi->ReconDataBuffer[2] + 64;
+    pbi->ReconDataBuffer[4] = pbi->ReconDataBuffer[3] + 64;
+    pbi->ReconDataBuffer[5] = pbi->ReconDataBuffer[4] + 64;
+
+    pbi->TmpDataBuffer        = (INT16 *)duck_memalign(32, 64 * sizeof(INT16), DMEM_GENERAL);
+    if ( !pbi->TmpDataBuffer )        { VP6_DeleteTmpBuffers(pbi); return FALSE;};
+
+	pbi->LoopFilteredBlock        = (UINT8 *)duck_memalign(32, 256 * sizeof(UINT8), DMEM_GENERAL);
+    if ( !pbi->LoopFilteredBlock )        { VP6_DeleteTmpBuffers(pbi); return FALSE;};
+
+    pbi->TmpReconBuffer       = (INT16 *)duck_memalign(32, 64 * sizeof(INT16), DMEM_GENERAL);
+    if ( !pbi->TmpReconBuffer )       { VP6_DeleteTmpBuffers(pbi); return FALSE;};
+
+    return TRUE;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DeletePBInstance
+ *
+ *  INPUTS        :     PB_INSTANCE **pbi : Pointer to the pointer to the 
+ *                                          decoder instance.
+ *
+ *  OUTPUTS       :     PB_INSTANCE **pbi : Pointer to the pointer to the 
+ *                                          decoder instance. Set to 0 on exit.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     De-allocates the decoder instance data structure.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_DeletePBInstance ( PB_INSTANCE **pbi )
+{
+    if ( *pbi )
+    {
+        // Delete any other dynamically allocaed temporary buffers
+		VP6_DeleteTmpBuffers(*pbi);
+		VP6_DeleteQuantizer(&(*pbi)->quantizer);
+        DeletePostProcInstance(&(*pbi)->postproc);
+    }
+
+    // dealoocate and reset pointer to NULL
+	duck_free ( *pbi );
+	*pbi = 0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_CreatePBInstance
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     PB_INSTANCE *: Pointer to allocated decoder instance.
+ *
+ *  FUNCTION      :     Allocates space for and initializes decoder instance.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+PB_INSTANCE *VP6_CreatePBInstance ( void )
+{
+	PB_INSTANCE *pbi = 0;
+    CONFIG_TYPE ConfigurationInit = { 0,0,0,0,8,8,0,0,0,0,0,0,0,0 };
+	int pbi_size = sizeof(PB_INSTANCE);
+
+    pbi = (PB_INSTANCE *) duck_malloc ( pbi_size, DMEM_GENERAL );
+    if ( !pbi )
+        return 0;
+
+	// initialize whole structure to 0
+	memset ( (unsigned char *)pbi, 0, pbi_size );
+	memcpy ( (void *)&pbi->Configuration, (void *)&ConfigurationInit, sizeof(CONFIG_TYPE) );
+
+	if ( !VP6_AllocateTmpBuffers(pbi) )
+    {
+        duck_free(pbi);
+        return 0;
+    }
+
+	pbi->CPUFree = 70;
+    pbi->idct    = idct;
+
+	// Initialise Entropy related data structures.
+	memset( pbi->DcProbs, 0, sizeof(pbi->DcProbs) );
+	memset( pbi->AcProbs, 0, sizeof(pbi->AcProbs) );
+
+	return pbi;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_VPInitLibrary
+ *
+ *  INPUTS        :     None.
+ *  
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Fully initializes the playback library.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_VPInitLibrary(void)
+{
+	int i;
+
+#if !defined(__POWERPC__)
+    CPUFrequency = VP6_GetProcessorFrequency();
+#endif
+
+
+    VP6_DMachineSpecificConfig();
+
+    for ( i=0 ; i<Q_TABLE_SIZE; i++ )
+    {
+		INT32 dcScale = VP6_DcQuant[i]/2 + 2;
+		VP6_DCQuantScaleP[i] = dcScale;
+    }
+
+    InitPostProcessing (
+		VP6_DCQuantScaleP,
+		VP6_DCQuantScaleP,
+		VP6_DCQuantScaleP,
+		CURRENT_DECODE_VERSION );
+
+	InitVPUtil(); 
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_VPDeInitLibrary
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     De-initializes the playback library.
+ *
+ *  SPECIAL NOTES :     Currently nothing to be done. 
+ *
+ ****************************************************************************/
+void VP6_VPDeInitLibrary(void)
+{
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/quantize.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/quantize.c
new file mode 100644
index 00000000..1916c69f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/quantize.c
@@ -0,0 +1,769 @@
+/****************************************************************************
+*
+*   Module Title :     Quantise
+*
+*   Description  :     Quantisation and dequanitsation of an 8x8 dct block. .
+*
+****************************************************************************/						
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Frames
+****************************************************************************/
+#include "quantize.h"
+#include "duck_mem.h"
+#include <stddef.h>
+/****************************************************************************
+*  Module Statics
+****************************************************************************/ 
+#define MIN16 ((1<<16)-1)
+      
+// Scale factors used to improve precision of DCT/IDCT
+#define IDCT_SCALE_FACTOR       2       // Shift left bits to improve IDCT precision
+
+// AC Quantizer Tables
+static const UINT32 VP6_QThreshTable[Q_TABLE_SIZE] = 
+{   94, 92, 90, 88, 86, 82, 78, 74,
+    70, 66, 62, 58, 54, 53, 52, 51,
+	50, 49, 48, 47, 46, 45, 44, 43,
+	42,	40, 39, 37, 36, 35, 34, 33,
+    32, 31, 30, 29, 28, 27, 26, 25, 
+    24, 23, 22, 21, 20, 19, 18, 17,
+    16, 15, 14, 13, 12, 11, 10,  9,  
+    8,   7,  6,  5,  4,  3,  2,  1
+};
+
+static const UINT32 VP6_UvQThreshTable[Q_TABLE_SIZE] = 
+{   94, 92, 90, 88, 86, 82, 78, 74,
+    70, 66, 62, 58, 54, 53, 52, 51,
+	50, 49, 48, 47, 46, 45, 44, 43,
+	42,	40, 39, 37, 36, 35, 34, 33,
+    32, 31, 30, 29, 28, 27, 26, 25, 
+    24, 23, 22, 21, 20, 19, 18, 17,
+    16, 15, 14, 13, 12, 11, 10,  9,  
+    8,   7,  6,  5,  4,  3,  2,  1
+};
+
+// AC Zero Bin and Rounding Tables (include fdct normalisation)
+static const UINT32 VP6_ZBinTable[Q_TABLE_SIZE] = 
+{
+	330,314,298,284,264,246,228,213,
+	201,190,178,167,156,153,149,146,
+	144,141,138,135,132,130,127,124,
+	121,115,110,104, 99, 96, 94, 90,
+	 85, 82, 79, 76, 74, 71, 69, 66,
+	 63, 61, 58, 55, 53, 50, 47, 45,
+	 43, 40, 38, 36, 33, 31, 28, 24,
+	 21, 18, 16, 13, 10,  7,  4,  2
+};
+
+static const UINT32 VP6_UvZBinTable[Q_TABLE_SIZE] = 
+{
+	330,314,298,284,264,246,228,213,
+	201,190,178,167,156,153,149,146,
+	144,141,138,135,132,130,127,124,
+	121,115,110,104, 99, 96, 94, 90,
+     85, 82, 79, 76, 74, 71, 69, 66,
+	 63, 61, 58, 55, 53, 50, 47, 45,
+	 43, 40, 38, 36, 33, 31, 28, 24,
+	 21, 18, 16, 13, 10,  7,  4,  2
+};
+
+static const UINT32 VP6_RTable[Q_TABLE_SIZE] = 
+{
+	48, 56, 64, 70, 78, 82, 86, 88,
+	91, 92, 94, 94, 99,103,102,100,
+	99, 97,	95, 93, 91, 89, 87, 85, 
+	83, 79, 77, 73, 71, 69, 67, 65,
+	64, 62, 60, 58, 56, 54, 52, 50,
+	48, 46, 44, 42, 40, 38, 36, 34,
+	32, 30, 28, 26, 24, 22, 20, 18, 
+	16, 14, 12, 10,  8,  6,  4,  2
+};
+
+static const UINT32 VP6_UvRTable[Q_TABLE_SIZE] = 
+{
+	48, 56, 64, 70, 78, 82, 86, 88,
+	91, 92, 94, 94, 99,103,102,100,
+	99, 97,	95, 93, 91, 89, 87, 85, 
+	83, 79, 77, 73, 71, 69, 67, 65,
+	64, 62, 60, 58, 56, 54, 52, 50,
+	48, 46, 44, 42, 40, 38, 36, 34,
+	32, 30, 28, 26, 24, 22, 20, 18, 
+	16, 14, 12, 10,  8,  6,  4,  2
+};
+
+// DC Quantizer tables
+const Q_LIST_ENTRY VP6_DcQuant[ Q_TABLE_SIZE ] = 
+{
+	47, 47, 47, 47, 45, 43, 43, 43, 
+	43, 43,	42, 41, 41, 40, 40, 40, 
+	40, 35,	35, 35, 35, 33, 33, 33, 
+	33, 32, 32, 32, 27, 27, 26, 26,
+	25, 25, 24, 24, 23, 23,	19, 19,
+    19, 19, 18, 18, 17, 16, 16, 16, 
+    16, 16, 15, 11, 11, 11, 10, 10,
+     9,  8,  7,  5,  3,  3,  2,  2    
+};
+
+static const Q_LIST_ENTRY VP6_UvDcQuant[ Q_TABLE_SIZE ] = 
+{
+	47, 47, 47, 47, 45, 43, 43, 43, 
+	43, 43,	42, 41, 41, 40, 40, 40, 
+	40, 35,	35, 35, 35, 33, 33, 33, 
+	33,	32, 32, 32, 27, 27, 26, 26,
+	25, 25, 24, 24, 23, 23,	19, 19,
+    19, 19, 18, 18, 17, 16, 16, 16,
+    16, 16, 15, 11, 11, 11, 10, 10,
+     9,  8,  7,  5,  3,  3,  2,  2    
+};
+
+// DC Zero Bin and Rounding Tables (include fdct normalisation)
+static const UINT32 VP6_DcZBinTable[Q_TABLE_SIZE] = 
+{
+	170,162,152,150,140,130,125,121,
+	121,118,113,111,110,108,108,106,
+	105,96, 93, 87, 86, 83, 83, 83, 
+	83, 78, 78, 78, 66, 66, 63, 63,
+	61, 61, 58, 58, 56, 56, 46, 46,
+	46, 46, 43, 43, 41, 38, 38, 38,
+    38, 38, 35, 24, 24, 24, 23, 23, 
+	20, 19, 16, 13,  6,  6,  4,  4
+};
+
+static const UINT32 VP6_UvDcZBinTable[Q_TABLE_SIZE] = 
+{
+	170,162,152,150,140,130,125,121,
+	121,118,113,111,110,108,108,106,
+	105,96, 93, 87, 86, 83, 83, 83, 
+	83, 78, 78, 78, 66, 66, 63, 63,
+	61, 61, 58, 58, 56, 56, 46, 46,
+	46, 46, 43, 43, 41, 38, 38, 38,
+    38, 38, 35, 24, 24, 24, 23, 23, 
+	20, 19, 16, 13,  6,  6,  4,  4
+};
+
+static const UINT32 VP6_DcRTable[Q_TABLE_SIZE] = 
+{
+	20, 28, 38, 40, 44, 46, 50, 50, 
+	51, 57,	59, 61, 62, 64, 66, 67, 
+	67, 62,	63, 64, 64, 62, 62, 62, 
+	62,	62, 62, 62, 54, 54, 52, 52,
+	50, 50, 48, 48, 46, 46, 38, 38,
+	38, 38, 36, 36, 34, 32, 32, 32,
+	32, 32, 30, 22, 22, 22, 20, 20, 
+	18, 16, 14, 10,  6,  6,  4,  4
+};
+
+static const UINT32 VP6_UvDcRTable[Q_TABLE_SIZE] = 
+{
+	20, 30, 38, 40, 44, 46, 50, 50, 
+	51, 57,	59, 61, 62, 64, 66, 67, 
+	67, 62,	63, 64, 64, 62, 62, 62, 
+	62,	62, 62, 62, 54, 54, 52, 52, 
+	50, 50, 48, 48, 46, 46, 38, 38,
+	38, 38, 36, 36, 34, 32, 32, 32,
+	32, 32, 30, 22, 22, 22, 20, 20, 
+	18, 16, 14, 10,  6,  6,  4,  4
+};
+
+
+// Correction factors for ZBin size.based upon zero run length leading up to the current coef
+// The factor is A % of the bin width to be added to the existing zero bin.
+static const INT32 VP6_ZlrZbinCorrection[Q_TABLE_SIZE] = 
+{
+    -8,  0,  5, 10, 10, 10, 10, 10, 
+	15, 15, 15, 15, 20, 20, 20, 20, 
+	20, 20, 20, 20, 20, 20, 20, 20, 
+	20, 20, 20, 20, 20, 20, 20, 20, 
+	20, 20, 20, 20, 20, 20, 20, 20, 
+	25, 25, 25, 25, 25, 25, 25, 25,  
+	25, 25, 25, 25, 25, 25, 25, 25, 
+	30, 30, 30, 30, 30, 30, 30, 30, 
+};
+
+
+/****************************************************************************
+ *
+ *	Inverse fast DCT index:
+ *
+ *	This contains the offsets needed to convert zigzag order into x, y order
+ *  for decoding. It is generated from the input zigzag	index at at run time.												
+ *
+ *	For maximum speed during both quantisation and dequantisation we maintain
+ *  separate quantisation and zigzag tables for each operation.														
+ *
+ *	qi->quant_index  :	zigzag index used during quantisation	
+ *	dequant_index    :	zigzag index used during dequantisation			
+ *					
+ *  qi->quant_index is the inverse of dequant_index and is calculated during
+ *  initialisation.
+ *
+ ****************************************************************************/
+static const UINT32 dequant_index[64] = 
+{	0,  1,  8,  16,  9,  2,  3, 10,
+	17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static const UINT32 transIndexC[64] = 
+{
+	 0,	 1,	 2,	 3,	   4,  5,  6,  7,
+	 8,	 9, 10,	11,	  12, 13, 14, 15,
+	16, 17, 18, 19,   20, 21, 22, 23,
+	24, 25, 26, 27,   28, 29, 30, 31,
+
+	32, 33, 34, 35,   36, 37, 38, 39,
+	40, 41, 42, 43,   44, 45, 46, 47,
+	48, 49, 50, 51,   52, 53, 54, 55, 
+	56, 57, 58, 59,   60, 61, 62, 63
+};
+
+static const UINT32 quant_indexC[64] = 
+{
+    0,  1,   5,  6, 14, 15, 27, 28,
+    2,  4,   7, 13, 16, 26, 29, 42,
+    3,  8,  12, 17, 25, 30, 41, 43,
+    9,  11, 18, 24, 31, 40, 44, 53,
+    10, 19, 23, 32, 39, 45, 52, 54, 
+    20, 22, 33, 38, 46, 51, 55, 60,
+    21, 34, 37, 47, 50, 56, 59, 61,
+    35, 36, 48, 49, 57, 58, 62, 63
+};
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+void (*VP6_BuildQuantIndex)( QUANTIZER * qi);
+void (*VP6_quantize)( QUANTIZER *qi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+const UINT8 VP6_QTableSelect[6] = { 0,0,0,0,1,1 };	// Controls selection of Q Table,rounding,zero bin etc for Y, U & V blocks
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_InitQTables
+ *
+ *  INPUTS        :     QUANTIZER *qi     : Pointer to quantizer instance.
+ *                      UINT8 Vp3VersionNo : Decoder version number (NOT USED).
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Initialises Q table.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_InitQTables ( QUANTIZER *qi, UINT8 Vp3VersionNo )
+{  
+	memcpy ( qi->QThreshTable, VP6_QThreshTable, sizeof(qi->QThreshTable) );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_BuildQuantIndex_Generic
+ *
+ *  INPUTS        :     QUANTIZER *qi : Pointer to quantizer instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Builds the quant_index table.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_BuildQuantIndex_Generic ( QUANTIZER *qi )
+{
+    INT32 i,j;
+
+    qi->transIndex = (UINT32 *)transIndexC;
+
+    // invert the dequant index into the quant index
+	for ( i=0; i<BLOCK_SIZE; i++ )
+	{	
+        j = dequant_index[i];
+		qi->quant_index[j] = i;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_init_dequantizer
+ *
+ *  INPUTS        :     QUANTIZER *qi      : Pointer to quantizer instance.
+ *                      UINT8 Vp3VersionNo : Decoder version number (NOT USED)
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Performs initialization of the dequantizer.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_init_dequantizer ( QUANTIZER *qi, UINT8 Vp3VersionNo )
+{
+    int	i, j;						 
+
+	// *************** Y ******************/
+	
+    // AC: set-up the dequant values and then place in the zig-zag/transposed order.
+	for ( i=1; i<64; i++ )
+	{	
+        j = qi->quant_index[i];
+		qi->dequant_coeffs[0][j] = VP6_QThreshTable[qi->FrameQIndex] << IDCT_SCALE_FACTOR;
+    }
+	
+    // DC
+    qi->dequant_coeffs[0][0] = VP6_DcQuant[qi->FrameQIndex] << IDCT_SCALE_FACTOR;
+
+	// *************** UV ******************/
+
+    // AC: set-up the dequant values and then place in the zig-zag/transposed order.
+	for ( i=1; i<64; i++ )
+	{	
+        j = qi->quant_index[i];
+		qi->dequant_coeffs[1][j] = VP6_UvQThreshTable[qi->FrameQIndex] << IDCT_SCALE_FACTOR;
+    }
+	
+    // DC
+    qi->dequant_coeffs[1][0] = VP6_UvDcQuant[qi->FrameQIndex] << IDCT_SCALE_FACTOR;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_UpdateQ
+ *
+ *  INPUTS        :     QUANTIZER *qi      : Pointer to quantizer instance.
+ *                      UINT8 Vp3VersionNo : Decoder version number.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Updates the quantisation tables for a new Q.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_UpdateQ ( QUANTIZER *qi, UINT8 Vp3VersionNo )
+{  
+	if ( qi->FrameQIndex == qi->LastFrameQIndex )
+		return;
+
+	// Update the record of last Q index.
+    qi->LastFrameQIndex = qi->FrameQIndex;
+
+	// Invert the dequant index into the quant index --
+    // the decoder has a different order than the encoder.
+    VP6_BuildQuantIndex(qi);
+
+	// Re-initialise the q tables for forward and reverse transforms.    
+	VP6_init_dequantizer ( qi, Vp3VersionNo );
+}
+
+/********************* COMPRESSOR SPECIFIC **********************************/
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_init_quantizer
+ *
+ *  INPUTS        :     QUANTIZER *qi      : Pointer to quantizer instance.
+ *                      UINT8 Vp3VersionNo : Decoder version number (NOT USED).
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Updates the quantisation tables for a new Q.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+#define SHIFT16 (1<<16)
+void VP6_init_quantizer ( QUANTIZER *qi, UINT8 Vp3VersionNo )
+{
+    int i;
+    double temp_fp_quant_coeffs;
+
+    // Notes on setup of quantisers:
+    // The "* 4" is a normalisation factor for the forward DCT transform.
+	
+	// ******************* Y *********************
+
+    // Calculate DC quant values (Include a *4 for FDCT normalization)
+	temp_fp_quant_coeffs =  (double)( VP6_DcQuant[qi->FrameQIndex] * 4 ); 
+
+	// 1/X (Y)
+	temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+	qi->QuantCoeffs[0][0] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+	// DC rounding (Y)
+	qi->QuantRound[0][0] = VP6_DcRTable[qi->FrameQIndex];
+
+	// Set DC zero Bin (Y)
+	qi->ZeroBinSize[0][0] = VP6_DcZBinTable[qi->FrameQIndex];
+   
+	// AC for Y
+	for ( i=1; i<64; i++ )
+	{
+		// Normalize the quantizer (* 4 for fdct normalisation)
+		temp_fp_quant_coeffs =  (double)(VP6_QThreshTable[qi->FrameQIndex] * 4);
+
+		// Convert to 1/x
+		temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+		qi->QuantCoeffs[0][i] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+		// AC rounding
+		qi->QuantRound[0][i] = VP6_RTable[qi->FrameQIndex];
+
+		// Zero Bins
+		qi->ZeroBinSize[0][i] = VP6_ZBinTable[qi->FrameQIndex];
+	}
+
+	// ******************* UV *********************
+
+    // Calculate DC quant values (Include a *4 for FDCT normalization)
+	temp_fp_quant_coeffs =  (double)( VP6_UvDcQuant[qi->FrameQIndex] * 4 ); 
+
+	// 1/X (UV)
+	temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+	qi->QuantCoeffs[1][0] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+	// DC rounding (UV)
+	qi->QuantRound[1][0] = VP6_UvDcRTable[qi->FrameQIndex];
+
+	// Set DC zero Bin (UV)
+	qi->ZeroBinSize[1][0] = VP6_UvDcZBinTable[qi->FrameQIndex];
+   
+	// AC for UV
+	for ( i=1; i<64; i++ )
+	{
+		// Normalize the quantizer (* 4 for fdct normalisation)
+		temp_fp_quant_coeffs =  (double)(VP6_UvQThreshTable[qi->FrameQIndex] * 4);
+
+		// 1/x
+		temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+		qi->QuantCoeffs[1][i] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+		// AC rounding
+		qi->QuantRound[1][i] = VP6_UvRTable[qi->FrameQIndex];
+
+		// Zero Bins
+		qi->ZeroBinSize[1][i] = VP6_UvZBinTable[qi->FrameQIndex];
+	}
+
+	for ( i=0; i<8; i++ )
+	{
+		qi->round[i] = qi->QuantRound[0][1];
+		qi->mult[i] = qi->QuantCoeffs[0][1];
+		qi->zbin[i] = qi->ZeroBinSize[0][1]-1;
+	}
+
+
+	// Work out the ZRL correction factors for ZBIN
+	for ( i = 0; i < 64; i++ )
+	{
+		qi->ZlrZbinCorrections[0][i] = ((INT32)VP6_QThreshTable[qi->FrameQIndex] * 4 * VP6_ZlrZbinCorrection[i]) / 100;
+		qi->ZlrZbinCorrections[1][i] = ((INT32)VP6_UvQThreshTable[qi->FrameQIndex] * 4 * VP6_ZlrZbinCorrection[i]) / 100;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     UpdateQC (compressor's update q)
+ *
+ *  INPUTS        :     QUANTIZER *qi      : Pointer to quantizer instance.
+ *                      UINT8 Vp3VersionNo : Decoder version number.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Updates the quantisation tables for a new Q
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_UpdateQC ( QUANTIZER *qi, UINT8 Vp3VersionNo )
+{  
+	if ( qi->FrameQIndex == qi->LastFrameQIndex )
+		return;
+
+	// Update the record of last Q index.
+    qi->LastFrameQIndex = qi->FrameQIndex;
+
+	// Invert the dequant index into the quant index --
+    // the decoder has a different order than the encoder.
+    VP6_BuildQuantIndex_Generic(qi);
+
+    // Re-initialise the q tables for forward and reverse transforms.    
+    VP6_init_quantizer ( qi, Vp3VersionNo );
+	VP6_init_dequantizer ( qi, Vp3VersionNo );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_quantize_c
+ *
+ *  INPUTS        :     QUANTIZER *qi               : Pointer to quantizer instance.
+ *                      INT16 *DCT_block             : List of 64 DCT coefficients.
+ *                      UINT8 bp                     : Position of block within MB.
+ *
+ *  OUTPUTS       :     Q_LIST_ENTRY *quantized_list : List of 64 quantized DCT coefficients.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Quantizes the DCT coefficients wrt the current 
+ *                      quantization level.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+#define HIGHBITDUPPED(X) (((signed short) X)  >> 15)
+
+void VP6_quantize_c( QUANTIZER *qi, INT16 *DCT_block, Q_LIST_ENTRY *quantized_list, UINT8 bp )
+{
+    UINT32  i, j;
+	INT32	temp;
+	UINT32	ColourPlane = VP6_QTableSelect[bp]; 
+    
+    INT32 * QuantRoundPtr  = qi->QuantRound[ColourPlane];
+    INT32 * QuantCoeffsPtr = qi->QuantCoeffs[ColourPlane];
+    INT32 * ZBinPtr        = qi->ZeroBinSize[ColourPlane];
+	INT32 * ZrlCorrection = qi->ZlrZbinCorrections[ColourPlane];
+    INT16 * DCT_blockPtr   = DCT_block;
+
+	UINT8   Zrl = 0;
+
+    // Set the quantized_list to default to 0
+    memset( quantized_list, 0, 64 * sizeof(Q_LIST_ENTRY) );
+
+	// DC quantization 
+	if ( DCT_blockPtr[0] >= ZBinPtr[0] )
+	{
+		temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] );
+		quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+	}
+    else if ( DCT_blockPtr[0] <= -ZBinPtr[0] )
+	{
+		temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+		quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+	}
+	else
+		Zrl++;
+
+    // Quantize AC 
+    for( i=1; i<64; i++ )
+    {
+		// Zig Zag order... 
+		j = dequant_index[i];
+
+        if ( DCT_blockPtr[j] >= (ZBinPtr[j] + ZrlCorrection[Zrl]) )
+        {
+			temp = QuantCoeffsPtr[j] * ( DCT_blockPtr[j] + QuantRoundPtr[j] );
+			quantized_list[i] = (Q_LIST_ENTRY) (temp>>16);
+			Zrl = 0;
+        }
+        else if ( DCT_blockPtr[j] <= -(ZBinPtr[j] + ZrlCorrection[Zrl]) )
+        {
+			temp = QuantCoeffsPtr[j] * ( DCT_blockPtr[j] - QuantRoundPtr[j] ) + MIN16;
+			quantized_list[i] = (Q_LIST_ENTRY) (temp>>16);
+			Zrl = 0;
+        }
+		else
+			Zrl++;
+    }
+
+}
+/**************************** END COMPRESSOR SPECIFIC **********************************/
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeleteQuantizerBuffers
+ *
+ *  INPUTS        :     QUANTIZER *qi : Pointer to quantizer instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ * 
+ *  FUNCTION      :     De-allocates buffers associated with the quantizer.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+static void DeleteQuantizerBuffers ( QUANTIZER *qi )
+{
+	if ( qi->dequant_coeffsAlloc[0] )
+		duck_free(qi->dequant_coeffsAlloc[0]);
+	qi->dequant_coeffsAlloc[0]	= 0;
+	qi->dequant_coeffs[0]		= 0;
+
+	if ( qi->dequant_coeffsAlloc[1] )
+		duck_free(qi->dequant_coeffsAlloc[1]);
+	qi->dequant_coeffsAlloc[1]	= 0;
+	qi->dequant_coeffs[1]		= 0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     AllocateQuantizerBuffers
+ *
+ *  INPUTS        :     QUANTIZER *qi : Pointer to quantizer instance.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     INT32: Always TRUE.
+ *
+ *  FUNCTION      :     Allocates buffers associated with quantization.
+ *
+ *  SPECIAL NOTES :     Uses ROUNDUP32 to ensure that allocated buffers are
+ *                      aligned on 32-byte boundaries to improve cache performance. 
+ *
+ ****************************************************************************/
+
+// TODO: benski> need better checks for other compilers
+#if defined(_M_AMD64) || defined(__LP64__)
+#define ROUNDUP32(X) ( ( ( (uintptr_t) X ) + 31 )&( 0xFFFFFFFFFFFFFFE0 ) )
+#else //#elif //defined(_M_IX86) 
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+#endif
+
+
+static INT32 AllocateQuantizerBuffers ( QUANTIZER *qi )
+{
+	DeleteQuantizerBuffers(qi);
+
+	qi->dequant_coeffsAlloc[0]		    = (INT16 *)duck_malloc(32+64*sizeof(INT16), DMEM_GENERAL);
+    if ( !qi->dequant_coeffsAlloc[0] ) { DeleteQuantizerBuffers(qi); return FALSE; };
+	qi->dequant_coeffs[0]			    = (INT16 *)ROUNDUP32(qi->dequant_coeffsAlloc[0]);
+
+	qi->dequant_coeffsAlloc[1]		    = (INT16 *)duck_malloc(32+64*sizeof(INT16), DMEM_GENERAL);
+    if ( !qi->dequant_coeffsAlloc[1] ) { DeleteQuantizerBuffers(qi); return FALSE; };
+	qi->dequant_coeffs[1]			    = (INT16 *)ROUNDUP32(qi->dequant_coeffsAlloc[1]);
+
+	return TRUE;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DeleteQuantizer
+ *
+ *  INPUTS        :     QUANTIZER **qi : Pointer to pointer to quantizer instance.
+ *
+ *  OUTPUTS       :     QUANTIZER **qi : Pointer to pointer to quantizer instance,
+ *                                       set to NULL on exit.
+ *
+ *  RETURNS       :     void.
+ *
+ *  FUNCTION      :     De-allocates memory associated with the quantizer.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_DeleteQuantizer ( QUANTIZER **qi )
+{
+    if ( *qi )
+    {
+        // Delete any other dynamically allocaed temporary buffers
+		DeleteQuantizerBuffers(*qi);
+
+        // De-allocate the quantizer
+        duck_free(*qi);
+		*qi=0;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_CreateQuantizer
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Pointer to allocated quantizer instance.
+ *
+ *  FUNCTION      :     Allocated memory for and initializes a quantizer instance.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+QUANTIZER *VP6_CreateQuantizer ( void )
+{
+	QUANTIZER *qi = 0;
+	int quantizer_size = sizeof(QUANTIZER);
+	
+    qi = (QUANTIZER *) duck_malloc(quantizer_size, DMEM_GENERAL); 
+    if ( !qi )
+        return 0;
+
+	// initialize whole structure to 0
+	memset ( (unsigned char *)qi, 0, quantizer_size );
+	
+	if ( !AllocateQuantizerBuffers(qi) )
+		VP6_DeleteQuantizer(&qi);
+
+	return qi;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : GetQuantizedCoeffsMSE_RD
+ *
+ *  INPUTS        : CP_INSTANCE *cpi        : Pointer to encoder instance.
+ *                  INT16 * DctCodes        : Result of Forward DCT
+ *	                INT16 * Coeffs,         : Quantized Coeffs
+ *	                INT16 * DequantMatrix,  : Dequantizaton Matrix
+ *	                
+ *
+ *  OUTPUTS       : UINT32 *MSE             : Mean Square Error
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Computer MSE in transform domain.
+ *
+ *  SPECIAL NOTES : From the arguement that the mse in frequency domain
+ *                  is same as the mse in spatial domain, this routine 
+ *                  calculate the mse in transform domain to saving the
+ *                  idct and recon operations for distortion measurement.
+ *
+ ****************************************************************************/
+void GetQuantizedCoeffsMSE_RD
+( 
+    INT16 * DctCodes,
+    INT16 * Coeffs,
+    INT16 * DequantMatrix,
+    UINT32 *MSE
+)
+{
+    UINT32 Error=0;
+    INT32 i;    
+    INT32 diff;
+
+
+    for(i=0;i<64;i++)
+    {
+        int j = dequant_index[i];
+        diff = Coeffs[i] * DequantMatrix [i] - DctCodes[j];        
+        Error += diff*diff;
+    }
+
+    *MSE = (Error<<2);
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/recon.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/recon.c
new file mode 100644
index 00000000..99f0318f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/recon.c
@@ -0,0 +1,603 @@
+/****************************************************************************
+*
+*   Module Title :     recon.c
+*
+*   Description  :     Frame reconstruction functions.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking. */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <math.h>
+#include "pbdll.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/              
+#define TMAX 6
+#define TMIN 1
+
+#define Mod8(a) ((a) & 7)
+
+/*************************************************************************** 
+ *
+ *  ROUTINE       :     Var16Point
+ *
+ *  INPUTS        :     UINT8 *DataPtr     : Pointer to data block.
+ *                      INT32 SourceStride : Block stride.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     UINT32: Calculated 16-point variance (no scaling).
+ *
+ *  FUNCTION      :     Calculates variance for the 8x8 block *BUT* only samples
+ *                      every second pixel in every second row of the block. In
+ *                      other words for the 8x8 block only 16 sample points are used.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+UINT32 Var16Point ( UINT8 *DataPtr, INT32 SourceStride )
+{
+    UINT32  i;
+    UINT32  XSum=0;
+    UINT32  XXSum=0;
+    UINT8   *DiffPtr;
+
+    // Loop expanded out for speed.
+    DiffPtr = DataPtr;
+
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i+=2 )
+    {
+        // Examine alternate pixel locations.
+        XSum += DiffPtr[0];
+        XXSum += DiffPtr[0] * DiffPtr[0];
+        XSum += DiffPtr[2];
+        XXSum += DiffPtr[2] * DiffPtr[2];
+        XSum += DiffPtr[4];
+        XXSum += DiffPtr[4] * DiffPtr[4];
+        XSum += DiffPtr[6];
+        XXSum += DiffPtr[6] * DiffPtr[6];
+
+        // Step to next row of block.
+        DiffPtr += (SourceStride << 1);
+    }
+
+    // Compute population variance as mis-match metric.
+    return (( (XXSum<<4) - XSum*XSum ) ) >> 8;
+}
+
+/*************************************************************************** 
+ *
+ *  ROUTINE       :     DiffVar16Point
+ *
+ *  INPUTS        :     UINT8 *DataPtr     : Pointer to data block.
+ *                      INT32 SourceStride : Block stride.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     UINT32: Calculated 16-point variance (no scaling).
+ *
+ *  FUNCTION      :     Calculates a variance for 16 data values.
+ *						Each data value is the absolute difference between a pair of samples
+ *                      one line and one column apart 
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+UINT32 DiffVar16Point ( UINT8 *DataPtr, INT32 SourceStride )
+{
+    UINT32  i;
+	INT32   X;
+    UINT32  XSum=0;
+    UINT32  XXSum=0;
+    UINT8   *DiffPtr;
+    UINT8   *DiffPtr2;
+
+    // Loop expanded out for speed.
+    DiffPtr = DataPtr;
+	DiffPtr2 = DataPtr + SourceStride + 1;
+
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i+=2 )
+    {
+        // Examine alternate pixel locations.
+		X = abs( DiffPtr[0] - DiffPtr2[0]);
+        XSum += X;
+        XXSum += X * X;
+
+		X = abs( DiffPtr[2] - DiffPtr2[2]);
+        XSum += X;
+        XXSum += X * X;
+
+		X = abs( DiffPtr[4] - DiffPtr2[4]);
+        XSum += X;
+        XXSum += X * X;
+
+		X = abs( DiffPtr[6] - DiffPtr2[6]);
+        XSum += X;
+        XXSum += X * X;
+
+        // Step to next row of block.
+        DiffPtr += (SourceStride << 1);
+		DiffPtr2 += (SourceStride << 1);
+    }
+
+    // Compute population variance as mis-match metric.
+    return (( (XXSum<<4) - XSum*XSum ) ) >> 8;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     InitLoopDeringThresholds
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Initialise thresholds used in the prediction/loop 
+ *                      deringing filter.
+ *
+ *  SPECIAL NOTES :     
+ *
+ *****************************************************************************/
+void InitLoopDeringThresholds ( PB_INSTANCE *pbi )
+{
+	UINT32 i;
+
+	pbi->DrCutOff = 64;
+	for ( i=0; i<pbi->DrCutOff; i++ )
+		pbi->DrThresh[255 - i] = ((TMAX * pbi->DrCutOff) - ((TMAX - TMIN) * i)) / pbi->DrCutOff;
+
+	for ( i=pbi->DrCutOff; i<255; i++ )
+		pbi->DrThresh[255 - i] = TMIN;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     LoopDeringBlock
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *                      UINT8 *SrcPtr    : Pointer to block to be deringed.
+ *                      UINT32 Stride    : Stride for input block data.
+ *                      UINT32 Width     : Block width.
+ *                      UINT32 Height    : Block height.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Applies a thresholded dering/smoothing filter to a block
+ *						of data.
+ *
+ *  SPECIAL NOTES :     
+ *
+ *****************************************************************************/
+void LoopDeringBlock 
+( 
+    PB_INSTANCE *pbi, 
+    UINT8 *SrcPtr, 
+    UINT32 Stride, 
+    UINT32 Width, 
+    UINT32 Height 
+)
+{
+	UINT32 i,j;
+
+	UINT8 *DataPtr0;
+	UINT8 *DataPtr1;
+	UINT8 *DataPtr2;
+	UINT8  TmpBuffer[16];		// TBD only one value needed... clean up code
+
+	INT32  ADiff1;
+	INT32  ADiff2;
+
+	INT32  Sum;
+	INT32  Thresh;
+
+	UINT8  Min = 255;
+	UINT8  Max = 0;
+
+	// Look for the min and max value in the block
+	DataPtr1 = SrcPtr;
+	for ( i=0; i<Height; i++ )
+	{
+		for ( j=0; j<Width; j++ )
+		{
+			if ( *DataPtr1 < Min )
+				Min = *DataPtr1;
+			if ( *DataPtr1 > Max )
+				Max = *DataPtr1;
+
+			DataPtr1++;
+		}
+		DataPtr1 = (DataPtr1 - Width) + Stride;
+	}
+
+	// Now choose the dering threshold
+	if ( pbi->DrThresh[255 - Min] > pbi->DrThresh[Max] )
+		Thresh = pbi->DrThresh[255 - Min];
+	else
+		Thresh = pbi->DrThresh[Max];
+
+	// Threshold bigger for bigger range
+	Thresh += ((Max - Min) >> 5);
+
+	// Horizontal dering
+	DataPtr1 = SrcPtr; 
+	for ( i=0; i<Height; i++ )
+	{
+		for ( j=0; j<Width; j++ )
+		{
+			ADiff1 = abs( (INT32)DataPtr1[j] - (INT32)DataPtr1[j-1] );
+			ADiff2 = abs( (INT32)DataPtr1[j] - (INT32)DataPtr1[j+1] );
+
+			Sum = DataPtr1[j] + DataPtr1[j];
+
+			if ( ADiff1 <= Thresh )
+				Sum += DataPtr1[j-1];
+			else
+				Sum += DataPtr1[j];
+
+			if ( ADiff2 <= Thresh )
+				Sum += DataPtr1[j+1];
+			else
+				Sum += DataPtr1[j];
+			
+			Sum = (Sum + 2) >> 2;
+
+			TmpBuffer[j] = Sum;
+		}
+
+		// Copy back the filtered line
+		memcpy ( DataPtr1, TmpBuffer, Width );
+
+		// Next line
+		DataPtr1 += Stride;
+	}
+
+	// Vertical dering
+	for ( i=0; i<Width; i++ )
+	{
+		DataPtr1 = SrcPtr + i;
+		DataPtr0 = DataPtr1 - Stride;
+		DataPtr2 = DataPtr1 + Stride;
+
+		for ( j=0; j<Height; j++ )
+		{
+			ADiff1 = abs( (INT32)*DataPtr1 - (INT32)*DataPtr0 );
+			ADiff2 = abs( (INT32)*DataPtr1 - (INT32)*DataPtr2 );
+
+			Sum = *DataPtr1 + *DataPtr1;
+
+			if ( ADiff1 <= Thresh ) 
+				Sum += *DataPtr0;
+			else
+				Sum += *DataPtr1;
+
+			if ( ADiff2 <= Thresh )
+				Sum += *DataPtr2;
+			else
+				Sum += *DataPtr1;
+			
+			Sum = (Sum + 2) >> 2;
+
+			TmpBuffer[j] = Sum;
+
+			DataPtr0 += Stride;
+			DataPtr1 += Stride;
+			DataPtr2 += Stride;
+		}
+
+		// Copy back the filtered data
+		DataPtr1 = SrcPtr + i;
+		for ( j=0; j<Height; j++ )
+		{
+			*DataPtr1 = TmpBuffer[j];
+			DataPtr1 += Stride;
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_PredictFiltered
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *                      UINT8 *SrcPtr    : Pointer to block to be filtered.
+ *	                    INT32 mx         :
+ *	                    INT32 my         :
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Build an 8x8 motion prediction block. If the block is 
+ *                      copied across a block boundary, attempt to eliminate 
+ *                      the internal border by applying the loop filter internally.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ *****************************************************************************/
+void VP6_PredictFiltered
+(
+	PB_INSTANCE *pbi,
+	UINT8 *SrcPtr,
+	INT32 mx,
+	INT32 my,
+    UINT32 bp
+) 
+{
+    INT32  mVx, mVy;
+	INT32  ReconIndex;
+	MACROBLOCK_INFO *mbi=&pbi->mbi;
+    
+	UINT8 *TempBuffer = pbi->LoopFilteredBlock;
+
+	INT32  BoundaryX, BoundaryY; 
+
+	// Calculate full pixel motion vector position 
+    if(mx > 0 )
+        mVx = (mx >> pbi->mbi.blockDxInfo[bp].MvShift);
+    else 
+        mVx = -((-mx) >> pbi->mbi.blockDxInfo[bp].MvShift);
+
+    if(my > 0 )
+        mVy = (my >> pbi->mbi.blockDxInfo[bp].MvShift);
+    else
+        mVy = -((-my) >> pbi->mbi.blockDxInfo[bp].MvShift);
+
+	// calculate offset in last frame matching motion vector
+	ReconIndex = mbi->blockDxInfo[bp].FrameReconStride * mVy + mVx;
+
+	// Give our selves a border of 2 extra pixel on all sides (for loop filter and half pixel moves)
+	ReconIndex -= 2 * mbi->blockDxInfo[bp].CurrentReconStride;
+	ReconIndex -= 2;
+
+	// copy the 12x12 region starting from reconpixel index into our temp buffer.
+    Copy12x12( SrcPtr + ReconIndex, TempBuffer, mbi->blockDxInfo[bp].CurrentReconStride, 16);
+
+	// What sort of loop filtering are we doing
+	// Dering loop filter is mandated to OFF in the current bitstream#
+    //if ( pbi->UseLoopFilter == LOOP_FILTER_DERING )
+	if ( FALSE )
+	{
+		// Apply prediction.loop dering filter
+		LoopDeringBlock( pbi, &TempBuffer[16+1], 16, 10, 10 );
+	}
+	else
+	{
+		// calculate block border position for x
+		BoundaryX = (8 - Mod8(mVx))&7;
+
+		// calculate block border position for y
+		BoundaryY = (8 - Mod8(mVy))&7;
+
+		// apply the loop filter at the horizontal boundary we selected
+		if(BoundaryX)
+			FilteringHoriz_12(
+				pbi->quantizer->FrameQIndex, 
+				TempBuffer + 2 + BoundaryX, 
+				16); 
+
+		// apply the loop filter at the vertical boundary we selected
+		if (BoundaryY)
+			FilteringVert_12(
+				pbi->quantizer->FrameQIndex, 
+				TempBuffer + 2 * 16 + BoundaryY * 16, 
+				16);
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PredictFilteredBlock
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *                      INT16 *OutputPtr  : Pointer to output data.
+ *                      BLOCK_POSITION bp : Position of block within MB.
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Build an 8x8 motion prediction block. If the block is 
+ *                      copied across a block boundary, attempt to eliminate 
+ *                      the internal border by applying the loop filter internally.
+ *
+ *  SPECIAL NOTES :     
+ *
+ *****************************************************************************/
+void VP6_PredictFilteredBlock 
+(
+	PB_INSTANCE *pbi,
+	INT16 *OutputPtr,
+	UINT32 bp  
+) 
+{
+    UINT8 *SrcPtr;
+	UINT8 *TempBuffer;
+	UINT32 TempPtr1;
+	UINT32 TempPtr2;
+	INT32  ModX, ModY;
+	UINT32 IVar;
+	UINT32 BicMvSizeLimit;
+	UINT32 Stride;
+
+    UINT32 MvShift = pbi->mbi.blockDxInfo[bp].MvShift; //pbi->mbi.MvShift;
+    UINT32 MvModMask = pbi->mbi.blockDxInfo[bp].MvModMask; //pbi->mbi.MvModMask;
+
+    // Which buffer are we working on?
+    SrcPtr = pbi->LastFrameRecon;
+    if ( VP6_Mode2Frame[pbi->mbi.Mode] == 2 ) 
+    {
+        SrcPtr = pbi->GoldenFrame;
+    }
+
+    // No loop filtering in simple profile
+	if ( pbi->VpProfile == SIMPLE_PROFILE || (pbi->UseLoopFilter == NO_LOOP_FILTER) )
+	{
+	    INT32  mVx, mVy;
+		INT32  mx = pbi->mbi.Mv[bp].x;
+		INT32  my = pbi->mbi.Mv[bp].y;
+
+        // Mask off fractional pel bits.
+	    ModX = (mx & MvModMask);
+	    ModY = (my & MvModMask); 
+
+		// Calculate full pixel motion vector position 
+        mx += (MvModMask&(mx>>31));
+        my += (MvModMask&(my>>31));
+        
+		mVx = (mx >> MvShift);
+		mVy = (my >> MvShift);
+
+		// Set up a pointer into the recon buffer
+		TempBuffer = SrcPtr + pbi->mbi.blockDxInfo[bp].thisRecon + (pbi->mbi.blockDxInfo[bp].FrameReconStride * mVy + mVx);
+		Stride = pbi->mbi.blockDxInfo[bp].CurrentReconStride;
+		TempPtr1 = TempPtr2 = 0;
+	}
+	else
+	{
+		// Loop filter the block
+		VP6_PredictFiltered( pbi, SrcPtr + pbi->mbi.blockDxInfo[bp].thisRecon, pbi->mbi.Mv[bp].x, pbi->mbi.Mv[bp].y, bp );
+		TempBuffer = pbi->LoopFilteredBlock;
+		Stride = 16;
+		TempPtr1 = 2*16+2;		// Offset into the 12x12 loop filtered buffer
+		TempPtr2 = TempPtr1;
+        
+        // Mask off fractional pel bits.
+	    ModX = (pbi->mbi.Mv[bp].x & MvModMask);
+	    ModY = (pbi->mbi.Mv[bp].y & MvModMask); 
+	}
+
+    // determine if we have a fractional pixel move in the x direction
+	if ( ModX )
+	{
+		TempPtr2 += ( pbi->mbi.Mv[bp].x > 0 )*2 -1;        
+	}
+
+	// handle fractional pixel motion in Y
+	if ( ModY )
+	{
+        TempPtr2 += (( pbi->mbi.Mv[bp].y > 0 ) * 2 - 1)*Stride;
+	}
+ 
+	// put the results back into the real reconstruction buffer
+    if ( TempPtr1 != TempPtr2 ) 
+	{
+		// The FilterBlock selects a filter based upon a ModX and ModY value that are at 1/8 point 
+		// precision. Because U and V are subsampled the vector is already at the right precision 
+		// for U and V but for Y we have to multiply by 2.
+		if ( bp < 4 )
+		{
+			// Filterblock expects input at 1/8 pel resolution (hence << 1 for Y)
+			ModX = ModX << 1;
+			ModY = ModY << 1; 
+
+			// Select the filtering mode
+			if ( pbi->VpProfile == SIMPLE_PROFILE )
+			{
+				// Simple profile always uses bilinear filtering for speed
+				FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, FALSE, 14 );
+			}
+			else if ( pbi->PredictionFilterMode == AUTO_SELECT_PM )
+			{
+				//  Work out the Mv size limit for selecting bicubic
+				if ( pbi->PredictionFilterMvSizeThresh > 0 )
+					BicMvSizeLimit = (1 << (pbi->PredictionFilterMvSizeThresh - 1)) << 2;			 // Convert to a value in 1/4 pel units
+				else
+					BicMvSizeLimit = ((MAX_MV_EXTENT >> 1) + 1) << 2;								 // Unrestricted
+
+				// Only use bicubic on shortish vectors
+				if ( ( pbi->PredictionFilterMvSizeThresh != 0 ) &&
+					 ( ( (UINT32)abs(pbi->mbi.Mv[bp].x) > BicMvSizeLimit ) || ( (UINT32)abs(pbi->mbi.Mv[bp].y) > BicMvSizeLimit ) ) )
+				{
+					FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, FALSE, pbi->PredictionFilterAlpha);
+				}
+			    // Should we use a variance test for bicubic as well
+				else if ( pbi->PredictionFilterVarThresh != 0 )
+				{
+					IVar = Var16Point( &TempBuffer[TempPtr1], Stride );
+					FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, (IVar >= pbi->PredictionFilterVarThresh), pbi->PredictionFilterAlpha );
+				}
+				else
+				{
+					FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, TRUE, pbi->PredictionFilterAlpha );
+				}
+			}
+			else  
+				FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, (pbi->PredictionFilterMode == BICUBIC_ONLY_PM), pbi->PredictionFilterAlpha );
+		}
+		else
+		{
+			FilterBlock( &TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, Stride, ModX, ModY, FALSE, pbi->PredictionFilterAlpha );
+		}
+	}
+	// No fractional pels
+    else
+        UnpackBlock(&TempBuffer[TempPtr1], OutputPtr, Stride );
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ReconstructBlock
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi  : Pointer to decoder instance.
+ *                      BLOCK_POSITION bp : Position of block within MB.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Reconstructs the coded block depending on coding mode.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void VP6_ReconstructBlock ( PB_INSTANCE *pbi, BLOCK_POSITION bp )
+{
+    UINT32 thisRecon = pbi->mbi.blockDxInfo[bp].thisRecon;
+
+	// Action depends on decode mode.
+	if ( pbi->mbi.Mode == CODE_INTER_NO_MV )       // Inter with no motion vector
+	{
+		ReconInter( pbi->TmpDataBuffer, 
+                    (UINT8 *)&pbi->ThisFrameRecon[thisRecon], 
+			        (UINT8 *)&pbi->LastFrameRecon[thisRecon], 
+			        (INT16 *)pbi->ReconDataBuffer[bp], 
+                    pbi->mbi.blockDxInfo[bp].CurrentReconStride);
+		
+	}
+	else if ( VP6_ModeUsesMC[pbi->mbi.Mode] )          // The mode uses a motion vector.
+	{
+		// For the compressor we did this already ( possible optimization).
+		VP6_PredictFilteredBlock( pbi, pbi->TmpDataBuffer,bp);
+
+		ReconBlock( pbi->TmpDataBuffer,
+			        (INT16 *)pbi->ReconDataBuffer[bp],
+			        (UINT8 *)&pbi->ThisFrameRecon[thisRecon],
+			        pbi->mbi.blockDxInfo[bp].CurrentReconStride );
+	}
+	else if ( pbi->mbi.Mode == CODE_USING_GOLDEN )     // Golden frame with motion vector
+	{
+		// Reconstruct the pixel data using the golden frame reconstruction and change data
+		ReconInter( pbi->TmpDataBuffer, 
+                    (UINT8 *)&pbi->ThisFrameRecon[thisRecon], 
+			        (UINT8 *)&pbi->GoldenFrame[thisRecon], 
+			        (INT16 *)pbi->ReconDataBuffer[bp], 
+                    pbi->mbi.blockDxInfo[bp].CurrentReconStride );
+	}
+	else                                            // Simple Intra coding
+	{
+		// Get the pixel index for the first pixel in the fragment.
+		ReconIntra( pbi->TmpDataBuffer, 
+                    (UINT8 *)&pbi->ThisFrameRecon[thisRecon], 
+                    (UINT16 *)pbi->ReconDataBuffer[bp], 
+                    pbi->mbi.blockDxInfo[bp].CurrentReconStride );
+	}
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vfwpbdll_if.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vfwpbdll_if.c
new file mode 100644
index 00000000..b0aa83f3
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vfwpbdll_if.c
@@ -0,0 +1,605 @@
+/****************************************************************************
+*        
+*   Module Title :     vfwpbdll_if.c
+*
+*   Description  :     Video codec playback dll interface
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <math.h>
+#include "pbdll.h"
+#include "vp60dversion.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#ifndef _MSC_VER
+#define __try  
+#define CommentString "\nON2.COM VERSION VP60D " VP60DVERSION "\n"
+#pragma comment(exestr,CommentString)
+
+#endif
+
+
+/****************************************************************************
+* Imports
+****************************************************************************/ 
+extern unsigned int CPUFrequency;
+extern void VP6_DecodeFrameMbs(PB_INSTANCE *pbi);
+extern void VP6_InitialiseConfiguration(PB_INSTANCE *pbi);
+extern void InitHeaderBuffer ( FRAME_HEADER *Header, unsigned char *Buffer );
+extern void SetAddNoiseMode(POSTPROC_INST , int);
+
+#include <stdio.h>
+/****************************************************************************
+*  Module Statics
+****************************************************************************/        
+static const char vp31dVersion[] = VP60DVERSION;
+
+/****************************************************************************
+* Exports
+****************************************************************************/
+#ifdef PBSTATS1
+static INT32  TotQ          = 0;    // TEMP diagnostic variables
+static INT32  PBFrameNumber = 0;
+#endif
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP60D_GetVersionNumber
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     const char *: Pointer to decoder version string.
+ *
+ *  FUNCTION      :     Returns a pointer to the decoder version string.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+const char * CCONV VP60D_GetVersionNumber ( void ) 
+{
+    return vp31dVersion;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_StartDecoder
+ *
+ *  INPUTS        :     PB_INSTANCE **pbi  : Pointer to pointer to decoder instance.
+ *                      UINT32 ImageWidth  : Width of the image.
+ *                      UINT32 ImageHeight : Height of the image.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     TRUE if succeeds, FALSE otherwise.
+ *
+ *  FUNCTION      :     Creates and initializes the decoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+BOOL CCONV VP6_StartDecoder( PB_INSTANCE **pbi, UINT32 ImageWidth, UINT32 ImageHeight )
+{ 
+    __try
+    {
+        // set up our structure holding all formerly global information about a playback instance
+        *pbi = VP6_CreatePBInstance();
+
+        // Set Flag to indicate that a key frame is required as the first input
+        (*pbi)->ScaleWidth = ImageWidth;
+        (*pbi)->ScaleHeight = ImageHeight;
+        (*pbi)->OutputWidth = ImageWidth;
+        (*pbi)->OutputHeight = ImageHeight;
+		
+
+        // Validate the combination of height and width.
+        (*pbi)->Configuration.VideoFrameWidth = ImageWidth;
+        (*pbi)->Configuration.VideoFrameHeight = ImageHeight;
+
+        (*pbi)->postproc = CreatePostProcInstance(&(*pbi)->Configuration);
+        (*pbi)->quantizer = VP6_CreateQuantizer();
+        (*pbi)->ProcessorFrequency = CPUFrequency;
+
+        
+        // Fills in fragment counts as well
+        if ( !VP6_InitFrameDetails(*pbi) )
+        {
+            VP6_DeletePBInstance(pbi);
+            return FALSE;
+        }
+
+        // Set last_dct_thresh to an illegal value to make sure the
+        // Q tables are initialised for the new video sequence. 
+        (*pbi)->quantizer->LastFrameQIndex = 0xFFFFFFFF;
+
+        // Set up various configuration parameters.
+        VP6_InitialiseConfiguration(*pbi);
+        
+        return TRUE;
+    }
+#if defined(_MSC_VER)
+    __except( TRUE )
+    {
+        VP6_ErrorTrap( *pbi, GEN_EXCEPTIONS );
+        return FALSE;
+    }
+#endif
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_GetPbParam
+ *
+ *  INPUTS        :     PB_INSTANCE **pbi       : Pointer to decoder instance.
+ *                      PB_COMMAND_TYPE Command : Command action specifier.
+ *                      
+ *  OUTPUTS       :     UINT32 *Parameter       : Command dependent value requested.
+ *
+ *  RETURNS       :     void
+ *  
+ *  FUNCTION      :     Generalised command interface to decoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void CCONV VP6_GetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, UINT32 *Parameter )
+{
+    switch ( Command )
+    {
+#if defined(POSTPROCESS)
+    case PBC_SET_POSTPROC:
+        *Parameter = pbi->PostProcessingLevel;
+#endif
+
+    default:
+        break;
+    }
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_PickPostProcessingLevel
+ *
+ *  INPUTS        :     PB_INSTANCE **pbi : Pointer to decoder instance.
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     int: Selected post-processing level.
+ *  
+ *  FUNCTION      :     Select the post-processing level to be used based
+ *                      on how fast we're decoding.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+#define CRITICALWATERMARK   (int) (31000 * pbi->CPUFree / 100)
+#define DOWNWATERMARK       (int) (30000 * pbi->CPUFree / 100)
+#define UPWATERMARK         (int) (28000 * pbi->CPUFree / 100)
+
+int VP6_PickPostProcessingLevel ( PB_INSTANCE *pbi )
+{
+	int minimumTime = pbi->thisDecodeTime + pbi->avgBlitTime + pbi->avgPPTime[8];
+	int thisTime = minimumTime + pbi->avgPPTime[pbi->PostProcessingLevel];
+	int avgTime = pbi->avgDecodeTime + pbi->avgBlitTime;
+	
+	// estimate the times of all of our unknown postprocessors
+	if(pbi->avgPPTime[6]==0)
+		pbi->avgPPTime[6] = avgTime>>1;
+	
+	if(pbi->avgPPTime[5]==0)
+		pbi->avgPPTime[5] = avgTime>>1;
+
+	if(pbi->avgPPTime[4]==0)
+		pbi->avgPPTime[4] = (avgTime ) >> 2;
+
+	if(pbi->avgPPTime[8]==0)
+		pbi->avgPPTime[8] = avgTime>>3;
+
+	if(pbi->CPUFree == 0 )
+		return pbi->PostProcessingLevel;
+
+	// automatically select a postprocessing level based on the amount 
+	// of time taken to decode blit and postprocess etc
+	
+	// more than 1/30 of a second no postprocessing at all (its better to show an 
+	// ugly frame than none at all). We use 1/30th of a second because nothing 
+	// tells us the actual framerate
+	if ( thisTime > (int)(CRITICALWATERMARK) )
+	{
+		// this frame's taking too long try to make up time on the subsequent frames
+		pbi->avgDecodeTime = pbi->thisDecodeTime; 
+
+		// pick a post processor we can decode in less than 2/3 the time
+		if(pbi->avgPPTime[6] + minimumTime < CRITICALWATERMARK )
+			return 6;
+		
+		if(pbi->avgPPTime[5] + minimumTime < CRITICALWATERMARK )
+			return 5;
+		
+		if(pbi->avgPPTime[4] + minimumTime < CRITICALWATERMARK )
+			return 4;
+		
+		if(pbi->avgPPTime[8] + minimumTime < CRITICALWATERMARK )
+			return 8;
+
+		return 0;
+	}
+
+	if(thisTime < DOWNWATERMARK && thisTime > UPWATERMARK)
+		return pbi->PostProcessingLevel;
+
+	// pick a post processor we can decode in less than 2/3 the time
+	if(pbi->avgPPTime[6] + avgTime < UPWATERMARK )
+		return 6;
+
+	if(pbi->avgPPTime[5] + avgTime < UPWATERMARK )
+		return 5;
+
+	if(pbi->avgPPTime[4] + avgTime < UPWATERMARK )
+		return 4;
+
+	if(pbi->avgPPTime[8] + avgTime < UPWATERMARK )
+		return 8;
+
+	return 0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_GetYUVConfig
+ *
+ *  INPUTS        :     PB_INSTANCE **pbi            : Pointer to decoder instance.
+ *                      YUV_BUFFER_CONFIG *YuvConfig : Pointer to configuration
+ *                                                     data-structure.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *  
+ *  FUNCTION      :     Gets details of the reconstruction buffer
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void CCONV VP6_GetYUVConfig ( PB_INSTANCE *pbi, YUV_BUFFER_CONFIG *YuvConfig )
+{
+    __try 
+    {
+#ifdef _MSC_VER
+		unsigned int duration;
+		unsigned int starttsc,endtsc;
+		VP6_readTSC(&starttsc);
+		pbi->PostProcessingLevel = VP6_PickPostProcessingLevel(pbi);
+#endif
+        if( pbi->PostProcessingLevel || (pbi->Configuration.Interlaced && pbi->DeInterlaceMode) )
+        {
+#ifdef _MSC_VER
+            extern void vp6_showinfo2(PB_INSTANCE *pbi);
+            extern void vp6_showinfo(PB_INSTANCE *pbi);
+		
+            if ( pbi->PostProcessingLevel > 200 )
+            {
+                PostProcess (
+                    pbi->postproc,                  
+                    pbi->Vp3VersionNo,          
+                    pbi->FrameType,             
+                    pbi->PostProcessingLevel-200,   
+                    pbi->AvgFrameQIndex,            
+                    pbi->LastFrameRecon,        
+                    pbi->PostProcessBuffer,     
+                    (unsigned char *) pbi->FragInfo,        
+                    sizeof(FRAG_INFO),
+                    0x0001 );
+				VP6_readTSC(&endtsc);
+                vp6_showinfo(pbi);
+            }
+            else if ( pbi->PostProcessingLevel > 100 )
+            {
+				PostProcess (
+                    pbi->postproc,                  
+                    pbi->Vp3VersionNo,          
+                    pbi->FrameType,             
+                    pbi->PostProcessingLevel-100,   
+                    pbi->AvgFrameQIndex,            
+                    pbi->LastFrameRecon,        
+                    pbi->PostProcessBuffer,     
+                    (unsigned char *) pbi->FragInfo,                
+                    sizeof(FRAG_INFO),
+                    0x0001 );
+				VP6_readTSC(&endtsc);
+				vp6_showinfo2(pbi);
+            }
+            else
+#endif
+			{
+//				pbi->AvgFrameQIndex = pbi->quantizer->FrameQIndex;
+				
+                PostProcess (
+                    pbi->postproc,                  
+                    pbi->Vp3VersionNo,          
+                    pbi->FrameType,             
+                    pbi->PostProcessingLevel,   
+                    pbi->AvgFrameQIndex,            
+                    pbi->LastFrameRecon,        
+                    pbi->PostProcessBuffer,     
+                    (unsigned char *) pbi->FragInfo,                
+                    sizeof(FRAG_INFO),
+                    0x0001 );
+#ifdef _MSC_VER
+                VP6_readTSC(&endtsc);
+#endif
+			}
+        }
+
+        if(pbi->BlackClamp)
+            ClampLevels( pbi->postproc,pbi->BlackClamp,pbi->WhiteClamp,pbi->PostProcessBuffer,	pbi->PostProcessBuffer);
+
+        if( pbi->Configuration.VideoFrameWidth < pbi->OutputWidth &&
+            pbi->Configuration.VideoFrameHeight == pbi->OutputHeight )
+        {
+            YuvConfig->YWidth = pbi->OutputWidth+32; 
+            YuvConfig->YHeight = pbi->OutputHeight+32;
+            YuvConfig->YStride = YuvConfig->YWidth;
+            
+            YuvConfig->UVWidth = YuvConfig->YWidth / 2;
+            YuvConfig->UVHeight = YuvConfig->YHeight / 2;
+            YuvConfig->UVStride = YuvConfig->YStride / 2;
+			
+            YuvConfig->YBuffer = (char *)pbi->ScaleBuffer;
+            YuvConfig->UBuffer = (char *)pbi->ScaleBuffer+YuvConfig->YWidth*YuvConfig->YHeight;
+            YuvConfig->VBuffer = (char *)pbi->ScaleBuffer+YuvConfig->YWidth*YuvConfig->YHeight+YuvConfig->UVWidth*YuvConfig->UVHeight;
+
+			if(pbi->PostProcessingLevel)
+	            ScaleOrCenter( pbi->postproc, pbi->PostProcessBuffer, YuvConfig  );
+			else
+	            ScaleOrCenter( pbi->postproc, pbi->LastFrameRecon, YuvConfig  );
+
+			YuvConfig->YBuffer += 
+				(YuvConfig->YHeight - pbi->OutputHeight ) / 2 * YuvConfig->YStride 
+				+(YuvConfig->YWidth - pbi->OutputWidth) / 2;
+            YuvConfig->YWidth = pbi->OutputWidth; 
+            YuvConfig->YHeight = pbi->OutputHeight;
+            
+			YuvConfig->UBuffer += 
+				(YuvConfig->UVHeight - pbi->OutputHeight/2 ) / 2 * YuvConfig->UVStride 
+				+(YuvConfig->UVWidth - pbi->OutputWidth/2) / 2;
+
+			YuvConfig->VBuffer += 
+				(YuvConfig->UVHeight - pbi->OutputHeight/2 ) / 2 * YuvConfig->UVStride 
+				+(YuvConfig->UVWidth - pbi->OutputWidth/2) / 2;
+
+            YuvConfig->UVWidth = pbi->OutputWidth / 2; 
+            YuvConfig->UVHeight = pbi->OutputHeight / 2;
+        }
+        else
+        {
+            YuvConfig->YWidth = pbi->Configuration.VideoFrameWidth;
+            YuvConfig->YHeight = pbi->Configuration.VideoFrameHeight;
+            YuvConfig->YStride = pbi->Configuration.YStride;
+            
+            YuvConfig->UVWidth = pbi->Configuration.VideoFrameWidth / 2;
+            YuvConfig->UVHeight = pbi->Configuration.VideoFrameHeight / 2;
+            YuvConfig->UVStride = pbi->Configuration.UVStride;
+
+            if( pbi->PostProcessingLevel ||(pbi->Configuration.Interlaced && pbi->DeInterlaceMode))
+            { 
+                YuvConfig->YBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconYDataOffset+(pbi->Configuration.YStride * UMV_BORDER) + UMV_BORDER];
+                YuvConfig->UBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconUDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+                YuvConfig->VBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconVDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+            }
+            else
+            {
+                YuvConfig->YBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconYDataOffset+ (pbi->Configuration.YStride * UMV_BORDER) + UMV_BORDER];
+                YuvConfig->UBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconUDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+                YuvConfig->VBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconVDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+            }
+        }
+
+#if defined(_MSC_VER)   
+		duration = ( endtsc - starttsc )/ pbi->ProcessorFrequency ;
+
+		if( pbi->avgPPTime[pbi->PostProcessingLevel%10] == 0)
+			pbi->avgPPTime[pbi->PostProcessingLevel%10] = duration;
+		else
+			pbi->avgPPTime[pbi->PostProcessingLevel%10] = ( 7 * pbi->avgPPTime[pbi->PostProcessingLevel%10] + duration ) >> 3;
+#endif
+    }
+#if defined(_MSC_VER)   
+    __except ( TRUE )
+    {
+        VP6_ErrorTrap( pbi, GEN_EXCEPTIONS );
+    }    
+#endif
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_DecodeFrameToYUV
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi       : Pointer to decoder instance.
+ *                      char *VideoBufferPtr   : Pointer to compressed data buffer.
+ *                      unsigned int ByteCount : Size in bytes of compressed data buffer.
+ *                      UINT32 ImageWidth      : Image width.
+ *                      UINT32 ImageHeight     : Image height.
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     int: 0 for success, negative value for error.
+ *
+ *  FUNCTION      :     Decodes a frame into the internal YUV reconstruction buffer.
+ *                      Details of this buffer can be obtained by calling GetYUVConfig().
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+int CurrentFrame = 0;
+int CCONV VP6_DecodeFrameToYUV(PB_INSTANCE *pbi, char *VideoBufferPtr, unsigned int ByteCount)
+{
+    unsigned char *tmp;
+
+    __try
+    {
+#ifdef _MSC_VER
+		unsigned int duration;
+		unsigned int starttsc,endtsc;
+        VP6_readTSC(&starttsc);
+#endif
+		pbi->CurrentFrameSize = ByteCount;
+
+		// Initialise the bit reader used to read the fixed raw part of the header
+        InitHeaderBuffer ( &pbi->Header, (unsigned char*)VideoBufferPtr );
+
+		// decode the frame header
+        if ( !VP6_LoadFrame(pbi) )
+            return -1;
+
+		//  Start the second boolean decoder
+		if ( pbi->MultiStream || (pbi->VpProfile == SIMPLE_PROFILE) )
+		{
+		    pbi->mbi.br = &pbi->br2;
+
+			if ( pbi->UseHuffman )
+			{
+				// Initialise BITREADER for second bitstream partition
+				pbi->br3.bitsinremainder = 0;
+				pbi->br3.remainder = 0;
+				pbi->br3.position = ((unsigned char*)VideoBufferPtr)+pbi->Buff2Offset;
+			}
+			else
+				VP6_StartDecode(&pbi->br2,((unsigned char*)VideoBufferPtr)+pbi->Buff2Offset);
+		}
+        else
+        {
+        	pbi->mbi.br = &pbi->br;
+        }
+
+        // decode and reconstruct frame
+        VP6_DecodeFrameMbs(pbi);
+
+		// switch pointers so lastframe recon is this frame
+        tmp = pbi->LastFrameRecon;
+        pbi->LastFrameRecon = pbi->ThisFrameRecon;
+        pbi->ThisFrameRecon = tmp;
+
+        // update the border 
+        UpdateUMVBorder(pbi->postproc, pbi->LastFrameRecon);
+
+        // Update the golden frame buffer
+		if( (pbi->FrameType == BASE_FRAME) || pbi->RefreshGoldenFrame )
+            memcpy(pbi->GoldenFrame, pbi->LastFrameRecon, pbi->ReconYPlaneSize + 2* pbi->ReconUVPlaneSize); 
+
+#if defined(_MSC_VER)
+	    ClearSysState();
+#endif
+
+#ifdef PBSTATS1
+        // Update PB stats
+        TotQ += pbi->quantizer->ThisFrameQualityValue;
+        PBFrameNumber += 1;
+#endif
+
+	    if(pbi->FrameType == BASE_FRAME )
+			pbi->AvgFrameQIndex = pbi->quantizer->FrameQIndex;
+		else
+			pbi->AvgFrameQIndex = (2 + 3 * pbi->AvgFrameQIndex + pbi->quantizer->FrameQIndex) / 4 ;
+
+#ifdef _MSC_VER
+        VP6_readTSC(&endtsc);
+		duration = (endtsc-starttsc)/ (pbi->ProcessorFrequency);
+		pbi->thisDecodeTime = duration;
+
+		if( pbi->avgDecodeTime == 0)
+			pbi->avgDecodeTime = duration;
+		else
+			pbi->avgDecodeTime = (7*pbi->avgDecodeTime + duration)>>3;
+#endif
+
+
+#if 0
+        if (pbi->br.pos>pbi->CurrentFrameSize)
+        {
+            FILE *f = fopen("badframes.stt","a");
+            fprintf(f,"%8d %8d %8d \n", CurrentFrame,pbi->br.pos,pbi->CurrentFrameSize);
+            fclose(f);
+        }
+#endif
+
+        CurrentFrame++;
+    }
+#if defined(_MSC_VER) 
+    __except ( TRUE )
+    {
+        VP6_ErrorTrap( pbi, GEN_EXCEPTIONS );
+        return -2;
+    }
+#endif    
+    return 0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_StopDecoder
+ *
+ *  INPUTS        :     PB_INSTANCE **pbi : Pointer to pointer to decoder instance.
+ *
+ *  OUTPUTS       :     PB_INSTANCE **pbi : Pointer to pointer to decoder instance,
+ *                                          set to NULL on return.
+ *
+ *  RETURNS       :     int: TRUE on success, FALSE otherwise.
+ *
+ *  FUNCTION      :     Detroys the decoder instance.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+int CCONV VP6_StopDecoder ( PB_INSTANCE **pbi )
+{
+    __try
+    {
+        if ( *pbi )
+        {
+            // Set flag to say that the decoder is no longer initialised
+            VP6_DeleteQuantizer(&(*pbi)->quantizer);
+            DeletePostProcInstance(&(*pbi)->postproc);
+            VP6_DeleteFragmentInfo(*pbi);
+            VP6_DeleteFrameInfo(*pbi);
+            VP6_DeletePBInstance(pbi);
+            return TRUE;
+        }
+    }
+
+#if defined(_MSC_VER)        
+    __except ( TRUE )
+    {
+        VP6_ErrorTrap( *pbi, GEN_EXCEPTIONS );
+        return FALSE;
+    }
+#endif    
+    return TRUE;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_ErrorTrap
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi : Pointer to decoder instance.
+ *                      int ErrorCode    : Error code to report.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Called when a fatal error is detected.
+ *
+ *  SPECIAL NOTES :     Currently does nothing. 
+ *
+ ****************************************************************************/
+void VP6_ErrorTrap ( PB_INSTANCE *pbi, int ErrorCode )
+{
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vp60dxv.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vp60dxv.c
new file mode 100644
index 00000000..1e460f3e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Generic/vp60dxv.c
@@ -0,0 +1,454 @@
+/****************************************************************************
+*
+*   Module Title :     vp5dxv.c
+*
+*   Description  :     VP50 interface to DXV.
+*
+*    AUTHOR      :     SJL
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.03 SJL 17/10/02  Up the version to 1.0.0.3, added new dxv interface
+*   1.02 YWX 30/09/02  Up the version to 1.0.0.2, added support of scaling
+*   1.01 YWX 19/09/02  Fixed bug in blit and up the version to 1.0.0.1
+*	1.00 SJL 17/06/02  Base
+*
+*****************************************************************************
+*/
+//#include <stdlib.h> 
+
+#include "duck_mem.h" /* interface to memory manager */
+#include "dxl_plugin.h" /* interface to dxv */
+
+#include "pbdll.h"
+
+
+const char* VP6LIBVERSION="ON2 VP6 Decode Library for MAC Version 1.0.0.3";
+
+typedef unsigned int FourCC;
+ 
+#define VP60_FOURCC DXL_MKFOURCC( 'V', 'P', '6', '0')
+#define VP61_FOURCC DXL_MKFOURCC( 'V', 'P', '6', '1')
+
+
+static dxvBitDepth bitDepths[] = 
+{
+	DXYV12,DXRGBNULL
+};
+
+
+void vp60_SetParameter(DXL_XIMAGE_HANDLE src,int Command, unsigned int Parameter );
+
+extern void VP6_VPInitLibrary(void);
+extern void VP6_VPDeInitLibrary(void);
+
+#include "duck_dxl.h"
+
+#if 0
+typedef struct tFrameInfo
+{
+    int KeyFrame;
+    int Version;
+    int Quality;
+    int vp30Flag;
+} FrameInfo;
+
+void 
+vp60_GetInfo(unsigned char * source, FrameInfo * frameInfo)
+{
+
+    // Is the frame and inter frame or a key frame 
+    frameInfo->KeyFrame = !(source[0] > 0x7f);
+    frameInfo->Quality = source[0] >> 2;
+    if(frameInfo->KeyFrame) 
+        frameInfo->Version = ((source[2]>>3) & 0x1f );
+    else
+        frameInfo->Version = 0;
+
+    frameInfo->vp30Flag = (int)source[1];
+
+}
+#endif
+
+// YUV buffer configuration structure
+typedef struct
+{
+    int     YWidth;
+    int     YHeight;
+    int     YStride;
+
+    int     UVWidth;
+    int     UVHeight;
+    int     UVStride;
+
+    char *  YBuffer;
+    char *  UBuffer;
+    char *  VBuffer;
+
+	char *  uvStart;
+    int uvDstArea;
+    int uvUsedArea;
+
+} DXV_YUV_BUFFER_CONFIG;
+
+/* define an algorithm base container */
+typedef struct tXImageCODEC
+{
+	FourCC myFourCC;
+	DXV_YUV_BUFFER_CONFIG FrameBuffer;
+	PB_INSTANCE *myPBI;
+	int owned;
+} vp60_XIMAGE, *vp60_XIMAGE_HANDLE;
+
+
+typedef void ((*VP6BLIT_FUNC)(unsigned char *, int, YUV_BUFFER_CONFIG *));
+//typedef void ((*vp6_VSCREEN_FUNC)(void));
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp50_decompress
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     
+ *
+ *  SPECIAL NOTES :     
+ *
+ ****************************************************************************/
+#include "huffman.h"
+
+static int 
+vp60_decompress(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE vScreen)
+{
+
+	int retVal;
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+	unsigned char *cAddr;
+	int cSize;		
+	int w, h;
+
+	
+	DXL_GetXImageXYWH(src, NULL, NULL, &w, &h);
+			
+
+
+	// if we have a compressed frame decompress it ( otherwise we'll just redo
+	// the scaling and postprocessing from the last frame )
+    cAddr = DXL_GetXImageCDataAddr(src);
+	cSize = DXL_GetXImageCSize(src);
+	
+    if(cAddr)
+    {
+		if((cSize != 0) && (cAddr[0]>=1 || cAddr[1]>=1 || cAddr[2] >=1))
+		{
+			// decode the frame 
+			retVal = VP6_DecodeFrameToYUV(thisAlgorithmBase->myPBI, (char *)cAddr, cSize, w, h);
+			if(retVal != 0 )
+			{
+	            if(retVal == -1)
+		            return DXL_VERSION_CONFLICT;
+			    else
+				    return DXL_BAD_DATA;
+			}
+		}
+    }
+
+
+	if (vScreen) /* if there is a vScreen, blit to it */
+	{
+        unsigned char * ptrScrn;
+        short thisPitch, vsHeight;
+        dxvBlitQuality bq; 
+        dxvBitDepth bd;
+        VP6BLIT_FUNC blitter;
+        
+        DXL_GetVScreenAttributes(vScreen, (void **)&ptrScrn, &bq, &bd, &thisPitch, &vsHeight);
+
+		if(ptrScrn)
+        { 
+    		int x, y, pSize;
+            int viewX, viewY;
+
+			DXL_GetVScreenView(vScreen, &viewX, &viewY, NULL, NULL);
+
+			/* get a frame pointer to the scaled and postprocessed reconstructed buffer */
+		    VP6_GetYUVConfig(thisAlgorithmBase->myPBI, (YUV_BUFFER_CONFIG *) &(thisAlgorithmBase->FrameBuffer));
+			
+          	pSize = VPX_GetSizeOfPixel(bd);
+
+			DXL_GetXImageXYWH(src, &x, &y, NULL, NULL);
+
+		    /* remember to offset if requested */
+		    y += viewY;
+		    x += viewX; 
+
+	        ptrScrn += (x * pSize) + (y * thisPitch);
+
+            /* setup ptrs so we can work backwards through Paul's frame buffers */
+            #if 1
+            thisAlgorithmBase->FrameBuffer.YBuffer = thisAlgorithmBase->FrameBuffer.YBuffer + 
+                    ((thisAlgorithmBase->FrameBuffer.YHeight - 1) * 
+                     (thisAlgorithmBase->FrameBuffer.YStride));
+
+			thisAlgorithmBase->FrameBuffer.UBuffer = thisAlgorithmBase->FrameBuffer.UBuffer +
+                    ((thisAlgorithmBase->FrameBuffer.UVHeight - 1) * 
+                     (thisAlgorithmBase->FrameBuffer.UVStride));
+			
+            thisAlgorithmBase->FrameBuffer.VBuffer = thisAlgorithmBase->FrameBuffer.VBuffer +
+                    ((thisAlgorithmBase->FrameBuffer.UVHeight - 1) * 
+                     (thisAlgorithmBase->FrameBuffer.UVStride));
+            #endif
+            
+            if((bd != DXYUY2) && (bd != DXYV12))
+            {
+                if(bq == DXBLIT_STRETCH)
+                {
+                    thisPitch *= 2;
+                }
+            }
+
+            if(bd == DXYV12 || bd == DXI420)
+            {
+				if(thisPitch < 0)
+				{
+					thisAlgorithmBase->FrameBuffer.uvStart = (char *) (ptrScrn + abs(thisPitch) + abs(thisPitch) * h/4 + thisPitch/2 );
+					thisAlgorithmBase->FrameBuffer.uvDstArea = abs((thisPitch * h)/4);
+					thisAlgorithmBase->FrameBuffer.uvUsedArea = 0;
+				}
+				else
+				{
+					thisAlgorithmBase->FrameBuffer.uvStart = (char *) (ptrScrn + (thisPitch * h));
+					thisAlgorithmBase->FrameBuffer.uvDstArea = ((thisPitch * h)/4);
+					thisAlgorithmBase->FrameBuffer.uvUsedArea = ((thisPitch * thisAlgorithmBase->FrameBuffer.UVHeight)/2);
+				}
+
+            }
+
+			blitter = (VP6BLIT_FUNC)VPX_GetBlitter(bq, bd);
+			
+			if ((void *)blitter != (void *)-1) 
+			{
+            	blitter(ptrScrn, thisPitch, (YUV_BUFFER_CONFIG *)(&thisAlgorithmBase->FrameBuffer));
+            }
+            else
+            {
+            	return DXL_INVALID_BLIT;
+            }
+
+
+        }
+	}
+
+	return DXL_OK;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp60_xImageDestroy
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     close down a decompressor, releasing the wilk decompressor, 
+ *                      the xImage (decompressor), and the intermediate vScreen (surface)
+ *
+ *  SPECIAL NOTES :     
+ *
+ ****************************************************************************/
+static int 
+vp60_xImageDestroy(DXL_XIMAGE_HANDLE src)
+{
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+	if(thisAlgorithmBase)
+	{
+		if ( thisAlgorithmBase->owned )
+	        VP6_StopDecoder ( &(thisAlgorithmBase->myPBI) );
+		duck_free ( thisAlgorithmBase );
+	}
+	return DXL_OK;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp50_xImageReCreate
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     
+ *
+ *  SPECIAL NOTES : 
+ *                  called during initialization and/or when xImage (decompressor)
+ *                  attributes change, note that nImage and src are actually
+ *                  synonymous and should be cleared out a bit (to say the least!)
+ *
+ *
+ *                  !!!!!!
+ *                  This function should be prepared to get data that is NOT of the 
+ *                  type native to the decoder,  It should do it's best to verify it 
+ *                  as valid data and should clean up after itself and return NULL
+ *                  if it doesn't recognize the format of the data
+ *
+ ****************************************************************************/
+static void * 
+vp60_xImageReCreate(DXL_XIMAGE_HANDLE src, unsigned char *data, int type, enum BITDEPTH bitDepth, int w, int h)
+{  
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src); 
+
+	(void) bitDepth;
+   
+
+    if ( 
+    		(type != VP60_FOURCC) && 
+    		(type != VP61_FOURCC) && 
+    		(type !=  DXL_MKFOURCC( 'V', 'P', '6', '2'))  
+    	) 
+		return NULL;
+
+	thisAlgorithmBase->myFourCC = type;
+
+    /* create new PBI */
+    if ( !VP6_StartDecoder( &(thisAlgorithmBase->myPBI), w, h ) )
+    {
+		vp60_xImageDestroy ( src );
+        thisAlgorithmBase = NULL;
+    }
+    else
+    {
+	    thisAlgorithmBase->owned = 1;
+    }
+
+
+    return (DXL_HANDLE)thisAlgorithmBase;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp50_xImageCreate
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     
+ *
+ *  SPECIAL NOTES :     in this "glue" case, just calls through to the create function. 
+ *
+ ****************************************************************************/
+static DXL_HANDLE 
+vp60_xImageCreate (DXL_XIMAGE_HANDLE src, unsigned char *data)
+{
+//	return vp60_xImageReCreate(src, data, VP60_FOURCC, (enum BITDEPTH ) 0, 320, 240);
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src); 
+
+	/* create a new xImage, specific to this type of decoder, 
+        (see "vp60_XIMAGE" struct above and dxl_main.h) */
+	thisAlgorithmBase = (vp60_XIMAGE_HANDLE)duck_calloc ( 1, sizeof(vp60_XIMAGE), DMEM_GENERAL );
+	if (thisAlgorithmBase == NULL) 
+        return NULL;
+
+	DXL_RegisterXImageRecreate(src, (RECREATE_FUNC) vp60_xImageReCreate);
+
+	DXL_RegisterXImageDestroy(src, (DESTROY_FUNC) vp60_xImageDestroy);
+
+	DXL_RegisterXImageDx(src, (DX_FUNC) vp60_decompress);
+
+	DXL_RegisterXImageSetParameter(src, (SET_PARAMETER_FUNC) vp60_SetParameter);
+
+	thisAlgorithmBase->myFourCC = VP60_FOURCC;
+
+	return (DXL_HANDLE)thisAlgorithmBase;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp50_Init
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+int 
+vp60_Init(void)
+{
+    DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, VP60_FOURCC);
+
+    DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, VP61_FOURCC);
+    
+    DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate,  DXL_MKFOURCC( 'V', 'P', '6', '2'));
+    
+
+    vp3SetBlit();
+
+	/* initialize all the global variables */
+	VP6_VPInitLibrary();
+	
+	return DXL_OK;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp60_Exit
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     main exit routine, called during DXL_ExitVideo() 
+ *                      clean up any global information if necessary
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+int 
+vp60_Exit(void)
+{
+	VP6_VPDeInitLibrary();
+
+	return DXL_OK;
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void 
+vp60_SetParameter(DXL_XIMAGE_HANDLE src, int Command, uinptr_t Parameter)
+{
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src); 
+
+	VP6_SetPbParam(thisAlgorithmBase->myPBI, (PB_COMMAND_TYPE) Command, Parameter );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Makefile b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Makefile
new file mode 100644
index 00000000..7cf2436d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Makefile
@@ -0,0 +1,69 @@
+## Target to built
+
+TARGET 		=libvp6d
+
+## TOOLS
+CC      	= ecc
+LD      	= ecc
+AR      	= ar
+OBJDUMP 	= objdump
+RM      	= rm -f
+
+## Directories
+TOPDIR  		=C:\DuckSoft
+PRIVATEINCLUDE  =${TOPDIR}\private\include
+PRIVATEINCLUDE2 =${TOPDIR}\private\include\vp60
+CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
+CDXVINCLUDE     =${TOPDIR}\private\corelibs\cdxv\include 
+VP6INCLUDE      =${TOPDIR}\private\corelibs\cdxv\vp60\vp60\include
+CXGENERIC       =${TOPDIR}\private\corelibs\cdxv\vp60\vp60\cx\generic
+OBJDIR          =${TOPDIR}\ObjectCode\bspvp6e
+CURRENTDIR 		=${TOPDIR}\private\corelibs\cdxv\vp60\vp60 
+LIBDIR			=${TOPDIR}\private\corelibs\lib\mapca 
+
+## Compile Flags
+ALLINCLUDES     =-I${VP6INCLUDE} -I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${PRIVATEINCLUDE2} 
+VP6DEFINES		=-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
+ETIDEFINES      =-DMAPCA
+ALLDEFINES      =${VP6DEFINES} ${ETIDEFINES}
+DEBUG			=-O2
+CFLAGS 			=-msvc -align 8 -etswp -mP3OPT_nonlocal_calls_through_register=true \
+				-mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
+				-magen_interroutine_padding
+ALLFLAGS 		= $(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
+
+
+## Files
+OBJS		= 	bsp\boolhuff.o				\
+				generic\decodembs.o			\
+				generic\decodemode.o		\
+				generic\decodemv.o			\
+				generic\DFrameR.o			\
+				generic\FrameIni.o			\
+				generic\Huffman.o			\
+				generic\pb_globals.o		\
+				generic\quantize.o			\
+				generic\recon.o				\
+				generic\TokenEntropy.o		\
+				bsp\bspQuantize.o			\
+				bsp\DSystemDependant.o		\
+				bsp\duck_mem.o			\
+				generic\vfwpbdll_if.o   
+
+SRCS		= $(OBJS:.o=.c)
+
+ARTARGET	= ${TARGET}.a
+
+# archive
+
+ARTARGET:${OBJS}
+	${AR} -cr ${ARTARGET} ${OBJS}
+	mv ${ARTARGET} ${LIBDIR}
+
+${OBJS} : ${SRCS}
+	$(CC) $(ALLFLAGS) -c $*.c -o $*.o
+
+clean:
+	${RM} ${OBJS} ${ARTARGET}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/OptFunctions.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/OptFunctions.c
new file mode 100644
index 00000000..0ac90ff1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/OptFunctions.c
@@ -0,0 +1,315 @@
+/****************************************************************************
+*
+*   Module Title :     OptFunctions.c
+*
+*   Description  :     MMX or otherwise processor specific 
+*                      optimised versions of functions
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+ *  1.08 JBB 13 Jun 01 VP4 Code Clean Out
+*   1.07 JBB 26/01/01  Removed unused function
+*	1.06 YWX 23/05/00  Remove the clamping in MmxReconPostProcess()
+*	1.05 YWX 15/05/00  Added MmxReconPostProcess()
+*   1.04 SJL 03/14/00  Added in Tim's versions of MmxReconInter and MmxReconInterHalfPixel2. 
+*   1.03 PGW 12/10/99  Changes to reduce uneccessary dependancies. 
+*   1.02 PGW 30/08/99  Minor changes to MmxReconInterHalfPixel2().
+*   1.01 PGW 13/07/99  Changes to keep reconstruction data to 16 bit
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/* 
+    Use Tim's optimized version.
+*/
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              // Strict type checking. 
+
+#include "codec_common.h"
+
+#include "pbdll.h"
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*  Imports.
+*****************************************************************************
+*/   
+
+extern INT32 * XX_LUT;
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Exported Functions 
+*****************************************************************************
+*/              
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/  
+
+INT16 Ones[4]               = {1,1,1,1};
+INT16 OneTwoEight[4]        = {128,128,128,128};
+UINT8 Eight128s[8]          = {128,128,128,128,128,128,128,128};
+
+#pragma warning( disable : 4799 )  // Disable no emms instruction warning!
+                                      
+/****************************************************************************
+*  Forward References
+*****************************************************************************
+*/  
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ClearSysState()
+ *
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     DoesNothing
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ClearSysStateC(void)
+{
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ClearMmx()
+ *
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Clears down the MMX state
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ClearMmx(void)
+{
+	__asm
+	{
+		emms									; Clear the MMX state.
+	}
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MMXReconIntra
+ *
+ *  INPUTS        :     INT16 *  idct
+ *                               Pointer to the output from the idct for this block
+ *
+ *                      UINT32   stride
+ *                               Line Length in pixels in recon and reference images
+ *                               
+ *
+ *                     
+ *
+ *  OUTPUTS       :     UINT8 *  dest
+ *                               The reconstruction buffer
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs an intra block - MMX version
+ *
+ *  SPECIAL NOTES :     Tim Murphy's optimized version 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void MMXReconIntra( PB_INSTANCE *pbi, UINT8 * dest, INT16 * idct, INT32 stride )
+{
+    __asm
+    {
+        // u    pipe
+        //   v  pipe
+        mov         eax,[idct]              ; Signed 16 bit inputs
+          mov         edx,[dest]            ; Signed 8 bit outputs
+        movq        mm0,[Eight128s]         ; Set mm0 to 0x8080808080808080
+          ;
+        mov         ebx,[stride]            ; Line stride in output buffer
+          lea         ecx,[eax+128]         ; Endpoint in input buffer
+loop_label:                                 ;
+        movq        mm2,[eax]               ; First four input values
+          ;
+        packsswb    mm2,[eax+8]             ; pack with next(high) four values
+          por         mm0,mm0               ; stall
+        pxor        mm2,mm0                 ; Convert result to unsigned (same as add 128)
+          lea         eax,[eax + 16]        ; Step source buffer
+        cmp         eax,ecx                 ; are we done
+          ;
+        movq        [edx],mm2               ; store results
+          ;
+        lea         edx,[edx+ebx]           ; Step output buffer
+          jc          loop_label            ; Loop back if we are not done
+    }
+    // 6c/8 elts = 9c/8 = 1.125 c/pix
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MmxReconInter
+ *
+ *  INPUTS        :     UINT8 *  RefPtr
+ *                               The last frame reference
+ *
+ *                      INT16 *  ChangePtr
+ *                               Pointer to the change data
+ *
+ *                      UINT32   LineStep
+ *                               Line Length in pixels in recon and ref images
+ *
+ *  OUTPUTS       :     UINT8 *  ReconPtr
+ *                               The reconstruction
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs data from last data and change
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void MmxReconInter( PB_INSTANCE *pbi, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+    (void) pbi;
+
+ _asm {
+	push	edi
+;;	 mov	ebx, [ref]
+;;	mov		ecx, [diff]
+;;	 mov	eax, [dest]
+;;	mov		edx, [stride]
+	 mov	ebx, [RefPtr]
+	mov		ecx, [ChangePtr]
+	 mov	eax, [ReconPtr]
+	mov		edx, [LineStep]
+	 pxor	mm0, mm0
+	lea		edi, [ecx + 128]
+	 ;
+  L:
+	movq	mm2, [ebx]			; (+3 misaligned) 8 reference pixels
+	 ;
+	movq	mm4, [ecx]			; first 4 changes
+	 movq	mm3, mm2
+	movq	mm5, [ecx + 8]		; last 4 changes
+	 punpcklbw mm2, mm0			; turn first 4 refs into positive 16-bit #s
+	paddsw	mm2, mm4			; add in first 4 changes
+	 punpckhbw mm3, mm0			; turn last 4 refs into positive 16-bit #s
+	paddsw	mm3, mm5			; add in last 4 changes
+	 add	ebx, edx			; next row of reference pixels
+	packuswb mm2, mm3			; pack result to unsigned 8-bit values
+	 lea	ecx, [ecx + 16]		; next row of changes
+	cmp		ecx, edi			; are we done?
+	 ;
+	movq	[eax], mm2			; store result
+	 ;
+	lea		eax, [eax+edx]		; next row of output
+	 jc		L					; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+	pop		edi
+ }
+}
+
+
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CopyBlockUsingMMX
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Copies a block from source to destination
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride)
+{
+	unsigned char *s = src;
+	unsigned char *d = dest;
+	unsigned int stride = srcstride;
+	// recon copy 
+	_asm
+	{
+			mov		ecx, [stride]
+			mov		eax, [s]
+			mov		ebx, [d]
+			lea		edx, [ecx + ecx * 2]
+
+			movq	mm0, [eax]
+			movq	mm1, [eax + ecx]
+			movq	mm2, [eax + ecx*2]
+			movq	mm3, [eax + edx]
+
+			lea		eax, [eax + ecx*4]
+
+			movq	[ebx], mm0
+			movq	[ebx + ecx], mm1
+			movq	[ebx + ecx*2], mm2
+			movq	[ebx + edx], mm3
+
+			lea		ebx, [ebx + ecx * 4]
+
+			movq	mm0, [eax]
+			movq	mm1, [eax + ecx]
+			movq	mm2, [eax + ecx*2]
+			movq	mm3, [eax + edx]
+
+			movq	[ebx], mm0
+			movq	[ebx + ecx], mm1
+			movq	[ebx + ecx*2], mm2
+			movq	[ebx + edx], mm3
+	}
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/WmtOptFunctions.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/WmtOptFunctions.c
new file mode 100644
index 00000000..5ad15136
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/WmtOptFunctions.c
@@ -0,0 +1,208 @@
+ /****************************************************************************
+ *
+ *   Module Title :     WmtOptFunctions.c
+ *
+ *   Description  :     willamette processor specific 
+ *                      optimised versions of functions
+ *
+ *   AUTHOR      :		Yaowu Xu
+ *
+ *	 Special Note:		
+ *
+ *****************************************************************************
+ *   Revision History
+ *
+ *
+ *  1.04 JBB 13 Jun 01 VP4 Code Clean Out
+ *   1.03 YWX 07-Dec-00 Removed constants and functions that are not in use
+ * 			Added push and pop ebx in WmtReconIntra
+ *   1.02 YWX 30 Aug 00 changed to be compatible with Microsoft compiler
+ *   1.01 YWX 13 JUL 00 New Willamette Optimized Functions
+ *   1.00 YWX 14/06/00  Configuration baseline from OptFunctions.c
+ *
+ *****************************************************************************
+ */
+ 
+/* 
+    Use Tim's optimized version.
+*/
+
+/****************************************************************************
+ *  Header Files
+ *****************************************************************************
+ */
+
+#define STRICT              // Strict type checking. 
+
+#include "codec_common.h"
+
+#include "pbdll.h"
+
+/****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */        
+
+/**************************************************************************** 
+ *  Imports.
+ *****************************************************************************
+ */   
+
+
+/****************************************************************************
+ *  Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ *  Exported Functions 
+ *****************************************************************************
+ */              
+
+/****************************************************************************
+ *  Module Statics
+ *****************************************************************************
+ */  
+
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+static  UINT8 Eight128s[8] =  {128,128,128,128,128,128,128,128};
+#pragma pack()
+#else
+_declspec(align(16)) static  UINT8 Eight128s[8] =  {128,128,128,128,128,128,128,128};
+#endif
+
+#pragma warning( disable : 4799 )  // Disable no emms instruction warning!
+                                      
+/****************************************************************************
+*  Forward References
+*****************************************************************************
+*/  
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     WmtReconIntra
+ *
+ *  INPUTS        :     INT16 *  idct
+ *                               Pointer to the output from the idct for this block
+ *
+ *                      UINT32   stride
+ *                               Line Length in pixels in recon and reference images
+ *                               
+ *
+ *                     
+ *
+ *  OUTPUTS       :     UINT8 *  dest
+ *                               The reconstruction buffer
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs an intra block - wmt version
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void WmtReconIntra( PB_INSTANCE *pbi, UINT8 * dest, INT16 * idct, INT32 stride )
+{
+    __asm
+    {
+	
+		push		ebx
+
+        mov         eax,[idct]						; Signed 16 bit inputs
+        mov         edx,[dest]						; Unsigned 8 bit outputs
+
+        movq		xmm0,QWORD PTR [Eight128s]		; Set xmm0 to 0x000000000000008080808080808080
+		pxor		xmm3, xmm3						; set xmm3 to 0
+													;
+        mov         ebx,[stride]					; Line stride in output buffer
+        lea         ecx,[eax+128]					; Endpoint in input buffer
+
+loop_label:                                 
+
+        movdqa		xmm2,XMMWORD PTR [eax]			; Read the eight inputs
+		packsswb	xmm2,xmm3						;		
+		
+		pxor        xmm2,xmm0						; Convert result to unsigned (same as add 128)
+        lea         eax,[eax + 16]					; Step source buffer
+
+        cmp         eax,ecx							; are we done
+        movq		QWORD PTR [edx],xmm2			; store results
+
+        lea         edx,[edx+ebx]					; Step output buffer
+        jc          loop_label						; Loop back if we are not done
+
+		pop			ebx
+    }
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     WmtReconInter
+ *
+ *  INPUTS        :     UINT8 *  RefPtr
+ *                               The last frame reference
+ *
+ *                      INT16 *  ChangePtr
+ *                               Pointer to the change data
+ *
+ *                      UINT32   LineStep
+ *                               Line Length in pixels in recon and ref images
+ *
+ *  OUTPUTS       :     UINT8 *  ReconPtr
+ *                               The reconstruction
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs data from last data and change
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void WmtReconInter( PB_INSTANCE *pbi, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+    (void) pbi;
+
+ _asm {
+		push	edi
+		
+		mov		ebx, [RefPtr]
+		mov		ecx, [ChangePtr]
+
+		mov		eax, [ReconPtr]
+		mov		edx, [LineStep]
+
+		pxor	xmm0, xmm0
+		lea		edi, [ecx + 128]
+  L:
+		movq	xmm2, QWORD ptr [ebx]		; (+3 misaligned) 8 reference pixels
+		movdqa	xmm4, XMMWORD ptr [ecx]		; 8 changes
+		
+		punpcklbw xmm2, xmm0				; 
+
+		add	ebx, edx						; next row of reference pixels
+		paddsw	xmm2, xmm4					; add in first 4 changes
+
+		lea		ecx, [ecx + 16]				; next row of changes
+		packuswb xmm2, xmm0					; pack result to unsigned 8-bit values
+
+		cmp		ecx, edi					; are we done?
+		movq	QWORD PTR [eax], xmm2		; store result
+
+		lea		eax, [eax+edx]				; next row of output
+		jc		L							; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+		pop		edi
+ }
+
+}
+
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/dsystemdependant.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/dsystemdependant.c
new file mode 100644
index 00000000..3ec9dcb0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/dsystemdependant.c
@@ -0,0 +1,334 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <windows.h>
+#include "pbdll.h"
+#include "math.h"
+
+#include "vp60dversion.h"
+
+#include "quantize.h" 
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#pragma warning(disable:4115)
+
+#define MMX_ENABLED 1
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern unsigned int CPUFrequency;
+
+extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
+extern void VP6_BuildQuantIndex_Generic ( QUANTIZER *pbi );
+extern void VP6_BuildQuantIndex_ForMMX ( QUANTIZER *pbi );
+extern void VP6_BuildQuantIndex_ForWMT ( QUANTIZER *pbi );
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_SetPbParam
+ *
+ *  INPUTS        :     PB_INSTANCE **pbi       : Pointer to decoder instance.
+ *                      PB_COMMAND_TYPE Command : Command action specifier.
+ *                      UINT32 *Parameter       : Command dependent value.
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *  
+ *  FUNCTION      :     Generalised command interface to decoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void CCONV VP6_SetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, uintptr_t Parameter )
+{
+#if defined(POSTPROCESS)
+    switch ( Command )
+    {
+    case PBC_SET_CPUFREE:
+    {
+#if defined(_MSC_VER)
+        double Pixels = pbi->Configuration.VideoFrameWidth * pbi->Configuration.VideoFrameHeight;
+        double FreeMhz = pbi->ProcessorFrequency * Parameter / 100;
+        double PixelsPerMhz = 100 * sqrt(1.0*Pixels) / FreeMhz;
+#else
+        double PixelsPerMhz = 100 *10;
+#endif
+        pbi->CPUFree = Parameter; 
+
+        if( PixelsPerMhz > 150 )
+            pbi->PostProcessingLevel = 0;
+        else if( PixelsPerMhz > 100 )
+            pbi->PostProcessingLevel = 8;
+        else if( PixelsPerMhz > 90 )
+            pbi->PostProcessingLevel = 4;
+        else if( PixelsPerMhz > 80 )
+            pbi->PostProcessingLevel = 5;
+        else
+            pbi->PostProcessingLevel = 6;
+        break;
+    }
+
+    case PBC_SET_ADDNOISE:
+        pbi->AddNoiseMode = Parameter;
+        //SetAddNoiseMode(pbi->postproc, Parameter);
+        break;
+
+	case PBC_SET_REFERENCEFRAME:
+		CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->LastFrameRecon);
+		CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->GoldenFrame);
+		break;
+
+	case PBC_SET_POSTPROC:
+        if( Parameter == 9 )                
+            VP6_SetPbParam( pbi, PBC_SET_CPUFREE, 70);
+        else
+        {
+            pbi->CPUFree = 0;
+            pbi->PostProcessingLevel = Parameter;
+        }
+        break;
+
+    case PBC_SET_DEINTERLACEMODE:
+        pbi->DeInterlaceMode = Parameter;
+        break;
+
+    case PBC_SET_BLACKCLAMP:
+        pbi->BlackClamp = Parameter;
+        break;
+
+    case PBC_SET_WHITECLAMP:
+        pbi->WhiteClamp = Parameter;
+        break;
+
+    default:
+        break;
+    }
+#endif
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_readTSC
+ *
+ *  INPUTS        : None.
+ *
+ *  OUTPUTS       : unsigned long *tsc : Pointer to returned counter value.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Reads the cpu time stamp counter.
+ *
+ *  SPECIAL NOTES : Since this function uses RDTSC instruction, which is 
+ *					introduced in Pentium processor, this routine is only
+ *					expected to work on Pentium and above.
+ *
+ ****************************************************************************/
+void VP6_readTSC ( unsigned long *tsc )
+{
+	int time;
+	
+	__asm 
+	{
+        pushad
+        cpuid
+		rdtsc
+		mov time,eax
+        popad
+	}
+	*tsc = time;
+	return;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_GetProcessorFrequency
+ *
+ *  INPUTS        : None.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : unsigned long: The processors' frequency (in MHz).
+ *
+ *  FUNCTION      : Check the Processor's working freqency.
+ *
+ *  SPECIAL NOTES : This function should only be used here. Limited tests 
+ *					have verified it works till 166MHz Pentium with MMX. 
+ *
+ ****************************************************************************/
+unsigned long VP6_GetProcessorFrequency()
+{
+
+	LARGE_INTEGER pf;						// Performance Counter Frequency
+	LARGE_INTEGER startcount, endcount;		
+	unsigned long tsc1, tsc2;
+
+	// If the cpu does not support the high resolution counter, return 0
+    unsigned long time1, time2;
+	unsigned long cpufreq = 0;
+    unsigned long Nearest66Mhz, Nearest50Mhz;
+    unsigned long Delta66, Delta50;
+
+	if ( QueryPerformanceFrequency( &pf ) )
+	{
+		// read the counter and TSC at start
+		QueryPerformanceCounter ( &startcount );
+		VP6_readTSC ( &tsc1 );
+
+		// delay for 10 ms to get enough accuracy
+        time1 = timeGetTime();
+        time2 = time1;
+        while ( time2 < time1+5 )
+            time2 = timeGetTime();
+
+		// read the counter and TSC at end
+		QueryPerformanceCounter ( &endcount );
+		VP6_readTSC ( &tsc2 );
+		
+		// calculate the frequency
+		cpufreq = (unsigned long )( (double)(tsc2-tsc1) 
+			            * (double)pf.LowPart 
+			            / (double) (endcount.LowPart - startcount.LowPart) 
+			            / 1000000 );
+	}
+   
+    Nearest66Mhz = ((cpufreq * 3 + 100)/200 * 200) / 3;
+    Delta66      = abs( Nearest66Mhz - cpufreq );
+    Nearest50Mhz = ((cpufreq + 25)/50 *50);
+    Delta50      = abs( Nearest50Mhz - cpufreq );
+
+    if ( Delta50 < Delta66 )
+        cpufreq = Nearest50Mhz;
+    else
+    {
+        cpufreq = Nearest66Mhz;
+        if ( cpufreq == 666 )
+            cpufreq = 667;
+    }
+    return cpufreq;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_DMachineSpecificConfig
+ *
+ *  INPUTS        : None.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Checks for machine specifc features such as MMX support;
+ *                  sets approipriate flags and function pointers.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void VP6_DMachineSpecificConfig ( void )
+{
+	INT32 MmxEnabled;
+	INT32 XmmEnabled; 
+	INT32 WmtEnabled;
+
+	GetProcessorFlags ( &MmxEnabled, &XmmEnabled, &WmtEnabled );
+
+	// If MMX supported use MMX version of functions, else use C versions
+	if ( WmtEnabled )		// Willamette
+		VP6_BuildQuantIndex = VP6_BuildQuantIndex_ForWMT;
+	else if ( MmxEnabled )  // MMX
+		VP6_BuildQuantIndex = VP6_BuildQuantIndex_ForMMX;
+    else                    // No instruction set support
+		VP6_BuildQuantIndex = VP6_BuildQuantIndex_Generic;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_IssueWarning
+ *
+ *  INPUTS        : char *WarningMessage : Pointer to warning message text.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Issues a warning message.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void VP6_IssueWarning ( char *WarningMessage )
+{
+    MessageBox ( NULL, WarningMessage, NULL, MB_ICONEXCLAMATION | MB_TASKMODAL );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_IssueWarning
+ *
+ *  INPUTS        : unsigned int SleepMs : Time (in milli-seconds) to wait.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Pause/Sleep for specified time(in milli-seconds).
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void VP6_PauseProcess ( unsigned int SleepMs )
+{
+    Sleep ( SleepMs );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_SytemGlobalAlloc
+ *
+ *  INPUTS        : unsigned int Size : Size of block of memory (in bytes).
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : char *: Pointer to allocated block of memory.
+ *
+ *  FUNCTION      : Allocates a block of memory of specified size.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+char *VP6_SytemGlobalAlloc ( unsigned int Size )  
+{
+    return GlobalAlloc( GPTR, Size );  
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_SystemGlobalFree
+ *
+ *  INPUTS        : char *MemPtr : Pointer to block of memory.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : De-allocates a block of memory.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void VP6_SystemGlobalFree ( char* MemPtr )
+{
+    GlobalFree ( (HGLOBAL)MemPtr );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/quantindexmmx.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/quantindexmmx.c
new file mode 100644
index 00000000..0bc11412
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/quantindexmmx.c
@@ -0,0 +1,381 @@
+/****************************************************************************
+*
+*   Module Title :     quantindexmmx.c
+*
+*   Description  :     
+*
+****************************************************************************/						
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "quantize.h"
+#include "math.h"
+/****************************************************************************
+*  Macros
+****************************************************************************/ 
+#define MIN16 ((1<<16)-1)
+       
+/****************************************************************************
+*  Module Statics
+****************************************************************************/
+static UINT32 dequant_index[64] = 
+{	
+     0,  1,  8, 16,  9,  2,  3, 10,
+	17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+ 
+static UINT32 dequant_indexMMX[64] = 
+{
+     0,  1,  5,  6, 14, 15, 27, 28,
+     2,  4,  7, 13, 16, 26, 29, 42,
+     3,  8, 12, 17, 25, 30, 41, 43,
+     9, 11, 18, 24, 31, 40, 44, 53,
+    10, 19, 23, 32, 39, 45, 52, 54, 
+    20, 22, 33, 38, 46, 51, 55, 60,
+    21, 34, 37, 47, 50, 56, 59, 61,
+    35, 36, 48, 49, 57, 58, 62, 63
+};
+
+/* Used to unravel the coeffs in the proper order required */
+/* by MMX_idct (see mmxidct.cxx)                           */
+static UINT32 transIndexMMX[64] = 
+{
+     0,  8,  1,  2,    9, 16, 24, 17,
+    10,  3, 32, 11,   18, 25,  4, 12,
+     5, 26, 19, 40,   33, 34, 41, 48,
+    27,  6, 13, 20,   28, 21, 14,  7,
+
+    56, 49, 42, 35,   43, 50, 57, 36, 
+    15, 22, 29, 30,   23, 44, 37, 58,
+    51, 59, 38, 45,   52, 31, 60, 53,
+    46, 39, 47, 54,   61, 62, 55, 63
+};
+
+static UINT32 transIndexWMT[64] = 
+{	
+	 0,  8,  1,  2,   9, 16, 24, 17,
+	10,  3,  4, 11,	 18, 25, 32, 40,
+    33, 26, 19, 12,   5,  6, 13, 20,
+    27, 34, 41, 48,  56, 49, 42, 35,
+    28, 21, 14,  7,  15, 22, 29, 36, 
+    43, 50, 57, 58,  51, 44, 37, 30,
+    23, 31, 38, 45,  52, 59, 60, 53,
+    46, 39, 47, 54,  61, 62, 55, 63
+};
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_BuildQuantIndex_ForMMX
+ *
+ *  INPUTS        : QUANTIZER *pbi : Pointer to quantizer instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Builds the quant_index table in a transposed order.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_BuildQuantIndex_ForMMX ( QUANTIZER *pbi )
+{
+    INT32 i, j;
+
+    pbi->transIndex = transIndexMMX;
+
+    // invert the dequant index into the quant index
+	for ( i=0; i<BLOCK_SIZE; i++ )
+	{	
+        j = transIndexMMX[dequant_indexMMX[i]];
+		pbi->quant_index[j] = i;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : BuildQuantIndex_ForWMT
+ *
+ *  INPUTS        : QUANTIZER *pbi : Pointer to quantizer instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Builds the quant_index table in a transposed order.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_BuildQuantIndex_ForWMT ( QUANTIZER *pbi )
+{
+    INT32 i, j;
+
+    pbi->transIndex = transIndexWMT;
+
+    // invert the dequant index into the quant index
+	for ( i=0; i<BLOCK_SIZE; i++ )
+	{	
+        j = transIndexWMT[dequant_indexMMX[i]];
+		pbi->quant_index[j] = i;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_quantize_wmt
+ *
+ *  INPUTS        : QUANTIZER *pbi               : Pointer to quantizer instance.
+ *                  INT16 *DCT_block             : Pointer to block of DCT coeffs.
+ *                  UINT8 bp                     : Position of blockin MB.
+ *
+ *  OUTPUTS       : Q_LIST_ENTRY *quantized_list : Pointer to block of quantized DCT coeffs.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Quantizes an 8x8 blockof DCT coefficients.  
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_quantize_wmt
+( 
+    QUANTIZER *qi, 
+    INT16 *DCT_block, 
+    Q_LIST_ENTRY *quantized_list, 
+    UINT8 bp 
+)
+{
+    UINT32  i, j;
+	INT32   temp;
+    INT32 ThisDC;
+    
+    UINT32	ColourPlane   = VP6_QTableSelect[bp]; 
+    INT32 *QuantRoundPtr  = qi->QuantRound[ColourPlane];
+    INT32 QuantCoeffsDC   = qi->QuantCoeffs[ColourPlane][0];
+    INT32 *ZBinPtr        = qi->ZeroBinSize[ColourPlane];
+	INT16 *round          = &qi->round[0];
+	INT16 *mult           = &qi->mult[0];
+    INT32  Zrl            = 0;
+    INT32 * ZrlCorrection = qi->ZlrZbinCorrections[ColourPlane];
+__declspec(align(16)) unsigned short xyw[64];  
+  
+
+	// this quantizer stores its results back in the source!!
+	__asm
+	{
+		// setup and collect registers
+		mov			esi, DCT_block
+		xor         ecx, ecx        // index ptr
+		mov			edi, round
+		movdqu      xmm2, [edi]		// get the round values
+		mov         edi, mult
+		movdqu      xmm3, [edi]     // get the quantizer values
+        lea         edi,  xyw      
+        
+        mov         eax,  quantized_list
+        pxor        xmm7, xmm7
+	
+		// 8 coefficients at a time loop 
+next8:
+		movdqa      xmm0, [esi+ecx]	// get source values
+		movdqa      xmm1, xmm0		// sign bits of the abs values 
+		psraw		xmm1, 15		// negative all 1's postive all 0's
+
+        // get the absolute value of the input values
+		pxor        xmm0, xmm1      // one's complement of negatives 
+		psubw       xmm0, xmm1      // xmm0 = abs coeffs
+
+		// calculate & round quantizer
+		paddw		xmm0, xmm2      // Coeff + Quant Round
+        pmulhuw     xmm0, xmm3      // *QuantCoeffs >> 16
+
+
+		// get back the sign bit
+        pxor        xmm0, xmm1      // ones complement of negatives
+        psubw       xmm0, xmm1      // negatives are back as negative
+
+		// output the results
+		movdqa      [edi+ecx], xmm0 
+        movdqa      [eax+ecx], xmm7
+
+		// loop back to the next set
+		add         ecx, 16			
+		cmp			ecx, 128
+		jl          next8
+	}
+
+
+	// DC quantization 
+    ThisDC = DCT_block[0];
+	if ( ThisDC >= ZBinPtr[0] )
+	{
+		temp = QuantCoeffsDC * ( ThisDC + QuantRoundPtr[0] );
+		quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+	}
+    else if ( ThisDC <= -ZBinPtr[0] )
+	{
+		temp = QuantCoeffsDC * ( ThisDC - QuantRoundPtr[0] ) + MIN16;
+		quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+	}
+	else
+    {    
+		Zrl++;
+    }
+
+
+	// zig-zagify 
+    for ( i=1; i<64; i++ )
+    {
+		INT32 x;
+        INT32 y;
+        // Zig Zag order
+		j = dequant_index[i];
+        x = xyw[j];
+        y= abs( DCT_block[j]);
+        if(y<ZBinPtr[j] + ZrlCorrection[Zrl]) 
+        {
+            Zrl ++;
+        }
+        else
+        {
+            Zrl = 0;
+            quantized_list[i] = x;
+        }
+    }
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_quantize_mmx
+ *
+ *  INPUTS        : QUANTIZER *pbi               : Pointer to quantizer instance.
+ *                  INT16 *DCT_block             : Pointer to block of DCT coeffs.
+ *                  UINT8 bp                     : Position of blockin MB.
+ *
+ *  OUTPUTS       : Q_LIST_ENTRY *quantized_list : Pointer to block of quantized DCT coeffs.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Quantizes an 8x8 blockof DCT coefficients.  
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void VP6_quantize_mmx
+( 
+    QUANTIZER *qi, 
+    INT16 *DCT_block, 
+    Q_LIST_ENTRY *quantized_list, 
+    UINT8 bp 
+)
+{
+    UINT32  i, j;
+	INT32   temp;
+    INT32 ThisDC;
+    
+    UINT32	ColourPlane   = VP6_QTableSelect[bp]; 
+    INT32 *QuantRoundPtr  = qi->QuantRound[ColourPlane];
+    INT32 QuantCoeffsDC   = qi->QuantCoeffs[ColourPlane][0];
+    INT32 *ZBinPtr        = qi->ZeroBinSize[ColourPlane];
+	INT16 *round          = &qi->round[0];
+	INT16 *mult           = &qi->mult[0];
+    INT32  Zrl            = 0;
+    INT32 * ZrlCorrection = qi->ZlrZbinCorrections[ColourPlane];
+__declspec(align(16)) unsigned short xyw[64];  
+  
+
+	// this quantizer stores its results back in the source!!
+	__asm
+	{
+		// setup and collect registers
+		mov			esi, DCT_block
+		xor         ecx, ecx        // index ptr
+		mov			edi, round
+		movq        mm2, [edi]		// get the round values
+		mov         edi,  mult
+		movq        mm3, [edi]     // get the quantizer values
+        lea         edi,  xyw      
+        
+        mov         eax,  quantized_list
+        pxor        mm7, mm7
+	
+		// 8 coefficients at a time loop 
+next8:
+		movq        mm0, [esi+ecx]	// get source values
+		movq        mm1, mm0		// sign bits of the abs values 
+		psraw		mm1, 15		// negative all 1's postive all 0's
+
+        // get the absolute value of the input values
+		pxor        mm0, mm1      // one's complement of negatives 
+		psubw       mm0, mm1      // xmm0 = abs coeffs
+
+		// calculate & round quantizer
+		paddw		mm0, mm2      // Coeff + Quant Round
+        pmulhuw     mm0, mm3      // *QuantCoeffs >> 16
+
+
+		// get back the sign bit
+        pxor        mm0, mm1      // ones complement of negatives
+        psubw       mm0, mm1      // negatives are back as negative
+
+		// output the results
+		movq        [edi+ecx], mm0 
+        movq        [eax+ecx], mm7
+
+		// loop back to the next set
+		add         ecx, 8			
+		cmp			ecx, 128
+		jl          next8
+	}
+
+
+	// DC quantization 
+    ThisDC = DCT_block[0];
+	if ( ThisDC >= ZBinPtr[0] )
+	{
+		temp = QuantCoeffsDC * ( ThisDC + QuantRoundPtr[0] );
+		quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+	}
+    else if ( ThisDC <= -ZBinPtr[0] )
+	{
+		temp = QuantCoeffsDC * ( ThisDC - QuantRoundPtr[0] ) + MIN16;
+		quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+	}
+	else
+    {    
+		Zrl++;
+    }
+
+
+	// zig-zagify 
+    for ( i=1; i<64; i++ )
+    {
+		INT32 x;
+        INT32 y;
+        // Zig Zag order
+		j = dequant_index[i];
+        x = xyw[j];
+        y= abs( DCT_block[j]);
+        if(y<ZBinPtr[j] + ZrlCorrection[Zrl]) 
+        {
+            Zrl ++;
+        }
+        else
+        {
+            Zrl = 0;
+            quantized_list[i] = x;
+        }
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/timer.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/timer.c
new file mode 100644
index 00000000..8df2b37f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/timer.c
@@ -0,0 +1,147 @@
+/****************************************************************************
+*
+*   Module Title :     Timer.C
+*
+*   Description  :     Video CODEC timer module
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*   
+*   1.01 PGW 09/07/99  Added code to support profile timing
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/					  
+
+#define STRICT              /* Strict type checking. */
+#define INC_WIN_HEADER      1
+#include <windows.h>
+
+#include "type_aliases.h"
+#include <mmsystem.h> 
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+                
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/  
+
+
+/****************************************************************************
+*  Module Static Variables
+*****************************************************************************
+*/              
+
+// Used for calculation of elapsed time
+UINT32 LastCPUTime;
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MyInitTimer
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Initialises the timer mechanism.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void MyInitTimer( void )
+{
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MyGetTime
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Time in ms since startup.
+ *
+ *  FUNCTION      :     Provides a model independant interface for getting times.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+UINT32 MyGetTime( void )	  
+{
+/* Use different timing mechanisms for win32 and win16. 
+*  The win16 method is accurate to 1ms whilst the Win32 is not garauteed to better than 16ms
+*/
+    return timeGetTime();
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MyGetElapsedCpuTime
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     CPU cycles since last call
+ *
+ *  FUNCTION      :     Calculate the CPU cycles elapsed since the last call
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+UINT32 MyGetElapsedCpuTime( void )	  
+{
+    UINT32 CurrCPUTime[2];                          // Full 64 bit CPU time
+    UINT32 CurrentCpuTime;                          // modified 32 bit current time
+    UINT32 ElapsedTime;
+
+__asm
+	{
+        rdtsc                                       ; Get CPU time into EDX:EAX
+
+        mov         dword ptr [CurrCPUTime], eax    ; Save to a global
+        mov         dword ptr [CurrCPUTime+4], edx   
+    }
+
+    // Save CurrCPUTime to LastCPUTime
+    CurrCPUTime[0] = (CurrCPUTime[0] >> 8);
+    CurrCPUTime[1] = (CurrCPUTime[1] & 0x000000FF) << 24;
+    CurrentCpuTime = CurrCPUTime[0] | CurrCPUTime[1];
+
+    // Check for wrapp around
+    if ( CurrentCpuTime >= LastCPUTime )
+    {
+        ElapsedTime =  CurrentCpuTime - LastCPUTime;
+    }
+    else
+    {
+        ElapsedTime =  (LastCPUTime - CurrentCpuTime) + 0xFFFF;
+    }
+    LastCPUTime = CurrentCpuTime;
+
+    return ElapsedTime;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/vp60dxv.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/vp60dxv.c
new file mode 100644
index 00000000..99130f88
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win32/vp60dxv.c
@@ -0,0 +1,420 @@
+/****************************************************************************
+*
+*   Module Title :     vp60dxv.c
+*
+*   Description  :     Defines the entry point for the console application.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <math.h>   // For Abs()
+#include "pbdll.h"
+
+#include "duck_mem.h" /* interface to memory manager */
+#include "dxl_plugin.h" /* interface to dxv */
+#include "duck_dxl.h"
+
+#include <stddef.h>
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#ifdef _MSC_VER 
+#pragma warning(disable:4055)
+#endif 
+
+#define VP60_FOURCC DXL_MKFOURCC( 'V', 'P', '6', '0')
+#define VP61_FOURCC DXL_MKFOURCC( 'V', 'P', '6', '1')
+#define VP62_FOURCC DXL_MKFOURCC( 'V', 'P', '6', '2')
+extern int VPX_GetSizeOfPixel(dxvBitDepth bd);
+extern void *VPX_GetBlitter(dxvBlitQuality bq, dxvBitDepth bd);
+
+/****************************************************************************
+*  Typedefs
+****************************************************************************/
+typedef unsigned long FourCC;
+
+typedef struct  // YUV buffer configuration structure
+{
+	int   YWidth;
+	int   YHeight;
+	int   YStride;
+
+	int   UVWidth;
+	int   UVHeight;
+	int   UVStride;
+
+	char *YBuffer;
+	char *UBuffer;
+	char *VBuffer;
+
+	char *uvStart;
+	int   uvDstArea;
+	int   uvUsedArea;
+} DXV_YUV_BUFFER_CONFIG;
+
+/* define an xImage structure based on the core xImage struct */
+typedef struct tXImageCODEC
+{
+	FourCC myFourCC;
+	DXV_YUV_BUFFER_CONFIG FrameBuffer;
+	PB_INSTANCE *myPBI;
+	int owned;
+	int decompressedOnce;
+
+} vp60_XIMAGE, *vp60_XIMAGE_HANDLE;
+
+typedef void ((*vp6BLIT_FUNC)(unsigned char *, int, YUV_BUFFER_CONFIG *));
+//typedef void ((*vp6_VSCREEN_FUNC)(void));
+
+/****************************************************************************
+*  Modul Statics
+****************************************************************************/
+
+/****************************************************************************
+*  Forward declarationss
+****************************************************************************/
+void vp60_SetParameter(DXL_XIMAGE_HANDLE src,int Command, uintptr_t Parameter );
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern void VP6_VPInitLibrary(void);
+extern void VP6_VPDeInitLibrary(void);
+extern void VP6_readTSC(unsigned long *tsc);
+
+int vp60_getWH(DXL_XIMAGE_HANDLE src, int *w, int *h)
+{
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+	*w = thisAlgorithmBase->myPBI->Configuration.VideoFrameWidth;
+	*h = thisAlgorithmBase->myPBI->Configuration.VideoFrameHeight;
+	return DXL_OK;
+}
+#if 0
+/****************************************************************************
+* 
+*  ROUTINE       :     vp60_GetInfo
+*
+*  INPUTS        :     unsigned char *source :
+*                      
+*  OUTPUTS       :     FrameInfo *frameInfo  :
+*
+*  RETURNS       :     void
+*
+*  FUNCTION      :     
+*
+*  SPECIAL NOTES :     None. 
+*
+****************************************************************************/
+void vp60_GetInfo ( unsigned char *source, FrameInfo *frameInfo )
+{
+	// Is the frame and inter frame or a key frame 
+	frameInfo->KeyFrame = !(source[0] > 0x7f);
+	frameInfo->Quality = source[0] >> 2;
+	if ( frameInfo->KeyFrame )
+		frameInfo->Version = ((source[2]>>3) & 0x1f );
+	else
+		frameInfo->Version = 0;
+
+	frameInfo->vp30Flag = (int)source[1];
+}
+#endif
+
+/****************************************************************************
+* 
+*  ROUTINE       :  vp60_decompress
+*
+*  INPUTS        :  vp60_XIMAGE_HANDLE src     :
+*                   DXL_VSCREEN_HANDLE vScreen :
+*
+*  OUTPUTS       :  None.
+*
+*  RETURNS       :  int:
+*
+*  FUNCTION      :  
+*
+*  SPECIAL NOTES :  None. 
+*
+****************************************************************************/
+int vp60_decompress ( DXL_XIMAGE_HANDLE src)
+{
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+	unsigned char *cAddr;
+	int cSize;
+
+	cAddr = DXL_GetXImageCDataAddr(src);
+	cSize = DXL_GetXImageCSize(src);
+
+	// if we have a compressed frame decompress it ( otherwise we'll just redo
+	// the scaling and postprocessing from the last frame )
+	if (cAddr)
+	{
+		if( cSize != 0 && (cAddr[0]>=1 || cAddr[1]>=1 || cAddr[2] >=1))
+		{
+
+
+			// decode the frame 
+			int retVal = VP6_DecodeFrameToYUV (
+				thisAlgorithmBase->myPBI,
+				(char *)cAddr, 
+				cSize);
+
+			if ( retVal != 0 )
+			{
+				if ( retVal == -1)
+					return DXL_VERSION_CONFLICT;
+				else
+					return DXL_BAD_DATA;
+			}
+			thisAlgorithmBase->decompressedOnce = 1;
+
+		}
+	}
+
+	//CT>removed blit for size 
+	VP6_GetYUVConfig(thisAlgorithmBase->myPBI, (YUV_BUFFER_CONFIG *) &thisAlgorithmBase->FrameBuffer);
+
+
+	return DXL_OK;   
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :  vp60_xImageDestroy
+*
+*  INPUTS        :  vp60_XIMAGE_HANDLE xThis     :
+*
+*  OUTPUTS       :  None.
+*
+*  RETURNS       :  int:
+*
+*  FUNCTION      :  Closes decoder and releases resources.
+*
+*  SPECIAL NOTES :  None. 
+*
+****************************************************************************/
+static int vp60_xImageDestroy ( DXL_XIMAGE_HANDLE src )
+{
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+	if(thisAlgorithmBase)
+	{
+		if ( thisAlgorithmBase->owned )
+			VP6_StopDecoder ( &(thisAlgorithmBase->myPBI) );
+		duck_free ( thisAlgorithmBase );
+	}
+	return DXL_OK;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :  vp60_xImageReCreate
+*
+*  INPUTS        :  unsigned char *data :
+*
+*  OUTPUTS       :  None.
+*
+*  RETURNS       :  DXL_XIMAGE_HANDLE:
+*
+*  FUNCTION      :  
+*
+*  SPECIAL NOTES :  Called during initialization and/or when xImage
+*                   (decompressor) attributes change, note that nImage and
+*                   src are actually synonymous and should be cleared out
+*                   a bit (to say the least!)
+*
+*                   NOTE:
+*                   This function should be prepared to get data that is
+*                   NOT of the type native to the decoder,  It should do
+*                   it's best to verify it as valid data and should clean
+*                   up after itself and return NULL if it doesn't recognize
+*                   the format of the data.
+*
+****************************************************************************/
+static DXL_HANDLE vp60_xImageReCreate
+(
+ DXL_XIMAGE_HANDLE src,
+ unsigned char *data,
+ int type,
+ enum BITDEPTH bitDepth,
+ int w,
+ int h
+ )
+{  
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src); 
+
+	(void) bitDepth;
+
+	if ( (type != VP60_FOURCC) && (type != VP61_FOURCC) && (type != VP62_FOURCC) ) 
+		return NULL;
+
+
+	/* create new PBI */
+	if ( !VP6_StartDecoder( &(thisAlgorithmBase->myPBI), w, h ) )
+	{
+		vp60_xImageDestroy ( src );
+		thisAlgorithmBase = NULL;
+	}
+	else
+	{
+		thisAlgorithmBase->owned = 1;
+		thisAlgorithmBase->decompressedOnce = 0;
+	}	
+
+	return (DXL_HANDLE)thisAlgorithmBase;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :  vp60_xImageCreate
+*
+*  INPUTS        :  unsigned char *data :
+*
+*  OUTPUTS       :  None.
+*
+*  RETURNS       :  DXL_XIMAGE_HANDLE:
+*
+*  FUNCTION      :  
+*
+*  SPECIAL NOTES :  In this "glue" case, just calls through to the 
+*                   create function. 
+*
+****************************************************************************/
+static DXL_HANDLE vp60_xImageCreate (DXL_XIMAGE_HANDLE src, unsigned char *data)
+{
+	//	return vp60_xImageReCreate(src, data, VP60_FOURCC, (enum BITDEPTH ) 0, 320, 240);
+
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src); 
+
+	/* create a new xImage, specific to this type of decoder, 
+	(see "vp60_XIMAGE" struct above and dxl_main.h) */
+		thisAlgorithmBase = (vp60_XIMAGE_HANDLE)duck_calloc ( 1, sizeof(vp60_XIMAGE), DMEM_GENERAL );
+	if (thisAlgorithmBase == NULL) 
+		return NULL;
+
+
+	DXL_RegisterXImageRecreate(src, (RECREATE_FUNC) vp60_xImageReCreate);
+
+	DXL_RegisterXImageDestroy(src, (DESTROY_FUNC) vp60_xImageDestroy);
+
+	DXL_RegisterXImageDx(src, (DX_FUNC) vp60_decompress);
+
+	DXL_RegisterXImageSetParameter(src, (SET_PARAMETER_FUNC) vp60_SetParameter);
+
+	thisAlgorithmBase->myFourCC = VP60_FOURCC;
+
+	thisAlgorithmBase->decompressedOnce = 0;
+	return (DXL_HANDLE)thisAlgorithmBase;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :  vp60_Init
+*
+*  INPUTS        :  None.
+*
+*  OUTPUTS       :  None.
+*
+*  RETURNS       :  int
+*
+*  FUNCTION      :  
+*
+*  SPECIAL NOTES :  
+*
+****************************************************************************/
+int vp60_Init ( void )
+{
+	DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, VP60_FOURCC);
+	DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, VP61_FOURCC);
+	DXL_RegisterXImage((CREATE_FUNC) vp60_xImageCreate, VP62_FOURCC);
+
+	/* initialize all the global variables */
+	VP6_VPInitLibrary();
+
+	return DXL_OK;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :  vp60_Exit
+*
+*  INPUTS        :  None.
+*
+*  OUTPUTS       :  None.
+*
+*  RETURNS       :  int
+*
+*  FUNCTION      :  Main exit routine, called during DXL_ExitVideo()
+*                   clean up any global information if necessary.
+*                   
+*  SPECIAL NOTES :  None. 
+*
+****************************************************************************/
+int vp60_Exit(void)
+{
+	VP6_VPDeInitLibrary();
+
+	return DXL_OK;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       :  vp60_SetParameter
+*
+*  INPUTS        :  DXL_XIMAGE_HANDLE src   :
+*                   int Command             :
+*                   unsigned long Parameter :
+*
+*  OUTPUTS       :  None.
+*
+*  RETURNS       :  void
+*
+*  FUNCTION      :  
+*                   
+*                   
+*  SPECIAL NOTES :  None. 
+*
+****************************************************************************/
+void vp60_SetParameter(DXL_XIMAGE_HANDLE src, int Command, uintptr_t Parameter)
+{
+	vp60_XIMAGE_HANDLE thisAlgorithmBase = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src); 
+
+	if ( Command == PBC_SET_PBSTRUCT )
+	{
+		if ( thisAlgorithmBase->owned )
+			VP6_StopDecoder ( &(thisAlgorithmBase->myPBI) );
+
+		thisAlgorithmBase->owned = 0;
+		thisAlgorithmBase->myPBI = (PB_INSTANCE *) Parameter;
+	}
+	else
+		VP6_SetPbParam( thisAlgorithmBase->myPBI, (PB_COMMAND_TYPE)Command, Parameter );
+}
+
+//CT:
+typedef	struct {
+	unsigned char*	baseAddr;
+	long			rowBytes;
+} YV12_PLANE;
+
+typedef	struct {
+	YV12_PLANE	y;
+	YV12_PLANE	u;
+	YV12_PLANE	v;
+} YV12_PLANES;
+
+void GetImageBufs(DXL_XIMAGE_HANDLE x, YV12_PLANES *p)
+{
+	//  vp60_XIMAGE_HANDLE xim=(vp60_XIMAGE_HANDLE)x;
+	vp60_XIMAGE_HANDLE xim = (vp60_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(x);
+
+	p->y.baseAddr=(unsigned char *)xim->FrameBuffer.YBuffer;
+	p->u.baseAddr=(unsigned char *)xim->FrameBuffer.UBuffer;
+	p->v.baseAddr=(unsigned char *)xim->FrameBuffer.VBuffer;
+	p->y.rowBytes=xim->FrameBuffer.YStride;
+	p->u.rowBytes=xim->FrameBuffer.UVStride;
+	p->v.rowBytes=xim->FrameBuffer.UVStride;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win64/dsystemdependant.c b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win64/dsystemdependant.c
new file mode 100644
index 00000000..d4c0dfe2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/dx/Win64/dsystemdependant.c
@@ -0,0 +1,348 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <windows.h>
+#include "pbdll.h"
+#include "math.h"
+
+#include "vp60dversion.h"
+
+#include "quantize.h" 
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#pragma warning(disable:4115)
+
+#define MMX_ENABLED 1
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern unsigned int CPUFrequency;
+
+extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
+extern void VP6_BuildQuantIndex_Generic ( QUANTIZER *pbi );
+extern void VP6_BuildQuantIndex_ForMMX ( QUANTIZER *pbi );
+extern void VP6_BuildQuantIndex_ForWMT ( QUANTIZER *pbi );
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP6_SetPbParam
+ *
+ *  INPUTS        :     PB_INSTANCE **pbi       : Pointer to decoder instance.
+ *                      PB_COMMAND_TYPE Command : Command action specifier.
+ *                      UINT32 *Parameter       : Command dependent value.
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *  
+ *  FUNCTION      :     Generalised command interface to decoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void CCONV VP6_SetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, uintptr_t Parameter )
+{
+#if defined(POSTPROCESS)
+    switch ( Command )
+    {
+    case PBC_SET_CPUFREE:
+    {
+#if defined(_MSC_VER)
+        double Pixels = pbi->Configuration.VideoFrameWidth * pbi->Configuration.VideoFrameHeight;
+        double FreeMhz = pbi->ProcessorFrequency * Parameter / 100;
+        double PixelsPerMhz = 100 * sqrt(1.0*Pixels) / FreeMhz;
+#else
+        double PixelsPerMhz = 100 *10;
+#endif
+        pbi->CPUFree = Parameter; 
+
+        if( PixelsPerMhz > 150 )
+            pbi->PostProcessingLevel = 0;
+        else if( PixelsPerMhz > 100 )
+            pbi->PostProcessingLevel = 8;
+        else if( PixelsPerMhz > 90 )
+            pbi->PostProcessingLevel = 4;
+        else if( PixelsPerMhz > 80 )
+            pbi->PostProcessingLevel = 5;
+        else
+            pbi->PostProcessingLevel = 6;
+        break;
+    }
+
+    case PBC_SET_ADDNOISE:
+        pbi->AddNoiseMode = Parameter;
+        //SetAddNoiseMode(pbi->postproc, Parameter);
+        break;
+
+	case PBC_SET_REFERENCEFRAME:
+		CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->LastFrameRecon);
+		CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->GoldenFrame);
+		break;
+
+	case PBC_SET_POSTPROC:
+        if( Parameter == 9 )                
+            VP6_SetPbParam( pbi, PBC_SET_CPUFREE, 70);
+        else
+        {
+            pbi->CPUFree = 0;
+            pbi->PostProcessingLevel = Parameter;
+        }
+        break;
+
+    case PBC_SET_DEINTERLACEMODE:
+        pbi->DeInterlaceMode = Parameter;
+        break;
+
+    case PBC_SET_BLACKCLAMP:
+        pbi->BlackClamp = Parameter;
+        break;
+
+    case PBC_SET_WHITECLAMP:
+        pbi->WhiteClamp = Parameter;
+        break;
+
+    default:
+        break;
+    }
+#endif
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_readTSC
+ *
+ *  INPUTS        : None.
+ *
+ *  OUTPUTS       : unsigned long *tsc : Pointer to returned counter value.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Reads the cpu time stamp counter.
+ *
+ *  SPECIAL NOTES : Since this function uses RDTSC instruction, which is 
+ *					introduced in Pentium processor, this routine is only
+ *					expected to work on Pentium and above.
+ *
+ ****************************************************************************/
+#ifdef _M_AMD64 // For 64-bit apps
+unsigned __int64 __rdtsc(void);
+#pragma intrinsic(__rdtsc)
+#define _RDTSC __rdtsc
+#else // For 32-bit apps
+
+#define _RDTSC_STACK(ts) \
+	__asm rdtsc \
+	__asm mov DWORD PTR [ts], eax \
+	__asm mov DWORD PTR [ts+4], edx
+
+__inline unsigned __int64 _inl_rdtsc32() {
+	unsigned __int64 t;
+	_RDTSC_STACK(t);
+	return t;
+}
+#define _RDTSC _inl_rdtsc32
+#endif
+
+
+void VP6_readTSC(unsigned long *tsc)
+{
+	LARGE_INTEGER t;
+	t.QuadPart = _RDTSC();
+	*tsc = t.LowPart;
+
+	return;
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_GetProcessorFrequency
+ *
+ *  INPUTS        : None.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : unsigned long: The processors' frequency (in MHz).
+ *
+ *  FUNCTION      : Check the Processor's working freqency.
+ *
+ *  SPECIAL NOTES : This function should only be used here. Limited tests 
+ *					have verified it works till 166MHz Pentium with MMX. 
+ *
+ ****************************************************************************/
+unsigned long VP6_GetProcessorFrequency()
+{
+
+	LARGE_INTEGER pf;						// Performance Counter Frequency
+	LARGE_INTEGER startcount, endcount;		
+	unsigned long tsc1, tsc2;
+
+	// If the cpu does not support the high resolution counter, return 0
+    unsigned long time1, time2;
+	unsigned long cpufreq = 0;
+    unsigned long Nearest66Mhz, Nearest50Mhz;
+    unsigned long Delta66, Delta50;
+
+	if ( QueryPerformanceFrequency( &pf ) )
+	{
+		// read the counter and TSC at start
+		QueryPerformanceCounter ( &startcount );
+		VP6_readTSC ( &tsc1 );
+
+		// delay for 10 ms to get enough accuracy
+        time1 = timeGetTime();
+        time2 = time1;
+        while ( time2 < time1+5 )
+            time2 = timeGetTime();
+
+		// read the counter and TSC at end
+		QueryPerformanceCounter ( &endcount );
+		VP6_readTSC ( &tsc2 );
+		
+		// calculate the frequency
+		cpufreq = (unsigned long )( (double)(tsc2-tsc1) 
+			            * (double)pf.LowPart 
+			            / (double) (endcount.LowPart - startcount.LowPart) 
+			            / 1000000 );
+	}
+   
+    Nearest66Mhz = ((cpufreq * 3 + 100)/200 * 200) / 3;
+    Delta66      = abs( Nearest66Mhz - cpufreq );
+    Nearest50Mhz = ((cpufreq + 25)/50 *50);
+    Delta50      = abs( Nearest50Mhz - cpufreq );
+
+    if ( Delta50 < Delta66 )
+        cpufreq = Nearest50Mhz;
+    else
+    {
+        cpufreq = Nearest66Mhz;
+        if ( cpufreq == 666 )
+            cpufreq = 667;
+    }
+    return cpufreq;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_DMachineSpecificConfig
+ *
+ *  INPUTS        : None.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Checks for machine specifc features such as MMX support;
+ *                  sets approipriate flags and function pointers.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void VP6_DMachineSpecificConfig ( void )
+{
+#if 0
+	INT32 MmxEnabled;
+	INT32 XmmEnabled; 
+	INT32 WmtEnabled;
+
+	GetProcessorFlags ( &MmxEnabled, &XmmEnabled, &WmtEnabled );
+
+	// If MMX supported use MMX version of functions, else use C versions
+	if ( WmtEnabled )		// Willamette
+		VP6_BuildQuantIndex = VP6_BuildQuantIndex_ForWMT;
+	else if ( MmxEnabled )  // MMX
+		VP6_BuildQuantIndex = VP6_BuildQuantIndex_ForMMX;
+    else                    // No instruction set support
+#endif
+		VP6_BuildQuantIndex = VP6_BuildQuantIndex_Generic;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_IssueWarning
+ *
+ *  INPUTS        : char *WarningMessage : Pointer to warning message text.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Issues a warning message.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void VP6_IssueWarning ( char *WarningMessage )
+{
+    MessageBox ( NULL, WarningMessage, NULL, MB_ICONEXCLAMATION | MB_TASKMODAL );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_IssueWarning
+ *
+ *  INPUTS        : unsigned int SleepMs : Time (in milli-seconds) to wait.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Pause/Sleep for specified time(in milli-seconds).
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void VP6_PauseProcess ( unsigned int SleepMs )
+{
+    Sleep ( SleepMs );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_SytemGlobalAlloc
+ *
+ *  INPUTS        : unsigned int Size : Size of block of memory (in bytes).
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : char *: Pointer to allocated block of memory.
+ *
+ *  FUNCTION      : Allocates a block of memory of specified size.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+char *VP6_SytemGlobalAlloc ( unsigned int Size )  
+{
+    return GlobalAlloc( GPTR, Size );  
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VP6_SystemGlobalFree
+ *
+ *  INPUTS        : char *MemPtr : Pointer to block of memory.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : De-allocates a block of memory.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void VP6_SystemGlobalFree ( char* MemPtr )
+{
+    GlobalFree ( (HGLOBAL)MemPtr );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj
new file mode 100644
index 00000000..73a81a5a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj
@@ -0,0 +1,385 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>17.0</VCProjectVersion>
+    <ProjectGuid>{8666A681-2E07-49A5-B23E-EC28D165C63B}</ProjectGuid>
+    <RootNamespace>vp6d</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\..\obj\vp6d\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <OutDir>..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\..\obj\vp6d\$(PlatformShortName)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\..\obj\vp6d\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <OutDir>..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\..\obj\vp6d\$(PlatformShortName)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnableManifest>false</VcpkgEnableManifest>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>.\include;..\include;..\..\include;..\..\..\..\..\libvp6\corelibs\include;..\..\..\..\..\libvp6\include;..\..\..\..\..\libvp6\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_DEBUG;WIN32;_WINDOWS;_USRDLL;vp6D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;4005;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>.\include;..\include;..\..\include;..\..\..\..\..\libvp6\corelibs\include;..\..\..\..\..\libvp6\include;..\..\..\..\..\libvp6\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_DEBUG;WIN32;_WINDOWS;_USRDLL;vp6D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;4005;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>.\include;..\include;..\..\include;..\..\..\..\..\libvp6\corelibs\include;..\..\..\..\..\libvp6\include;..\..\..\..\..\libvp6\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>NDEBUG;WIN32;_WINDOWS;_USRDLL;vp6D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;4005;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>Full</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>.\include;..\include;..\..\include;..\..\..\..\..\libvp6\corelibs\include;..\..\..\..\..\libvp6\include;..\..\..\..\..\libvp6\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>NDEBUG;WIN32;_WINDOWS;_USRDLL;vp6D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;4005;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="dx\Generic\boolhuff.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\debug.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodembs.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodemode.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodemv.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\DFrameR.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\DSystemDependant.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\FrameIni.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\Huffman.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\pb_globals.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\quantize.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\recon.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\TokenEntropy.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\vfwpbdll_if.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Win32\dsystemdependant.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Win32\quantindexmmx.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Win32\vp60dxv.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Win64\dsystemdependant.c">
+      <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename)1.obj</ObjectFileName>
+      <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename)1.obj</ObjectFileName>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename)1.obj</ObjectFileName>
+      <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename)1.obj</ObjectFileName>
+    </ClCompile>
+    <ClCompile Include="xprintf\xprintf.cpp">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Full</Optimization>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj.filters
new file mode 100644
index 00000000..e46cdc7b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.vcxproj.filters
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Decompress">
+      <UniqueIdentifier>{95dad006-2a54-48a1-baf0-500b01d2a44e}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Win32">
+      <UniqueIdentifier>{ca73ff2b-c2ff-4b2d-8dcf-b2fe83464e0f}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Win64">
+      <UniqueIdentifier>{a92da48f-df62-48b1-9ba1-ffaa42119481}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="dx\Generic\boolhuff.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\debug.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodembs.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodemode.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodemv.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\DFrameR.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\DSystemDependant.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\FrameIni.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\Huffman.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\pb_globals.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\quantize.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\recon.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\TokenEntropy.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\vfwpbdll_if.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Win32\dsystemdependant.c">
+      <Filter>Win32</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Win32\quantindexmmx.c">
+      <Filter>Win32</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Win32\vp60dxv.c">
+      <Filter>Win32</Filter>
+    </ClCompile>
+    <ClCompile Include="xprintf\xprintf.cpp">
+      <Filter>Win32</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Win64\dsystemdependant.c">
+      <Filter>Win64</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.xcodeproj/project.pbxproj b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..ee05bb81
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6d.xcodeproj/project.pbxproj
@@ -0,0 +1,257 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 42;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		0C02406A0BB7912C00AE885C /* boolhuff.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C0240690BB7912C00AE885C /* boolhuff.c */; };
+		0C02406D0BB7913500AE885C /* debug.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02406C0BB7913500AE885C /* debug.c */; };
+		0C0240840BB7916D00AE885C /* DSystemDependant.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C0240760BB7916D00AE885C /* DSystemDependant.c */; };
+		0C0240850BB7916D00AE885C /* FrameIni.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C0240770BB7916D00AE885C /* FrameIni.c */; };
+		0C0240880BB7916D00AE885C /* modestats.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407A0BB7916D00AE885C /* modestats.c */; };
+		0C0240890BB7916D00AE885C /* pb_globals.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407B0BB7916D00AE885C /* pb_globals.c */; };
+		0C02408A0BB7916D00AE885C /* Huffman.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407C0BB7916D00AE885C /* Huffman.c */; };
+		0C02408B0BB7916D00AE885C /* quantize.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407D0BB7916D00AE885C /* quantize.c */; };
+		0C02408C0BB7916D00AE885C /* recon.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407E0BB7916D00AE885C /* recon.c */; };
+		0C02408D0BB7916D00AE885C /* TokenEntropy.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C02407F0BB7916D00AE885C /* TokenEntropy.c */; };
+		0C0240B30BB791FF00AE885C /* vp60dxv.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C0240B20BB791FF00AE885C /* vp60dxv.c */; };
+		0C1423C30BB819EB00FDDAB7 /* vfwpbdll_if.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C1423C20BB819EB00FDDAB7 /* vfwpbdll_if.c */; };
+		0C1423D90BB81A1200FDDAB7 /* decodembs.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C1423D80BB81A1200FDDAB7 /* decodembs.c */; };
+		0C1423E00BB81A3000FDDAB7 /* decodemode.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C1423DD0BB81A3000FDDAB7 /* decodemode.c */; };
+		0C1423E10BB81A3000FDDAB7 /* decodemv.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C1423DE0BB81A3000FDDAB7 /* decodemv.c */; };
+		0C1423E20BB81A3000FDDAB7 /* DFrameR.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C1423DF0BB81A3000FDDAB7 /* DFrameR.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		0C0240690BB7912C00AE885C /* boolhuff.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = boolhuff.c; path = dx/Generic/boolhuff.c; sourceTree = "<group>"; };
+		0C02406C0BB7913500AE885C /* debug.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = debug.c; path = dx/Generic/debug.c; sourceTree = "<group>"; };
+		0C0240760BB7916D00AE885C /* DSystemDependant.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = DSystemDependant.c; path = dx/Generic/DSystemDependant.c; sourceTree = "<group>"; };
+		0C0240770BB7916D00AE885C /* FrameIni.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = FrameIni.c; path = dx/Generic/FrameIni.c; sourceTree = "<group>"; };
+		0C02407A0BB7916D00AE885C /* modestats.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = modestats.c; path = dx/Generic/modestats.c; sourceTree = "<group>"; };
+		0C02407B0BB7916D00AE885C /* pb_globals.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = pb_globals.c; path = dx/Generic/pb_globals.c; sourceTree = "<group>"; };
+		0C02407C0BB7916D00AE885C /* Huffman.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = Huffman.c; path = dx/Generic/Huffman.c; sourceTree = "<group>"; };
+		0C02407D0BB7916D00AE885C /* quantize.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = quantize.c; path = dx/Generic/quantize.c; sourceTree = "<group>"; };
+		0C02407E0BB7916D00AE885C /* recon.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = recon.c; path = dx/Generic/recon.c; sourceTree = "<group>"; };
+		0C02407F0BB7916D00AE885C /* TokenEntropy.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = TokenEntropy.c; path = dx/Generic/TokenEntropy.c; sourceTree = "<group>"; };
+		0C0240B20BB791FF00AE885C /* vp60dxv.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = vp60dxv.c; path = dx/Win32/vp60dxv.c; sourceTree = "<group>"; };
+		0C1423C20BB819EB00FDDAB7 /* vfwpbdll_if.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = vfwpbdll_if.c; path = dx/Generic/vfwpbdll_if.c; sourceTree = "<group>"; };
+		0C1423D80BB81A1200FDDAB7 /* decodembs.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = decodembs.c; path = dx/Generic/decodembs.c; sourceTree = "<group>"; };
+		0C1423DD0BB81A3000FDDAB7 /* decodemode.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = decodemode.c; path = dx/Generic/decodemode.c; sourceTree = "<group>"; };
+		0C1423DE0BB81A3000FDDAB7 /* decodemv.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = decodemv.c; path = dx/Generic/decodemv.c; sourceTree = "<group>"; };
+		0C1423DF0BB81A3000FDDAB7 /* DFrameR.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = DFrameR.c; path = dx/Generic/DFrameR.c; sourceTree = "<group>"; };
+		D2AAC046055464E500DB518D /* libvp6d.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvp6d.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		D289987405E68DCB004EDB86 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		08FB7794FE84155DC02AAC07 /* vp60 */ = {
+			isa = PBXGroup;
+			children = (
+				08FB7795FE84155DC02AAC07 /* Source */,
+				C6A0FF2B0290797F04C91782 /* Documentation */,
+				1AB674ADFE9D54B511CA2CBB /* Products */,
+			);
+			name = vp60;
+			sourceTree = "<group>";
+		};
+		08FB7795FE84155DC02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				0C0240B20BB791FF00AE885C /* vp60dxv.c */,
+				0C1423C20BB819EB00FDDAB7 /* vfwpbdll_if.c */,
+				0C1423D80BB81A1200FDDAB7 /* decodembs.c */,
+				0C1423DD0BB81A3000FDDAB7 /* decodemode.c */,
+				0C1423DE0BB81A3000FDDAB7 /* decodemv.c */,
+				0C1423DF0BB81A3000FDDAB7 /* DFrameR.c */,
+				0C0240690BB7912C00AE885C /* boolhuff.c */,
+				0C02406C0BB7913500AE885C /* debug.c */,
+				0C0240760BB7916D00AE885C /* DSystemDependant.c */,
+				0C0240770BB7916D00AE885C /* FrameIni.c */,
+				0C02407A0BB7916D00AE885C /* modestats.c */,
+				0C02407B0BB7916D00AE885C /* pb_globals.c */,
+				0C02407C0BB7916D00AE885C /* Huffman.c */,
+				0C02407D0BB7916D00AE885C /* quantize.c */,
+				0C02407E0BB7916D00AE885C /* recon.c */,
+				0C02407F0BB7916D00AE885C /* TokenEntropy.c */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		1AB674ADFE9D54B511CA2CBB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				D2AAC046055464E500DB518D /* libvp6d.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		C6A0FF2B0290797F04C91782 /* Documentation */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			name = Documentation;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		D2AAC043055464E500DB518D /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		D2AAC045055464E500DB518D /* vp60 */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vp60" */;
+			buildPhases = (
+				D2AAC043055464E500DB518D /* Headers */,
+				D2AAC044055464E500DB518D /* Sources */,
+				D289987405E68DCB004EDB86 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = vp60;
+			productName = vp60;
+			productReference = D2AAC046055464E500DB518D /* libvp6d.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		08FB7793FE84155DC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vp6d" */;
+			hasScannedForEncodings = 1;
+			mainGroup = 08FB7794FE84155DC02AAC07 /* vp60 */;
+			projectDirPath = "";
+			targets = (
+				D2AAC045055464E500DB518D /* vp60 */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		D2AAC044055464E500DB518D /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				0C02406A0BB7912C00AE885C /* boolhuff.c in Sources */,
+				0C02406D0BB7913500AE885C /* debug.c in Sources */,
+				0C0240840BB7916D00AE885C /* DSystemDependant.c in Sources */,
+				0C0240850BB7916D00AE885C /* FrameIni.c in Sources */,
+				0C0240880BB7916D00AE885C /* modestats.c in Sources */,
+				0C0240890BB7916D00AE885C /* pb_globals.c in Sources */,
+				0C02408A0BB7916D00AE885C /* Huffman.c in Sources */,
+				0C02408B0BB7916D00AE885C /* quantize.c in Sources */,
+				0C02408C0BB7916D00AE885C /* recon.c in Sources */,
+				0C02408D0BB7916D00AE885C /* TokenEntropy.c in Sources */,
+				0C0240B30BB791FF00AE885C /* vp60dxv.c in Sources */,
+				0C1423C30BB819EB00FDDAB7 /* vfwpbdll_if.c in Sources */,
+				0C1423D90BB81A1200FDDAB7 /* decodembs.c in Sources */,
+				0C1423E00BB81A3000FDDAB7 /* decodemode.c in Sources */,
+				0C1423E10BB81A3000FDDAB7 /* decodemv.c in Sources */,
+				0C1423E20BB81A3000FDDAB7 /* DFrameR.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		1DEB91EC08733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COPY_PHASE_STRIP = NO;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_FIX_AND_CONTINUE = YES;
+				GCC_MODEL_TUNING = G5;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				INSTALL_PATH = /usr/local/lib;
+				PRODUCT_NAME = vp6d;
+				ZERO_LINK = YES;
+			};
+			name = Debug;
+		};
+		1DEB91ED08733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = (
+					ppc,
+					i386,
+				);
+				GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+				GCC_MODEL_TUNING = G5;
+				INSTALL_PATH = /usr/local/lib;
+				PRODUCT_NAME = vp6d;
+			};
+			name = Release;
+		};
+		1DEB91F008733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				OBJROOT = build;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+				SYMROOT = ../../../../lib/osx;
+				USER_HEADER_SEARCH_PATHS = "include ../../include ../../../include ../../../../include ../../../../include/vp60";
+			};
+			name = Debug;
+		};
+		1DEB91F108733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				OBJROOT = build;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+				SYMROOT = ../../../../lib/osx;
+				USER_HEADER_SEARCH_PATHS = "include ../../include ../../../include ../../../../include ../../../../include/vp60";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vp60" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91EC08733DB70010E9CD /* Debug */,
+				1DEB91ED08733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vp6d" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91F008733DB70010E9CD /* Debug */,
+				1DEB91F108733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.sln b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.sln
new file mode 100644
index 00000000..3e63ab8f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.sln
@@ -0,0 +1,23 @@
+Microsoft Visual Studio Solution File, Format Version 8.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vp6e", "vp6e.vcproj", "{9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}"
+	ProjectSection(ProjectDependencies) = postProject
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfiguration) = preSolution
+		Debug = Debug
+		Release = Release
+	EndGlobalSection
+	GlobalSection(ProjectDependencies) = postSolution
+	EndGlobalSection
+	GlobalSection(ProjectConfiguration) = postSolution
+		{9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}.Debug.ActiveCfg = Debug|Win32
+		{9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}.Debug.Build.0 = Debug|Win32
+		{9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}.Release.ActiveCfg = Release|Win32
+		{9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}.Release.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+	EndGlobalSection
+	GlobalSection(ExtensibilityAddIns) = postSolution
+	EndGlobalSection
+EndGlobal
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.vcproj b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.vcproj
new file mode 100644
index 00000000..91ff6d1a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/vp6e.vcproj
@@ -0,0 +1,626 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="7.10"
+	Name="vp6e"
+	ProjectGUID="{9BB38682-B7F7-44E8-BE04-BD9D1F5512CE}"
+	SccProjectName=""
+	SccLocalPath="">
+	<Platforms>
+		<Platform
+			Name="Win32"/>
+	</Platforms>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory=".\..\..\Lib\Win32\Debug"
+			IntermediateDirectory=".\..\..\..\..\ObjectCode\vp6e\debug"
+			ConfigurationType="4"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="FALSE"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				OptimizeForProcessor="2"
+				AdditionalIncludeDirectories=".\include,..\include,..\..\include,..\..\..\include,..\..\..\..\include,..\..\..\..\Include\vp60,..\..\..\..\include\vp60"
+				PreprocessorDefinitions="vp6E_EXPORTS;_DEBUG;WIN32;_WINDOWS;_USRDLL;PREDICT_2D;VFW_COMP;COMPDLL;POSTPROCESS;CPUISLITTLEENDIAN;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				PrecompiledHeaderFile=".\..\..\..\..\ObjectCode\vp6e\debug/vp6e.pch"
+				AssemblerListingLocation=".\..\..\..\..\ObjectCode\vp6e\debug/"
+				ObjectFile=".\..\..\..\..\ObjectCode\vp6e\debug/"
+				ProgramDataBaseFileName=".\..\..\..\..\ObjectCode\vp6e\debug/"
+				WarningLevel="3"
+				SuppressStartupBanner="TRUE"
+				DebugInformationFormat="3"
+				CompileAs="0"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="..\..\..\..\Lib\Win32\Debug\s_vp60e.lib"
+				SuppressStartupBanner="TRUE"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory=".\..\..\Lib\Win32\Release"
+			IntermediateDirectory=".\..\..\..\..\ObjectCode\vp6e\Release"
+			ConfigurationType="4"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="FALSE"
+			CharacterSet="2">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				InlineFunctionExpansion="1"
+				OptimizeForProcessor="2"
+				AdditionalIncludeDirectories=".\include,..\include,..\..\include,..\..\..\include,..\..\..\..\include,..\..\..\..\Include\vp60"
+				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;vp6E_EXPORTS;PREDICT_2D;VFW_COMP;COMPDLL;POSTPROCESS;CPUISLITTLEENDIAN;NORMALIZED;INLINE=__forceinline;FORCEINLINE=__forceinline"
+				StringPooling="TRUE"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="TRUE"
+				PrecompiledHeaderFile=".\..\..\..\..\ObjectCode\vp6e\Release/vp6e.pch"
+				AssemblerListingLocation=".\..\..\..\..\ObjectCode\vp6e\Release/"
+				ObjectFile=".\..\..\..\..\ObjectCode\vp6e\Release/"
+				ProgramDataBaseFileName=".\..\..\..\..\ObjectCode\vp6e\Release/"
+				WarningLevel="3"
+				SuppressStartupBanner="TRUE"
+				CompileAs="0"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="..\..\..\..\Lib\Win32\Release\s_vp60e.lib"
+				SuppressStartupBanner="TRUE"/>
+			<Tool
+				Name="VCMIDLTool"/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Compress"
+			Filter="">
+			<File
+				RelativePath="CX\Generic\Comp_Globals.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\CSystemDependant.c">
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="TRUE">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="TRUE">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\Encode.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\encodembs.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\encodemode.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\encodemv.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\fullframefdct.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="CX\Generic\mcomp.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="CX\Generic\misc_common.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\PackVideo.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\PickModes.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\RawBuffer.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\Tokenize.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\Transform.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Generic\twopass.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="CX\Generic\vfwcomp.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="CX\Generic\vfwcomp_if.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Win32"
+			Filter="">
+			<File
+				RelativePath="cx\Win32\COptFunctions.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Win32\csystemdependant.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Win32\CWmtFunctions.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Win32\MmxEncodeMath.asm">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.lst /Fo &quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.obj &quot;$(InputPath)&quot;
+"
+						Outputs=".\&quot;$(IntDir)&quot;\$(InputName).obj"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)/$(InputName)&quot;.lst /Fo &quot;$(IntDir)/$(InputName)&quot;.obj &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)/$(InputName).obj"/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Win32\WmtTransform.c">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Win32\XmmGetError.asm">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.lst /Fo &quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.obj &quot;$(InputPath)&quot;
+"
+						Outputs=".\&quot;$(IntDir)&quot;\$(InputName).obj"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)/$(InputName)&quot;.lst /Fo &quot;$(IntDir)/$(InputName)&quot;.obj &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)/$(InputName).obj"/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="CX\Win32\XmmGetSAD8.asm">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.lst /Fo &quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.obj &quot;$(InputPath)&quot;
+"
+						Outputs=".\&quot;$(IntDir)&quot;\$(InputName).obj"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)/$(InputName)&quot;.lst /Fo &quot;$(IntDir)/$(InputName)&quot;.obj &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)/$(InputName).obj"/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="cx\Win32\XmmSAD.asm">
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.lst /Fo &quot;$(IntDir)&quot;\&quot;$(InputName)&quot;.obj &quot;$(InputPath)&quot;
+"
+						Outputs=".\&quot;$(IntDir)&quot;\$(InputName).obj"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /Zi /Zm /Cx /c /coff /Fl&quot;$(IntDir)/$(InputName)&quot;.lst /Fo &quot;$(IntDir)/$(InputName)&quot;.obj &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)/$(InputName).obj"/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/Src/libvpShared/corelibs/cdxv/VP60/vp60/xprintf/xprintf.cpp b/Src/libvpShared/corelibs/cdxv/VP60/vp60/xprintf/xprintf.cpp
new file mode 100644
index 00000000..d775bf72
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/VP60/vp60/xprintf/xprintf.cpp
@@ -0,0 +1,139 @@
+/****************************************************************************
+*
+*   Module Title :     xprintf.cpp
+*
+*   Description  :     Display a printf style message on the current video frame.    
+*
+****************************************************************************/						
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <stdio.h>
+#include <stdarg.h>
+#include <windows.h>
+#include "xprintf.h"
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : xprintf
+ *
+ *  INPUTS        : const PB_INSTANCE *ppbi : Pointer to decoder instance.
+ *                  long nPixel             : Offset into buffer to write text.
+ *                  const char *format      : Format string for print.
+ *                  ...                     : Variable length argument list.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : int: Size (in bytes) of the formatted text.
+ *
+ *  FUNCTION      : Display a printf style message on the current video frame.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int vp6_xprintf ( const PB_INSTANCE *ppbi, long nPixel, const char *format, ... )
+{
+    BOOL bRC;
+    va_list arglist;
+	HFONT hfont, hfonto;
+
+    int rc = 0;
+    long nSizeY = ppbi->HFragments * 8;
+    long nStride = ppbi->Configuration.YStride;
+    char szFormatted[256] = "";
+    UINT8 *pDest = &ppbi->PostProcessBuffer[nPixel];
+
+    //  Format text
+    va_start ( arglist, format );
+    _vsnprintf ( szFormatted, sizeof(szFormatted), format, arglist );
+    va_end ( arglist );
+
+#if defined (_WIN32_WCE)
+#else
+    //  Set up temporary bitmap
+    HDC hdcMemory   = NULL;
+    HBITMAP hbmTemp = NULL;
+    HBITMAP hbmOrig = NULL;
+
+    RECT rect;
+    rect.left   = 0;
+    rect.top    = 0;
+    rect.right  = 8 * strlen(szFormatted);
+    rect.bottom = 8;
+
+    hdcMemory = CreateCompatibleDC ( NULL );
+    if ( hdcMemory == NULL )
+        goto Exit;
+
+    hbmTemp = CreateBitmap ( rect.right, rect.bottom, 1, 1, NULL );
+    if ( hbmTemp == NULL )
+        goto Exit;
+
+    hbmOrig = static_cast<HBITMAP>(SelectObject(hdcMemory, hbmTemp));
+    if ( !hbmOrig )
+        goto Exit;
+
+    //  Write text into bitmap
+    //  font?
+	hfont = CreateFont ( 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, VARIABLE_PITCH | FF_SWISS, "" ); 
+	if ( hfont == NULL )
+		goto Exit;
+
+    hfonto = static_cast<HFONT>(SelectObject(hdcMemory, hbmTemp));
+	if ( !hfonto )
+		goto Exit;
+
+	SelectObject ( hdcMemory, hfont );
+    SetTextColor ( hdcMemory, 1 );
+    SetBkColor ( hdcMemory, 0 );
+    SetBkMode ( hdcMemory, TRANSPARENT );
+
+    bRC = BitBlt ( hdcMemory, rect.left, rect.top, rect.right, rect.bottom, hdcMemory, rect.left, rect.top, BLACKNESS );
+    if ( !bRC )
+        goto Exit;
+
+    bRC = ExtTextOut ( hdcMemory, 0, 0, ETO_CLIPPED, &rect, szFormatted, strlen(szFormatted), NULL );
+    if ( !bRC )
+        goto Exit;
+
+    //  Copy bitmap to video frame
+    long x;
+    long y;
+
+    for ( y=rect.top; y<rect.bottom; ++y )
+    {
+        for ( x=rect.left; x<rect.right; ++x )
+        {
+            if ( GetPixel( hdcMemory, x, rect.bottom - 1 - y ) )
+                pDest[x] = 255;
+        }
+        pDest += nStride;
+    }
+
+    rc = strlen ( szFormatted );
+
+Exit:
+
+    if ( hbmTemp != NULL )
+    {
+        if ( hbmOrig != NULL )
+        {
+            SelectObject ( hdcMemory, hbmOrig );
+        }
+        DeleteObject ( hbmTemp );
+    }
+    if ( hfont != NULL )
+    {
+        if ( hfonto != NULL )
+            SelectObject ( hdcMemory, hfonto );
+        DeleteObject ( hfont );
+    }
+
+    if ( hdcMemory != NULL )
+        DeleteDC ( hdcMemory );
+    hdcMemory = 0;
+#endif
+
+    return rc;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj b/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj
new file mode 100644
index 00000000..413368c7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj
@@ -0,0 +1,308 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9E8FD088-3571-4BCD-896D-8DBFEC3042FC}</ProjectGuid>
+    <RootNamespace>dxv</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>16.0.32002.118</_ProjectFileVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\dxv\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\dxv\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnableManifest>false</VcpkgEnableManifest>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Vcpkg">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Vcpkg">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <AdditionalIncludeDirectories>..\..\include;..\..\..\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4013;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <AdditionalIncludeDirectories>..\..\include;..\..\..\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4013;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..\..\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <WarningLevel>Level4</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..\..\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="generic\dxlvinfd.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\dxl_attr.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\dxl_main.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\dxl_reg.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\dxv_init.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\vscreen.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\ximage.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\dxAccurateTime.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\icmdxv.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj.filters
new file mode 100644
index 00000000..e5519660
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/dxv.vcxproj.filters
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="generic">
+      <UniqueIdentifier>{12c2bb0c-53fa-442f-812f-1e57762a8b14}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{16f6450f-7013-49fe-a82b-a2a13821ec8c}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="generic\dxl_attr.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\dxl_main.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\dxl_reg.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\dxlvinfd.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\dxv_init.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\icmdxv.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\vscreen.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\ximage.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\dxAccurateTime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_attr.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_attr.c
new file mode 100644
index 00000000..59cf2d61
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_attr.c
@@ -0,0 +1,33 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "dxl_main.h"                                            
+
+int DXL_SetVScreenBlitQuality(DXL_VSCREEN_HANDLE dst, enum BLITQUALITY blitquality)                  
+{
+	int oldBQ;
+	
+	validate(dst);
+
+	oldBQ = dst->bq;
+	dst->bq = blitquality;
+
+	return oldBQ;
+}
+
+enum BLITQUALITY DXL_GetVScreenBlitQuality(DXL_VSCREEN_HANDLE dst)                  
+{
+	if (dst)	{
+		return dst->bq;
+	}
+	return DXBLIT_SAME;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_main.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_main.c
new file mode 100644
index 00000000..14adb534
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_main.c
@@ -0,0 +1,69 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "duck_mem.h"
+#include "dxl_main.h"       
+
+static DXL_VSCREEN_HANDLE vScreens = NULL;
+static int maxScreens;
+
+int preallocVScreens(int lmaxScreens)
+{
+	(void) lmaxScreens;  // not used
+#if PRE_ALLOCATE
+    vScreens = (DXL_VSCREEN_HANDLE)duck_calloc(maxScreens = lmaxScreens,sizeof(DXL_VSCREEN),DMEM_GENERAL);
+    
+	if (vScreens == NULL) 
+		return DXL_ALLOC_FAILED;
+#endif
+	return DXL_OK;
+}
+
+void freeVScreens(void)
+{                                     
+#if PRE_ALLOCATE
+    int i;
+    
+    if (vScreens)
+	{
+        for(i = 0; i < maxScreens; i++)
+            DXL_DestroyVScreen(&vScreens[i]);
+        duck_free(vScreens);
+    }
+#endif
+}
+        
+DXL_VSCREEN_HANDLE vScreenCreate(void)
+{
+	DXL_VSCREEN_HANDLE nScreen;
+
+#if PRE_ALLOCATE
+	if (vScreens)
+	{
+		int i;
+		nScreen = vScreens;
+    
+		for(i=0; i < maxScreens; i++,nScreen++)
+			if (!nScreen->dkFlags.inUse) break;
+
+		if (i < maxScreens) 
+			return nScreen;
+    }
+#endif
+
+    nScreen = (DXL_VSCREEN_HANDLE)duck_calloc(1,sizeof(DXL_VSCREEN),DMEM_GENERAL);
+	
+	if (nScreen)
+		nScreen->dkFlags.allocated = 1;
+
+    return nScreen;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_reg.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_reg.c
new file mode 100644
index 00000000..38d1c28c
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxl_reg.c
@@ -0,0 +1,236 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+/********
+
+  DXL_REG.C - functions for registration of "Blit" functions
+  (C)1996 The Duck Corporation
+
+********/
+#include <assert.h>
+#include <dxl_main.h>
+#include <stdio.h>
+#include <string.h>
+#include "duck_mem.h"
+
+typedef struct tBlitStruct {
+	blitFunc setup, blit, exit;
+} DXL_BLITTER, DXL_BLITTER_HANDLE;
+
+static int nextBlitter = 1;
+                                  /**************/
+static DXL_BLITTER blitTable[32][DXL_MAX_IFORMATS];
+static signed char blitTranslateTable[MAX_BQUALITIES][MAX_CDEPTHS];
+
+static void nullBlitter(void){}
+
+static DXL_INTERNAL_FORMAT iFormats[] = {
+	DXL_NULL_IFORMAT,
+	DXL_NULL_IFORMAT,
+	DXL_NULL_IFORMAT,
+	DXL_NULL_IFORMAT,
+	DXL_NULL_IFORMAT,
+	DXL_NULL_IFORMAT,
+	DXL_NULL_IFORMAT,
+	DXL_NULL_IFORMAT
+};
+DXL_INTERNAL_FORMAT dxl_GetFOURCCInternalFormat(unsigned long fourcc)
+{
+	int aHandle = dxl_GetAlgHandle(fourcc);
+
+	if (aHandle != -1)
+		return iFormats[aHandle];
+
+	return DXL_NULL_IFORMAT;
+}
+
+int dxl_RegisterInternalFormat(int xHandle, DXL_INTERNAL_FORMAT xFormat)
+{
+	iFormats[xHandle] = xFormat;
+
+	return DXL_OK;
+}
+
+DXL_BLIT_FORMAT DXL_ReserveBlitter(void)
+{
+	if (nextBlitter >= 32)
+		return -1; /*DXL_EXCEEDED_MAX_BLITTERS;*/
+
+	return nextBlitter++;
+}
+
+DXL_BLIT_FORMAT DXL_OverrideBlitter(enum BLITQUALITY bq,enum BITDEPTH bd)
+{
+    if(blitTranslateTable[bq][bd] == (signed char)-1)
+	    blitTranslateTable[bq][bd] =  (char)DXL_ReserveBlitter();
+
+    return blitTranslateTable[bq][bd];
+}
+
+int DXL_RegisterBlitter(DXL_BLIT_FORMAT dFormat, DXL_INTERNAL_FORMAT sFormat, 
+						blitFunc blit, blitFunc setup, blitFunc exit)
+{
+	if ((dFormat >= nextBlitter) || (sFormat >= DXL_MAX_IFORMATS))
+		return -1; /*DXL_INVALID_BLIT_FORMAT;*/
+
+	blitTable[dFormat][sFormat].setup = setup;
+	blitTable[dFormat][sFormat].exit = exit;
+	blitTable[dFormat][sFormat].blit = blit;
+
+	return 0; /*DXL_OK;*/
+}
+
+DXL_INTERNAL_FORMAT DXL_GetXImageInternalFormat(DXL_XIMAGE_HANDLE xImage,
+												DXL_VSCREEN_HANDLE vScreen)
+{
+	int ret;
+
+	ret = xImage->internalFormat(xImage,vScreen);
+
+	if (ret == DXL_NULL_IFORMAT)
+	{
+		return (DXL_INTERNAL_FORMAT )
+			dxl_GetFOURCCInternalFormat(DXL_GetXImageFOURCC(xImage));
+	}
+	return (DXL_INTERNAL_FORMAT ) ret;
+}
+
+DXL_INTERNAL_FORMAT DXL_GetVScreenInternalFormat(DXL_VSCREEN_HANDLE vScreen)
+{
+	if (vScreen->bd == DXRGB16){
+		return DXL_LINE16;
+	}else if (vScreen->bd == DXRGB8||vScreen->bd == DXHALFTONE8){
+		return DXL_LINE8;
+	}else
+		return (DXL_INTERNAL_FORMAT) -1;
+}
+
+blitFunc DXL_GetVBlitFunc(DXL_VSCREEN_HANDLE src,DXL_VSCREEN_HANDLE dst)
+{    
+	return blitTable[DXL_GetVScreenBlitFormat(dst)]
+		[DXL_GetVScreenInternalFormat(src)].blit;
+}
+
+blitFunc DXL_GetVBlitSetupFunc(DXL_VSCREEN_HANDLE src,DXL_VSCREEN_HANDLE dst)
+{    
+	return blitTable[DXL_GetVScreenBlitFormat(dst)]
+		[DXL_GetVScreenInternalFormat(src)].setup;
+}
+
+blitFunc DXL_GetBlitFunc(DXL_XIMAGE_HANDLE xImage,DXL_VSCREEN_HANDLE vScreen)
+{   
+	DXL_BLIT_FORMAT i = DXL_GetVScreenBlitFormat(vScreen);
+	DXL_INTERNAL_FORMAT j = DXL_GetXImageInternalFormat(xImage,vScreen);
+
+	if(i == -1)
+		return (blitFunc)-1;
+
+	if(j == DXL_NULL_IFORMAT) 
+#pragma warning(disable:4054) // typecase from function pointer to data pointer
+		return (blitFunc)nullBlitter;
+#pragma warning(default:4054) // typecase from function pointer to data pointer
+	else
+		return blitTable[i][j].blit;
+}
+
+void *DXL_GetBlitSetupFunc(DXL_XIMAGE_HANDLE xImage,DXL_VSCREEN_HANDLE vScreen)
+{    
+	return blitTable[DXL_GetVScreenBlitFormat(vScreen)]
+		[DXL_GetXImageInternalFormat(xImage,vScreen)].setup;
+}
+
+void *DXL_GetBlitExitFunc(DXL_XIMAGE_HANDLE xImage,DXL_VSCREEN_HANDLE vScreen)
+{    
+	return blitTable[DXL_GetVScreenBlitFormat(vScreen)]
+		[DXL_GetXImageInternalFormat(xImage,vScreen)].exit;
+}
+ 
+DXL_BLIT_FORMAT DXL_GetVScreenBlitFormat(DXL_VSCREEN_HANDLE vScreen)
+{   enum BLITQUALITY bq;
+
+	if (vScreen->blitFormat != (signed char)-1)
+		return vScreen->blitFormat;
+		
+	bq = DXL_GetVScreenBlitQuality(vScreen);
+
+	return blitTranslateTable[bq]
+		[vScreen->bd];
+}
+
+void resetBlitters(void)
+{    
+	nextBlitter = 0;
+
+	duck_memset(blitTable,-1,sizeof(blitTable));
+	duck_memset(blitTranslateTable,-1,sizeof(blitTranslateTable));
+}
+
+
+
+int DXL_CheckFCCToVScreenFormat(unsigned long FCC,enum BITDEPTH format, enum BLITQUALITY bq)
+{
+	DXL_XIMAGE_HANDLE src;
+	DXL_VSCREEN_HANDLE dst;
+	int ret = DXL_INVALID_BLIT;
+
+	src = DXL_CreateXImageOfType(NULL,FCC);	
+	assert(src != NULL);
+
+	if (src)
+	{
+		dst = DXL_CreateVScreen(
+			(unsigned char *)0xDEADBEEF, format, 1280,480);
+
+		assert(dst != NULL);
+		if (dst)
+		{
+			dst->bq = bq;
+			ret = DXL_CheckdxImageToVScreen(src, dst);
+			DXL_DestroyVScreen(dst);
+		}
+		DXL_DestroyXImage(src);
+	}
+	return ret;
+}
+
+int DXL_CheckVScreenXImageBlit(DXL_VSCREEN_HANDLE dst,DXL_XIMAGE_HANDLE src)
+{
+    validate(src);
+
+    if (!src->dx)
+        return -1;
+	
+	if (!dst) return -1;
+
+	if (src->verify != NULL)
+		return(src->verify(src,dst));
+
+#pragma warning(disable:4054) // typecase from function pointer to data pointer
+    if((void *)(src->internalFormat) != NULL){
+        dst->blitter = DXL_GetBlitFunc(src, dst); 
+
+        if ((dst->blitter !=  (void *) -1) && (dst->blitter !=  nullBlitter))
+			return DXL_OK;
+    }
+#pragma warning(default:4054) // typecase from function pointer to data pointer
+	return DXL_INVALID_BLIT;
+}
+
+int DXL_CheckVScreenBlit(DXL_VSCREEN_HANDLE dst,unsigned long fourcc)
+{
+	return DXL_CheckFCCToVScreenFormat(fourcc,dst->bd, dst->bq);
+}
+
+int DXL_CheckdxImageToVScreen(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE dst)
+{
+	return DXL_CheckVScreenXImageBlit( dst, src);
+}
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/dxlvinfd.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxlvinfd.c
new file mode 100644
index 00000000..3e004911
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxlvinfd.c
@@ -0,0 +1,76 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+/*/////////////////////////////////////////////////////////////////////////
+//
+// dxlvinfd.c
+//
+// Purpose: A list of helper functions to the quick time codec code
+//
+///////////////////////////////////////////////////////////////////////*/
+
+//#include <stdio.h>
+//#include <math.h>
+//#include <string.h>
+#include "dxl_main.h"
+
+struct DisplaySetting {
+	long dotOne;
+	long dotTwo;
+	long dotThree;
+	long dotFour;
+	long dotFive;
+};
+
+static struct DisplaySetting id_RGB24 ={0x00000000,0x00000000,0xffffffff,0x00000000,0xffffffff}; 
+static struct DisplaySetting id_RGB32 ={0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff}; 
+static struct DisplaySetting id_RGB555={0xffffffff,0x00000000,0xffffffff,0x00000000,0xffffffff}; 
+static struct DisplaySetting id_RGB565={0xffffffff,0x00000000,0x00000000,0x00000000,0xffffffff}; 
+static struct DisplaySetting id_UYVY  ={0xff80ff80,0x00800080,0xff80ff80,0x00800080,0x00800080}; 
+static struct DisplaySetting id_YUY2  ={0x80ff80ff,0x80008000,0x80008000,0x80008000,0x80008000}; 
+static struct DisplaySetting id_YVU9  ={0x80008000,0x80008000,0xff80ff80,0xff80ff80,0xff80ff80}; 
+static struct DisplaySetting id_RGB8  ={0x00000000,0xffffffff,0x00000000,0xffffffff,0x00000000}; 
+
+
+static struct DisplaySetting id_STRETCH 		={0x00000000,0xffffffff,0x00000000,0x00000000,0x00000000}; 
+static struct DisplaySetting id_STRETCH_BRIGHT ={0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000}; 
+static struct DisplaySetting id_STRETCH_SAME   ={0xffffffff,0x00000000,0x00000000,0x00000000,0x00000000}; 
+
+static struct DisplaySetting id_KEY 	= 	{0x00000000,0x00000000,0xffffffff,0x00000000,0x00000000}; 
+static struct DisplaySetting id_NOTKEY 	=	{0x00000000,0x00000000,0x00000000,0x00000000,0x00000000}; 
+
+static struct DisplaySetting id_CLEAR_ME 	=	{0x00000000,0x00000000,0x00000000,0x00000000,0x00000000}; 
+
+
+static void OrSettings(struct DisplaySetting *src1,struct DisplaySetting *src2, struct DisplaySetting *dst)
+{
+	if (dst) {
+		dst->dotOne = src1->dotOne | src2->dotOne;
+		dst->dotTwo = src1->dotTwo | src2->dotTwo;
+		dst->dotThree = src1->dotThree | src2->dotThree;
+		dst->dotFour = src1->dotFour | src2->dotFour;
+		dst->dotFive = src1->dotFive | src2->dotFive;
+	}
+}
+
+
+static void SetSettings(struct DisplaySetting *dst,struct DisplaySetting *src)
+{
+	if (dst) {
+		dst->dotOne = src->dotOne ;
+		dst->dotTwo = src->dotTwo ;
+		dst->dotThree = src->dotThree ;
+		dst->dotFour = src->dotFour ;
+		dst->dotFive = src->dotFive ;
+	}
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/dxv_init.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxv_init.c
new file mode 100644
index 00000000..4126daf8
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/dxv_init.c
@@ -0,0 +1,43 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "duck_mem.h"
+#include "dxl_main.h"       
+
+
+#if defined(DISPLAYDIB)
+#if DISPLAYDIB
+int globalIsDIB,globalDIBWidth,globalDIBHeight;
+#endif
+#endif    
+
+extern int preallocVScreens(int lmaxScreens);
+extern void freeVScreens(void);
+
+
+int DXL_InitVideo(int lmaxScreens,int lmaxImages)
+{
+	(void)lmaxImages; // Not Used;
+/*    registerDuckBlitters(); */
+	resetBlitters();
+
+//	DXL_RegisterXImage(NULL,0L,(DXL_INTERNAL_FORMAT ) 0);
+
+	preallocVScreens(lmaxScreens);
+	return DXL_OK;
+}
+
+
+void DXL_ExitVideo(void)
+{                                     
+    freeVScreens();
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/vscreen.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/vscreen.c
new file mode 100644
index 00000000..704040b7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/vscreen.c
@@ -0,0 +1,175 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "duck_mem.h"
+#include "dxl_main.h"                                            
+#include <assert.h>
+/***********************************************/
+
+int DXL_GetVScreenSizeOfPixel(DXL_VSCREEN_HANDLE vSc)
+{
+    switch (vSc->bd){
+        case DXRGB8: 
+		case DXHALFTONE8:
+        case DXRGB8VESA:            
+            return 1;
+        case DXRGB16_555:
+        case DXRGB16_565:
+        case DXRGB16VESA:
+        case DXYUY2:
+        case DXUYVY:
+            return 2;
+        case DXRGB24:
+            return 3;
+        case DXRGB32:
+			return 4;
+		default:
+			return -1;
+    }
+}
+
+void DXL_DestroyVScreen(DXL_VSCREEN_HANDLE dst)
+{
+    if (dst != NULL){
+        dst->dkFlags.inUse = 0;
+        dst->addr = NULL;
+		if (dst->dkFlags.allocated)
+			duck_free(dst);
+    }
+}
+
+int DXL_AlterVScreen(DXL_VSCREEN_HANDLE dst, unsigned char *addr,enum BITDEPTH bd, int p,int h) 
+{       
+    validate(dst);
+
+    if (addr != NULL) dst->addr = addr;
+
+    if (bd != DXRGBNULL) dst->bd = bd;
+
+    if (p != -1) dst->pitch = (short) p;
+    
+    if (h != -1) dst->height = (short) h;
+
+    return DXL_OK;
+}           
+
+int DXL_AlterVScreenView(DXL_VSCREEN_HANDLE dst,int x,int y,int w,int h)
+{
+    validate(dst);
+
+	if (x > -1)	dst->viewX = (short)x;// & 0xfffe;
+    if (y > -1)	dst->viewY = (short)y;
+    if (w > -1) dst->viewW = (short)w;// & 0xfffe;
+    if (h > -1) dst->viewH = (short)h;
+
+    return DXL_OK;
+}   
+        
+DXL_VSCREEN_HANDLE DXL_CreateVScreen(unsigned char *addr, enum BITDEPTH bd, short p,short h)
+{
+#pragma warning(disable: 4210) // nonstandard extension used : function given file scope
+	DXL_VSCREEN_HANDLE vScreenCreate(void);
+#pragma warning(default: 4210) // nonstandard extension used : function given file scope
+
+	DXL_VSCREEN_HANDLE nScreen = vScreenCreate();
+
+	if (!nScreen) return NULL;
+
+	nScreen->dkFlags.inUse = 1;
+	nScreen->blitFormat = -1;
+
+	DXL_AlterVScreen(nScreen, addr, bd, p, h);
+
+	nScreen->bx = nScreen->by = 0;
+	nScreen->bAddr = NULL;
+	nScreen->bq = DXBLIT_SAME;
+
+	return nScreen;
+}
+
+int DXL_GetVScreenView(DXL_VSCREEN_HANDLE dst,int *x,int *y,int *w,int *h)
+{
+	validate(dst);
+
+	*x = dst->viewX;
+	*y = dst->viewY;
+	*w = dst->viewW;
+	*h = dst->viewH;
+
+	return DXL_OK;
+}
+
+
+
+int DXL_GetVScreenAttributes(
+    DXL_VSCREEN_HANDLE vScreen,
+    void **addr, 
+    dxvBlitQuality *bq, 
+    dxvBitDepth *bd,
+    short *pitch, 
+    short *height
+	)
+{
+	if (addr)
+	{
+		*addr = (void *) (vScreen->addr);
+	}
+	else
+	{
+		assert(0);
+	}
+
+
+	if (bq)
+	{
+		*bq = vScreen->bq;
+	}
+	else
+	{
+		assert(0);
+	}
+
+
+
+	if (bd)
+	{
+		*bd = vScreen->bd;
+	}
+	else
+	{
+		assert(0);
+	}
+
+
+	if (pitch)
+	{
+		*pitch = vScreen->pitch;
+	}
+	else
+	{
+		assert(0);
+	}
+
+
+
+	if (height)
+	{
+		*height = vScreen->height;
+	}
+	else
+	{
+		assert(0);
+	}
+
+
+	return 0;
+}  /* end get attributes */
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/generic/ximage.c b/Src/libvpShared/corelibs/cdxv/dxv/generic/ximage.c
new file mode 100644
index 00000000..c4e9cffc
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/generic/ximage.c
@@ -0,0 +1,353 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "duck_mem.h"
+#include "dxl_main.h" 
+
+
+
+int DXL_SetXImageCSize(DXL_XIMAGE_HANDLE src, int temp)
+{
+	src->fSize = temp;
+
+	return temp;
+}
+
+void DXL_DestroyXImage(DXL_XIMAGE_HANDLE src)
+{
+
+	{
+#pragma warning(disable:4210) //nonstandard extension used : function given file scope
+		void DXL_AccurateTime(UINT64* time);
+#pragma warning(default:4210) //nonstandard extension used : function given file scope
+
+		UINT64 clocksTotal;
+		double ratio, ratio2;
+		DXL_AccurateTime(&src->prof.profileEnd);
+		clocksTotal = (src->prof.profileEnd - src->prof.profileStart);
+		if (clocksTotal)
+		{
+			ratio = src->prof.dxClocks * 1.0 / clocksTotal;
+			ratio2 = (double)(src->prof.dxClocks / src->prof.frameCount);
+		}
+	}
+
+
+	if (src != NULL)
+	{
+		if (src->dkFlags.inUse)
+		{
+			src->destroy(src);
+		}
+	}
+}
+
+int DXL_MoveXImage(DXL_XIMAGE_HANDLE src,enum OFFSETXY mode, int x,int y)
+{
+	validate(src);
+
+	if (mode != DXL_RELATIVE){
+		src->x = 0;
+		src->y = 0;
+	}
+	src->x = (short)(src->x + x);
+	src->y = (short)(src->y + y);
+	return DXL_OK;
+}
+
+int DXL_AlterXImageData(DXL_XIMAGE_HANDLE src, unsigned char *data)
+{                    
+	validate(src);
+
+	src->addr = data;       
+	src->dkFlags.DXed = 0;
+
+	if (data == NULL) return DXL_OK;
+
+	if (src->seedData)
+		return src->seedData(src);
+	else
+		return 0;
+}
+
+int DXL_GetXImageXYWH(DXL_XIMAGE_HANDLE src,int *x,int *y,int *w, int *h)
+{
+	validate(src);
+
+	*x = src->x;
+	*y = src->y;
+	*w = src->w;
+	*h = src->h;
+
+	return DXL_OK;
+}
+
+int DXL_IsXImageKeyFrame(DXL_XIMAGE_HANDLE src)
+{
+	validate(src);
+
+	return src->dkFlags.keyFrame;
+}
+
+/* typedef DXL_XIMAGE_HANDLE (*createFunc)(unsigned char *data);   */
+#define NUM_ALG 16
+static createFunc creator[NUM_ALG];
+static unsigned long fourCC[NUM_ALG];
+
+DXL_XIMAGE_HANDLE DXL_CreateXImage(unsigned char *data)
+{
+	int i;
+	DXL_XIMAGE_HANDLE nImage = NULL;
+
+	for(i = 0; i < NUM_ALG; i++){
+		if (fourCC[i]){
+			nImage = creator[i](data);
+			if ( nImage )
+				break;
+		}else
+			break;
+	}
+
+	if (nImage) 
+	{
+		nImage->dkFlags.inUse = 1;
+		nImage->addr = data;
+		nImage->create = (struct tXImage *(__cdecl *)(void *))creator[i];
+	}
+
+	return nImage;
+}
+
+
+
+
+DXL_XIMAGE_HANDLE DXL_CreateXImageOfType(unsigned char *data,unsigned long type)
+{
+	int i;
+	DXL_XIMAGE_HANDLE nImage = NULL;
+
+	for(i = 0; i < NUM_ALG; i++){
+		if (fourCC[i] == type){
+			nImage = creator[i](data);
+			if ( nImage )
+				break;
+		}
+	}
+
+	if (nImage) 
+	{
+		nImage->dkFlags.inUse = 1;
+		nImage->addr = data;
+		nImage->prof.profileStart = 0;
+		nImage->prof.dxClocks = 0;
+		nImage->prof.frameCount = 0;
+	}
+
+	return nImage;
+}
+
+
+
+
+DXL_XIMAGE_HANDLE DXL_CreateXImageFromBMI(
+	unsigned char *data, 
+	unsigned long fcc, 
+	DK_BITMAPINFOHEADER *srcAndDest  /* There will always be two Obiwan */
+	)
+{
+	int i;
+	DXL_XIMAGE_HANDLE nImage = NULL;
+
+	for(i = 0; i < NUM_ALG; i++){
+		if (fourCC[i] == fcc){
+			nImage = creator[i]((unsigned char *) srcAndDest);
+			if ( nImage )
+				break;
+		}
+	}
+
+	if (nImage) 
+	{
+		nImage->dkFlags.inUse = 1;
+		nImage->addr = data;
+		duck_memset(&nImage->prof,0,sizeof(DXL_PROFILEPACK)); /* probably redundent */
+	}
+
+	return nImage;
+}
+
+
+
+
+
+
+int DXL_RegisterXImage(createFunc myCreator,unsigned long fourcc, DXL_INTERNAL_FORMAT xFormat)
+{
+	int i;
+
+	if (!fourcc){
+		duck_memset(creator,0,sizeof(creator));
+		duck_memset(fourCC,0,sizeof(fourCC));
+		return 0;
+	}
+
+	for (i = 0; i < sizeof(fourCC)/sizeof(unsigned long);i++){
+		if (!fourCC[i]){
+			creator[i] = myCreator;
+			fourCC[i] = fourcc;
+			dxl_RegisterInternalFormat(i, xFormat);
+			return i;
+		}
+	}
+	return -1;
+}
+
+unsigned long *DXL_GetFourCCList(void)
+{
+	/*********
+	return a list of all supported fourccs
+	*********/
+	return fourCC;
+}
+
+
+int dxl_GetAlgHandle(unsigned long fourcc)
+{
+	/*********
+	search through the fourcc table to find a dx'er's index
+	*********/
+	int i;
+
+	for (i = 0; i < sizeof(fourCC)/sizeof(unsigned long);i++)
+		if (fourCC[i] == fourcc) return i;
+
+	return -1;
+}
+
+
+unsigned long DXL_GetXImageFOURCC(DXL_XIMAGE_HANDLE src)
+{
+	/*********
+	find an ximages fourcc (by comparing creator functions)
+	*********/
+	int i;
+
+	for (i = 0; i < sizeof(fourCC)/sizeof(unsigned long);i++)
+		if (creator[i] == (createFunc)src->create) 
+			return fourCC[i];
+
+	return 0L;
+}
+
+unsigned char *DXL_GetDestAddress(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE dst)
+{
+	/*********
+	get the address within the vscreen to start writing at
+	*********/
+	unsigned char *scrnDest = (unsigned char *)0L;
+	int x,y;
+
+	y = dst->viewY + src->y;           
+	x = dst->viewX + src->x;
+
+	scrnDest = (unsigned char *) dst->addr;
+	scrnDest += (x * DXL_GetVScreenSizeOfPixel(dst)) + (y * dst->pitch);
+
+	return scrnDest;
+}
+
+int DXL_dxImageToVScreen(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE dst)
+{
+	int dxvCode; 
+	validate(src);
+
+	if (!src->dx)
+		return -1;
+
+#pragma warning(disable:4054) // typecase from function pointer to data pointer
+	if(dst && ((void *)(src->internalFormat) != NULL)) {
+		/* get your hamdy damdy((c)1997 Duck North) registered blitter setup */
+		dst->blitSetup = DXL_GetBlitSetupFunc(src,dst);
+		dst->blitExit = DXL_GetBlitExitFunc(src,dst);
+		dst->blitter = DXL_GetBlitFunc(src, dst); 
+
+		if (dst->blitter ==  (void *) -1)
+			return DXL_INVALID_BLIT;
+	}
+#pragma warning(default:4054) // typecase from function pointer to data pointer
+
+	//	if (!src->addr)
+	//		return 1;
+
+#if 1  /* we want to profile ... this should constitute no performance hit to profile */
+	{
+		UINT64 timerStart;
+		UINT64 timerEnd;
+
+		void DXL_AccurateTime(UINT64* time);
+		DXL_AccurateTime(&timerStart);
+
+		if (src->prof.profileStart == 0)
+			src->prof.profileStart = timerStart;	
+		dxvCode = src->dx(src,dst);
+		DXL_AccurateTime(&timerEnd);
+		src->prof.dxClocks += (timerEnd - timerStart);
+		src->prof.frameCount += 1;
+	}
+#else
+	dxvCode = src->dx(src,dst);
+#endif
+
+
+	return dxvCode;
+}
+
+
+long DXL_GetXImageCSize(DXL_XIMAGE_HANDLE src)
+{
+	if (src == NULL) return -1;
+
+	if (!src->GetXImageCSize)
+		return -2;
+
+	return(src->GetXImageCSize(src));
+}
+
+/***********************************************/
+
+DXL_XIMAGE_HANDLE DXL_AlterXImage(DXL_XIMAGE_HANDLE src,
+																	unsigned char *data,int type,
+																	enum BITDEPTH bitDepth,int width,int height)
+{
+	if (src == NULL)
+	{
+		if (type) /* if type specified, try using it as the fourcc */
+			src = DXL_CreateXImageOfType(data,type);
+
+		if (src == NULL) /* if still null, try creating it blind from the data */
+			src = DXL_CreateXImage(data);
+
+		if (src == NULL) /* if still null, give up */
+			return NULL;
+	}
+
+	if (!src->recreate) /* no way to recreate, assume create is good enough */
+		return src;
+
+	return(src->recreate(src,data,type,bitDepth,width,height));
+}
+
+
+void DXL_SetParameter(DXL_XIMAGE_HANDLE src, int Command, unsigned long Parameter )
+{
+	src->setParameter(src,Command,Parameter);
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/utils/dxv_utils.c b/Src/libvpShared/corelibs/cdxv/dxv/utils/dxv_utils.c
new file mode 100644
index 00000000..a4bf4098
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/utils/dxv_utils.c
@@ -0,0 +1,40 @@
+#include "dxl_main.h"
+#include "duck_dxl.h"
+
+
+/*-------------------------------------------------------------------
+
+File : dxv_util.c  
+
+Any extra functions whose lifespan/utility might be "questionable".
+Functions that are not part of the "core", but yet are not really 
+anything but Dxv specific.
+
+-------------------------------------------------------------------*/
+
+
+/* This function used during the development of ICM wrapper */
+/*----------------------------------------------------------*/
+char *DXL_DumpRegistry(char *buf);
+char *DXL_DumpRegistry(char *buf)
+{
+        int sprintf( char *buffer, const char *format, ...);
+
+        unsigned long *g = DXL_GetFourCCList();
+
+    	int i = 0;
+    
+    	while(g)
+	    {   
+       		sprintf(buf,"fourCC[%d] = %c%c%c%c\n",i,
+		        (g[i] & 0xFF000000) >> 24,
+		        (g[i] & 0xFF0000) >> 16,
+		        (g[i] & 0xFF00) >> 8,
+		        (g[i] & 0xFF) >> 0
+		    );
+                
+            g++;
+    	}
+
+	    return buf;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dkprof.c b/Src/libvpShared/corelibs/cdxv/dxv/win32/dkprof.c
new file mode 100644
index 00000000..7cb5b11f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dkprof.c
@@ -0,0 +1,104 @@
+/***********************************************\
+??? dkprof.c
+? profiling functions
+? also see perf.asm and pentium.asm
+\***********************************************/
+#include "duck_mem.h"
+#include "dkprof.h"
+
+#define MAX_PROFILE 15
+
+int profStarted = 0;
+ 
+enum PROFILESECTION { 
+    LOSSLESSDX = 0, 
+    PLANARDX, 
+    BLITME,
+    RD_FRAME_DESC,
+    RASTER_CONFIG,
+    DELTA_TABLES,
+    HANDLER_CONFIG, 
+    STRING_DECODER,
+    STRING_DATA,
+    TSC0,
+    TSC1,
+    TSC2,
+    TSC3
+};
+
+PSECTION pSectionArray[MAX_PROFILE];
+
+unsigned long pentiumKiloCycles(void);
+
+#if 1
+/***********************************************/
+void tscStart(enum PROFILESECTION sel) 
+{
+    PSECTION *pSection;
+
+    if(profStarted) {
+        pSection = &pSectionArray[sel];
+        pSection->pkc1 = pentiumKiloCycles();
+    }        
+}
+
+/***********************************************/
+void tscEnd(enum PROFILESECTION sel) 
+{
+    PSECTION *pSection;
+
+    if(profStarted) {
+        pSection = &pSectionArray[sel];
+
+        pSection->pkc2 = pentiumKiloCycles();
+        pSection->pkc2 = (pSection->pkc2 - pSection->pkc1);
+        pSection->avgKc += pSection->pkc2;
+        pSection->numTimes += 1;
+
+        if(pSection->pkc2 < pSection->minKc)
+            pSection->minKc = pSection->pkc2;
+
+        if(pSection->pkc2 > pSection->maxKc)
+            pSection->maxKc = pSection->pkc2;
+    }
+}
+
+/***********************************************/
+void tscInit() 
+{
+    int i;
+
+    for(i=0; i<MAX_PROFILE; i++) {
+        duck_memset(&pSectionArray[i],0,sizeof(PSECTION));
+        pSectionArray[i].minKc = 0xffffffff;
+    }
+
+    profStarted = 1;
+}
+
+/***********************************************/
+void tscUninit() 
+{
+    profStarted = 0;
+}
+
+/***********************************************/
+unsigned long tscProcessCounts(unsigned long *cnt, enum PROFILESECTION sel) 
+{
+    unsigned long rv = 0;
+
+    *cnt = 0;
+    if(profStarted) {
+        if(pSectionArray[sel].numTimes) {
+            rv = pSectionArray[sel].avgKc /= pSectionArray[sel].numTimes;
+            *cnt = pSectionArray[sel].numTimes;
+            duck_memset(&pSectionArray[sel],0,sizeof(PSECTION));
+            pSectionArray[sel].minKc = 0xffffffff;
+        }
+        /* reset all vars */
+    }
+    return (rv);
+}
+#endif
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxAccurateTime.c b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxAccurateTime.c
new file mode 100644
index 00000000..aa9e42a1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxAccurateTime.c
@@ -0,0 +1,30 @@
+#include "dkpltfrm.h"
+
+
+static void readTSC(UINT64 *bigOne)
+{
+	unsigned long xhigh;
+	unsigned long xlow;
+	
+	__asm 
+	{
+		
+		rdtsc
+			
+		mov [xlow],EAX;
+		mov [xhigh],edx;
+		
+	}
+	
+	*bigOne =  xhigh ;
+	*bigOne <<= 32;
+	*bigOne |= xlow;
+	
+	return;
+}
+
+
+void DXL_AccurateTime(UINT64 *temp)
+{
+	readTSC(temp);
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxl_feat.c b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxl_feat.c
new file mode 100644
index 00000000..1bda2ef4
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxl_feat.c
@@ -0,0 +1,20 @@
+#include "dkpltfrm.h"
+#include "dxl_main.h"                                            
+
+unsigned long cpuFeatures;
+
+#define CHECK_MMX       0x800000
+#define CHECK_TSC       0x10
+#define CHECK_CMOV      0x8000
+#define CHECK_FCMOV     0x10000
+
+CPU_FEATURES DXL_GetCPUFeatures(void)
+{
+    enum CPU_FEATURES currentFeatures = NO_FEATURES;
+
+    if(cpuFeatures & CHECK_MMX)
+        currentFeatures |= MMX_SUPPORTED;
+        
+    return currentFeatures;
+}              
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.cpp b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.cpp
new file mode 100644
index 00000000..342717ce
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.cpp
@@ -0,0 +1,70 @@
+#include <stdio.h>
+#include <windows.h>
+#include <windowsx.h>
+
+
+// ************************************************************************
+// FUNCTION : DllMain( HINSTANCE, DWORD, LPVOID )
+// PURPOSE  : DllMain is called by the C run-time library from the
+//            _DllMainCRTStartup entry point.  The DLL entry point gets
+//            called (entered) on the following events: "Process Attach",
+//            "Thread Attach", "Thread Detach" or "Process Detach".
+// COMMENTS : No initialization is needed here so this entry point simply
+//            returns TRUE.
+// ************************************************************************
+BOOL WINAPI
+DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpvReserved )
+{
+  UNREFERENCED_PARAMETER( hInstDLL );
+  UNREFERENCED_PARAMETER( fdwReason );
+  UNREFERENCED_PARAMETER( lpvReserved );
+  
+  return( TRUE );
+}
+
+int DXV_GetVersion()
+{
+	return (int)0x0365;
+}
+
+extern "C" {
+
+char* pannounce;
+char *announcestart;
+#define ANNBUFSIZE 2048
+
+FILE* hf = NULL;
+
+void Announcement(const char* lpszString)
+{
+#if _DEBUG
+	if (!hf)	{
+		hf = fopen("Announce.txt","w");
+	}
+	if (hf) {
+		fprintf(hf,lpszString);
+		fflush(hf);
+	}
+#endif
+	if ((2 * strlen(lpszString) + pannounce) > announcestart + ANNBUFSIZE)	{
+		pannounce = announcestart + ANNBUFSIZE - 2 * strlen(lpszString);	// lock up at end
+	}
+	strcpy(pannounce,lpszString);  // copy and bump
+	pannounce += strlen(lpszString);
+}
+
+void AnnDone()
+{
+#if _DEBUG
+	Announcement("Closing Announcements");
+	if(hf) fclose(hf);
+//	hf = 0;			// don't reset handle or file will re-open on next call
+#endif
+}
+
+void ErrorBuffer(char *errorbuf)
+{
+	pannounce = announcestart = errorbuf;
+}
+
+}
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.def b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.def
new file mode 100644
index 00000000..a67094b2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.def
@@ -0,0 +1,97 @@
+; -------------------------------------------------------------------------
+;
+;                       Duck Developer Support
+;              Copyright (c) 1995 Duck Corporation
+; -------------------------------------------------------------------------
+; MODULE   : Dukdll.DEF
+; PURPOSE  : module-definition file for the Duck Player
+; COMMENTS :
+; -------------------------------------------------------------------------
+;LIBRARY      DXV
+DESCRIPTION  'Duck Windows Player'
+
+EXPORTS
+	DXL_InitVideo @2
+	DXL_ExitVideo @3
+	DXL_CreateVScreen @4
+	DXL_DestroyVScreen @5
+	DXL_AlterVScreen @6
+	DXL_AlterVScreenClip @7
+	DXL_AlterVScreenView @8
+	DXL_AlterVScreenBackground @9
+	DXL_CreateXImage @10
+	DXL_DestroyXImage @11
+	DXL_AlterXImageData @12
+	DXL_MoveXImage @13
+
+;	DXL_AlterXImageSpriteModes @14
+
+	DXL_GetXImageColorDepth @15
+;	DXL_GetXImageDirtyXYWH @16
+	DXL_GetXImageXYWH @17
+	DXL_dxImageToVScreen @19
+	DXL_IsXImageKeyFrame @20
+	DXL_GetVScreenView   @21
+
+;	DXL_CreateDirtyBuffer @22
+;	DXL_DeriveVScreenFromXImage @23
+;	DXL_DisplayVScreen @24
+	DXL_SetVScreenBlitQuality @25
+	DXL_GetVScreenBlitQuality @26
+	DXL_GetVScreenAddr @27
+	DXL_CreateGenericXImage @28
+	DXL_EraseVScreen		@29
+
+	DXL_BlitXImageToVScreen @30
+
+	DXL_BlitVScreenToVScreen @31
+
+	DXL_GetXImageFrameBuffer @32
+	DXL_AlterXImage		@33
+	DXL_SetVScreenCLUTs @34
+	DXL_ResetVScreenCLUTs @35
+
+
+;	BlackBox     		@81
+
+	AnnDone				@86
+	Announcement		@88
+
+;	RestoreDirty		@90
+;	GetScreenAddr		@91
+;	resetDisplay		@92
+;	SetClutPtrs			@93
+;	MarkSkips		  	@94
+
+	DXL_Decompress16BitAs565 @107
+	
+	ErrorBuffer			@108
+	DXL_SetXImageBlitAll @109
+
+	DXV_Setmalloc		@111
+	DXV_Setcalloc		@112
+	DXV_Setfree			@113
+
+;    DUCK_rdtsc_Start    @114
+;    DUCK_rdtsc_End      @115
+;    DUCK_InitProfile    @116
+;    DUCK_ProcessCounts  @117
+;    DUCK_UninitProfile  @118
+;    DUCK_ClearI         @119
+;    DUCK_SetI           @120
+
+	DXL_CheckdxImageToVScreen @121
+	DXL_GetFourCCList @122
+	DXL_CreateXImageOfType @123
+	DXL_GetXImageCSize @124
+	DXL_GetBitDepthPalette @125
+
+	DXL_CheckVScreenBlit @126
+	DXL_CheckVScreenXImageBlit @127
+
+    DXL_BlackLineVScreen @128
+    DXL_GetCPUFeatures @129
+    DXL_GetXImageFOURCC @130
+
+	DXL_SetXImageCSize @131
+	DXL_InitVideoEx @132
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.rc b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.rc
new file mode 100644
index 00000000..8175d7dc
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv.rc
@@ -0,0 +1,126 @@
+//Microsoft Developer Studio generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "afxres.h"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+#ifndef MAJ 
+#define MAJ 0 
+#endif
+#ifndef MIN 
+#define MIN 0 
+#endif
+#ifndef PVER 
+#define PVER 0 
+#endif
+#ifndef BVER
+#define BVER ".b1"
+#endif
+
+#ifndef PROJ
+#define PROJ UNKNOWN!!
+#endif
+
+#define DT(x) #x
+#define VTEXT(x) DT(x)
+#define VERSTRING(w,x,y,z) DT(w) "." DT(x) "." DT(y) DT(z) "\0"
+
+#ifndef _MAC
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION 1,0,0,1
+ PRODUCTVERSION 1,0,0,1
+ FILEFLAGSMASK 0x3fL
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x10004L
+ FILETYPE 0x1L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904b0"
+        BEGIN
+            VALUE "CompanyName", "The Duck Corporation\0"
+            VALUE "FileDescription", "TruePlay SDK Library - " VTEXT(PROJ) ".dll\0"
+            VALUE "FileVersion", VERSTRING(MAJ,MIN,PVER,BVER)
+            VALUE "InternalName", "MPVE - " VTEXT(PROJ) "\0"
+            VALUE "LegalCopyright", "Copyright �1998 The Duck Corp.\0"
+            VALUE "ProductName", "TruePlay SDK - " VTEXT(PROJ) " Library \0"
+            VALUE "ProductVersion", VERSTRING(MAJ,MIN,PVER,BVER)
+            VALUE "Providing", VTEXT(ALG)
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1200
+    END
+END
+
+#endif    // !_MAC
+//resource
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE DISCARDABLE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE DISCARDABLE 
+BEGIN
+    "#include ""afxres.h""\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE DISCARDABLE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+#endif    // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv_mem.c b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv_mem.c
new file mode 100644
index 00000000..a86d620b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/dxv_mem.c
@@ -0,0 +1,103 @@
+/*
+    Windows 95 Memory allocation abstraction functions
+ */
+
+#include <stdio.h>
+#include <windows.h>
+
+#include "dkpltfrm.h"
+#include "duck_mem.h"
+
+#define CHECK_FOR_MEMORY_LEAK 0
+
+void *malloc_32b(size_t size)
+{
+	return LocalAlloc( LMEM_FIXED, size );
+	
+}
+void *calloc_32b(size_t count,size_t size)
+{
+	/* maybe we should rewrite this to use dwords 
+	    (who knows if they do it automatica like) */
+	return LocalAlloc( LMEM_ZEROINIT, size*count );
+	
+}
+
+void free_32b(void * hMem)
+{
+	LocalFree(hMem);
+}
+
+static void *(*ptr_malloc)(size_t size) = malloc_32b;
+static void *(*ptr_calloc)(size_t, size_t size) = calloc_32b;
+static void (*ptr_free)(void *) = free_32b;
+ 
+void *duck_malloc(size_t size, enum tmemtype fred)
+{   
+	void *temp;
+	
+	temp = (*ptr_malloc)(size);
+	
+#if CHECK_FOR_MEMORY_LEAK
+{
+	FILE * out;
+	
+	if ((out = fopen("c:\\sjl.log","a")) != NULL) {
+        fprintf(out,"DXV duck_malloc:%x %d\n", temp, size);
+        fclose(out);
+    }
+}
+#endif
+
+	return temp;
+}
+
+void *duck_calloc(size_t n,size_t size, enum tmemtype  fred)
+{   
+	void *temp = (*ptr_calloc) (n, size);
+
+#if CHECK_FOR_MEMORY_LEAK
+{
+	FILE * out;
+	
+	if ((out = fopen("c:\\sjl.log","a")) != NULL) {
+        fprintf(out,"DXV duck_calloc:%x %d %d \n", temp, n, size);
+        fclose(out);
+    }
+}
+#endif
+
+	return temp;
+}
+
+void duck_free(void *old_blk) 
+{  
+
+#if CHECK_FOR_MEMORY_LEAK
+{
+	FILE * out;
+	
+	if ((out = fopen("c:\\sjl.log","a")) != NULL) {
+        fprintf(out,"DXV duck_free:%x\n", old_blk);
+        fclose(out);
+    }
+}
+#endif
+
+	(*ptr_free) (old_blk);
+}
+
+void DXV_Setmalloc(void *(*ptr)(size_t))
+{
+	ptr_malloc = ptr;
+}
+ 
+void DXV_Setcalloc(void *(*ptr)(size_t, size_t))
+{
+	ptr_calloc = ptr;
+}
+
+void DXV_Setfree(void (*ptr)(void *))
+{
+	ptr_free = ptr;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/goals.mk b/Src/libvpShared/corelibs/cdxv/dxv/win32/goals.mk
new file mode 100644
index 00000000..e549d598
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/goals.mk
@@ -0,0 +1,9 @@
+
+$(OBJDIR)sc_$(PROJECT).lib: $(LIBDIR)\sc_tm20.lib $(LIBDIR)\sc_torq.lib
+    LIB $(OBJS) /OUT:$@
+    LIB $@ $(LIBDIR)\s_tm1.lib /OUT:$@
+    LIB $@ $(LIBDIR)\sc_tm20.lib /OUT:$@
+    LIB $@ $(LIBDIR)\s_tmrt.lib /OUT:$@
+    LIB $@ $(LIBDIR)\sc_torq.lib /OUT:$@
+    copy $@ $(LIBDIR)
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/icmdxv.c b/Src/libvpShared/corelibs/cdxv/dxv/win32/icmdxv.c
new file mode 100644
index 00000000..85d9d04b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/icmdxv.c
@@ -0,0 +1,717 @@
+// dxvmpg.cpp : Defines the entry point for the console application.
+//
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "dkpltfrm.h" /* platform specifics */
+#include "duktypes.h" /* predefined general types used at duck */
+
+#include "duck_mem.h" /* interface to memory manager */
+#include "dxl_main.h" /* interface to dxv */
+
+#include <windows.h>
+#include <mmsystem.h>
+#include <vfw.h>
+
+
+
+/* formats that might be supported by a codec and dxv */
+/* call me crazy ... but I want to make this public ... ie in the HEADER ! */
+BMIMapping DXL_BMIMap[] = 
+{
+	{ DXL_MKFOURCC('Y','V','1','2'),	12, 3, DXYV12 },
+
+	{ DXL_MKFOURCC('I','Y','U','V'),	12, 1, DXI420 },
+
+	{ DXL_MKFOURCC('Y','U','Y','2'),	16, 1, DXYUY2 },
+
+	{ DXL_MKFOURCC('Y','V','Y','U'),	16, 1, DXYVYU },
+
+	{ DXL_MKFOURCC('U','Y','V','Y'),	16, 1, DXUYVY },
+
+	{ 0,								24, 1, DXRGB24 },
+
+	{ 0,								32, 1, DXRGB32 }
+
+};	
+
+
+
+static char *MakeFourCCString(unsigned long fcc, char *buff)
+{
+	sprintf(buff,"%c%c%c%c",
+		(fcc & 0xFF) >> 0,
+		(fcc & 0xFF00) >> 8,
+		(fcc & 0xFF0000) >> 16,
+		(fcc & 0xFF000000) >> 24
+		); 
+	return buff;
+}
+
+
+/* Report to a little HTML file */
+void DXL_ReportBMIMapping(char *filename)
+{
+	FILE *fp;
+	int len = sizeof(DXL_BMIMap) / sizeof(BMIMapping);
+
+	fp = fopen(filename,"w");
+
+	if (fp)
+	{
+		int t;
+		char temp[5];
+		fprintf(fp, "<table BORDER=1>");
+		fprintf(fp, "<tr><td>FOURCC</td><td>BitDepth</td><td>Planes</td><td>dxvBitDepth</td></tr>");
+
+		for(t = 0; t < len; t++)
+		{
+			fprintf(fp, "<tr><td>%s<br>%x</td> <td>%ld</td> <td>%ld</td>  <td>%ld</td></tr>",
+				MakeFourCCString(DXL_BMIMap[t].biCompression,temp),
+				DXL_BMIMap[t].biCompression,
+				DXL_BMIMap[t].biBitCount,
+				DXL_BMIMap[t].biPlanes,
+				DXL_BMIMap[t].bd 
+				);
+		}
+
+		fprintf(fp,"</table>");
+		fclose(fp);
+	}
+
+}
+
+
+
+
+/*********  USAGE
+
+In addition to regular DXV services, this library will invoke VFW 
+codecs for decompression services.
+
+Because of a bug in the frame parsing, the library is compiled to 
+decompress TM2X via its codec as well. So be certain to have a
+TM2X VFW codec installed.
+
+The library has only been tested with TM2X(2.5.1.8), 
+Indeo 5.2 and MPEG4.2.  Other codecs may work, but only if 
+they support RGB32, RGB24, and RGB16 (555 and/or 565).
+MS-CRAM and Cinepak crashed painfully in tests so far.
+
+The library assumes support for all 4 RGB truecolor modes 
+mentioned above, (NOTE: TM2X doesn't support RGB24 yet!)
+
+- 5/19/99 - 
+We added black-lining blitters for stretched modes.  Note that
+24 bit display uses a 32bit offscreen buffer which is blitted
+down to 24bit when stretched, this makes the asm code that much
+simpler.
+
+To use:
+
+in addition to the regular DXV, 
+link s_icm.lib to your application and do the following
+
+substitute:
+DXL_InitVideoEx(int lmaxScreens,int lmaxImages);
+in place of: 
+DXL_InitVideo(int lmaxScreens,int lmaxImages)
+
+use:
+movie->xImage = 
+DXL_AlterXImage(movie->xImage,movie->vData,
+HFB_GetStreamInfo(movie->vStream)->a.BitmapInfo.ulHandler,
+0,
+HFB_GetStreamInfo(movie->vStream)->a.BitmapInfo.usWidth,
+HFB_GetStreamInfo(movie->vStream)->a.BitmapInfo.usHeight);
+
+in place of:
+movie->xImage = DXL_CreateXImage(movie->vData);
+
+and, prior to any calls to:
+DXL_dxImageToVScreen(movie->xImage, movie->vScreen);
+
+you must call:
+DXL_SetXImageCSize(movie->xImage, movie->vLength);
+
+********/
+
+
+static dxvBitDepth bitDepths[] = 
+{
+	DXRGB32,DXRGB24,DXRGB16,DXRGBNULL
+};
+
+/* define an xImage structure based on the core xImage struct */
+
+
+
+typedef struct tXImageCODEC{  
+	xImageBaseStruct;
+
+	DK_BITMAPINFOHEADER bihIn;
+	unsigned long bihInFields[3];
+	DK_BITMAPINFOHEADER bihOut;
+	unsigned long bihOutFields[3];
+	HIC hic;
+	int changeVScreen;
+
+	BMIMapping* maps[20];
+	int maxMaps;
+
+} DXL_CODEC,*DXL_CODEC_HANDLE;
+
+
+
+
+
+char* DXL_DecodeICERR(int err, char *storage, int length)
+{
+	(void)length; // not used
+
+	switch (err)
+	{
+	case ICERR_UNSUPPORTED  :
+		strcpy(storage,"ICERR_UNSUPPORTED");
+		break;
+
+	case ICERR_BADFORMAT :
+		strcpy(storage,"ICERR_BADFORMAT");
+		break;
+
+	case ICERR_MEMORY  :
+		strcpy(storage,"ICERR_MEMORY");
+		break;
+
+	case ICERR_ERROR :
+		strcpy(storage,"ICERR_ERROR");
+		break;
+
+	default :
+		strcpy(storage,"Defaulted to ICERR_ERROR");
+		break;
+
+	}
+
+	return storage;
+}
+
+
+
+
+DK_BITMAPINFOHEADER* DXL_VSCREEN_2_BMI
+(
+ DXL_XIMAGE_HANDLE xImage, 
+ DXL_VSCREEN_HANDLE vScreen, 
+ DK_BITMAPINFOHEADER *bmih,
+ dxvBitDepth* bd1
+ )
+{
+	unsigned char *addr;
+	dxvBlitQuality bq;
+	dxvBitDepth bd;
+	short pitch;
+	short height;
+	int t;
+
+	DXL_CODEC_HANDLE src = (DXL_CODEC_HANDLE ) xImage;
+
+
+	duck_memcpy(bmih,&((DXL_CODEC_HANDLE ) xImage)->bihIn,sizeof(DK_BITMAPINFOHEADER));
+
+	DXL_GetVScreenAttributes(vScreen, (void **) &addr, &bq, &bd, &pitch, &height );
+
+	for(t = 0; t < src->maxMaps; t++)
+	{
+		if (src->maps[t]->bd == bd)
+		{
+			bmih->biBitCount		= src->maps[t]->biBitCount;
+			bmih->biCompression		= src->maps[t]->biCompression;
+			bmih->biPlanes			= src->maps[t]->biPlanes;
+
+			bmih->biWidth = pitch / (bmih->biBitCount / 8);
+			bmih->biHeight = height;
+			bmih->biSizeImage = pitch * bmih->biHeight;
+
+			fprintf(stderr,"\nBMI from VScreen attributes ...\n");
+			fprintf(stderr,"\t pitch = %ld\n", pitch);
+			fprintf(stderr,"\t width = %ld\n", bmih->biWidth);
+			fprintf(stderr,"\t height = %ld\n", bmih->biHeight);
+			fprintf(stderr,"\t biCompression = %c%c%c%c\n", 
+				((char *) &bmih->biCompression)[0],
+				((char *) &bmih->biCompression)[1],
+				((char *) &bmih->biCompression)[2],
+				((char *) &bmih->biCompression)[3]
+			);
+
+			fflush(stderr);
+
+			return bmih;
+		}
+	}
+
+	*bd1 = bd;
+
+	return 0;
+}
+
+
+
+
+int DXL_ReportBestBMIMatch(DXL_XIMAGE_HANDLE xImage, BMIMapping** map, int *maxMaps, int doConsoleReport)
+{
+	int t;
+	int ret;
+	char buff[5];
+	int len = sizeof(DXL_BMIMap)/sizeof(BMIMapping);
+	int matches = 0;
+	DXL_CODEC_HANDLE src = (DXL_CODEC_HANDLE ) xImage;
+	DK_BITMAPINFOHEADER temp;
+
+	(void)doConsoleReport; //unused
+
+	src->bihIn.dxFlavor = 2;  /* use the extended ICM functions */
+
+	duck_memcpy(&temp,&src->bihIn,sizeof(DK_BITMAPINFOHEADER));
+
+	for(t = 0; t < len; t++)  /* for each one we support with out mapping */
+	{
+
+		temp.biBitCount =	DXL_BMIMap[t].biBitCount;
+		temp.biCompression = DXL_BMIMap[t].biCompression;
+		temp.biPlanes =		DXL_BMIMap[t].biPlanes;
+		temp.biSizeImage = temp.biBitCount * temp.biWidth * temp.biHeight / 8;
+		ret =  ICDecompressQuery(src->hic, &(src->bihIn), &temp );
+
+		if (ret == ICERR_OK)
+		{
+			fprintf(stderr,"format of %s supported, planes = %ld, rank = %ld\n",
+				MakeFourCCString(temp.biCompression, buff ), temp.biPlanes, matches + 1);
+			fflush(stderr);
+
+
+			if (matches < *maxMaps)
+			{
+				src->maps[matches] = map[matches] = &DXL_BMIMap[t];
+				matches += 1;
+			}
+		}
+		else 
+		{
+			fprintf(stderr,"format of %s NOT supported, planes = %ld\n",
+				MakeFourCCString(temp.biCompression, buff ), temp.biPlanes);
+			fflush(stderr);
+
+		}
+	} 
+
+
+	src->maxMaps = *maxMaps = matches;
+
+	/* This could be done somewhere else ! */
+	duck_memset(&src->bihOut,0,sizeof(DK_BITMAPINFOHEADER));
+
+	return matches;
+}			
+
+
+
+static int decompress1(DXL_XIMAGE_HANDLE xImage, DXL_VSCREEN_HANDLE vScreen)
+{
+	/* Keep the warnings away ! */
+	DXL_CODEC_HANDLE src = (DXL_CODEC_HANDLE ) xImage;
+	//	DWORD dwFlags = 0;
+	DWORD ret;
+	dxvBitDepth bd;
+
+
+	int changeOutput = src->changeVScreen;
+
+	if (changeOutput)
+	{
+
+		/* should be cleared first time in so width zero ! */
+		if (src->bihOut.biWidth != 0)
+			ICDecompressEnd(src->hic);
+
+
+		if ( DXL_VSCREEN_2_BMI(xImage, vScreen, (DK_BITMAPINFOHEADER *) &(src->bihOut), &bd ) == 0)
+		{
+			/* user asks for unsupported surface FOURCC */
+			fprintf(stderr, "User asks for unsupported dxvBitDepth = %ld\n", bd  );
+			fflush(stderr);
+
+			return ICERR_BADFORMAT;
+		}
+
+
+		ret = ICDecompressBegin(src->hic, &src->bihIn,  &src->bihOut);
+
+
+		if (ret != ICERR_OK)
+		{
+			return ret;
+		}	
+
+	}
+
+
+	src->bihIn.biSizeImage = src->fSize;
+
+	ret = ICDecompress( src->hic, 0,
+		(BITMAPINFOHEADER *) &src->bihIn, src->addr, 
+		(BITMAPINFOHEADER *) &src->bihOut, 
+		(char *) vScreen->addr);
+
+
+	if (ret != ICERR_OK)
+	{
+		fprintf(stderr,"Oh boy decompress may have failed !\n");
+		assert(0);
+		exit(0);
+	}
+
+	src->changeVScreen = 0;
+
+	return ICERR_OK;
+}
+
+
+static int decompress2(DXL_XIMAGE_HANDLE xImage, DXL_VSCREEN_HANDLE vScreen)
+{
+	/* Keep the warnings away ! */
+	DXL_CODEC_HANDLE src = (DXL_CODEC_HANDLE ) xImage;
+	DWORD dwFlags = 0;
+	DWORD ret;
+	dxvBitDepth bd;
+
+
+	if (src->changeVScreen)
+	{
+
+		/* should be cleared first time in so width zero ! */
+		if (src->bihOut.biWidth != 0)
+			ICDecompressExEnd(src->hic);
+
+
+		if ( DXL_VSCREEN_2_BMI(xImage, vScreen, (DK_BITMAPINFOHEADER *) &(src->bihOut), &bd ) == 0)
+		{
+			/* user asks for unsupported surface FOURCC */
+			fprintf(stderr, "User asks for unsupported dxvBitDepth = %ld\n", bd  );
+			fflush(stderr);
+
+			return ICERR_BADFORMAT;
+		}
+
+
+
+		ret = ICDecompressExBegin(
+			src->hic, 
+			dwFlags,
+			(BITMAPINFOHEADER *) &(src->bihIn), 
+			src->addr,
+			0,                    
+			0,                    
+			src->bihIn.biWidth,                   
+			src->bihIn.biHeight,        
+			(BITMAPINFOHEADER *) &(src->bihOut),
+			(char *) vScreen->addr,
+			0,
+			0,
+			src->bihIn.biWidth,                   
+			src->bihIn.biHeight
+			);
+
+
+		if (ret == ICERR_UNSUPPORTED)
+		{
+			return ICERR_UNSUPPORTED;
+		}
+		if (ret != ICERR_OK)
+		{
+			char *storage = (char *) calloc(256,sizeof(char));
+			fprintf(stderr,"ICDecompressExBegin returns error code = %ld\n", ret);
+			fprintf(stderr,"Decoded as ... %s\n", DXL_DecodeICERR(ret, storage, sizeof(storage) - 1));
+			fflush(stderr);
+
+			if (storage)
+				free(storage);
+			assert(0);
+		}	
+
+
+		src->changeVScreen = 0;
+	}
+
+
+	src->bihIn.biSizeImage = src->fSize;
+
+
+	ret = ICDecompressEx(
+		src->hic,                     
+		dwFlags,               
+		(BITMAPINFOHEADER *) &src->bihIn,
+		src->addr,          
+		0,                    
+		0,                    
+		src->bihIn.biWidth,                   
+		src->bihIn.biHeight,                      
+		(BITMAPINFOHEADER *) &src->bihOut,
+		(char *) vScreen->addr,          
+		0,                    
+		0,                    
+		src->bihIn.biWidth,                   
+		src->bihIn.biHeight          
+		);
+
+
+
+	if (ret != ICERR_OK)
+	{
+		fprintf(stderr,"Oh boy decompress may have failed !\n");
+		assert(0);
+		exit(0);
+	}
+
+
+	return 0;
+}
+
+
+
+static int decompress(DXL_XIMAGE_HANDLE xImage, DXL_VSCREEN_HANDLE vScreen2)
+{
+	DXL_CODEC_HANDLE xThis = (DXL_CODEC_HANDLE) xImage;
+
+	int retVal = ICERR_OK;
+
+	/* Try the version that handles wack pitch first ! */
+	if (xThis->bihIn.dxFlavor == 2)
+	{
+		retVal = decompress2(xImage, vScreen2);
+		if (retVal == ICERR_UNSUPPORTED)
+		{
+			xThis->bihIn.dxFlavor = 1;
+		}
+	}
+
+	/* if the wack pitch one failed */
+	if (xThis->bihIn.dxFlavor == 1)
+	{
+		retVal = decompress1(xImage, vScreen2);
+	}
+
+	return retVal;
+}
+
+
+
+
+
+/* 
+close down a decompressor, releasing the icm decompressor, 
+the xImage (decompressor), and the intermediate vScreen (surface)
+*/
+
+static int destroyCodec(DXL_XIMAGE_HANDLE xImage)
+{
+	DXL_CODEC_HANDLE xThis = (DXL_CODEC_HANDLE ) xImage;
+	if (xThis)
+	{
+
+		if (xThis->hic)
+		{
+
+			ICDecompressEnd(xThis->hic);
+			ICClose(xThis->hic); 
+
+		}
+		duck_free(xThis);
+
+	}
+
+	return DXL_OK;
+}
+
+
+
+
+static char* duckToNarrow(char *s)
+{
+	char dst[256];
+
+	int t=0;
+
+	if (s)
+	{
+		do 
+		{
+			dst[t] = s[2*t];
+			t = t + 1;
+		}
+		while ( *((short *) &s[t])   );
+
+		dst[t] = '\0';
+
+		strcpy(s,dst);
+
+		return s;
+	}
+	else
+	{
+		return 0;
+	}
+
+}  /* end duckToNarrow */
+
+
+
+
+
+/* 
+called during initialization and/or when xImage (decompressor)
+attributes change, note that nImage and src are actually
+synonymous and should be cleared out a bit (to say the least!)
+
+
+!!!!!!
+This function should be prepared to get data that is NOT of the 
+type native to the decoder,  It should do it's best to verify it 
+as valid data and should clean up after itself and return NULL
+if it doesn't recognize the format of the data
+*/
+
+static DXL_XIMAGE_HANDLE reCreateCodec(DXL_CODEC_HANDLE src,unsigned char *data,
+																			 int type,enum BITDEPTH bitDepth,int w,int h)
+{  
+#pragma warning(disable: 4210) // nonstandard extension used : function given file scope
+	DXL_XIMAGE_HANDLE createCodec(unsigned char *data);
+#pragma warning(default: 4210) // nonstandard extension used : function given file scope
+
+	DXL_XIMAGE_HANDLE nImage;
+	DK_BITMAPINFOHEADER *bmiHeader = (DK_BITMAPINFOHEADER *) data;
+	unsigned long fccHandler;
+
+	(void)h; // unused
+	(void)w; // unused
+	(void)bitDepth; //unused
+	(void)type; //unused
+
+	if (src != NULL)	/* if an xImage/decompressor already exists, destroy it */
+		destroyCodec((DXL_XIMAGE_HANDLE ) src);
+
+	/* create a new xImage, specific to this type of decoder, (
+	see "DXL_CODEC" struct above and dxl_main.h) */
+
+	nImage = (DXL_XIMAGE_HANDLE)duck_calloc(1,sizeof(DXL_CODEC),DMEM_GENERAL);
+	src = (DXL_CODEC_HANDLE ) nImage;
+
+	if (!nImage) return NULL;
+
+	duck_memset(nImage,0,sizeof(DXL_CODEC));
+
+
+	src->changeVScreen = 1; /* True ... inform decompresss the dest has changed */
+
+
+	/* set up the "vtable" of interface calls */
+	src->create =  		(DXL_XIMAGE_HANDLE (*)(void *)) createCodec;
+	src->recreate =  	(DXL_XIMAGE_HANDLE (*)(DXL_XIMAGE_HANDLE,void *,int,int,int,int)) reCreateCodec;
+
+	nImage->destroy = destroyCodec;
+	nImage->dx = 		decompress;
+	nImage->blit = 		NULL; /* there is no interleaved blitter for codecs */
+
+	src->bdPrefs = 		bitDepths; /* plug in the list of prefered bit depths */
+
+	nImage->addr = data;
+	nImage->dkFlags.inUse = 1;
+
+	duck_memcpy(&src->bihIn, bmiHeader,sizeof(DK_BITMAPINFOHEADER));
+	duck_memset(&src->bihOut, 0, sizeof(DK_BITMAPINFOHEADER));
+
+	src->w = (short ) (src->bihIn.biWidth);
+	src->h = (short ) (src->bihIn.biHeight);
+
+	src->imWidth = (short) src->w;
+	src->imHeight = (short) src->h;
+
+	fccHandler = src->bihIn.fccHandler;
+
+	if (fccHandler == 0)
+	{
+		src->hic=ICLocate(ICTYPE_VIDEO, fccHandler, (BITMAPINFOHEADER *) &src->bihIn, 0, ICMODE_DECOMPRESS); 
+	}
+	else
+	{
+		src->hic=ICOpen(ICTYPE_VIDEO, fccHandler, ICMODE_DECOMPRESS); 
+	}
+
+
+
+
+	{
+		ICINFO i;
+
+		memset(&i,0,sizeof(ICINFO));
+		if (ICGetInfo( src->hic, &i, sizeof(ICINFO) ))
+		{
+
+			char temp[5];
+
+			unsigned long biCompression = src->bihIn.biCompression;
+
+			fccHandler = src->bihIn.fccHandler = i.fccHandler;
+
+
+			fprintf(stderr,	"Short Name : %s\n"
+				"Driver : %s\n"
+				"driver version = %d %d  or as hex = %x\n"
+				"Description : %s\n"
+				"Codec biCompression = %s\n"
+				"Codec fccHandler = %s\n", 
+				duckToNarrow( (char *) i.szName),
+				duckToNarrow( (char *) i.szDriver), 
+				((i.dwVersion & 0x0000FFFF) >> 0 ),  
+				((i.dwVersion & 0xFFFF0000) >> 16 ), 
+				i.dwVersion,
+				duckToNarrow( (char *) i.szDescription), 
+				MakeFourCCString(biCompression, temp),
+				MakeFourCCString(fccHandler, temp)
+				);
+			fflush(stderr);
+
+		}
+	}
+
+
+	if (src->hic == 0)
+	{
+		destroyCodec((DXL_XIMAGE_HANDLE ) src);
+
+		fprintf(stderr, "codec for fourCC = %c%c%c%c, %x won't open\n",
+			(fccHandler & 0xFF000000) >> 24,
+			(fccHandler & 0xFF0000) >> 16,
+			(fccHandler & 0xFF00) >> 8,
+			(fccHandler & 0xFF) >> 0,
+			fccHandler
+			);
+		fflush(stderr);
+
+
+		return 0;
+	}
+
+
+	return nImage;
+}
+
+/* in this "glue" case, just calls through to the create function */
+
+#pragma warning(disable:4211) //nonstandard extension used : redefined extern to static
+static DXL_XIMAGE_HANDLE createCodec(unsigned char *bmih)
+{
+	return reCreateCodec(NULL, bmih ,0,(enum BITDEPTH ) 0,0,0);
+}
+#pragma warning(default:4211) //nonstandard extension used : redefined extern to static
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/makefile b/Src/libvpShared/corelibs/cdxv/dxv/win32/makefile
new file mode 100644
index 00000000..8547cbbc
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/makefile
@@ -0,0 +1,339 @@
+everything : all 
+###### dxv standard project specs
+     
+PROJECT = dxv
+# project name
+
+# standard modules for this library
+
+OBJS = \
+    $(OBJDIR)dxlcsize.$(OBJ) \
+    $(OBJDIR)dxv_init.$(OBJ) \
+    $(OBJDIR)dxv_initex.$(OBJ) \
+    $(OBJDIR)dxl_main.$(OBJ) \
+    $(OBJDIR)vscreen.$(OBJ) \
+    $(OBJDIR)ximage.$(OBJ) \
+    $(OBJDIR)ximgblit.$(OBJ) \
+    $(OBJDIR)getprefs.$(OBJ) \
+    $(OBJDIR)dxl_clip.$(OBJ) \
+	$(OBJDIR)dxl_bkgr.$(OBJ) \
+    $(OBJDIR)dxl_view.$(OBJ) \
+	$(OBJDIR)vgetview.$(OBJ) \
+	$(OBJDIR)vscerase.$(OBJ) \
+    $(OBJDIR)dxl_attr.$(OBJ) \
+    $(OBJDIR)dxl_reg.$(OBJ) \
+    $(OBJDIR)dxl_feat.$(OBJ) \
+	$(OBJDIR)genximg.$(OBJ) \
+    $(OBJDIR)svsbitdp.$(OBJ) \
+	$(OBJDIR)vscraddr.$(OBJ) \
+    $(OBJDIR)blitall.$(OBJ) \
+    $(OBJDIR)dxlvinfd.$(OBJ) \
+    $(OBJDIR)cpuid.$(OBJ) \
+	$(OBJDIR)count_tm.$(OBJ)
+
+!ifndef NOCLUT
+
+OBJS = $(OBJS) \
+	$(OBJDIR)dxl_clut.$(OBJ)
+    
+!endif
+	
+!ifndef NOLOGO
+
+OBJS = $(OBJS) \
+	$(OBJDIR)on2logo.$(OBJ) \
+	$(OBJDIR)tmlogo55.$(OBJ) \
+	$(OBJDIR)tmlogo56.$(OBJ) \
+	$(OBJDIR)tmlogo24.$(OBJ) \
+	$(OBJDIR)ducklogo.$(OBJ) 
+
+!endif
+	
+
+MAJREV = 6		# major library revision
+MINREV = 0		# minor library revision
+PVER = 1		# minor.minor library revision
+BVER = ".b1"	# build version
+
+# libraries on which this should be built
+
+!if 0
+    $(LIBDIR)$(LIBPREFIX)tmrt$(LIBSUFFIX) \
+	$(LIBDIR)$(LIBPREFIX)tm1$(LIBSUFFIX) \
+	$(LIBDIR)$(LIBPREFIX)tm20$(LIBSUFFIX) \
+
+	$(LIBDIR)$(LIBPREFIX)vp3$(LIBSUFFIX) \
+	$(LIBDIR)$(LIBPREFIX)vp31d$(LIBSUFFIX)
+
+
+!endif
+
+!if 0
+USER_LIBS = \
+	$(LIBDIR)$(LIBPREFIX)torq$(LIBSUFFIX)
+!endif
+
+#
+# project sub options sent to compilers
+#
+!ifndef CENV
+CENV = -DTORQB -DTM1B -DTM2B -DTMRTB
+!endif
+
+# add sub-options to c options (allows for environment override)
+C_FLAGS = $(CENV)
+c_flags = $(C_FLAGS)
+
+# this proj library is to be built as a shared or master library
+SHAREDLIB = 0
+
+M_INCLUDES = -I$(INCDIR)/$(OS) -I$(INCDIR) -I. -I../generic $(M_INCLUDES)
+# win95 specific modules
+
+OBJS = $(OBJS) \
+	$(OBJDIR)dxv.$(OBJ) \
+    $(OBJDIR)dkprof.$(OBJ) \
+	$(OBJDIR)perf.$(OBJ) \
+    $(OBJDIR)pentium.$(OBJ) \
+	$(OBJDIR)$(PROJECT).res
+
+# modules specifically sent to linker,
+# others are provided in libraries
+
+DLLOBJS = $(OBJDIR)dxl_main.obj \
+	$(OBJDIR)dxv_mem.$(OBJ) \
+	$(OBJDIR)dxv_mems.$(OBJ) \
+	$(OBJDIR)$(PROJECT).res \
+	$(OBJDIR)$(PROJECT).$(OBJ)
+
+PROJLIBS = $(PROJLIBS) 
+
+#$(OBJDIR)sc_$(PROJECT).lib
+
+THISFILE = makefile
+
+SRCDIR    = .
+GENDIR    = ..\generic
+OBJDIR    = obj\  
+LIBDIR    = obj\ 
+INCDIR    = $(CORELIBS_INC)
+
+ASMDIR    = ..\X86\ 
+MMXDIR    = ..\mmx\ 
+WASMDIR   = ..\wX86\ 
+
+OBJRELDIR = $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)\Release\ 
+OBJDEBDIR = $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)\Debug\ 
+LIBRELDIR = $(ROOT)Private\CoreLibs\Lib\$(OS)\Release 
+LIBDEBDIR = $(ROOT)Private\CoreLibs\Lib\$(OS)\Debug 
+
+OBJ = obj
+
+
+MKDIR = 
+
+# tools 
+CC = cl
+ASM = tasm /ml /m8 /dWINP /zd /I$(ASMDIR) /dWIN95
+WASM = wasm /dWINP /dWIN95 /I=$(ASMDIR) /I=$(MMXDIR) -q
+RC = rc
+
+LIBNAME = $(OBJDIR)$(PROJECT).lib
+SLIBNAME = $(OBJDIR)s_$(PROJECT).lib
+EXENAME = $(OBJDIR)$(PROJECT).exe
+###DEFNAME = $(SRCDIR)$(PROJECT).DEF
+DEFNAME = $(PROJECT).DEF
+
+LIBPREFIX = \s_
+LIBSUFFIX = .lib
+
+# Include Path Variable Extensible
+include = $(SDK)/inc;$(include)
+
+
+############################################################
+# Global Flags (compiler / linker)
+C_FLAGS = $(C_FLAGS) -Gy -W3 -nologo -DWIN32 -D_MBCS -D_AFXDLL -D_WINDOWS -DHIRES\
+ -DDOS32 -c  $(M_INCLUDES:/=\) 
+#-WX
+
+L_FLAGS = $(L_FLAGS) /SUBSYSTEM:windows /INCREMENTAL:no /MACHINE:I386 
+
+############################################################
+# Locally Overridable Flags
+
+!ifndef DLLNAME
+DLLNAME = $(OBJDIR)$(PROJECT).dll
+!endif
+
+!ifndef c_thread
+
+## SJL - NEED TO USE LIBCMT NOW
+## c_thread = -MD
+c_thread = -MT
+!endif 
+
+!ifndef c_ndebugflags
+c_ndebugflags = -DNDEBUG -Ob2 -Ot -O2 -G5
+!endif 
+
+!ifndef c_debugflags
+c_debugflags = -D_DEBUG -GX -Od -Z7 
+!endif 
+
+!ifndef l_ndebugflags
+l_ndebugflags = 
+!endif 
+
+!ifndef l_debugflags
+l_debugflags = -DEBUG -DEBUGTYPE:BOTH
+!endif 
+
+# exe flags / objects
+!ifndef l_exe_flags
+l_exe_flags = 
+!endif
+
+
+# DLL flags / objects
+!ifndef l_dll_flags
+l_dll_flags = -DLL -DEF:$(DEFNAME)
+!endif
+
+!ifndef DLLOBJS
+DLLOBJS = $(OBJS)
+!endif
+
+# targets we will build
+!ifndef targets
+targets = $(SLIBNAME) 
+!ifdef SHAREDLIB
+!if $(SHAREDLIB) == 1
+targets = $(TARGETS) $(DLLNAME) 
+!endif
+!endif 
+!endif 
+
+
+# versions we will build
+!ifndef VERSIONS
+VERSIONS = release.ver debug.ver
+!endif 
+
+############################################################
+# inference rules
+#    -mkdir $(OBJDIR:\= )
+#    -mkdir $(MKOBJDIR:\= ) 
+
+# Target Directories
+#$(OBJDIR:\= ) :
+!if 0
+$(OBJDIR:\= ) :
+	mkdir $*
+	cd $* 
+!endif
+
+$(LIBDIR) :
+#    -mkdir $(LIBDIR)\..
+	-mkdir $(MKLIBDIR)
+
+# Object files 
+
+{$(GENDIR)}.c{$(OBJDIR)}.obj:
+	$(CC)  $(C_FLAGS)  $<  -Fo$@
+
+{$(GENDIR)}.cpp{$(OBJDIR)}.obj:
+	$(CC)  $(C_FLAGS)  $<  -Fo$@
+
+{$(SRCDIR)}.c{$(OBJDIR)}.obj:
+	$(CC)  $(C_FLAGS)  $<  -Fo$@
+
+{$(SRCDIR)}.cpp{$(OBJDIR)}.obj:
+	$(CC)  $(C_FLAGS)  $<  -Fo$@
+
+{$(WASMDIR)}.asm{$(OBJDIR)}.obj:
+    $(WASM) $< -fo=$@ 
+    editbin /NOLOGO /RELEASE $@
+
+{$(ASMDIR)}.asm{$(OBJDIR)}.obj:
+    $(ASM) $< $@ 
+    editbin /NOLOGO /RELEASE $@
+
+{$(SRCDIR)}.rc{$(OBJDIR)}.res:
+    $(RC) /fo$@ /dALG="$(CENV:-D= )"  /dPROJ=$(PROJECT) /dMAJ=$(MAJREV) /dMIN=$(MINREV) /dPVER=$(PVER) /dBVER=$(BVER) $< 
+
+#
+# building of targets 
+#
+
+###$(SLIBNAME): $(OBJDIR:\= ) $(LIBDIR) $(OBJS) 
+$(SLIBNAME): $(OBJS) 
+	LIB $(OBJS) /OUT:$@
+!ifdef USER_LIBS
+	for %i in ($(USER_LIBS)) do LIB $(SLIBNAME) %i /OUT:$@
+!endif
+	copy $(SLIBNAME) $(LIBDIR)
+
+##$(DLLNAME): $(SLIBNAME) $(OBJDIR:\= ) $(LIBDIR) $(DEFNAME) $(OBJDIR)$(PROJECT).res $(SLIBNAME) $(DLLOBJS)
+$(DLLNAME): $(SLIBNAME) $(LIBDIR) $(DEFNAME) $(OBJDIR)$(PROJECT).res $(SLIBNAME) $(DLLOBJS)
+	link $(L_FLAGS) $(l_dll_flags) /OUT:$(DLLNAME) $(DLLOBJS) $(SLIBNAME) /MAP:$(OBJDIR)$(PROJECT).map 
+	copy $@ $(LIBDIR)
+	copy $(LIBNAME) $(LIBDIR)
+
+$(EXENAME): $(OBJDIR:\= ) $(DEFNAME) $(OBJDIR)$(PROJECT).res $(OBJS) 
+	link $(L_FLAGS) $(l_exe_flags) /OUT:$(EXENAME) $(OBJS) /MAP:$(OBJDIR)$(PROJECT).map 
+
+#
+# Goals
+#
+build_me: $(targets) $(PROJLIBS)
+    @echo Build Completed
+
+#
+# there should be a more elegant way to build the following directories
+#
+build_dirs:
+	@echo BUILDING DIRECTORIES
+	mkdir $(ROOT)Object
+	mkdir $(ROOT)Object\CoreLibs
+	mkdir $(ROOT)Object\CoreLibs\$(PROJECT)
+	mkdir $(ROOT)Object\CoreLibs\$(PROJECT)\Dx
+	mkdir $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)
+	mkdir $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)
+	mkdir $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)\Release
+	mkdir $(ROOT)Object\CoreLibs\$(PROJECT)\Dx\$(OS)\Debug
+
+release.ver:
+    set C_FLAGS=$(C_FLAGS) $(c_ndebugflags) $(c_thread) $(c_local)
+    set L_FLAGS=$(L_FLAGS) $(l_ndebugflags)
+    set OBJDIR=$(OBJRELDIR) 
+    set LIBDIR=$(LIBRELDIR) 
+	nmake /NOLOGO /C /S build_dirs
+    nmake /NOLOGO -f $(THISFILE) build_me
+
+debug.ver:
+	@echo NOW DOING DEBUG VERSION
+    set C_FLAGS=$(C_FLAGS) $(c_debugflags) $(c_thread) $(c_local)
+    set L_FLAGS=$(L_FLAGS) $(l_debugflags)
+    set OBJDIR=$(OBJDEBDIR) 
+    set LIBDIR=$(LIBDEBDIR) 
+	nmake /NOLOGO /C /S build_dirs
+    nmake /NOLOGO -f $(THISFILE) build_me
+
+all:: $(VERSIONS)
+
+clean:
+    deltree /Y obj
+    deltree /Y obj.*
+
+
+
+
+$(OBJDIR)sc_$(PROJECT).lib: $(LIBDIR)\sc_tm20.lib $(LIBDIR)\sc_torq.lib
+    LIB $(OBJS) /OUT:$@
+    LIB $@ $(LIBDIR)\s_tm1.lib /OUT:$@
+    LIB $@ $(LIBDIR)\sc_tm20.lib /OUT:$@
+    LIB $@ $(LIBDIR)\s_tmrt.lib /OUT:$@
+    LIB $@ $(LIBDIR)\sc_torq.lib /OUT:$@
+    copy $@ $(LIBDIR)
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/resource.h b/Src/libvpShared/corelibs/cdxv/dxv/win32/resource.h
new file mode 100644
index 00000000..6ed72fa3
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/resource.h
@@ -0,0 +1,15 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Developer Studio generated include file.
+// Used by dxv.rc
+//
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1000
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/win32/template.mk b/Src/libvpShared/corelibs/cdxv/dxv/win32/template.mk
new file mode 100644
index 00000000..6fc2687a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/win32/template.mk
@@ -0,0 +1,22 @@
+# win95 specific modules
+
+OBJS = $(OBJS) \
+	$(OBJDIR)dxv.$(OBJ) \
+    $(OBJDIR)dkprof.$(OBJ) \
+	$(OBJDIR)perf.$(OBJ) \
+    $(OBJDIR)pentium.$(OBJ) \
+	$(OBJDIR)$(PROJECT).res
+
+# modules specifically sent to linker,
+# others are provided in libraries
+
+DLLOBJS = $(OBJDIR)dxl_main.obj \
+	$(OBJDIR)dxv_mem.$(OBJ) \
+	$(OBJDIR)dxv_mems.$(OBJ) \
+	$(OBJDIR)$(PROJECT).res \
+	$(OBJDIR)$(PROJECT).$(OBJ)
+
+PROJLIBS = $(PROJLIBS) 
+
+#$(OBJDIR)sc_$(PROJECT).lib
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/x86/cpuid.asm b/Src/libvpShared/corelibs/cdxv/dxv/x86/cpuid.asm
new file mode 100644
index 00000000..5f3b61ca
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/x86/cpuid.asm
@@ -0,0 +1,229 @@
+;/***********************************************\
+;??? cpuid.asm   
+; checks for cpuid
+; if an id is not found, the program assumes a x86
+;\***********************************************/ 
+
+; parts taken from intel's AP-485 
+
+
+
+;put checks for cmov and mmx support ????
+
+
+
+        .486
+        .MODEL  flat, SYSCALL, os_dos
+        .CODE
+
+IDEAL
+NAME x86cpuid
+MASM
+
+PUBLIC getCPUID_
+PUBLIC _getCPUID
+
+INCLUDE proc.ash
+
+EXTRN c cpuFeatures:DWORD
+
+
+_486        EQU 4h
+PENT        EQU 50h
+PENTMMX     EQU 54h
+PENTPRO     EQU 61h
+PENTII      EQU 63h
+
+AMD_K63D    EQU 58h
+AMD_K6      EQU 56h
+AMD_K5      EQU 50h             ; K5 has models 0 - 6
+
+_6X86       EQU 52h
+_6X86MX     EQU 60h
+
+.DATA 
+
+_vendor_id      db "------------" 
+intel_id        db "GenuineIntel" 
+amd_id          db "AuthenticAMD" 
+cyrix_id        db "CyrixInstead" 
+
+getCPUID_:
+_getCPUID:
+    push    esi ;safety sh*&
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+;------------------------------------------------
+; Intel486 processor check 
+; Checking for ability to set/clear ID flag (Bit 21) in EFLAGS 
+; which indicates the presence of a processor with the CPUID 
+; instruction.
+;------------------------------------------------
+.486 
+check_80486: 
+    pushfd                                  ; push original EFLAGS
+    pop     eax                             ; get original EFLAGS 
+    mov     ebp,X86                         ; rv
+    mov     ecx, eax                        ; save original EFLAGS 
+    xor     eax, 200000h                    ; flip ID bit in EFLAGS 
+    push    eax                             ; save new EFLAGS value on stack 
+    popfd                                   ; replace current EFLAGS value 
+    pushfd                                  ; get new EFLAGS 
+    pop     eax                             ; store new EFLAGS in EAX 
+    xor     eax, ecx                        ; can not toggle ID bit, 
+    je      end_cpu_type486                 ; processor=80486
+
+;------------------------------------------------
+; Execute CPUID instruction to not determine vendor, family, 
+; model, stepping and features. For the purpose of this 
+; code, only the initial set of CPUID information is saved.
+;------------------------------------------------
+;    push    ebx                             ; save registers 
+;    push    esi 
+;    push    edi 
+;    push    edx
+;    push    ecx
+
+;    mov     ebp,X86                         ; rv
+
+    mov     eax, 0                          ; set up for CPUID instruction 
+    CPU_ID                                  ; get and save vendor ID
+
+    mov     DWORD PTR _vendor_id, ebx 
+    mov     DWORD PTR _vendor_id[+4], edx 
+    mov     DWORD PTR _vendor_id[+8], ecx
+
+    cmp     DWORD PTR intel_id, ebx 
+    jne     IsProc_AMD
+    cmp     DWORD PTR intel_id[+4], edx 
+    jne     end_cpuid_type 
+    cmp     DWORD PTR intel_id[+8], ecx 
+    jne     end_cpuid_type                  ; if not equal, not an Intel processor
+
+    cmp     eax, 1                          ; make sure 1 is valid input for CPUID 
+    jl      end_cpuid_type                  ; if not, jump to end 
+
+    mov     eax, 1 
+    CPU_ID                                  ; get family/model/stepping/features 
+
+    shr     eax, 4                          ; isolate family and model
+    mov     ebp,PII                         ; assume PII
+
+    and     eax,0ffh                        ;mask out type and reserved
+nop
+
+    cmp     eax,PENTII
+    jge     end_cpuid_type
+
+    mov     ebp,PPRO
+    
+    cmp     eax,PENTPRO
+    je      end_cpuid_type
+
+    mov     ebp,PMMX
+    
+    cmp     eax,PENTMMX
+    je      end_cpuid_type
+
+    mov     ebp,X86
+    
+;    cmp     eax,PENT
+;    jge     end_cpuid_type
+
+end_cpuid_type: 
+    mov     eax,ebp
+    mov     [cpuFeatures],edx
+
+    pop     edx ;safety sh*&
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+    ret
+
+end_cpu_type486:
+    mov     eax,ebp
+    pop     edx ;safety sh*&
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+    ret
+
+;------------------------------------------------
+IsProc_AMD:
+    cmp     DWORD PTR amd_id, ebx 
+    jne     IsProc_CYRIX
+
+    cmp     DWORD PTR amd_id[+4], edx 
+    jne     end_cpuid_type 
+
+    cmp     DWORD PTR amd_id[+8], ecx 
+    jne     end_cpuid_type                  ; if not equal, not an AMD processor
+
+    cmp     eax, 1                          ; make sure 1 is valid input for CPUID 
+    jl      end_cpuid_type                  ; if not, jump to end 
+
+    mov     eax, 1 
+    CPU_ID                                  ; get family/model/stepping/features 
+
+    shr     eax, 4                          ; isolate family and model
+    mov     ebp,AMDK63D    
+
+    and     eax,0ffh                        ;mask out type and reserved
+    nop
+
+    cmp     eax,AMD_K63D
+    jge     end_cpuid_type
+
+    mov     ebp,AMDK6    
+    nop
+
+    cmp     eax,AMD_K6
+    jge     end_cpuid_type
+
+    mov     ebp,X86
+    nop
+
+    cmp     eax,AMD_K5
+    jge     end_cpuid_type
+
+    mov     ebp,X86
+    jmp     end_cpuid_type
+
+;------------------------------------------------
+IsProc_CYRIX:
+    cmp     DWORD PTR cyrix_id, ebx 
+    jne     end_cpuid_type
+
+    cmp     DWORD PTR cyrix_id[+4], edx 
+    jne     end_cpuid_type 
+
+    cmp     DWORD PTR cyrix_id[+8], ecx 
+    jne     end_cpuid_type                  ; if not equal, not an CYRIX processor
+
+    cmp     eax, 1                          ; make sure 1 is valid input for CPUID 
+    jl      end_cpuid_type                  ; if not, jump to end 
+
+    mov     eax, 1 
+    CPU_ID                                  ; get family/model/stepping/features 
+
+    shr     eax, 4                          ; isolate family and model
+    mov     ebp,C6X86MX
+
+    and     eax,0ffh                        ;mask out type and reserved
+    nop
+
+    cmp     eax,_6X86MX
+    je      end_cpuid_type
+
+    mov     ebp,X86
+    jmp     end_cpuid_type
+;************************************************
+         END
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/x86/pentium.asm b/Src/libvpShared/corelibs/cdxv/dxv/x86/pentium.asm
new file mode 100644
index 00000000..31dfe1f4
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/x86/pentium.asm
@@ -0,0 +1,77 @@
+
+; Pentium utilities.  Timothy S. Murphy 1/11/97.
+; This is a Borland i586 TASM source file.
+; Works (at least) with Watcom C++ and Visual C++ using "cdecl" linkage.
+
+	.586
+	.MODEL	flat, c, os_dos
+	.CODE
+
+;------------------------------------------------
+PUBLIC c pentiumKiloCycles, pentiumTime
+
+pentiumKiloCycles:
+		push	edx
+		; rdtsc					; get 64-bit cycle count in edx:eax
+		db		0Fh, 31h		; (tasm 4.0 doesn't have rdtsc opcode)
+		shrd	eax, edx, 10	; divide by 1024
+		pop		edx
+		ret						; value in eax
+
+pentiumTime:
+		push	ebx
+			push	edx
+
+		; rdtsc					; get 64-bit cycle count in edx:eax
+		db		0Fh, 31h		; (tasm 4.0 doesn't have rdtsc opcode)
+		shrd	eax, edx, 10	; divide by 1024
+		mov		ebx, eax
+	
+		mov		eax, 12[esp]
+		shr		eax, 1
+
+lup:	shr		edx, 16
+			dec		eax
+		nop
+			jns		lup
+
+		; rdtsc					; get 64-bit cycle count in edx:eax
+		db		0Fh, 31h		; (tasm 4.0 doesn't have rdtsc opcode)
+		shrd	eax, edx, 10	; divide by 1024
+
+		sub		eax, ebx
+
+		pop		edx
+		pop		ebx
+		ret						; value in eax
+;------------------------------------------------
+; void Get_scc(&preciseU32,&lessPreciseU32);
+x86_Get_sccParams    STRUC 
+                        dd  3 dup (?)   ;3 pushed regs
+                        dd  ?           ;return address
+        preciseU32      dd  ? 
+        lessPreciseU32  dd  ?
+x86_Get_sccParams    ENDS
+
+PUBLIC c Get_scc
+
+Get_scc:
+    push	edx
+    push    esi
+    push    edi
+
+    mov     esi,[esp].preciseU32
+    mov     edi,[esp].lessPreciseU32
+
+    ; rdtsc					; get 64-bit cycle count in edx:eax
+    db		0Fh, 31h		; (tasm 4.0 doesn't have rdtsc opcode)
+
+    mov     [edi],edx
+    mov     [esi],eax
+
+    pop     edi
+    pop     esi
+    pop		edx
+    ret						; value in eax
+
+END
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/x86/perf.asm b/Src/libvpShared/corelibs/cdxv/dxv/x86/perf.asm
new file mode 100644
index 00000000..478aa101
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/x86/perf.asm
@@ -0,0 +1,183 @@
+;/***********************************************\
+;??? perf.asm   
+;\***********************************************/ 
+        .586
+        .MODEL  flat, SYSCALL, os_dos
+        .CODE
+
+IDEAL
+NAME tsc
+MASM
+
+PUBLIC DUCK_sti_   
+PUBLIC _DUCK_sti
+
+PUBLIC DUCK_cli_   
+PUBLIC _DUCK_cli
+
+PUBLIC rdtsc_Start_   
+PUBLIC _rdtsc_Start
+
+PUBLIC rdtsc_End_   
+PUBLIC _rdtsc_End
+
+PUBLIC addTSC_   
+PUBLIC _addTSC
+
+; typedef struct tsc_cnt {
+;     unsigned long low;
+;     unsigned long high;
+; } *TSC_HANDLE, TSC;
+
+DUCK_sti_:   
+_DUCK_sti:
+    sti
+    ret
+
+DUCK_cli_:   
+_DUCK_cli:
+    cli
+    ret
+
+;------------------------------------------------
+; void rdtsc_Start(low, high)
+;
+rdtsc_StartParams    STRUC
+                dd  3 dup (?)   ;3 pushed regs
+                dd  ?           ;return address
+    low         dd  ?       
+    high        dd  ?       
+rdtsc_StartParams    ENDS
+;------------------------------------------------
+rdtsc_Start_:
+_rdtsc_Start:
+    push    ebx 
+    push    ecx
+    push    edx
+nop
+
+    mov     ebx,[esp].low               ;pointer to low
+    mov     ecx,[esp].high             ;pointer to high
+        
+;    RDTSC
+    db 0fh, 31h
+
+    mov     [ebx],eax               ;return values
+    mov     [ecx],edx
+
+nop
+    pop     edx
+    pop     ecx
+    pop     ebx
+    ret
+
+;------------------------------------------------
+; void rdtsc_End(unsigned long *)
+;
+rdtsc_EndParams    STRUC
+                dd  6 dup (?)   ;6 pushed regs
+                dd  ?           ;return address
+    elow         dd  ?       
+    ehigh        dd  ?       
+rdtsc_EndParams    ENDS
+;------------------------------------------------
+rdtsc_End_:
+_rdtsc_End:
+    push    esi
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+    mov		edi,[esp].elow               ;pointer to low var
+    mov		esi,[esp].ehigh               ;pointer to high var
+
+;    RDTSC
+    db 0fh, 31h
+
+    mov         ebx,[edi]           ;get start values
+    mov         ecx,[esi]
+    sub         eax,ebx
+    sbb         edx,ecx
+
+    mov         [edi],eax           ;return values
+    mov         [esi],edx
+
+    pop     edx
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+    ret
+
+;------------------------------------------------
+; adds time stamped counts and passes back average
+;------------------------------------------------
+; void addTSC(unsigned long *, unsigned long, unsigned long *);
+;
+addTSCParams    STRUC
+                dd  6 dup (?)   ;6 pushed regs
+                dd  ?           ;return address
+    dkTimes     dd  ?
+    dkCount     dd  ?       
+    rv          dd  ?       
+addTSCParams    ENDS
+
+addTSC_:
+_addTSC:
+    push    esi
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+    xor     ebp,ebp             ;used for adc
+    mov     eax,[esp].dkTimes   ;pointer to array of TSC's
+
+    mov     edi,[esp].dkCount   ;array count
+    mov     esi,[esp].rv        ;pointer to result
+
+    xor     edx,edx
+    mov     ebx,[eax]           ;get first TSC
+
+    mov     ecx,[eax+4]         ;get next TSC
+    add     eax,8
+    
+    adc     edx,ebp
+    add     ebx,ecx
+
+add_loop:
+    dec     edi
+    jz      averageVal
+
+    mov     ecx,[eax]
+    add     eax,4
+
+    adc     edx,ebp
+    add     ebx,ecx
+
+    jmp     add_loop
+
+averageVal:
+    mov     eax,ebx
+    mov     ebx,[esp].dkCount   ;array count
+
+    div     ebx                 ;div edx:eax by ebx (eax=quo, edx=rem)
+
+    mov    [esi],eax            ;get average of counts
+
+the_exit:
+    pop     edx
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+    ret
+
+;************************************************
+         END
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv/x86/proc.ash b/Src/libvpShared/corelibs/cdxv/dxv/x86/proc.ash
new file mode 100644
index 00000000..997d22a4
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv/x86/proc.ash
@@ -0,0 +1,22 @@
+;/***********************************************\
+;??? proc.ash   
+;\***********************************************/ 
+
+
+CPU_ID MACRO 
+    db 0fh                      ; Hardcoded CPUID instruction 
+    db 0a2h 
+ENDM
+
+;see proc.h
+X86         EQU 0                   ; /* 486, Pentium plain, or any other x86 compatible */
+PMMX        EQU 1                   ; /* Pentium with MMX */
+PPRO        EQU 2                   ; /* Pentium Pro */
+PII         EQU 3                   ; /* Pentium II */
+C6X86       EQU 4
+C6X86MX     EQU 5
+AMDK63D     EQU 6
+AMDK6       EQU 7
+AMDK5       EQU 8
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/dxv.vcproj b/Src/libvpShared/corelibs/cdxv/dxv2/dxv.vcproj
new file mode 100644
index 00000000..46938184
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/dxv.vcproj
@@ -0,0 +1,327 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="dxv"
+	ProjectGUID="{ADAC45FD-B93F-40A3-85B2-DBECA1283614}"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				InlineFunctionExpansion="1"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="..\include,..\..\include,..\..\..\include"
+				PreprocessorDefinitions="WIN32;NDEBUG;_LIB"
+				StringPooling="true"
+				RuntimeLibrary="2"
+				BufferSecurityCheck="false"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				AssemblerListingLocation=""
+				ObjectFile="$(IntDir)/"
+				ProgramDataBaseFileName="$(IntDir)/vc70.pdb"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(SolutionDir)lib\win32\release\s_dxv.lib"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\..\include,..\..\..\include"
+				PreprocessorDefinitions="WIN32;_DEBUG;_LIB"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="4"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="..\..\..\Lib\Win32\Debug\s_dxv.lib"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release 64|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalOptions="/GS-"
+				Optimization="2"
+				InlineFunctionExpansion="1"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="..\include,..\..\include,..\..\..\include"
+				PreprocessorDefinitions="WIN32;NDEBUG;_LIB"
+				StringPooling="true"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				EnableEnhancedInstructionSet="0"
+				UsePrecompiledHeader="0"
+				AssemblerListingLocation=""
+				ObjectFile="$(IntDir)/"
+				ProgramDataBaseFileName="$(IntDir)/vc70.pdb"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				Detect64BitPortabilityProblems="true"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				AdditionalOptions="/machine:AMD64"
+				OutputFile="..\..\..\Lib\Win64\Release\s_dxv.lib"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="generic"
+			>
+			<File
+				RelativePath="generic\vscreen.c"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release 64|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="generic\ximage.c"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release 64|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/dxv.xcodeproj/project.pbxproj b/Src/libvpShared/corelibs/cdxv/dxv2/dxv.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..02310491
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/dxv.xcodeproj/project.pbxproj
@@ -0,0 +1,205 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 42;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		0C442A480BB78AD600B3EE20 /* ximage.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C442A470BB78AD600B3EE20 /* ximage.c */; };
+		0C442A760BB78BC700B3EE20 /* vscreen.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C442A750BB78BC700B3EE20 /* vscreen.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		0C442A470BB78AD600B3EE20 /* ximage.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = ximage.c; path = generic/ximage.c; sourceTree = "<group>"; };
+		0C442A750BB78BC700B3EE20 /* vscreen.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = vscreen.c; path = generic/vscreen.c; sourceTree = "<group>"; };
+		D2AAC046055464E500DB518D /* libdxv.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libdxv.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		D289987405E68DCB004EDB86 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		08FB7794FE84155DC02AAC07 /* dxv */ = {
+			isa = PBXGroup;
+			children = (
+				08FB7795FE84155DC02AAC07 /* Source */,
+				C6A0FF2B0290797F04C91782 /* Documentation */,
+				1AB674ADFE9D54B511CA2CBB /* Products */,
+			);
+			name = dxv;
+			sourceTree = "<group>";
+		};
+		08FB7795FE84155DC02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				0C442A470BB78AD600B3EE20 /* ximage.c */,
+				0C442A750BB78BC700B3EE20 /* vscreen.c */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		1AB674ADFE9D54B511CA2CBB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				D2AAC046055464E500DB518D /* libdxv.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		C6A0FF2B0290797F04C91782 /* Documentation */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			name = Documentation;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		D2AAC043055464E500DB518D /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		D2AAC045055464E500DB518D /* dxv */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "dxv" */;
+			buildPhases = (
+				D2AAC043055464E500DB518D /* Headers */,
+				D2AAC044055464E500DB518D /* Sources */,
+				D289987405E68DCB004EDB86 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = dxv;
+			productName = dxv;
+			productReference = D2AAC046055464E500DB518D /* libdxv.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		08FB7793FE84155DC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "dxv" */;
+			hasScannedForEncodings = 1;
+			mainGroup = 08FB7794FE84155DC02AAC07 /* dxv */;
+			projectDirPath = "";
+			targets = (
+				D2AAC045055464E500DB518D /* dxv */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		D2AAC044055464E500DB518D /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				0C442A480BB78AD600B3EE20 /* ximage.c in Sources */,
+				0C442A760BB78BC700B3EE20 /* vscreen.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		1DEB91EC08733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COPY_PHASE_STRIP = NO;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_FIX_AND_CONTINUE = YES;
+				GCC_MODEL_TUNING = G5;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				INSTALL_PATH = /usr/local/lib;
+				PRODUCT_NAME = dxv;
+				USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../.. ../../../include";
+				ZERO_LINK = YES;
+			};
+			name = Debug;
+		};
+		1DEB91ED08733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = (
+					ppc,
+					i386,
+				);
+				GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+				GCC_MODEL_TUNING = G5;
+				INSTALL_PATH = /usr/local/lib;
+				PRODUCT_NAME = dxv;
+				USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../.. ../../../include";
+			};
+			name = Release;
+		};
+		1DEB91F008733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = "";
+				OBJROOT = build;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+				SYMROOT = ../../../lib/osx;
+				USER_HEADER_SEARCH_PATHS = "../include ../../include ../../.. ../../../include ../../../../include";
+			};
+			name = Debug;
+		};
+		1DEB91F108733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = "";
+				OBJROOT = build;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+				SYMROOT = ../../../lib/osx;
+				USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../.. ../../../include";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "dxv" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91EC08733DB70010E9CD /* Debug */,
+				1DEB91ED08733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "dxv" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91F008733DB70010E9CD /* Debug */,
+				1DEB91F108733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj b/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj
new file mode 100644
index 00000000..f852c980
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>17.0</VCProjectVersion>
+    <ProjectGuid>{ADAC45FD-B93F-40A3-85B2-DBECA1283614}</ProjectGuid>
+    <RootNamespace>dxv</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\dxv2\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\dxv2\$(PlatformShortName)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\dxv2\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\dxv2\$(PlatformShortName)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnableManifest>false</VcpkgEnableManifest>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>..\include;..\..\..\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>..\include;..\..\..\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\include;..\..\..\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\include;..\..\..\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="generic\vscreen.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\ximage.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj.filters
new file mode 100644
index 00000000..99856f05
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/dxv2.vcxproj.filters
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="generic">
+      <UniqueIdentifier>{af33cf3f-226e-4ed6-9952-d31a95439e2b}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="generic\vscreen.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\ximage.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/generic/vscreen.c b/Src/libvpShared/corelibs/cdxv/dxv2/generic/vscreen.c
new file mode 100644
index 00000000..5cc1cafb
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/generic/vscreen.c
@@ -0,0 +1,179 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#include "../include/duck_dxl.h"
+#include "duck_mem.h"
+#include "../include/dxl_plugin.h"         
+typedef struct tagflgs 
+{
+	unsigned inUse : 1;
+	unsigned DXed : 1;
+	unsigned clutOwner: 1;
+	unsigned doCompleteBlit : 1;
+	unsigned keyFrame : 1;
+	unsigned nullFrame : 1;
+	unsigned interframe : 1;
+	unsigned logo : 1;
+	unsigned allocated : 1;
+} dkInfoFlags;
+
+typedef struct vScreen
+{
+	DXL_OBJECT_VERSION version;
+
+	unsigned char *_addr;
+	unsigned char *laddr;	/* address of destination and what it was the last time */
+
+	enum BITDEPTH bd;		/* format of destination */
+	enum BLITQUALITY bq;	/* blit translation mode */
+
+	short pitch, height;	/* pitch and height of dest */        
+      
+	short viewX,viewY;		/* offset/clipping viewport within destination */
+	short viewW,viewH;
+
+	dkInfoFlags dkFlags;
+
+} DXL_VSCREEN;
+
+#define validate(x) \
+{ \
+	if (!x) \
+		return (int) DXL_NULLSOURCE; \
+	if (!x->dkFlags.inUse) \
+		return (int) DXL_NOTINUSE; \
+}
+
+/***********************************************/
+
+DXL_VSCREEN_HANDLE 
+vScreenCreate(void)
+{
+	DXL_VSCREEN_HANDLE nScreen;
+
+    nScreen = (DXL_VSCREEN_HANDLE)duck_calloc(1,sizeof(DXL_VSCREEN),DMEM_GENERAL);
+	if (nScreen)
+		nScreen->dkFlags.allocated = 1;
+
+    return nScreen;
+}
+
+int 
+DXL_SetVScreenBlitQuality(DXL_VSCREEN_HANDLE dst, enum BLITQUALITY blitquality)                  
+{
+	int oldBQ;
+	
+	validate(dst);
+
+	oldBQ = dst->bq;
+	dst->bq = blitquality;
+
+	return oldBQ;
+}
+
+void 
+DXL_DestroyVScreen(DXL_VSCREEN_HANDLE dst)
+{
+    if (dst != NULL){
+        dst->dkFlags.inUse = 0;
+        dst->_addr = NULL;
+		if (dst->dkFlags.allocated)
+			duck_free(dst);
+    }
+}
+
+int 
+DXL_AlterVScreen(DXL_VSCREEN_HANDLE dst, unsigned char *_addr, enum BITDEPTH bd, int p, int h) 
+{       
+    validate(dst);
+
+    if (_addr != NULL) dst->_addr = _addr;
+
+    if (bd != DXRGBNULL) dst->bd = bd;
+
+    if (p != -1) dst->pitch = (short) p;
+    
+    if (h != -1) dst->height = (short) h;
+
+    return DXL_OK;
+}           
+
+int 
+DXL_AlterVScreenView(DXL_VSCREEN_HANDLE dst,int x,int y,int w,int h)
+{
+    validate(dst);
+
+	if (x > -1)	dst->viewX = (short)x;
+    if (y > -1)	dst->viewY = (short)y;
+    if (w > -1) dst->viewW = (short)w;
+    if (h > -1) dst->viewH = (short)h;
+
+    return DXL_OK;
+}   
+        
+DXL_VSCREEN_HANDLE 
+DXL_CreateVScreen(unsigned char *_addr, enum BITDEPTH bd, short p,short h)
+{
+	DXL_VSCREEN_HANDLE vScreenCreate(void);
+	DXL_VSCREEN_HANDLE nScreen = vScreenCreate();
+
+	if (!nScreen) 
+		return NULL;
+
+	nScreen->dkFlags.inUse = 1;
+
+	DXL_AlterVScreen(nScreen, _addr, bd, p, h);
+
+	return nScreen;
+}
+
+int DXL_GetVScreenView(DXL_VSCREEN_HANDLE dst,int *x,int *y,int *w,int *h)
+{
+	validate(dst);
+
+    if(x)
+	    *x = dst->viewX;
+    if(y)
+	    *y = dst->viewY;
+    if(w)
+	    *w = dst->viewW;
+    if(h)
+    	*h = dst->viewH;
+
+	return DXL_OK;
+}
+
+
+
+int DXL_GetVScreenAttributes(DXL_VSCREEN_HANDLE dst, void **_addr, dxvBlitQuality *bq, dxvBitDepth *bd, short *pitch, short *height)
+{
+	validate(dst);
+
+    if(_addr)
+        *_addr = dst->_addr;
+
+    if(bq)
+        *bq = dst->bq;
+    
+    if(bd)
+        *bd = dst->bd;
+
+    if(pitch)
+        *pitch = dst->pitch;
+    
+    if(height)
+        *height = dst->height;
+
+    return DXL_OK;
+}   
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/generic/ximage.c b/Src/libvpShared/corelibs/cdxv/dxv2/generic/ximage.c
new file mode 100644
index 00000000..e416ce93
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/generic/ximage.c
@@ -0,0 +1,490 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+#include "duck_mem.h"
+#include "../include/dxl_plugin.h" 
+#include <ctype.h>  /* toupper */
+
+static CREATE_FUNC creator[NUM_ALG];
+static unsigned int fourCC[NUM_ALG];
+
+static DXL_OBJECT_VERSION thisVersion = 0x01000001;
+
+typedef struct tagflgs 
+{
+	unsigned inUse : 1;
+	unsigned DXed : 1;
+	unsigned clutOwner: 1;
+	unsigned doCompleteBlit : 1;
+	unsigned keyFrame : 1;
+	unsigned nullFrame : 1;
+	unsigned interframe : 1;
+	unsigned logo : 1;
+	unsigned allocated : 1;
+} dkInfoFlags;
+
+
+typedef struct tXImageBase
+{  
+	dkInfoFlags dkFlags;
+	//short x,y,w,h;
+	unsigned char *addr;
+	enum BITDEPTH *bdPrefs;
+	CREATE_FUNC create;
+	RECREATE_FUNC recreate;
+	DESTROY_FUNC destroy;
+    SEND_VMSG_FUNC sendVideoMessage;
+	DX_FUNC dx;
+	int fSize;
+	SET_PARAMETER_FUNC setParameter;
+	GET_PARAMETER_FUNC getParameter;
+} DXL_XIMAGE_BASE;
+
+typedef struct tXImage
+{  
+	DXL_OBJECT_VERSION version;
+	
+	DXL_XIMAGE_BASE * xImageBasePtr;
+	
+	DXL_HANDLE algorithmBasePtr;
+} DXL_XIMAGE;
+
+#define validateXImage(x) \
+{ \
+	if (!x) \
+		return (int) DXL_NULLSOURCE; \
+	if (!x->xImageBasePtr->dkFlags.inUse) \
+		return (int) DXL_NOTINUSE; \
+}
+
+//	if (!x->version != thisVersion) 
+//		return (int) DXL_INVALID_DATA; 
+
+static //inline 
+unsigned int toUpperFOURCC(unsigned int type)
+{
+
+    return (
+            (toupper((char)((type >> 24) & 0xff)) << 24) |
+            (toupper((char)((type >> 16) & 0xff)) << 16) |
+            (toupper((char)((type >> 8) & 0xff)) << 8) |
+            toupper((char)((type >> 0) & 0xff))
+            );
+}
+
+int 
+DXL_SetXImageCSize(DXL_XIMAGE_HANDLE src, int temp)
+{
+    if(src == NULL) 
+    	return DXL_NOTINUSE;
+
+	src->xImageBasePtr->fSize = temp;
+
+	return DXL_OK;
+}
+
+int 
+DXL_GetXImageCSize(DXL_XIMAGE_HANDLE src)
+{
+    if(src == NULL) 
+    	return 0;
+
+    return src->xImageBasePtr->fSize;
+}
+
+unsigned char * 
+DXL_GetXImageCDataAddr(DXL_XIMAGE_HANDLE src)
+{
+    if(src == NULL) 
+    	return NULL;
+
+	return src->xImageBasePtr->addr;
+}
+/*
+int 
+DXL_MoveXImage(DXL_XIMAGE_HANDLE src, enum OFFSETXY mode, int x, int y)
+{
+    validateXImage(src);
+    
+    if (mode != DXL_RELATIVE)
+    {
+        src->xImageBasePtr->x = 0;
+        src->xImageBasePtr->y = 0;
+    }
+    src->xImageBasePtr->x += (short) x;
+    src->xImageBasePtr->y += (short) y;
+
+    return DXL_OK;
+}
+*/
+int 
+DXL_AlterXImageData(DXL_XIMAGE_HANDLE src, unsigned char *data)
+{                    
+    validateXImage(src);
+
+    src->xImageBasePtr->addr = data;       
+    src->xImageBasePtr->dkFlags.DXed = 0;
+
+    if (data == NULL) 
+        return DXL_OK;
+
+    return DXL_OK;
+}
+
+int 
+DXL_IsXImageKeyFrame(DXL_XIMAGE_HANDLE src)
+{
+    validateXImage(src);
+
+    return src->xImageBasePtr->dkFlags.keyFrame;
+}
+
+
+void 
+DXL_DestroyXImage(DXL_XIMAGE_HANDLE src)
+{
+    if (src != NULL)
+    {
+        if (src->xImageBasePtr->dkFlags.inUse)
+        {
+            src->xImageBasePtr->destroy(src);
+        }
+
+    	duck_free(src->xImageBasePtr);
+    	duck_free(src);
+    }
+}
+
+DXL_XIMAGE_HANDLE DXL_CreateXImageOfType(unsigned char *data, unsigned int type)
+{
+    int i;
+    DXL_XIMAGE_HANDLE nImage = NULL;
+    
+    /* alloc our ximage */
+    nImage = (DXL_XIMAGE_HANDLE) duck_calloc(1, sizeof(DXL_XIMAGE), DMEM_GENERAL);
+    if(nImage == NULL) 
+    {
+    	return NULL;
+    }
+
+    nImage->version = thisVersion;
+
+    /* alloc our generic ximage base */
+    nImage->xImageBasePtr = (DXL_XIMAGE_BASE *) duck_calloc(1, sizeof(DXL_XIMAGE_BASE), DMEM_GENERAL);
+    if(nImage->xImageBasePtr == NULL) 
+    {
+    	duck_free(nImage);
+    	return NULL;
+    }
+
+    /* clear out just in case calloc does not really work */
+    nImage->algorithmBasePtr = NULL;
+    
+
+    /*
+    //convert fourCC to uppercase, fixes problem with calls to DXV with
+    //lowercase fourCC's
+    */
+    type = toUpperFOURCC(type);
+
+
+	/* try to match the fourcc to a registered algorithm */
+    for(i = 0; i < NUM_ALG; i++)
+    {
+        if(fourCC[i] == type)
+        {
+            if(nImage->algorithmBasePtr = creator[i](nImage, data))
+            {
+                nImage->xImageBasePtr->create = creator[i];
+                break;
+            }
+        }
+    }
+
+	/* was a valid registered alogrith found ? */
+    if(nImage->algorithmBasePtr == NULL) 
+    {
+    	/* nope, so we are going to bail */
+    	duck_free(nImage->xImageBasePtr);
+    	duck_free(nImage);
+
+    	return NULL;
+	}
+	
+    nImage->xImageBasePtr->dkFlags.inUse = 1;
+    nImage->xImageBasePtr->addr = data;
+
+    return nImage;
+}
+
+
+unsigned int *
+DXL_GetFourCCList(void)
+{
+	/*********
+		return a list of all supported fourccs
+	*********/
+	return fourCC;
+}
+
+
+int 
+DXL_GetAlgHandle(unsigned int fourcc)
+{
+	/*********
+		search through the fourcc table to find a dx'er's index
+	*********/
+	int i;
+
+    for (i = 0; i < NUM_ALG; i++)
+		if (fourCC[i] == fourcc) 
+            return i;
+
+	return DXL_NOTINUSE;
+}
+
+
+unsigned int 
+DXL_GetXImageFOURCC(DXL_XIMAGE_HANDLE src)
+{
+	/*********
+		find an ximages fourcc (by comparing creator functions)
+	*********/
+	int i;
+
+    for (i = 0; i < NUM_ALG; i++)
+		if (creator[i] == (CREATE_FUNC) src->xImageBasePtr->create) 
+        {
+			return fourCC[i];
+        }
+
+	return 0;
+}
+
+int 
+DXL_dxImageToVScreen(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE dst)
+{
+    int dxvCode; 
+
+	validateXImage(src);
+
+    /* 
+        after a ximage is created, it must always be altered....  this check will 
+        catch programmers who do not follow the api
+    */
+    if(!src->xImageBasePtr->dkFlags.allocated)
+        return DXL_NOTINUSE;
+
+    if(!src->xImageBasePtr->dx)
+        return DXL_NOTINUSE;
+
+	//if(!src->xImageBasePtr->addr)
+	//	return DXL_HOLD_FRAME;
+
+	dxvCode = src->xImageBasePtr->dx(src, dst);
+
+    return dxvCode;
+}
+
+/*-------------------------------------------------------------------
+
+-------------------------------------------------------------------*/
+int 
+DXL_InitVideo(void)
+{
+    /* this will force the internal fourcc and creator arrays to be set to 0 */
+	DXL_RegisterXImage(NULL, 0L);
+
+	return DXL_OK;
+}
+
+
+void 
+DXL_ExitVideo(void)
+{                                     
+
+}
+
+
+DXL_XIMAGE_HANDLE 
+DXL_AlterXImage(DXL_XIMAGE_HANDLE src, unsigned char *data, int type,
+    				enum BITDEPTH bitDepth, int width, int height)
+{
+    type = toUpperFOURCC(type);
+
+    if (src == NULL)
+	{
+		if(type) /* if type specified, try using it as the fourcc */
+			src = DXL_CreateXImageOfType(data,type);
+
+		if (src == NULL) /* if still null, give up */
+			return NULL;
+	}
+
+    /* no way to recreate, assume create is good enough */
+    if (!src->xImageBasePtr->recreate) 
+        return src;
+
+
+ 	src->xImageBasePtr->addr = data;
+
+	src->algorithmBasePtr = src->xImageBasePtr->recreate(src, data, type, bitDepth, width, height);
+
+
+	/* was a valid registered alogrith found ? */
+    if(src->algorithmBasePtr == NULL) 
+    {
+    	/* nope, so we are going to bail */
+    	duck_free(src->xImageBasePtr);
+    	duck_free(src);
+
+    	return NULL;
+	}
+
+    
+	src->xImageBasePtr->dkFlags.allocated = 1;
+
+    return src;
+}
+
+
+int 
+DXL_SetParameter(DXL_XIMAGE_HANDLE src, int Command, unsigned int Parameter )
+{
+	if (src == NULL) 
+		return DXL_NULLSOURCE;
+
+	if (src->xImageBasePtr == NULL) 
+		return DXL_NULLSOURCE;
+
+	if(src->xImageBasePtr->setParameter == NULL)
+		return DXL_NULLSOURCE;
+
+	src->xImageBasePtr->setParameter(src, Command, Parameter);
+
+    return DXL_OK;
+}
+
+int 
+DXL_GetParameter(DXL_XIMAGE_HANDLE src, int Command, unsigned int Parameter )
+{
+	if (src == NULL) 
+		return DXL_NULLSOURCE;
+
+	if (src->xImageBasePtr == NULL) 
+		return DXL_NULLSOURCE;
+
+	if(src->xImageBasePtr->getParameter == NULL)
+   		return DXL_NULLSOURCE;
+
+	return src->xImageBasePtr->getParameter(src, Command, Parameter);
+}
+
+DXL_HANDLE
+DXL_GetAlgorithmBasePtr(DXL_XIMAGE_HANDLE src) 
+{
+	return src->algorithmBasePtr;
+}
+
+int
+DXL_SendVideoMessage(DXL_XIMAGE_HANDLE src, void *msgHandle, unsigned int msgSize)
+{
+	validateXImage(src);
+
+   	if(src->xImageBasePtr->sendVideoMessage != NULL)
+		return src->xImageBasePtr->sendVideoMessage(src, msgHandle, msgSize);
+
+    return DXL_OK;
+}
+
+
+
+/*-------------------------------------------------------------------
+	CALLBACK REGISTRATION SECTION
+-------------------------------------------------------------------*/
+int 
+DXL_RegisterXImage(CREATE_FUNC myCreator, unsigned int fourcc)
+{
+    int i;
+    
+    /* special case -- a fourcc of zero will set the creator and fourcc arrays to 0 */
+    if (!fourcc)
+    {
+		duck_memset(creator, 0, sizeof(creator));
+		duck_memset(fourCC, 0, sizeof(fourCC));
+        return DXL_OK;
+    }
+            
+    for (i = 0; i < NUM_ALG; i++)
+    {
+        if (!fourCC[i])
+        {
+            creator[i] = myCreator;
+			fourCC[i] = fourcc;
+
+            return i;
+        }
+    }
+    return DXL_NOTINUSE;
+}
+
+int 
+DXL_RegisterXImageRecreate(DXL_XIMAGE_HANDLE src, RECREATE_FUNC thisFunc)
+{
+    src->xImageBasePtr->recreate = thisFunc;
+
+    return DXL_OK;
+}
+
+int 
+DXL_RegisterXImageDestroy(DXL_XIMAGE_HANDLE src, DESTROY_FUNC thisFunc)
+{
+    src->xImageBasePtr->destroy = thisFunc;
+
+    return DXL_OK;
+}
+
+int 
+DXL_RegisterXImageDx(DXL_XIMAGE_HANDLE src, DX_FUNC thisFunc)
+{
+    src->xImageBasePtr->dx = thisFunc;
+
+    return DXL_OK;
+}
+
+int 
+DXL_RegisterXImageSetParameter(DXL_XIMAGE_HANDLE src, SET_PARAMETER_FUNC thisFunc)
+{
+    src->xImageBasePtr->setParameter = thisFunc;
+
+    return DXL_OK;
+}
+
+int 
+DXL_RegisterXImageGetParameter(DXL_XIMAGE_HANDLE src, GET_PARAMETER_FUNC thisFunc)
+{
+    src->xImageBasePtr->getParameter = thisFunc;
+
+    return DXL_OK;
+}
+
+int 
+DXL_RegisterXImageSendVideoMessage(DXL_XIMAGE_HANDLE src, SEND_VMSG_FUNC thisFunc)
+{
+    src->xImageBasePtr->sendVideoMessage = thisFunc;
+
+    return DXL_OK;
+}
+
+/*-------------------------------------------------------------------
+
+-------------------------------------------------------------------*/
+
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/include/duck_dxl.h b/Src/libvpShared/corelibs/cdxv/dxv2/include/duck_dxl.h
new file mode 100644
index 00000000..d152ae29
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/include/duck_dxl.h
@@ -0,0 +1,350 @@
+#ifndef _duck_dxl_h
+#define _duck_dxl_h
+
+
+/******************************************************************************\
+<table BGCOLOR=#FFC0C0 border=1 WIDTH=100% ><tr><td><b>                                                                              
+  duck_dxl.h  </b></td><td><b> 	TrueMotion include file for decompression libraries </b>
+                                                                           
+</td></tr><tr><td>&nbsp</td><td>	Version:      6.0.0  
+</td></tr><tr><td>&nbsp</td><td>  	Created:      3/3/98                                         
+</td></tr><tr><td>&nbsp</td><td>  	Copyright (c) 1994-98, The Duck Corp. All rights reserved.
+</td></tr><tr><td>Important Objects</td><td>The On2 Decompression services tries to abstract the various objects
+used to decompress and render both audio and video. This allows the overall API to flex and accomodate new 
+decompression schemes and new destinations.
+</td></tr><tr><td>DXL_XIMAGE_HANDLE</td><td>Abstract container object used to organize and control compressed
+video.
+</td></tr><tr><td>DXL_VSCREEN_HANDLE</td><td>Abstract container object used to organize and control display of
+uncompressed video to a surface.
+</td></tr><tr><td>DXL_XAUDIOSRC_HANDLE</td><td>Abstract container object used to organize and control 
+compressed audio.
+</td></tr><tr><td>DXL_AUDIODST_HANDLE</td><td>Abstract container object used to organize and control 
+rendering / playing of uncompressed audio.
+</td></tr>
+</table>
+******************************************************************************/
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* enumerated data types */
+
+typedef enum BLITQUALITY {
+	DXBLIT_SAME = 0,        /* Blit directly, w/o stretching */
+	DXBLIT_R1,
+	DXBLIT_R2,
+	DXBLIT_STRETCH,         /* double horizontally, skip lines vertically */
+	DXBLIT_R3,
+	DXBLIT_STRETCH_BRIGHT,  /* double horizontally, interpolate vertically */
+	DXBLIT_R4,
+	DXBLIT_R5,
+	DXBLIT_R6,
+	DXBLIT_NONE,
+	DXBLITMAX
+} dxvBlitQuality ;
+
+typedef enum BITDEPTH { 
+    DXRGBNULL = 0, 
+    DXRGB8 = 1, 
+    
+	DXRGB16_555 = 2, 
+    DXRGB24 = 3, 
+    DXRGB_UNUSED = 4,
+    DXRGB16VESA = 5,
+    DXRGB8VESA = 6,
+    DXRGB16_565 = 7,
+
+    DXYUY2 = 8, 
+    DXYVU9 = 9, 
+    DXYV12 = 10, 
+    DXUYVY = 11, 
+    
+	DXRGB32 = 12, 
+    DXRGB16VESA_565 = 13, 
+	DXHALFTONE8 =14,
+	DXI420 = 15,
+	DXMAX
+} dxvBitDepth ;
+
+#define DXRGB16	DXRGB16_555
+#define DXRGB24CHAR DXRGB24
+
+typedef enum OFFSETXY { 
+	DXL_ABSOLUTE = 0, 
+	DXL_RELATIVE 
+} dxvOffsetMode;
+
+typedef enum DXL_ERR{
+    DXL_LOW_ERR = -32000,
+    DXL_HARDWARE_ERROR = -16002,
+    DXL_HARDWARE_NOT_INITED = -16001,
+    DXL_HARDWARE_BUFFER_FULL = -16000,
+    DXL_INVALID_REQUEST = -9,
+    DXL_VERSION_CONFLICT = -8,
+    DXL_INVALID_DATA = -7,
+    DXL_INVALID_BLIT = -6,
+    DXL_BAD_DATA = -5,
+    DXL_ALLOC_FAILED = -4,
+    DXL_NULL_FRAME = -3, 
+    DXL_NULLSOURCE = -2, 
+    DXL_NOTINUSE = -1, 
+    DXL_OK = 0,
+    DXL_HOLD_FRAME = 1
+} dxvError ;
+
+
+enum IMAGETYPE { whgfw_X=0 };  /* MEW */
+enum BGMODE     { kjhdkj_X=0 }; /* MEW */
+
+
+/*********************************************************/
+
+/* definition of data handles */                                         
+
+typedef struct vScreen *DXL_VSCREEN_HANDLE;
+typedef struct tXImage *DXL_XIMAGE_HANDLE;
+
+
+/* main video decompression init, exit and query */
+
+
+/*@
+@Name 			DXL_InitVideo
+@Description  		Initialize Video decompression services		
+@Return value		DXL_OK on success.
+@*/
+int DXL_InitVideo(
+void
+);
+
+
+/*@
+@Name			DXL_ExitVideo
+@Description		shutdown video decompression services.
+@Return value		none
+@*/
+void DXL_ExitVideo(void); 
+
+
+/*get pointer to NULL terminated 
+  array of supported fourCCs */
+unsigned int *DXL_GetFourCCList(void);
+
+
+/*@
+@Name			DXL_SetXImageCSize
+@Description		Set the size of the current compressed frame		
+@Return value		echo back the compressed image size		
+@*/
+int DXL_SetXImageCSize(
+DXL_XIMAGE_HANDLE xImage, 	/* compressed image handle */
+int compressedSize		/* compressed image size */
+);
+
+
+
+
+/*@
+@Name 			DXL_CreateXImageOfType
+@Description		Create an xImage (decompressor) object  of a requested type based on a FOURCC. 
+@Return value		handle to xImage created by this call .
+@*/
+DXL_XIMAGE_HANDLE DXL_CreateXImageOfType(
+unsigned char *data,    	/* pointer to compressed data */
+unsigned int fccType		/* FOURCC style code indicating type of compressed data */
+);
+
+
+
+/*@
+@Name 		DXL_DestroyXImage 
+@Description  	destroy the specified xImage
+@Return value	void
+@*/
+void DXL_DestroyXImage(
+DXL_XIMAGE_HANDLE src		/* handle to compressed image */
+);
+
+
+
+/*@
+@Name 		DXL_AlterXImageData
+@Description	feed the xImage new data, get ready to decompress 	
+@Return value  	DXL_OK on success
+@*/
+int DXL_AlterXImageData(		
+DXL_XIMAGE_HANDLE src,		/* xImage, handle to compressed data */			
+unsigned char *ptrData		/* latest data to be associated with xImage */
+);
+
+
+
+/*@
+@Name 		DXL_AlterXImage
+@Description    explicitly alter attributes of an xImage 
+@Return value 	handle to compressed image
+@*/
+DXL_XIMAGE_HANDLE DXL_AlterXImage(
+DXL_XIMAGE_HANDLE src,		/* handle to compressed image */
+unsigned char *ptrData,		/* pointer to compressed data. */
+int xImType,			/* code for compress data type. */
+dxvBitDepth bitDepth ,    	/* bitdepth of decompressed data */
+int maxWidth,			/* width of decompressed image */
+int maxHeight 			/* height of decompressed image */
+);
+
+
+unsigned char * 
+DXL_GetXImageCDataAddr(DXL_XIMAGE_HANDLE src);
+
+
+
+/*@
+@Name 			DXL_GetXImageCSize
+@Description 		Get xImage compressed size
+@Return value   	returns the compressed size
+@*/
+int DXL_GetXImageCSize(
+DXL_XIMAGE_HANDLE src		/* handle to compressed image */
+);
+
+
+
+/*@
+@Name			DXL_GetXImageXYWH
+@Description  		get application specified x,y offset, and overall decompressed width and height. 
+x and y offsets are legacy fields, ignore. 		
+@Return value  		DXL_OK on success	
+@*/
+int DXL_GetXImageXYWH(
+	DXL_XIMAGE_HANDLE src,			/* the xImage Handle. */
+	int *x,int *y,int *w, int *h		/* x,y,w,h */
+	);
+
+
+/*@
+@Name 			DXL_IsXImageKeyFrame
+@Description   		return whether this xImage is a keyFrame.
+@Return value  		return whether this xImage is a keyFrame.
+@*/
+int DXL_IsXImageKeyFrame(
+	DXL_XIMAGE_HANDLE src   /* handle to compressed image */
+);
+
+
+
+/*@
+@Name 			DXL_dxImageToVScreen
+@Description		decompress and blit as a single process 
+@Return value		DXL_OK on success.
+@*/
+int DXL_dxImageToVScreen(
+	DXL_XIMAGE_HANDLE src, 		/* xImage handle. */
+	DXL_VSCREEN_HANDLE dst		/* handle to destination surface */
+	);
+
+
+/* vscreen management functions */
+
+/*@
+@Name 		DXL_CreateVScreen
+@Description	create a virtual screen for rendering, storing decompressed video.		
+@Return value	returns a DXL_VSCREEN_HANDLE
+@*/
+DXL_VSCREEN_HANDLE DXL_CreateVScreen(
+	unsigned char *addr,		/* The address where pixel data should be written */
+	dxvBitDepth colorMode, 		/* Determines the colorspace and color depth of VScreen */
+	short bytePitch,		/* offset from one raster to the next */
+	short height			/* number of rasters in a VScreen */
+	);
+
+
+
+/*@
+@Name 		DXL_AlterVScreen
+@Description	Alter address and attributes associated with a vscreen. 			
+@Return value	returns a DXL_VSCREEN_HANDLE
+@*/
+int DXL_AlterVScreen(
+	DXL_VSCREEN_HANDLE dst, 	/* handle to a VScreen */
+	unsigned char *addr,		/* The address where pixel data should be written */
+	dxvBitDepth colorMode,		/* Determines the colorspace and color depth of VScreen */
+	int bytePitch,			/* offset from one raster to the next */
+	int height			/* number of rasters in a VScreen */
+	);
+	
+
+/* alter clipping rectangle of vScreen */
+/* not supported by all decompressors */
+int DXL_AlterVScreenClip(
+	DXL_VSCREEN_HANDLE dst,
+	int x,int y,
+	int w,int h
+	);
+
+/* alter viewport rectangle of vScreen */
+/* width/height not supported by all decompressors */
+int DXL_AlterVScreenView(
+	DXL_VSCREEN_HANDLE dst,
+	int x,int y,
+	int w,int h
+	);
+
+/* destroy a vScreen object/struct */
+void DXL_DestroyVScreen(
+	DXL_VSCREEN_HANDLE dst
+	);
+
+/* set blit mode/quality of a vScreen 
+   same (normal), stretch (black lined)
+   stretch bright (stretched w/interpolation) */    
+int DXL_SetVScreenBlitQuality(
+	DXL_VSCREEN_HANDLE dest,
+	dxvBlitQuality bq
+	);
+
+
+/* get attributes of the vScreen */
+int DXL_GetVScreenAttributes(
+    DXL_VSCREEN_HANDLE vScreen,
+    void **addr, 
+    dxvBlitQuality *bq, 
+    dxvBitDepth *bd,
+    short *pitch, 
+    short *height
+	);   
+
+/* get vScreen's current viewport rectangle
+	a viewport represents an x,y, offset and 
+	a clipping width and height */        
+int DXL_GetVScreenView(
+	DXL_VSCREEN_HANDLE dst,
+	int *x,int *y,int *w,int *h
+	);
+
+/* pass a parameter to the decompressor */
+int  DXL_SetParameter(
+	DXL_XIMAGE_HANDLE src, 
+	int Command, 
+	unsigned int Parameter 
+	);
+
+unsigned int DXL_GetXImageFOURCC(DXL_XIMAGE_HANDLE src);
+
+/* Temporary hack to dxv to allow calls to get info (jbb) */
+/*
+typedef struct tFrameInfo
+{
+    int KeyFrame;
+    int Version;
+    int Quality;
+    int vp30Flag;
+} FrameInfo;
+*/
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* include guards */
diff --git a/Src/libvpShared/corelibs/cdxv/dxv2/include/dxl_plugin.h b/Src/libvpShared/corelibs/cdxv/dxv2/include/dxl_plugin.h
new file mode 100644
index 00000000..0b49855a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/dxv2/include/dxl_plugin.h
@@ -0,0 +1,70 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#ifndef _dxl_plugin_h
+#define _dxl_plugin_h
+
+#include "duck_dxl.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* max number of algorithms to be supported at one time */
+#define NUM_ALG 16
+
+typedef void * DXL_HANDLE;
+
+typedef unsigned int DXL_OBJECT_VERSION;
+
+typedef DXL_HANDLE (*CREATE_FUNC)(DXL_XIMAGE_HANDLE, unsigned char *);
+
+typedef DXL_HANDLE (*RECREATE_FUNC)(DXL_XIMAGE_HANDLE,void *,int,int,int,int); 
+
+typedef int (*DESTROY_FUNC)(DXL_XIMAGE_HANDLE); 
+
+typedef int (*SEED_DATA_FUNC)(DXL_XIMAGE_HANDLE); 
+
+typedef int (*DX_FUNC)(DXL_XIMAGE_HANDLE, DXL_VSCREEN_HANDLE); 
+
+typedef void (*SET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+//typedef int (*SET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+
+typedef int (*GET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+
+typedef int (*SEND_VMSG_FUNC)(DXL_XIMAGE_HANDLE, void *, unsigned int);
+
+
+int DXL_GetAlgHandle(unsigned int fourcc);
+DXL_HANDLE DXL_GetAlgorithmBasePtr(DXL_XIMAGE_HANDLE src);
+
+int DXL_RegisterXImage(CREATE_FUNC creator, unsigned int fourcc);
+int DXL_RegisterXImageRecreate(DXL_XIMAGE_HANDLE src, RECREATE_FUNC thisFunc);
+int DXL_RegisterXImageDestroy(DXL_XIMAGE_HANDLE src, DESTROY_FUNC thisFunc);
+int DXL_RegisterXImageSeedData(DXL_XIMAGE_HANDLE src, SEED_DATA_FUNC thisFunc);
+int DXL_RegisterXImageDx(DXL_XIMAGE_HANDLE src, DX_FUNC thisFunc);
+
+int DXL_RegisterXImageSetParameter(DXL_XIMAGE_HANDLE src, SET_PARAMETER_FUNC thisFunc);
+int DXL_RegisterXImageGetParameter(DXL_XIMAGE_HANDLE src, GET_PARAMETER_FUNC thisFunc);
+
+int DXL_RegisterXImageSendVideoMessage(DXL_XIMAGE_HANDLE src, SEND_VMSG_FUNC thisFunc);
+
+
+#define DXL_MKFOURCC( ch0, ch1, ch2, ch3 ) \
+		( (unsigned int)(unsigned char)(ch0) | ( (unsigned int)(unsigned char)(ch1) << 8 ) |    \
+		( (unsigned int)(unsigned char)(ch2) << 16 ) | ( (unsigned int)(unsigned char)(ch3) << 24 ) )
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/codec_common.h b/Src/libvpShared/corelibs/cdxv/include/codec_common.h
new file mode 100644
index 00000000..a1cf862b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/codec_common.h
@@ -0,0 +1,101 @@
+/****************************************************************************
+*
+*   Module Title :     Codec_common.h
+*
+*   Description  :     Common codec definitions header file.
+*
+****************************************************************************/
+#ifndef __INC_COMCODEC_H
+#define __INC_COMCODEC_H
+
+/****************************************************************************
+*  Include Files
+****************************************************************************/
+#include <string.h>
+#include "type_aliases.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+
+// Platform Specific Inlines
+#ifdef _MSC_VER
+ #ifndef INLINE
+  #define INLINE      __inline
+ #endif
+ #ifndef FORCEINLINE
+  #define FORCEINLINE __forceinline
+ #endif
+#else
+ #ifndef INLINE
+  #define INLINE      inline
+ #endif
+ #ifndef FORCEINLINE
+  #define FORCEINLINE inline
+ #endif
+#endif
+
+
+// Platform specific math function defines
+#define LIMIT(x)    ( (x)<0 ? 0: (x)>255 ? 255: (x) )
+/****************************************************************************
+*  Module constants.
+****************************************************************************/
+#define BASE_FRAME              0
+#define NORMAL_FRAME            1
+#define Q_TABLE_SIZE            64
+#define BLOCK_HEIGHT_WIDTH      8
+#define BLOCK_SIZE              (BLOCK_HEIGHT_WIDTH * BLOCK_HEIGHT_WIDTH)
+
+
+/****************************************************************************
+*  Types
+****************************************************************************/
+
+/* Type defining YUV data elements. */
+typedef UINT8 YUV_BUFFER_ENTRY;
+typedef UINT8 *YUV_BUFFER_ENTRY_PTR;
+
+typedef struct CONFIG_TYPE
+{
+    // The size of the surface we want to draw to
+    UINT32 VideoFrameWidth;
+    UINT32 VideoFrameHeight;
+
+    INT32 YStride;
+    INT32 UVStride;
+
+    // The number of horizontal and vertical blocks encoded
+    UINT32 HFragPixels;
+    UINT32 VFragPixels;
+
+    // The Intended Horizontal Scale
+    UINT32 HScale;
+    UINT32 HRatio;
+
+    // The Intended Vertical Scale
+    UINT32 VScale;
+    UINT32 VRatio;
+
+    // The way in which we intended
+    UINT32 ScalingMode;
+
+    // Interlaced (0) means no (1) means Yes
+    UINT32 Interlaced;
+
+	UINT32 ExpandedFrameWidth;
+	UINT32 ExpandedFrameHeight;
+
+} CONFIG_TYPE;
+
+typedef struct
+{
+    INT16   x;
+    INT16   y;
+} MOTION_VECTOR;
+
+typedef MOTION_VECTOR COORDINATE;
+typedef INT16           Q_LIST_ENTRY;
+typedef Q_LIST_ENTRY    Q_LIST[64];
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/codec_common_interface.h b/Src/libvpShared/corelibs/cdxv/include/codec_common_interface.h
new file mode 100644
index 00000000..072bd723
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/codec_common_interface.h
@@ -0,0 +1,108 @@
+/****************************************************************************
+*
+*   Module Title :     codec_common_if.H
+*
+*   Description  :     Interface to video codec demo decompressor DLL
+*
+*   AUTHOR       :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+* 
+*   1.01 PGW 21/07/99  Added FR_INVALID_MODE_TOKEN.
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+#ifndef CODEC_COMMON_INTERFACE_H
+#define CODEC_COMMON_INTERFACE_H
+
+#define __export   
+#define _export  
+#define DllExport   __declspec( dllexport )
+#define DllImport   __declspec( dllimport )
+
+// Playback ERROR Codes. 
+#define NO_DECODER_ERROR			0
+#define REMOTE_DECODER_ERROR        -1
+
+#define	DFR_BAD_DCT_COEFF			-100
+#define	DFR_ZERO_LENGTH_FRAME		-101
+#define DFR_FRAME_SIZE_INVALID		-102
+#define DFR_OUTPUT_BUFFER_OVERFLOW	-103
+#define DFR_INVALID_FRAME_HEADER    -104
+#define FR_INVALID_MODE_TOKEN       -110
+#define ETR_ALLOCATION_ERROR		-200
+#define ETR_INVALID_ROOT_PTR		-201
+#define SYNCH_ERROR					-400
+#define BUFFER_UNDERFLOW_ERROR		-500
+#define PB_IB_OVERFLOW_ERROR        -501
+
+// External error triggers
+#define PB_HEADER_CHECKSUM_ERROR    -601
+#define PB_DATA_CHECKSUM_ERROR      -602
+
+// DCT Error Codes
+#define DDCT_EXPANSION_ERROR        -700
+#define DDCT_INVALID_TOKEN_ERROR    -701
+
+// ExceptionErrors
+#define GEN_EXCEPTIONS              -800
+#define EX_UNQUAL_ERROR             -801
+
+// Unrecoverable error codes
+#define FATAL_PLAYBACK_ERROR        -1000
+#define GEN_ERROR_CREATING_CDC		-1001
+#define GEN_THREAD_CREATION_ERROR   -1002
+#define DFR_CREATE_BMP_FAILED		-1003
+
+// YUV buffer configuration structure
+typedef struct
+{
+    int     YWidth;
+    int     YHeight;
+    int     YStride;
+
+    int     UVWidth;
+    int     UVHeight;
+    int     UVStride;
+
+    char *  YBuffer;
+    char *  UBuffer;
+    char *  VBuffer;
+
+} YUV_BUFFER_CONFIG;
+typedef enum
+{
+    C_SET_KEY_FRAME,
+    C_SET_FIXED_Q,
+    C_SET_FIRSTPASS_FILE,
+    C_SET_EXPERIMENTAL_MIN,
+    C_SET_EXPERIMENTAL_MAX = C_SET_EXPERIMENTAL_MIN + 255,
+	C_SET_CHECKPROTECT,
+	C_SET_TESTMODE,
+	C_SET_INTERNAL_SIZE,
+	C_SET_RECOVERY_FRAME,
+	C_SET_REFERENCEFRAME,
+    C_SET_GOLDENFRAME
+
+#ifndef VP50_COMP_INTERFACE
+    // Specialist test facilities. 
+//    C_VCAP_PARAMS,              // DO NOT USE FOR NOW WITH VFW CODEC
+#endif
+
+} C_SETTING;
+
+typedef enum
+{
+    MAINTAIN_ASPECT_RATIO   = 0x0,
+    SCALE_TO_FIT            = 0x1,
+    CENTER                  = 0x2,
+    OTHER                   = 0x3
+} SCALE_MODE;
+
+
+#endif
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/include/dxl_plugin.h b/Src/libvpShared/corelibs/cdxv/include/dxl_plugin.h
new file mode 100644
index 00000000..c961f5f2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/dxl_plugin.h
@@ -0,0 +1,75 @@
+//==========================================================================
+//
+//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+//  PURPOSE.
+//
+//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+//
+//--------------------------------------------------------------------------
+
+
+#ifndef _dxl_plugin_h
+#define _dxl_plugin_h
+
+#include "duck_dxl.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* max number of algorithms to be supported at one time */
+#define NUM_ALG 16
+
+typedef void * DXL_HANDLE;
+
+typedef unsigned int DXL_OBJECT_VERSION;
+
+DXL_HANDLE
+DXL_GetAlgorithmBasePtr(DXL_XIMAGE_HANDLE src);
+
+unsigned char * 
+DXL_GetXImageCDataAddr(DXL_XIMAGE_HANDLE src);
+
+
+typedef DXL_HANDLE (*CREATE_FUNC)(DXL_XIMAGE_HANDLE, unsigned char *);
+
+typedef DXL_HANDLE (*RECREATE_FUNC)(DXL_XIMAGE_HANDLE,void *,int,int,int,int); 
+
+typedef int (*DESTROY_FUNC)(DXL_XIMAGE_HANDLE); 
+
+typedef int (*SEED_DATA_FUNC)(DXL_XIMAGE_HANDLE); 
+
+typedef int (*DX_FUNC)(DXL_XIMAGE_HANDLE, DXL_VSCREEN_HANDLE); 
+
+typedef void (*SET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+//typedef int (*SET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+
+typedef int (*GET_PARAMETER_FUNC)(DXL_XIMAGE_HANDLE, int , unsigned int);
+
+typedef int (*SEND_VMSG_FUNC)(DXL_XIMAGE_HANDLE, void *, unsigned int);
+
+
+int DXL_GetAlgHandle(unsigned int fourcc);
+
+int DXL_RegisterXImage(CREATE_FUNC creator, unsigned int fourcc);
+int DXL_RegisterXImageRecreate(DXL_XIMAGE_HANDLE src, RECREATE_FUNC thisFunc);
+int DXL_RegisterXImageDestroy(DXL_XIMAGE_HANDLE src, DESTROY_FUNC thisFunc);
+int DXL_RegisterXImageDx(DXL_XIMAGE_HANDLE src, DX_FUNC thisFunc);
+
+int DXL_RegisterXImageSetParameter(DXL_XIMAGE_HANDLE src, SET_PARAMETER_FUNC thisFunc);
+int DXL_RegisterXImageGetParameter(DXL_XIMAGE_HANDLE src, GET_PARAMETER_FUNC thisFunc);
+
+int DXL_RegisterXImageSendVideoMessage(DXL_XIMAGE_HANDLE src, SEND_VMSG_FUNC thisFunc);
+
+
+#define DXL_MKFOURCC( ch0, ch1, ch2, ch3 ) \
+		( (unsigned int)(unsigned char)(ch0) | ( (unsigned int)(unsigned char)(ch1) << 8 ) |    \
+		( (unsigned int)(unsigned char)(ch2) << 16 ) | ( (unsigned int)(unsigned char)(ch3) << 24 ) )
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/postproc_if.h b/Src/libvpShared/corelibs/cdxv/include/postproc_if.h
new file mode 100644
index 00000000..40c2a450
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/postproc_if.h
@@ -0,0 +1,151 @@
+/****************************************************************************
+*
+*   Module Title :     postproc_if.h
+*
+*   Description  :     Post-processor interface header file.
+*
+****************************************************************************/
+#ifndef __INC_POSTPROC_IF_H
+#define __INC_POSTPROC_IF_H
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "codec_common_interface.h"
+
+/****************************************************************************
+*  Typedefs
+****************************************************************************/
+typedef struct POSTPROC_INSTANCE * POSTPROC_INST;
+
+/****************************************************************************
+*  Imported Functions.
+****************************************************************************/
+extern void InitPostProcessing
+( 
+	UINT32 *DCQuantScaleV2p,
+	UINT32 *DCQuantScaleUVp,
+	UINT32 *DCQuantScaleV1p,
+	UINT32 Version
+);
+
+extern void DeInitPostProcessing ();
+
+extern POSTPROC_INST CreatePostProcInstance
+(
+ CONFIG_TYPE *ConfigurationInit		// configuration to setup
+);
+
+extern void DeletePostProcInstance
+(
+ POSTPROC_INST	 *pbi				// postprocessor instance to delete
+);
+
+extern void SetPPInterlacedMode(POSTPROC_INST ppi, int Interlaced);
+extern void SetDeInterlaceMode(POSTPROC_INST ppi, int DeInterlaceMode);
+extern void SetAddNoiseMode(POSTPROC_INST ppi, int AddNoiseMode);
+
+extern void ChangePostProcConfiguration
+(
+ POSTPROC_INST	pbi,				// postprocessor instance to use	
+ CONFIG_TYPE *Configuration			// configuration to change to
+);
+
+extern void PostProcess
+(
+ POSTPROC_INST	 pbi,				// postprocessor instance to use
+ INT32       Vp3VersionNo,			// version of frame
+ INT32		 FrameType,				// key or non key
+ INT32		 PostProcessingLevel,	// level of post processing to perform 
+ INT32		 FrameQIndex,			// q index value used on passed in frame
+ UINT8		*LastFrameRecon,		// reconstruction buffer : passed in
+ UINT8		*PostProcessBuffer,		// postprocessing buffer : passed in
+ UINT8		*FragInfo,				// blocks coded : passed in
+ UINT32      FragInfoElementSize,	// size of each element
+ UINT32		 FragInfoCodedMask		// mask to get at whether fragment is coded
+);
+
+extern void (*ClampLevels)
+( 
+	POSTPROC_INST pbi,
+	INT32        BlackClamp,			// number of values to clamp from 0 
+	INT32        WhiteClamp,			// number of values to clamp from 255
+	UINT8		*Src,					// reconstruction buffer : passed in
+	UINT8		*Dst					// postprocessing buffer : passed in
+);
+
+extern void LoopFilter
+(
+ POSTPROC_INST	 pbi,				// postprocessor instance to use
+ INT32		 FrameQIndex,			// q index value used on passed in frame
+ UINT8		*LastFrameRecon,		// reconstruction buffer : passed in
+ UINT8		*PostProcessBuffer,		// postprocessing buffer : passed in
+ UINT8		*FragInfo,				// blocks coded : passed in
+ UINT32      FragInfoElementSize,	// size of each element
+ UINT32		 FragInfoCodedMask		// mask to get at whether fragment is coded
+);
+
+extern void ApplyReconLoopFilter
+(
+ POSTPROC_INST	 pbi,				// postprocessor instance to use
+ INT32		 FrameQIndex,			// q index value used on passed in frame
+ UINT8		*LastFrameRecon,		// reconstruction buffer : passed in
+ UINT8		*PostProcessBuffer,		// postprocessing buffer : passed in
+ UINT8		*FragInfo,				// blocks coded : passed in
+ UINT32      FragInfoElementSize,	// size of each element
+ UINT32		 FragInfoCodedMask		// mask to get at whether fragment is coded
+);
+
+extern void ScaleOrCenter
+( 
+ POSTPROC_INST	 pbi,				// postprocessor instance to use
+ UINT8		       *FrameBuffer,	// buffer to use passed in
+ YUV_BUFFER_CONFIG * YuvConfig		// size you want to output buffer to
+);
+
+/****************************************************************************
+*  Exported Functions.
+****************************************************************************/
+extern void UpdateUMVBorder
+( 
+ POSTPROC_INST    pbi, 
+ UINT8 * DestReconPtr 
+);
+
+extern void  (*FilteringVert_12) 
+(
+ UINT32 QValue,
+ UINT8 * Src, 
+ INT32 Pitch
+); 
+
+extern void  (*FilteringHoriz_12)
+(
+ UINT32 QValue,
+ UINT8 * Src, 
+ INT32 Pitch
+); 
+
+extern void  (*FilteringVert_8)  
+(
+ UINT32 QValue,
+ UINT8 * Src, 
+ INT32 Pitch
+); 
+
+extern void  (*FilteringHoriz_8) 
+(
+ UINT32 QValue,
+ UINT8 * Src, 
+ INT32 Pitch
+); 
+
+extern void CopyFrame( POSTPROC_INST pbi, YUV_BUFFER_CONFIG *b, UINT8 *DestReconPtr);
+
+/****************************************************************************
+*  Exported Data.
+****************************************************************************/
+extern UINT8 LimitVal_VP31[VAL_RANGE * 3];
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/preproc.h b/Src/libvpShared/corelibs/cdxv/include/preproc.h
new file mode 100644
index 00000000..98d748b4
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/preproc.h
@@ -0,0 +1,40 @@
+/****************************************************************************
+*
+*   Module Title :     preproc.h
+*
+*   Description  :     simple preprocessor
+*
+****************************************************************************/
+
+#ifndef __INC_PREPROC_H
+#define __INC_PREPROC_H
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "duck_mem.h"
+
+/****************************************************************************
+*  Types
+****************************************************************************/
+
+typedef struct 
+{
+	unsigned char* frameBuffer;
+	int frame;
+	unsigned int *fixedDivide;
+
+	unsigned char*frameBufferAlloc;
+	unsigned int *fixedDivideAlloc;
+} PreProcInstance;
+
+/****************************************************************************
+*  Functions.
+****************************************************************************/
+
+void DeletePreProc( PreProcInstance *ppi);
+int InitPreProc( PreProcInstance *ppi, int FrameSize);
+extern void spatialFilter_c( PreProcInstance *ppi,unsigned char *s,unsigned char *d,int width,int height,int pitch,int strength);
+extern void (*tempFilter)( PreProcInstance *ppi,unsigned char *s,unsigned char *d,int bytes,int strength);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/preprocif.h b/Src/libvpShared/corelibs/cdxv/include/preprocif.h
new file mode 100644
index 00000000..e941be4f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/preprocif.h
@@ -0,0 +1,64 @@
+/****************************************************************************
+*
+*   Module Title :     preproc_if.h
+*
+*   Description  :     Pre-processor interface header file.
+*
+****************************************************************************/						
+
+#ifndef __PREPROC_IF_H
+#define __PREPROC_IF_H
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "type_aliases.h"
+
+/****************************************************************************
+*  Types
+****************************************************************************/
+
+typedef struct
+{
+	UINT8 * Yuv0ptr;
+	UINT8 * Yuv1ptr;
+
+	UINT8	*FragInfo;				// blocks coded : passed in
+	UINT32   FragInfoElementSize;	// size of each element
+	UINT32	 FragInfoCodedMask;		// mask to get at whether fragment is coded
+
+    UINT32 * RegionIndex;           // Gives pixel index for top left of each block 
+	UINT32 VideoFrameHeight;
+	UINT32 VideoFrameWidth;
+	UINT8 HFragPixels;
+	UINT8 VFragPixels;
+
+} SCAN_CONFIG_DATA;
+
+typedef enum
+{	SCP_FILTER_ON_OFF,
+    SCP_SET_SRF_OFFSET,
+    SCP_SET_EBO_ON_OFF,
+    SCP_SET_VCAP_LEVEL_OFFSET,
+	SCP_SET_SHOW_LOCAL
+
+} SCP_SETTINGS;
+
+typedef struct PP_INSTANCE * xPP_INST;
+
+/****************************************************************************
+*  Module statics
+****************************************************************************/
+/* Controls whether Early break out is on or off in default case */
+#define EARLY_BREAKOUT_DEFAULT  TRUE           
+
+/****************************************************************************
+*  Functions
+****************************************************************************/
+extern  void SetScanParam ( xPP_INST ppi, UINT32 ParamId, INT32 ParamValue );
+extern  UINT32 YUVAnalyseFrame ( xPP_INST ppi, UINT32 * KFIndicator );
+extern  xPP_INST CreatePPInstance ( void );
+extern  void DeletePPInstance ( xPP_INST * );
+extern  BOOL ScanYUVInit ( xPP_INST,  SCAN_CONFIG_DATA *ScanConfigPtr );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/include/vputil_if.h b/Src/libvpShared/corelibs/cdxv/include/vputil_if.h
new file mode 100644
index 00000000..63fc0128
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/include/vputil_if.h
@@ -0,0 +1,149 @@
+/****************************************************************************
+*
+*   Module Title :     vputil_if.h
+*
+*   Description  :     Codec utilities header file.
+*
+****************************************************************************/
+#ifndef __VPUTIL_IF_H
+#define __VPUTIL_IF_H
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "codec_common_interface.h"
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+extern void InitVPUtil ( void );
+
+extern void (*ReconIntra)
+( 
+ INT16 *tmpBuffer, 
+ UINT8 *ReconPtr, 
+ UINT16 *ChangePtr, 
+ UINT32 LineStep 
+);
+
+extern void (*ReconInter)
+( 
+ INT16 * tmpBuffer, 
+ UINT8 * ReconPtr, 
+ UINT8 * RefPtr, 
+ INT16 * ChangePtr, 
+ UINT32 LineStep 
+);
+
+extern void (*ReconInterHalfPixel2)
+( 
+ INT16 * tmpBuffer, 
+ UINT8  * ReconPtr, 
+ UINT8  * RefPtr1, 
+ UINT8 * RefPtr2, 
+ INT16  * ChangePtr, 
+ UINT32 LineStep 
+);
+
+extern void (*idct[65])
+(
+ INT16 *InputData, 
+ INT16 *QuantMatrix, 
+ INT16 *OutputData 
+);
+
+extern void (*idctc[65])
+( 
+ INT16 *InputData, 
+ INT16 *QuantMatrix, 
+ INT16 * OutputData 
+);
+
+extern void (*ClearSysState) ( void );
+
+extern void (*ReconBlock)
+(
+ INT16 *SrcBlock,
+ INT16 *ReconRefPtr, 
+ UINT8 *DestBlock, 
+ UINT32 LineStep
+);
+
+extern void (*SubtractBlock)
+( 
+ UINT8 *SrcBlock, 
+ INT16 *DestPtr, 
+ UINT32 LineStep 
+);
+
+extern void (*UnpackBlock)
+( 
+ UINT8 *ReconPtr, 
+ INT16 *ReconRefPtr, 
+ UINT32 ReconPixelsPerLine
+);
+
+extern void (*AverageBlock)
+( 
+ UINT8 *ReconPtr1, 
+ UINT8 *ReconPtr2, 
+ UINT16 *ReconRefPtr, 
+ UINT32 ReconPixelsPerLine
+);
+
+extern void (*CopyBlock)
+(
+ unsigned char *src, 
+ unsigned char *dest, 
+ unsigned int srcstride
+);
+
+extern void (*fdct_short)
+( 
+ INT16 * InputData, 
+ INT16 * OutputData 
+);
+
+extern void (*Copy12x12)
+(
+ const unsigned char *src, 
+ unsigned char *dest, 
+ unsigned int srcstride,
+ unsigned int deststride
+);
+
+extern void (*FilterBlockBil_8)
+( 
+ UINT8 *ReconPtr1, 
+ UINT8 *ReconPtr2, 
+ UINT8 *ReconRefPtr, 
+ UINT32 ReconPixelsPerLine, 
+ INT32 ModX, 
+ INT32 ModY 
+);
+
+extern void (*FilterBlock)
+( 
+ UINT8 *ReconPtr1, 
+ UINT8 *ReconPtr2, 
+ UINT16 *ReconRefPtr, 
+ UINT32 PixelsPerLine, 
+ INT32 ModX, 
+ INT32 ModY, 
+ BOOL UseBicubic, 
+ UINT8 BicubicAlpha
+);
+
+extern UINT32 (*FiltBlockBilGetSad)
+(
+ UINT8 *SrcPtr,
+ INT32 SrcStride,
+ UINT8 *ReconPtr1,
+ UINT8 *ReconPtr2,
+ INT32 PixelsPerLine,
+ INT32 ModX, 
+ INT32 ModY,
+ UINT32 BestSoFar
+);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/pp/Win32/preprocfunctions.c b/Src/libvpShared/corelibs/cdxv/pp/Win32/preprocfunctions.c
new file mode 100644
index 00000000..9bd9cba7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/Win32/preprocfunctions.c
@@ -0,0 +1,257 @@
+/****************************************************************************
+*
+*   Module Title :     PreProcOptFunctions.c
+*
+*   Description  :     MMX or otherwise processor specific 
+*                      optimised versions of pre-processor functions
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.04 YWX 30-Nov-00 Added support for WMT cpu
+*   1.03 PGW 24 Jul 00 Added Column SAD function.
+*   1.02 YX  06/04/00  Optimized get row sad for xmm
+*   1.01 PGW 12/07/99  Changes to reduce uneccessary dependancies. 
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include "preproc.h"
+#include "cpuidlib.h"
+#pragma warning( disable : 4799 )  // Disable no emms instruction warning!
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*  Imports.
+*****************************************************************************
+*/   
+    
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Exported Functions 
+*****************************************************************************
+*/              
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/  
+
+
+/****************************************************************************
+*  Forward References
+*****************************************************************************
+*/  
+
+UINT32 MmxRowSAD( UINT8 * Src1, UINT8 * Src2 );
+extern UINT32 XmmRowSAD( UINT8 * Src1, UINT8 * Src2 );
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MachineSpecificConfig
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Checks for machine specifc features such as MMX support 
+ *                      sets approipriate flags and function pointers.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+#define MMX_ENABLED 1
+void MachineSpecificConfig(PP_INSTANCE *ppi)
+{
+    UINT32 FeatureFlags = 0;
+    BOOL   CPUID_Supported = TRUE;   // Is the CPUID instruction supported
+
+    BOOL   TestMmx = TRUE;
+
+    
+    PROCTYPE CPUType = findCPUId();
+	switch(CPUType)
+	{
+	case X86    :
+	case PPRO   :
+	case C6X86  :
+	case C6X86MX:
+	case AMDK5  :
+	case MACG3	:
+	case MAC68K	:
+		ppi->MmxEnabled = FALSE;
+		ppi->XmmEnabled = FALSE;
+		break;
+	case PII	:   
+	case AMDK63D:
+	case AMDK6  :
+	case PMMX	:   
+		ppi->MmxEnabled = TRUE;
+		ppi->XmmEnabled = FALSE;
+		break;
+	case XMM    :
+    case WMT    :
+		ppi->MmxEnabled = TRUE;
+		ppi->XmmEnabled = TRUE;
+		break;
+	}
+
+	
+	//To test We force the cpu type here
+	//ppi->MmxEnabled = FALSE;
+	//ppi->XmmEnabled = FALSE;
+
+    // If MMX supported then set to use MMX versions of functions else 
+    // use original 'C' versions.
+	if (ppi->XmmEnabled)
+	{
+		ppi->RowSAD=XmmRowSAD;
+        ppi->ColSAD = ScalarColSAD;
+	}
+	else if ( ppi->MmxEnabled )
+    {
+        ppi->RowSAD = MmxRowSAD;
+        ppi->ColSAD = ScalarColSAD;
+    }
+    else
+    {
+        ppi->RowSAD = ScalarRowSAD;
+        ppi->ColSAD = ScalarColSAD;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MmxRowSAD
+ *
+ *  INPUTS        :     UINT8 * NewDataPtr	(New Data)
+ *						UINT8 * RefDataPtr
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     Highest of two S.A.D. values.
+ * 
+ *
+ *  FUNCTION      :     Calculates the sum of the absolute differences for two groups of
+ *                      four pixels and returns the larger of the two.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+UINT32 MmxRowSAD( UINT8 * NewDataPtr, UINT8  * RefDataPtr )
+{
+    UINT32 SadValue;
+    UINT32 SadValue1;
+	UINT32 AbsValues[2];
+
+    // MMX code for calculating absolute difference values	
+__asm
+	{
+		pxor        mm6, mm6					; Blank mmx6
+		pxor        mm7, mm7					; Blank mmx6
+
+		mov         eax,dword ptr [NewDataPtr]	; Load base addresses
+		mov         ebx,dword ptr [RefDataPtr]
+
+        // Calculate eight ABS difference values.
+		movq		mm0, [eax]					; Copy eight bytes to mm0
+		movq		mm1, [ebx]					; Copy eight bytes to mm1
+		movq		mm2, mm0					; Take copy of MM0
+
+		psubusb		mm0, mm1					; A-B to MM0
+		psubusb		mm1, mm2					; B-A to MM1
+		por			mm0, mm1					; OR MM0 and MM1 gives abs differences in MM0
+
+		movq		mm1, mm0					; keep a copy
+
+		// Sum together the low four bytes and the high four bytes
+		punpcklbw   mm0, mm6					; unpack low four bytes to higher precision
+		punpckhbw   mm1, mm7					; unpack high four bytes to higher precision
+		movq        mm2, mm0                    ; take a copy
+		movq        mm3, mm1                    ; take a copy
+		punpcklwd   mm0, mm6					; unpack low two words to higher precision
+		punpcklwd   mm1, mm7					; unpack low two words to higher precision
+		punpckhwd   mm2, mm6					; unpack high low two words to higher precision
+		punpckhwd   mm3, mm7					; unpack high low two words to higher precision
+		
+		paddd       mm0, mm2                    ; Accumulate intermediate results
+		paddd       mm1, mm3                    ; Accumulate intermediate results
+		movq        mm2, mm0                    ; take a copy
+		movq        mm3, mm1                    ; take a copy
+		punpckhdq   mm0, mm6					; Unpack and accumulate again
+		punpckhdq   mm1, mm7					; Unpack and accumulate again
+		punpckldq   mm2, mm6
+		punpckldq   mm3, mm7
+		paddd       mm0, mm2                    ; Accumulate final result
+		paddd       mm1, mm3                    ; Accumulate final result
+
+		// Interleave the two SAD results
+		punpckldq   mm0, mm1
+
+        // Write back the abs values
+        movq        dword ptr [AbsValues], mm0  
+    }
+    
+    SadValue = AbsValues[0];
+    SadValue1 = AbsValues[1];
+    SadValue = (SadValue > SadValue1) ? SadValue : SadValue1;
+
+    return SadValue;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ClearMmxState()
+ *
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Clears down the MMX state
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ClearMmxState(PP_INSTANCE *ppi)
+{
+    if ( ppi->MmxEnabled )
+    {
+        __asm
+	    {
+            emms									; Clear the MMX state.
+        }
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/pp/Win32/resource.h b/Src/libvpShared/corelibs/cdxv/pp/Win32/resource.h
new file mode 100644
index 00000000..46597097
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/Win32/resource.h
@@ -0,0 +1,43 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Developer Studio generated include file.
+// Used by PreprocParams.rc
+//
+#define IDD_VCAP_PARAMS                 101
+#define IDD_PREPROC_PARAMS              101
+#define IDC_VCAP_P_TRESH_SB             1000
+#define IDC_VCAP_NOISE_SUP_SB           1001
+#define IDC_VCAP_TRIG_SB                1002
+#define IDC_SRF_CHECK                   1004
+#define IDC_SRF_TEMPORAL_CHECK          1005
+#define IDC_SC_TRADE_OFF_SB             1005
+#define IDC_RSAD_LOW_SB                 1006
+#define IDC_VCAP_PUV_TRESH_SB           1007
+#define IDC_SGC_TRESH_SB                1008
+#define IDC_SGC_TRIGGER_SB              1009
+#define IDC_SGC_UV_TRESH_SB             1010
+#define IDC_VCAP_BAR_THRESH_SB          1011
+#define IDC_VCAP_P_TRESH_ED             1012
+#define IDC_VCAP_PUV_TRESH_ED           1013
+#define IDC_VCAP_NOISE_SUP_ED           1014
+#define IDC_VCAP_TRIG_ED                1015
+#define IDC_VCAP_BAR_THRESH_ED          1016
+#define IDC_SGC_TRESH_ED                1017
+#define IDC_SGC_UV_TRESH_ED             1018
+#define IDC_SGC_TRIGGER_ED              1019
+#define IDC_SRF_MEDIAN_CHECK            1020
+#define IDC_RSAD_HIGH_SB                1020
+#define IDC_PAK_ENABLED_CHECK           1023
+#define IDC_SC_TRADE_OFF_ED             1024
+#define IDC_RSAD_LOW_ED                 1025
+#define IDC_RSAD_HIGH_ED                1026
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        103
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1021
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/pp/Win32/rowdiffscan.c b/Src/libvpShared/corelibs/cdxv/pp/Win32/rowdiffscan.c
new file mode 100644
index 00000000..6ae77d7f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/Win32/rowdiffscan.c
@@ -0,0 +1,765 @@
+/****************************************************************************
+*
+*   Module Title :     RowDiffScan.c
+*
+*   Description  :     Pre-processor row difference Scan
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.00 JBB 22 AUG 00	Configuration baseline
+*
+*****************************************************************************
+*/						
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include "type_aliases.h"
+#include "preproc.h"
+
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     RowDiffScan
+ *
+ *  INPUTS        :     UINT8  * YuvPtr1, YuvPtr2 
+ *								 Pointers into current and previous frame
+ *                      BOOL     EdgeRow
+ *                               Is this row an edge row.
+ *
+ *  OUTPUTS       :		UINT16 * YUVDiffsPtr
+ *								 Differnece map
+ *                      UINT8  * bits_map_ptr
+ *                               Pixels changed map
+ *                      UINT8  * SgcPtr
+ *								 Level change score.
+ *                      INT8   * DispFragPtr
+ *                               Block update map.
+ *                      INT32  * RowDiffsPtr
+ *								 Total sig changes for row
+ *                      UINT8  * ChLocalsPtr
+ *                               Changed locals data structure
+ *
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     Initial pixel differences scan
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+void RowDiffScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, 
+                  INT16 * YUVDiffsPtr, UINT8 * bits_map_ptr, 
+                  INT8  * SgcPtr, INT8  * DispFragPtr, 
+				  UINT8 * FDiffPixels, INT32 * RowDiffsPtr, 
+                  UINT8 * ChLocalsPtr, BOOL EdgeRow )
+{
+    INT32 i;
+    INT32 FragChangedPixels;
+
+    INT16 Diff[8];
+
+    UINT32  ZeroData[2] = { 0,0 };
+    UINT8   OneData[8] = { 1,1,1,1,1,1,1,1 };
+    UINT8   ChlocalsDummyData[8] = { 8,8,8,8,8,8,8,8 };
+
+    // Cannot use kernel if at edge or if PAK disabled
+    if ( (!ppi->PAKEnabled) || EdgeRow )
+    {
+        for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+        {
+            // Reset count of pixels changed for the current fragment.
+            FragChangedPixels = 0;
+
+            // Test for break out conditions to save time. 
+			if ((*DispFragPtr == CANDIDATE_BLOCK) )//|| !ppi->EarlyBreakAllowed)
+			{
+                
+				__asm
+				{
+				
+					movd		esi, [YuvPtr1];
+					movd		ebx, [YuvPtr2];
+					movd		edx, FragChangedPixels
+					pxor		mm7, mm7;
+
+					movq		mm0, [esi]			;76543210
+					movq		mm1, [ebx]			;76543210
+
+					movq		mm2, mm0			;make a copy
+					movq		mm3, mm1			;make a copy
+
+					punpcklbw	mm0, mm7			; 3 2 1 0
+					punpcklbw	mm1, mm7			; 3 2 1 0
+
+					punpckhbw	mm2, mm7			; 7 6 5 4
+					punpckhbw   mm3, mm7			; 7 6 5 4
+
+					psubw		mm0	 mm1			; Diff[3,2,1,0]
+					psubw		mm2, mm3			; Diff[7,6,5,4]
+					
+					movq		QWORD PTR [YUVDiffsPtr], mm0
+					movq		QWORD PTR [YUVDiffsPtr], mm2					
+
+				;------------------------------------------------------
+				;	mm0, mm1, mm3, mm4, m5, mm6, mm7, Free		
+				;	mm2, keep the Diff[7 6 5 4]
+				;------------------------------------------------------
+					
+					movd		eax, ppi->LevelThresh
+
+					movd		mm1, eax			;
+					movd		mm3, eax			;
+					
+					packsdw		mm1, mm3			;
+					movq		mm4, mm1			;
+					
+					psllw		mm1, 16
+					por			mm1, mm4			;4 ppi->LevelThresh
+					
+
+				;-------------------------------------------------------
+				;	mm3, mm4, mm5, mm6, mm7 Free
+				;   
+				;-------------------------------------------------------
+
+					movd		eax, ppi->SrfThresh
+					
+					movd		mm3, eax			;
+					movd		mm4, eax			;
+
+					packsdw		mm3, mm4			;
+					movq		mm5, mm3			;
+
+					psllw		mm3, 16
+					por			mm3, mm6			;4 ppi->SrfThresh
+
+				;--------------------------------------------------------
+				;	mm0 mm2		diff[0]-diff[7]
+				;	mm1			ppi->LevelThresh
+				;	mm3			ppi->SrfThresh
+				;	mm4-mm7		free
+				;	Note,	ppi->NegLevelThresh = - ppi->LevelThresh
+				;			ppi->NegSrfThresh = - ppi->SrfThresh
+				;--------------------------------------------------------
+
+					movq		mm4, mm0			; diff[0][1][2][3]
+					movq		mm5, mm0			;
+
+					psubsw		mm4, mm1			; if diff >= LevelThresh
+					psraw		mm4, 15				; 00s(True) and ffs (False)
+					pandn		mm4, FFFFFFFFh		; ffs(True) and 00s (False)
+					psrlw		mm4, 15				; 01 (True) and 00	(False)
+
+					pcmpgtw		mm5, mm3			; if diff > SrfThresh
+													; ffs(True) and 00s (False)
+					psrlw		mm5, 15				; 01 (True) and 00  (False)					
+					pand		mm5, mm4			; 
+
+					
+					movq		mm7, mm0			; save a copy of diff[0][1][2][3]
+					pxor		mm6, mm6			; clear MM6
+
+					psubsw		mm6, mm1			; mm6 = NegLevelThresh
+					pcmpgtw		mm0, mm6			; if diff > NegLevelThresh
+													; ffs(True) and 00s (False)
+					pandn		mm0, FFFFFFFFh		; if diff <= NegLevelThresh
+													; ffs(True) and 00	(False)
+					psrlw		mm0, 15				; 01 (True) and 00  (False)
+
+					paddsw		mm7, mm3			; if diff < -NegSrfThresh
+					psraw		mm7, 15				; ffs(True)	and 00s (False)
+					
+					psrlw	    mm7, 15				; 01 (True) and 00s (False)
+					pand		mm7, mm0			;
+					
+				;----------------------------------------------------------------------------
+				; mm0, mm1, mm2, mm3, mm4, mm5, mm7		 in use
+				; mm6	free
+				;----------------------------------------------------------------------------
+					
+					por			mm5, mm7			; mm7 is free now
+					pxor		mm6, mm6			;
+					movq		mm7, mm5			;
+					punpcklwd	mm5, mm6			;
+					punpckhwd	mm7, mm6			;
+
+					paddw		mm5, mm7			;
+					movq		mm7, mm5			;
+
+					psrlq		mm7, 32				;
+					paddd		mm7, mm5			;
+
+					movd		eax, mm7			;
+					
+					add			eax, ebx
+
+					
+
+
+				// Calculate the diference values and copy to YUVDiffsPtr
+				Diff[0] = ((INT16)YuvPtr1[0]) - ((INT16)YuvPtr2[0]);
+			    Diff[1] = ((INT16)YuvPtr1[1]) - ((INT16)YuvPtr2[1]);
+			    Diff[2] = ((INT16)YuvPtr1[2]) - ((INT16)YuvPtr2[2]);
+			    Diff[3] = ((INT16)YuvPtr1[3]) - ((INT16)YuvPtr2[3]);
+                ((INT32 *)YUVDiffsPtr)[0] = ((INT32 *)Diff)[0];
+                ((INT32 *)YUVDiffsPtr)[1] = ((INT32 *)Diff)[1];
+
+				// Test against the Level and ppi->SRF thresholds and record the results
+                // Pixel 0
+				if ( Diff[0] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+					if ( Diff[0] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[0] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[0] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+					if ( Diff[0] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[0] = 1;
+						FragChangedPixels++;
+					}
+				}
+                // Pixel 1
+				if ( Diff[1] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+					if ( Diff[1] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[1] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[1] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+					if ( Diff[1] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[1] = 1;
+						FragChangedPixels++;
+					}
+				}
+                // Pixel 2
+				if ( Diff[2] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+					if ( Diff[2] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[2] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[2] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+					if ( Diff[2] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[2] = 1;
+						FragChangedPixels++;
+					}
+				}
+                // Pixel 3
+				if ( Diff[3] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+					if ( Diff[3] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[3] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[3] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+					if ( Diff[3] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[3] = 1;
+						FragChangedPixels++;
+					}
+				}
+
+                // Clear down entries in changed locals array
+                ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+
+                // Calculate the diference values and copy to YUVDiffsPtr
+			    Diff[4] = ((INT16)YuvPtr1[4]) - ((INT16)YuvPtr2[4]);
+			    Diff[5] = ((INT16)YuvPtr1[5]) - ((INT16)YuvPtr2[5]);
+			    Diff[6] = ((INT16)YuvPtr1[6]) - ((INT16)YuvPtr2[6]);
+			    Diff[7] = ((INT16)YuvPtr1[7]) - ((INT16)YuvPtr2[7]);
+                ((INT32 *)YUVDiffsPtr)[2] = ((INT32 *)Diff)[2];
+                ((INT32 *)YUVDiffsPtr)[3] = ((INT32 *)Diff)[3];
+
+				// Test against the Level and ppi->SRF thresholds and record the results
+                // Pixel 4
+				if ( Diff[4] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+					if ( Diff[4] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[4] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[4] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+					if ( Diff[4] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[4] = 1;
+						FragChangedPixels++;
+					}
+				}
+                // Pixel 5
+				if ( Diff[5] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+					if ( Diff[5] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[5] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[5] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+					if ( Diff[5] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[5] = 1;
+						FragChangedPixels++;
+					}
+				}
+                // Pixel 6
+				if ( Diff[6] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+					if ( Diff[6] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[6] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[6] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+					if ( Diff[6] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[6] = 1;
+						FragChangedPixels++;
+					}
+				}
+                // Pixel 7
+				if ( Diff[7] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+					if ( Diff[7] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[7] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[7] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+					if ( Diff[7] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[7] = 1;
+						FragChangedPixels++;
+					}
+				}
+
+                // Clear down entries in changed locals array
+                ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+	        }
+            else
+            {
+                // For EBO coded blocks mark all pixels as changed.
+                if ( *DispFragPtr > BLOCK_NOT_CODED )
+                {
+                    ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+                    ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+                    ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+                    ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+                }
+                else
+                {
+                    ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+                    ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+                }
+            }
+
+			*RowDiffsPtr += FragChangedPixels;
+			*FDiffPixels += (UINT8)FragChangedPixels;
+
+			YuvPtr1 += ppi->HFragPixels;
+			YuvPtr2 += ppi->HFragPixels;
+			bits_map_ptr += ppi->HFragPixels;
+            ChLocalsPtr += ppi->HFragPixels;
+			YUVDiffsPtr += ppi->HFragPixels;
+			SgcPtr ++;
+			FDiffPixels ++;
+
+			// If we have a lot of changed pixels for this fragment on this row then 
+			// the fragment is almost sure to be picked (e.g. through the line search) so we
+			// can mark it as selected and then ignore it.
+			// if ( ppi->EarlyBreakAllowed )
+			{
+				if (FragChangedPixels >= 7)
+				{
+					*DispFragPtr = BLOCK_CODED;
+				}
+			}
+			DispFragPtr++;    
+		}
+    }
+    else
+    {
+        for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+        {
+            // Reset count of pixels changed for the current fragment.
+            FragChangedPixels = 0;
+
+            // Test for break out conditions to save time. 
+			if ((*DispFragPtr == CANDIDATE_BLOCK) )//|| !ppi->EarlyBreakAllowed)
+			{
+                // Calculate the diference values and copy to YUVDiffsPtr
+			    Diff[0] = ((INT16)YuvPtr1[0]) - ((INT16)YuvPtr2[0]);
+			    Diff[1] = ((INT16)YuvPtr1[1]) - ((INT16)YuvPtr2[1]);
+			    Diff[2] = ((INT16)YuvPtr1[2]) - ((INT16)YuvPtr2[2]);
+			    Diff[3] = ((INT16)YuvPtr1[3]) - ((INT16)YuvPtr2[3]);
+                ((INT32 *)YUVDiffsPtr)[0] = ((INT32 *)Diff)[0];
+                ((INT32 *)YUVDiffsPtr)[1] = ((INT32 *)Diff)[1];
+
+				// Test against the Level and ppi->SRF thresholds and record the results
+                // Pixel 0
+				if ( Diff[0] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[0] > ppi->SrfThresh) && (Diff[0] <= ppi->HighChange) )
+						Diff[0] = (int)ApplyPakLowPass( ppi, &YuvPtr1[0] ) - 
+							      (int)ApplyPakLowPass( ppi, &YuvPtr2[0] );
+
+					if ( Diff[0] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[0] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[0] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[0] < ppi->NegSrfThresh) && (Diff[0] >= ppi->NegHighChange) )
+						Diff[0] = (int)ApplyPakLowPass( ppi, &YuvPtr1[0] ) - 
+                                  (int)ApplyPakLowPass( ppi, &YuvPtr2[0] );
+
+					if ( Diff[0] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[0] = 1;
+						FragChangedPixels++;
+					}
+    			}
+
+                // Pixel 1
+				if ( Diff[1] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[1] > ppi->SrfThresh) && (Diff[1] <= ppi->HighChange) )
+						Diff[1] = (int)ApplyPakLowPass( ppi, &YuvPtr1[1] ) - 
+							      (int)ApplyPakLowPass( ppi, &YuvPtr2[1] );
+
+					if ( Diff[1] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[1] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[1] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[1] < ppi->NegSrfThresh) && (Diff[1] >= ppi->NegHighChange) )
+						Diff[1] = (int)ApplyPakLowPass( ppi, &YuvPtr1[1] ) - 
+                                  (int)ApplyPakLowPass( ppi, &YuvPtr2[1] );
+
+					if ( Diff[1] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[1] = 1;
+						FragChangedPixels++;
+					}
+    			}
+
+                // Pixel 2
+				if ( Diff[2] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[2] > ppi->SrfThresh) && (Diff[2] <= ppi->HighChange) )
+						Diff[2] = (int)ApplyPakLowPass( ppi, &YuvPtr1[2] ) - 
+							      (int)ApplyPakLowPass( ppi, &YuvPtr2[2] );
+
+					if ( Diff[2] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[2] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[2] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[2] < ppi->NegSrfThresh) && (Diff[2] >= ppi->NegHighChange) )
+						Diff[2] = (int)ApplyPakLowPass( ppi, &YuvPtr1[2] ) - 
+                                  (int)ApplyPakLowPass( ppi, &YuvPtr2[2] );
+
+					if ( Diff[2] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[2] = 1;
+						FragChangedPixels++;
+					}
+    			}
+
+                // Pixel 3
+				if ( Diff[3] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[3] > ppi->SrfThresh) && (Diff[3] <= ppi->HighChange) )
+						Diff[3] = (int)ApplyPakLowPass( ppi, &YuvPtr1[3] ) - 
+							      (int)ApplyPakLowPass( ppi, &YuvPtr2[3] );
+
+					if ( Diff[3] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[3] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[3] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[3] < ppi->NegSrfThresh) && (Diff[3] >= ppi->NegHighChange) )
+						Diff[3] = (int)ApplyPakLowPass( ppi, &YuvPtr1[3] ) - 
+                                  (int)ApplyPakLowPass( ppi, &YuvPtr2[3] );
+
+					if ( Diff[3] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[3] = 1;
+						FragChangedPixels++;
+					}
+    			}
+
+                // Clear down entries in changed locals array
+                ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+
+                // Calculate the diference values and copy to YUVDiffsPtr
+			    Diff[4] = ((INT16)YuvPtr1[4]) - ((INT16)YuvPtr2[4]);
+			    Diff[5] = ((INT16)YuvPtr1[5]) - ((INT16)YuvPtr2[5]);
+			    Diff[6] = ((INT16)YuvPtr1[6]) - ((INT16)YuvPtr2[6]);
+			    Diff[7] = ((INT16)YuvPtr1[7]) - ((INT16)YuvPtr2[7]);
+                ((INT32 *)YUVDiffsPtr)[2] = ((INT32 *)Diff)[2];
+                ((INT32 *)YUVDiffsPtr)[3] = ((INT32 *)Diff)[3];
+
+				// Test against the Level and ppi->SRF thresholds and record the results
+                // Pixel 4
+				if ( Diff[4] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[4] > ppi->SrfThresh) && (Diff[4] <= ppi->HighChange) )
+						Diff[4] = (int)ApplyPakLowPass( ppi, &YuvPtr1[4] ) - 
+							      (int)ApplyPakLowPass( ppi, &YuvPtr2[4] );
+
+					if ( Diff[4] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[4] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[4] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[4] < ppi->NegSrfThresh) && (Diff[4] >= ppi->NegHighChange) )
+						Diff[4] = (int)ApplyPakLowPass( ppi, &YuvPtr1[4] ) - 
+                                  (int)ApplyPakLowPass( ppi, &YuvPtr2[4] );
+
+					if ( Diff[4] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[4] = 1;
+						FragChangedPixels++;
+					}
+    			}
+
+                // Pixel 5
+				if ( Diff[5] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[5] > ppi->SrfThresh) && (Diff[5] <= ppi->HighChange) )
+						Diff[5] = (int)ApplyPakLowPass( ppi, &YuvPtr1[5] ) - 
+							      (int)ApplyPakLowPass( ppi, &YuvPtr2[5] );
+
+					if ( Diff[5] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[5] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[5] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[5] < ppi->NegSrfThresh) && (Diff[5] >= ppi->NegHighChange) )
+						Diff[5] = (int)ApplyPakLowPass( ppi, &YuvPtr1[5] ) - 
+                                  (int)ApplyPakLowPass( ppi, &YuvPtr2[5] );
+
+					if ( Diff[5] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[5] = 1;
+						FragChangedPixels++;
+					}
+    			}
+
+                // Pixel 6
+				if ( Diff[6] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+
+					// If the level change is still suspect then apply PAK kernel.
+			        if ( (Diff[6] > ppi->SrfThresh) && (Diff[6] <= ppi->HighChange) )
+						Diff[6] = (int)ApplyPakLowPass( ppi, &YuvPtr1[6] ) - 
+							      (int)ApplyPakLowPass( ppi, &YuvPtr2[6] );
+
+					if ( Diff[6] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[6] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[6] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[6] < ppi->NegSrfThresh) && (Diff[6] >= ppi->NegHighChange) )
+						Diff[6] = (int)ApplyPakLowPass( ppi, &YuvPtr1[6] ) - 
+                                  (int)ApplyPakLowPass( ppi, &YuvPtr2[6] );
+
+					if ( Diff[6] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[6] = 1;
+						FragChangedPixels++;
+					}
+    			}
+
+                // Pixel 7
+				if ( Diff[7] >= ppi->LevelThresh )
+				{         
+					SgcPtr[0]++;
+
+					// If the level change is still suspect then apply PAK kernel.
+			        if ( (Diff[7] > ppi->SrfThresh) && (Diff[7] <= ppi->HighChange) )
+						Diff[7] = (int)ApplyPakLowPass( ppi, &YuvPtr1[7] ) - 
+							      (int)ApplyPakLowPass( ppi, &YuvPtr2[7] );
+
+					if ( Diff[7] > ppi->SrfThresh )
+					{          
+						bits_map_ptr[7] = 1;
+						FragChangedPixels++;
+					}    
+				}
+				else if ( Diff[7] <= ppi->NegLevelThresh )
+				{
+					SgcPtr[0]--;
+
+					// If the level change is still suspect then apply PAK kernel.
+					if ( (Diff[7] < ppi->NegSrfThresh) && (Diff[7] >= ppi->NegHighChange) )
+						Diff[7] = (int)ApplyPakLowPass( ppi, &YuvPtr1[7] ) - 
+                                  (int)ApplyPakLowPass( ppi, &YuvPtr2[7] );
+
+					if ( Diff[7] < ppi->NegSrfThresh )
+					{          
+						bits_map_ptr[7] = 1;
+						FragChangedPixels++;
+					}
+    			}
+
+                // Clear down entries in changed locals array
+                ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+            }
+            else
+            {
+                // For EBO coded blocks mark all pixels as changed.
+                if ( *DispFragPtr > BLOCK_NOT_CODED )
+                {
+                    ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+                    ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+
+                    ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+                    ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+                }
+                else
+                {
+                    ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+                    ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+                }
+            }
+
+			*RowDiffsPtr += FragChangedPixels;
+			*FDiffPixels += (UINT8)FragChangedPixels;
+
+            YuvPtr1 += ppi->HFragPixels;
+            YuvPtr2 += ppi->HFragPixels;
+            bits_map_ptr += ppi->HFragPixels;
+            ChLocalsPtr += ppi->HFragPixels;
+            YUVDiffsPtr += ppi->HFragPixels;
+            SgcPtr ++;
+			FDiffPixels ++;
+
+			// If we have a lot of changed pixels for this fragment on this row then 
+			// the fragment is almost sure to be picked (e.g. through the line search) so we
+			// can mark it as selected and then ignore it.
+//			if ( ppi->EarlyBreakAllowed )
+			{
+				if (FragChangedPixels >= 7)
+				{
+					*DispFragPtr = BLOCK_CODED;
+				}
+			}
+			DispFragPtr++;    
+        }
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/pp/Win32/xmmrowsad.asm b/Src/libvpShared/corelibs/cdxv/pp/Win32/xmmrowsad.asm
new file mode 100644
index 00000000..b162ad9d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/Win32/xmmrowsad.asm
@@ -0,0 +1,88 @@
+;------------------------------------------------
+XmmRowSADParams  STRUC
+                    dd  ?			;1 pushed regs
+                    dd  ?           ;return address
+    NewDataPtr      dd  ?
+    RefDataPtr      dd  ?
+XmmRowSADParams  ENDS
+;------------------------------------------------
+
+INCLUDE iaxmm.inc
+ 
+        .586
+        .387
+        .MODEL  flat, SYSCALL, os_dos
+        .MMX
+
+; macros
+
+
+        .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA' 
+
+        ALIGN 32
+
+
+        .CODE
+
+NAME XmmRowSAD
+
+PUBLIC XmmRowSAD_
+PUBLIC _XmmRowSAD
+ 
+ 
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE     EQU 0
+
+
+;------------------------------------------------
+;UINT32 XmmRowSAD( UINT8 * NewDataPtr, UINT8  * RefDataPtr) 
+;
+XmmRowSAD_:
+_XmmRowSAD:
+
+    push    ebx 
+	mov         eax,(XmmRowSADParams PTR [esp]).NewDataPtr	; Load base addresses
+	mov         ebx,(XmmRowSADParams PTR [esp]).RefDataPtr
+    
+;
+; ESP = Stack Pointer                      MM0 = Free
+; ESI = Free                               MM1 = Free
+; EDI = Free                               MM2 = Free
+; EBP = Free                               MM3 = Free
+; EBX = RefDataPtr                         MM4 = Free
+; ECX = PixelsPerLine                      MM5 = Free
+; EDX = PixelsPerLine + STRIDE_EXTRA       MM6 = Free
+; EAX = NewDataPtr                         MM7 = Free
+;
+
+
+		movq		mm0, QWORD PTR [eax]		; copy eight bytes from NewDataPtr to mm0
+		movq		mm3, QWORD PTR [ebx]		; copy eight bytes from ReconDataPtr to mm3
+		
+		pxor		mm1, mm1					; clear mm1 for unpacking
+
+		movq		mm2, mm0					; make a copy
+		movq		mm4, mm3					; make a copy 
+
+		punpcklbw	mm0, mm1					; unpack the lower four bytes
+		punpcklbw   mm3, mm1					; unpack the lower four bytes
+
+		psadbw		mm0, mm3					; sum of absolute difference of four bytes
+		punpckhbw   mm2, mm1					; unpack the higher four bytes
+		punpckhbw   mm4, mm1					; unpack the higher four bytes
+
+		psadbw		mm2, mm4					; sum of absolute difference of another four
+
+        pop     ebx
+		pmaxsw		mm0, mm2					; get the max
+		movd		eax, mm0					; return value
+
+    ret
+
+;************************************************
+        END
+
+END
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/blockmap.c b/Src/libvpShared/corelibs/cdxv/pp/generic/blockmap.c
new file mode 100644
index 00000000..d543dbbe
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/blockmap.c
@@ -0,0 +1,391 @@
+/****************************************************************************
+*
+*   Module Title :     BlockMap.c
+*
+*   Description  :     Contains functions used to create the block map
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.08 PGW 28 Feb 01 Removal of history buffer mechanism.
+*   1.07 PGW 04 Oct 00 Changes to RowBarEnhBlockMap()
+*   1.06 JBB 03 Aug 00 Fixed Problem in which rownumber was compared to 
+*                      PlaneHFragments instead of PlaneVFragments, added 
+*                      statistic output functions
+*   1.05 PGW 27/07/00  Experiments with motion score.
+*   1.04 JBB 30/05/00  Removed hard coded size limits
+*   1.03 PGW 18/02/00  Changed weighting for History blocks. 
+*                      Redundant functions deleted.
+*					   Deglobalization.
+*   1.02 PGW 12/07/99  Changes to reduce uneccessary dependancies. 
+*   1.01 PGW 21/06/99  Alter function of RowBarEnhBlockMap() for VFW codec.
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/						
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include <string.h>
+
+#include "preproc.h"
+
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/  
+
+/****************************************************************************
+*  Module Types
+*****************************************************************************
+*/              
+
+/****************************************************************************
+*  Imported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Foreward References
+*****************************************************************************
+*/              
+
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/              
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     RowBarEnhBlockMap
+ *
+ *  INPUTS        :     UINT32 * FragNoiseScorePtr 
+ *                      INT8   * FragSgcPtr
+ *                      UINT32   RowNumber
+ *
+ *  OUTPUTS       :     INT8   * UpdatedBlockMapPtr 
+ *                      INT8   * BarBlockMapPtr
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     BAR Enhances block map on a row by row basis.
+ *
+ *  SPECIAL NOTES :     Note special cases for first and last row and first and last
+ *                      block in each row. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void RowBarEnhBlockMap( PP_INSTANCE *ppi, 
+					    UINT32 * FragScorePtr, 
+						INT8   * FragSgcPtr,
+						INT8   * UpdatedBlockMapPtr,
+						INT8   * BarBlockMapPtr,
+						UINT32 RowNumber )
+{
+	// For boundary blocks relax thresholds
+	UINT32 BarBlockThresh = ppi->BlockThreshold / 10;
+	UINT32 BarSGCThresh = ppi->BlockSgcThresh / 2;
+
+	INT32 i;
+
+    // Start by blanking the row in the bar block map structure.
+	memset( BarBlockMapPtr, BLOCK_NOT_CODED, ppi->PlaneHFragments );
+
+	// First row
+	if ( RowNumber == 0 )
+	{
+        
+		// For each fragment in the row.
+		for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+		{
+			// Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW
+			// Uncoded or coded blocks will be ignored.
+            if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK )
+			{
+				// Is one of the immediate neighbours updated in the main map.
+				// Note special cases for blocks at the start and end of rows.
+				if ( i == 0 )
+				{
+                    
+					if ( (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+					{
+						BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+					}
+                    
+				}
+				else if ( i == (ppi->PlaneHFragments - 1) )
+				{
+                    
+					if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) )
+					{
+						BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+					}
+                    
+				}
+				else
+				{
+					if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+					{
+						BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+					}
+				}
+			}
+		}
+        
+	}
+	// Last row
+    //   Used to read PlaneHFragments
+	else if ( RowNumber == (UINT32)(ppi->PlaneVFragments-1))
+	{
+        
+		// For each fragment in the row.
+		for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+		{
+			// Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW
+			// Uncoded or coded blocks will be ignored.
+            if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK )
+			{
+				// Is one of the immediate neighbours updated in the main map.
+				// Note special cases for blocks at the start and end of rows.
+				if ( i == 0 )
+				{
+					if ( (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+					{
+						BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+					}
+                
+				}
+				else if ( i == (ppi->PlaneHFragments - 1) )
+				{
+					if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) )
+					{
+						BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+					}
+				}
+				else
+				{
+					if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+					{
+						BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+					}
+				}
+			}
+		}
+        
+	}
+	// All other rows
+	else
+	{
+		// For each fragment in the row.
+		for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+		{
+			// Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW
+			// Uncoded or coded blocks will be ignored.
+            if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK )
+			{
+				// Is one of the immediate neighbours updated in the main map.
+				// Note special cases for blocks at the start and end of rows.
+				if ( i == 0 )
+				{
+                    
+					if ( (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+					{
+						BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+					}
+                    
+				}
+				else if ( i == (ppi->PlaneHFragments - 1) )
+				{
+                    
+					if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) )
+					{
+						BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+					}
+                    
+				}
+				else
+				{
+					if ( (UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
+						 (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ) )
+                         
+					{
+						BarBlockMapPtr[i] = BLOCK_CODED_BAR;
+					}
+				}
+			}
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     BarCopyBack
+ *
+ *  INPUTS        :     INT8  * BarBlockMapPtr
+ *
+ *  OUTPUTS       :     INT8  * UpdatedBlockMapPtr 
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Copies BAR blocks back into main block map.
+ *
+ *  SPECIAL NOTES :     None.
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void BarCopyBack( PP_INSTANCE *ppi, 
+				  INT8  * UpdatedBlockMapPtr,
+				  INT8  * BarBlockMapPtr )
+{
+	INT32 i;
+
+	// For each fragment in the row.
+	for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+	{
+		if ( BarBlockMapPtr[i] > BLOCK_NOT_CODED )
+		{
+			UpdatedBlockMapPtr[i] = BarBlockMapPtr[i];
+		}
+	}
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CreateOutputDisplayMap
+ *
+ *  INPUTS        :     INT8 *  InternalFragmentsPtr 
+ *                              Fragment list using internal format.
+ *                      INT8 *  RecentHistoryPtr
+ *                              List of blocks that have been marked for update int he last few frames.
+ * 
+ *                      UINT8 * ExternalFragmentsPtr
+ *                              Fragment list using external format.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Creates a block update map in the format expected by the caller.
+ *
+ *  SPECIAL NOTES :     The output block height and width must be an integer
+ *                      multiple of the internal value.  
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void CreateOutputDisplayMap
+(
+ PP_INSTANCE *ppi, 
+ INT8		 *InternalFragmentsPtr
+)
+{ 
+    UINT32 i;
+	UINT32 KFScore = 0;
+	UINT32 YBand = 	(ppi->ScanYPlaneFragments/8);	// 1/8th of Y image.	
+
+//#define DISPLAY_STATS
+#ifdef DISPLAY_STATS
+#include <stdio.h>
+	{
+
+		FILE * StatsFilePtr;
+		StatsFilePtr = fopen( "c:\\display_stats.stt", "a" ); 
+		if ( StatsFilePtr )
+		{
+            int i;
+            for(i=0;i<ppi->ScanYPlaneFragments;i++)
+            {
+                if(i%ppi->ScanHFragments  == 0 )
+                    fprintf( StatsFilePtr , "\n");
+
+                fprintf( StatsFilePtr, "%2d", 
+                    InternalFragmentsPtr[i]);
+            }
+            fprintf( StatsFilePtr , "\n");
+			fclose( StatsFilePtr );
+
+		}
+	}
+#endif    
+    
+	ppi->OutputBlocksUpdated = 0;
+    for ( i = 0; i < ppi->ScanFrameFragments; i++ )
+    {
+		if ( InternalFragmentsPtr[i] > BLOCK_NOT_CODED ) 
+        {
+            ppi->OutputBlocksUpdated ++;
+			setBlockCoded(i)
+        }
+		else
+		{
+			setBlockUncoded(i);
+		}
+    }
+
+	// Now calculate a key frame candidate indicator.
+	// This is based upon Y data only and only ignores the top and bottom 1/8 of the image.
+	// Also ignore history blocks and BAR blocks.
+    ppi->KFIndicator = 0;
+    for ( i = YBand; i < (ppi->ScanYPlaneFragments - YBand); i++ )
+    {
+		if ( InternalFragmentsPtr[i] > BLOCK_CODED_BAR ) 
+        {
+            ppi->KFIndicator ++;
+        }
+    }
+
+	// Convert the KF score to a range 0-100
+	ppi->KFIndicator = ((ppi->KFIndicator*100)/((ppi->ScanYPlaneFragments*3)/4));
+}
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/clamp.c b/Src/libvpShared/corelibs/cdxv/pp/generic/clamp.c
new file mode 100644
index 00000000..29efc90e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/clamp.c
@@ -0,0 +1,96 @@
+/****************************************************************************
+ *        
+ *   Module Title :     clamp.c
+ *
+ *   Description  :     c
+ *
+ *   AUTHOR       :     Jim Bankoski
+ *
+ *****************************************************************************
+ *   Revision History
+ *
+ *   1.09 YWX 26-Sep-01 Changed the default bandHeight from 5 to 4
+ *   1.08 YWX 23-Jul-00 Changed horizontal scaling function names
+ *   1.07 JBB 04 Dec 00 Added new Center vs Scale Bits
+ *   1.06 YWX 01-Dec-00 Removed bi-cubic scale functions
+ *   1.05 YWX 18-Oct-00 Added 1-2 scale functions
+ *   1.04 YWX 11-Oct-00 Added ratio check to determine scaling or centering
+ *   1.03 YWX 09-Oct-00 Added functions that do differen scaling in horizontal
+ *                      and vertical directions
+ *   1.02 YWX 04-Oct-00 Added 3-5 scaling functions
+ *   1.01 YWX 03-Oct-00 Added a set of 4-5 scaling functions
+ *   1.00 JBB 15 Sep 00 New Configuration baseline.
+ *
+ *****************************************************************************
+ */
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+#include "postp.h"
+#include <stdio.h>
+
+/****************************************************************************
+ *  Imported
+ *****************************************************************************
+ */
+
+void ClampLevels_C( 
+	POSTPROC_INSTANCE *pbi,
+	INT32        BlackClamp,			// number of values to clamp from 0 
+	INT32        WhiteClamp,			// number of values to clamp from 255
+	UINT8		*Src,					// reconstruction buffer : passed in
+	UINT8		*Dst					// postprocessing buffer : passed in
+	)
+{
+
+	unsigned char clamped[255];
+	int			  width = pbi->HFragments*8;
+	int			  height = pbi->VFragments*8;				// Y plane will be done in two passes
+	UINT8		 *SrcPtr = Src + pbi->ReconYDataOffset;
+	UINT8		 *DestPtr = Dst + pbi->ReconYDataOffset;
+	UINT32		  LineLength = pbi->YStride * 2;				// pitch is doubled for interlacing
+
+	// set up clamping table so we can avoid ifs while clamping
+	int i;
+	for(i=0;i<255;i++)
+	{
+		if(i<BlackClamp)
+			clamped[i] = BlackClamp;
+
+		if(i>WhiteClamp)
+			clamped[i] = WhiteClamp;
+	}
+
+	Block = 0;	
+
+	// clamping is for y only!
+	for ( row = 0 ; row < height ; row ++)
+	{
+		for (col = 0; col < width ; col ++)
+		{
+			SrcPtr[col]=clamped[DestPtr[col]];
+		}
+		SrcPtr += LineLength;
+		DestPtr += LineLength;
+    }
+
+
+}
+/****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */        
+ 
+       
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Module Static Variables
+*****************************************************************************
+*/
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/cscanyuv.c b/Src/libvpShared/corelibs/cdxv/pp/generic/cscanyuv.c
new file mode 100644
index 00000000..d0c33145
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/cscanyuv.c
@@ -0,0 +1,2750 @@
+/****************************************************************************
+*
+*   Module Title :     SCAN_YUV
+*
+*   Description  :     Content analysis and scoring functions for YUV 411. .
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.12 PGW 27 Apr 01 Changes to use last frame coded list passed in from codec.
+*   1.11 PGW 28 Feb 01 Removal of requirement for a seperate pre-processor output buffer.
+*   1.10 PGW 04 Oct 00 Bug fixes to SadPass2() and changes to how it is called.
+*					   Changes to ConsolidateDiffScanResults()
+*   1.09 PGW 29 Aug 00 Correction to defaults in SetVcapLevelOffset()
+*   1.08 JBB 03 Aug 00 Cleaned up a bit (memset full buffer)
+*                      Fixed Problem with Pak Filter wrapping over edges
+*   1.07 PGW 24 Jul 00 Added column scan funtion. Experiment with PAK off.
+*                      Tweaks to filter thresholds.
+*   1.06 PGW 10 Jul 00 Changes to RowDiffScan() to reduce number of conditionals.
+*   1.05 PGW 22/06/00  Filtering threshold tweaks.
+*   1.04 JBB 30/05/00  Removed hard coded size limits
+*	1.03 YX	 13/04/00  Comment out some if() testings 
+*   1.02 PGW 16/03/00  Changes to SetVcapLevelOffset() to provide 
+*                      various pre-set filter levels. 
+*   1.01 PGW 12/07/99  Changes to reduce uneccessary dependancies. 
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/						
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include "preproc.h"
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+
+#define MIN_STEP_THRESH 6
+
+
+#define SCORE_MULT_LOW    0.5
+#define SCORE_MULT_MEDIUM 2.0
+#define SCORE_MULT_HIGH   4
+
+/****************************************************************************
+*  Explicit Imports
+*****************************************************************************
+*/
+
+
+extern void ClearMmxState(PP_INSTANCE *ppi);
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+UINT32 LineLengthScores[ MAX_SEARCH_LINE_LEN + 1 ] = { 0, 0, 0, 0, 2, 4, 12, 24 };
+UINT32 BodyNeighbourScore = 8;
+double DiffDevisor = 0.0625; // 1/16
+UINT8  LineSearchTripTresh = 16;
+double LowVarianceThresh = 200.0;
+
+/****************************************************************************
+*  Foreward References
+*****************************************************************************
+*/              
+
+BOOL RowSadScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, INT8 *  DispFragPtr );
+BOOL ColSadScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, INT8 *  DispFragPtr );
+
+void RowDiffScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, 
+                  INT16 * YUVDiffPtr, UINT8 * bits_map_ptr, 
+                  INT8 * SgcPtr, INT8  * DispFragPtr, 
+				  UINT8 * FDiffPixels, INT32 * RowDiffsPtr, 
+                  UINT8 * ChLocalsPtr,  BOOL EdgeRow );
+
+void SadPass2( PP_INSTANCE *ppi, INT32 RowNumber, INT8 *  DispFragPtr );
+
+void ConsolidateDiffScanResults( PP_INSTANCE *ppi, UINT8 * FDiffPixels, INT8 * SgcScores, INT8 * DispFragPtr1 );
+
+void RowChangedLocalsScan( PP_INSTANCE *ppi, UINT8 * PixelMapPtr, UINT8 * ChLocalsPtr, INT8 * DispFragPtr,
+                           UINT8   RowType );
+
+
+void NoiseScoreRow( PP_INSTANCE *ppi, UINT8  * PixelMapPtr, UINT8 * ChLocalsPtr, 
+				    INT16  * YUVDiffsPtr, 
+                    UINT8  * PixelNoiseScorePtr, 
+                    UINT32 * FragScorePtr, 
+					INT8   * DispFragPtr,
+                    INT32  * RowDiffsPtr );
+
+void PrimaryEdgeScoreRow( PP_INSTANCE *ppi, 
+						  UINT8  * ChangedLocalsPtr, INT16 * YUVDiffsPtr, 
+                          UINT8  * PixelNoiseScorePtr, 
+                          UINT32 * FragScorePtr,
+						  INT8   * DispFragPtr,
+                          UINT8    RowType );
+
+void LineSearchScoreRow( PP_INSTANCE *ppi, 
+						 UINT8  * ChangedLocalsPtr, INT16 * YUVDiffsPtr, 
+                         UINT8  * PixelNoiseScorePtr, 
+                         UINT32 * FragScorePtr, 
+						 INT8   * DispFragPtr,
+                         INT32    RowNumber );
+
+UINT8 LineSearchScorePixel( PP_INSTANCE *ppi, UINT8 * ChangedLocalsPtr, INT32 RowNumber, INT32 ColNumber );
+void PixelLineSearch( PP_INSTANCE *ppi, UINT8 * ChangedLocalsPtr, INT32 RowNumber, INT32 ColNumber, UINT8 direction, UINT32 * line_length );
+double GetLocalVarianceMultiplier( PP_INSTANCE *ppi, INT16 * YUVDiffPtr, UINT32 PlaneLineLength );
+
+//void  RowCopy( PP_INSTANCE *ppi, UINT32 BlockMapIndex );
+UINT8 ApplyPakLowPass( PP_INSTANCE *ppi, UINT8 * SrcPtr );
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/              
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     InitScanMapArrays
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Initialise the display and score maps
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void InitScanMapArrays(PP_INSTANCE *ppi)
+{
+	INT32 i;
+	UINT8 StepThresh;
+
+    /* Clear down the fragment level map arrays for the current frame. */                     
+    memset( ppi->FragScores, 0, ppi->ScanFrameFragments * sizeof(UINT32) );
+    memset( ppi->SameGreyDirPixels, 0, ppi->ScanFrameFragments );
+    memset( ppi->FragDiffPixels, 0, ppi->ScanFrameFragments );
+    memset( (void *)ppi->RowChangedPixels, 0, 3* ppi->ScanConfig.VideoFrameHeight * sizeof(INT32) );
+
+    // Clear down blocks coded worspace.
+    memset( ppi->ScanDisplayFragments, BLOCK_NOT_CODED, ppi->ScanFrameFragments );
+
+	// Threshold used in setting up ppi->NoiseScoreBoostTable[]
+	StepThresh = (UINT8)(ppi->SRFGreyThresh >> 1);
+	if ( StepThresh < MIN_STEP_THRESH )
+		StepThresh = MIN_STEP_THRESH;
+	ppi->SrfThresh = (int)ppi->SRFGreyThresh;
+
+	// Set up various tables used to tweak pixel score values and scoring rules 
+	// based upon absolute value of a pixel change
+	for ( i = 0; i < 256; i++ )
+	{
+		// Score multiplier table indexed by absolute difference.
+		ppi->AbsDiff_ScoreMultiplierTable[i] = (double)i * DiffDevisor;
+		if ( ppi->AbsDiff_ScoreMultiplierTable[i] < SCORE_MULT_LOW )
+			ppi->AbsDiff_ScoreMultiplierTable[i] = SCORE_MULT_LOW;
+		else if ( ppi->AbsDiff_ScoreMultiplierTable[i] > SCORE_MULT_HIGH )
+			ppi->AbsDiff_ScoreMultiplierTable[i] = SCORE_MULT_HIGH;
+
+		// Table that facilitates a relaxation of the changed locals rules in
+		// NoiseScoreRow() for pixels that have changed by a large amount.
+		if ( i < (ppi->SrfThresh + StepThresh) )
+			ppi->NoiseScoreBoostTable[i] = 0;
+		else if ( i < (ppi->SrfThresh + (StepThresh * 4)) )
+			ppi->NoiseScoreBoostTable[i] = 1;
+		else if ( i < (ppi->SrfThresh + (StepThresh * 6)) )
+			ppi->NoiseScoreBoostTable[i] = 2;
+		else
+			ppi->NoiseScoreBoostTable[i] = 3;
+
+	}
+
+	// Set various other threshold parameters.
+
+	// Set variables that control access to the line search algorithms.
+	LineSearchTripTresh = 16;
+	if ( LineSearchTripTresh > ppi->PrimaryBlockThreshold )
+		LineSearchTripTresh = (UINT8)(ppi->PrimaryBlockThreshold + 1);
+
+	// Adjust line search length if block threshold low
+	ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN;
+	while ( (ppi->MaxLineSearchLen > 0) && (LineLengthScores[ppi->MaxLineSearchLen-1] > ppi->PrimaryBlockThreshold) )
+		ppi->MaxLineSearchLen -= 1;
+
+    // Initialise the level, srf and PAK threshold table pointers..
+    ppi->SrfThreshTablePtr = &(ppi->SrfThreshTable[255]);
+    ppi->SgcThreshTablePtr = &(ppi->SgcThreshTable[255]);
+    ppi->SrfPakThreshTablePtr = &(ppi->SrfPakThreshTable[255]);
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     AnalysePlane
+ *
+ *  INPUTS        :     PlanePtr0/1     Pointers to the first pixel in the plane 
+ *                                       for source and reference images  
+ *                      FragArrayOffset  Start offset in fragment arrays.
+ *                      PWidth           Width of an image plane in pixels.
+ *                      PHeight          Height of image plane in pixels
+ *                      PStride          Plane stride (the number to be added to 
+ *                                       a pixel index to get to the corresponding 
+ *                                       pixel in the next line (can be different 
+ *                                       from PWidth))
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Analyses and filters the image plane defined by the inputs.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void AnalysePlane( PP_INSTANCE *ppi, UINT8 * PlanePtr0, UINT8 * PlanePtr1, UINT32 FragArrayOffset, UINT32 PWidth, UINT32 PHeight, UINT32 PStride ) 
+{
+    UINT8  * RawPlanePtr0;
+    UINT8  * RawPlanePtr1;
+
+    INT16  * YUVDiffsPtr;
+    INT16  * YUVDiffsPtr1;
+    INT16  * YUVDiffsPtr2;
+        
+    UINT32 FragIndex;
+    UINT32 ScoreFragIndex1;
+    UINT32 ScoreFragIndex2;
+    UINT32 ScoreFragIndex3;
+    UINT32 ScoreFragIndex4;
+
+    BOOL   UpdatedOrCandidateBlocks = FALSE;
+    
+    UINT8  * ChLocalsPtr0;
+    UINT8  * ChLocalsPtr1;
+    UINT8  * ChLocalsPtr2;
+
+	UINT8  * PixelsChangedPtr0;
+	UINT8  * PixelsChangedPtr1;
+
+    UINT8  * PixelScoresPtr1;
+    UINT8  * PixelScoresPtr2;
+//	UINT8  * PixelScoresPtr4;
+
+	INT8   * DispFragPtr0;
+	INT8   * DispFragPtr1;
+	INT8   * DispFragPtr2;
+
+    UINT32 * FragScoresPtr1;
+    UINT32 * FragScoresPtr2;
+
+    INT32  * RowDiffsPtr;
+    INT32  * RowDiffsPtr1;
+    INT32  * RowDiffsPtr2;
+
+    INT32  i,j; 
+
+	INT32  RowNumber1;
+	INT32  RowNumber2;
+	INT32  RowNumber3;
+	INT32  RowNumber4;
+
+    BOOL   EdgeRow;
+    INT32  LineSearchRowNumber = 0;
+	
+	// Variables used as temporary stores for frequently used values.
+	INT32  Row0Mod3;
+	INT32  Row1Mod3;
+	INT32  Row2Mod3;
+	INT32  BlockRowPixels;
+
+
+    /* Set pixel difference threshold */
+	if ( FragArrayOffset == 0 )
+	{
+		/* Luminance */
+		ppi->LevelThresh = (int)ppi->SgcLevelThresh;
+        ppi->NegLevelThresh = -ppi->LevelThresh;
+
+		ppi->SrfThresh = (int)ppi->SRFGreyThresh;
+        ppi->NegSrfThresh = -ppi->SrfThresh;
+
+ 	    // Scores correction for Y pixels.
+        ppi->YUVPlaneCorrectionFactor = 1.0;
+
+		ppi->BlockThreshold = ppi->PrimaryBlockThreshold;
+		ppi->BlockSgcThresh = ppi->SgcThresh;
+	}
+	else
+	{
+		/* Chrominance */
+		ppi->LevelThresh = (int)ppi->SuvcLevelThresh;
+        ppi->NegLevelThresh = -ppi->LevelThresh;
+
+        ppi->SrfThresh = (int)ppi->SRFColThresh;
+        ppi->NegSrfThresh = -ppi->SrfThresh;
+
+		// Scores correction for UV pixels.
+        ppi->YUVPlaneCorrectionFactor = 1.5;
+
+		// Block threholds different for subsampled U and V blocks
+		ppi->BlockThreshold = (UINT32)(ppi->PrimaryBlockThreshold / ppi->UVBlockThreshCorrection);
+		ppi->BlockSgcThresh = (UINT32)(ppi->SgcThresh / ppi->UVSgcCorrection);
+	}
+
+    // Initialise the SRF thresh table and pointer.
+    memset( ppi->SrfThreshTable, 1, 512 );
+    for ( i = ppi->NegSrfThresh; i <= ppi->SrfThresh; i++ )
+    {
+        ppi->SrfThreshTablePtr[i] = 0;
+    }
+    
+    // Initialise the PAK thresh table.
+    for ( i = -255; i <= 255; i++ )
+    {
+		if ( ppi->SrfThreshTablePtr[i] && (i <= ppi->HighChange) && (i >= ppi->NegHighChange) )
+            ppi->SrfPakThreshTablePtr[i] = 1;
+        else
+            ppi->SrfPakThreshTablePtr[i] = 0;
+    }
+
+    // Initialise the SGc lookup table
+    for ( i = -255; i <= 255; i++ )
+    {
+        if ( i <= ppi->NegLevelThresh )
+            ppi->SgcThreshTablePtr[i] = -1;
+        else if ( i >= ppi->LevelThresh )
+            ppi->SgcThreshTablePtr[i] = 1;
+        else
+            ppi->SgcThreshTablePtr[i] = 0;
+    }
+
+    // Set up plane dimension variables
+    ppi->PlaneHFragments = PWidth / ppi->HFragPixels;
+    ppi->PlaneVFragments = PHeight / ppi->VFragPixels;
+    ppi->PlaneWidth = PWidth;
+    ppi->PlaneHeight = PHeight;
+    ppi->PlaneStride = PStride;
+
+    // Set up local pointers into the raw image data.
+    RawPlanePtr0 = (UINT8 *)PlanePtr0;
+    RawPlanePtr1 = (UINT8 *)PlanePtr1;
+   
+    // Note size and endo points for circular buffers.
+    ppi->YuvDiffsCircularBufferSize = YDIFF_CB_ROWS * ppi->PlaneWidth;
+    ppi->ChLocalsCircularBufferSize = CHLOCALS_CB_ROWS * ppi->PlaneWidth;
+	ppi->PixelMapCircularBufferSize = PMAP_CB_ROWS * ppi->PlaneWidth;
+
+    // Set high change thresh where PAK not needed;
+    ppi->HighChange = ppi->SrfThresh * 4;
+    ppi->NegHighChange = -ppi->HighChange;
+
+    // Set up row difference pointers.
+    RowDiffsPtr = ppi->RowChangedPixels;
+    RowDiffsPtr1 = ppi->RowChangedPixels;
+    RowDiffsPtr2 = ppi->RowChangedPixels;
+
+	BlockRowPixels = ppi->PlaneWidth * ppi->VFragPixels;
+
+    for ( i = 0; i < (ppi->PlaneVFragments + 4); i++ )
+    {
+		RowNumber1 = (i - 1);
+		RowNumber2 = (i - 2);
+		RowNumber3 = (i - 3);
+		RowNumber4 = (i - 4);
+
+		// Pre calculate some frequently used values
+		Row0Mod3 = i % 3;
+		Row1Mod3 = RowNumber1 % 3;
+		Row2Mod3 = RowNumber2 % 3;
+
+        //  For row diff scan last two iterations are invalid
+        if ( i < ppi->PlaneVFragments )
+        {
+		    FragIndex = (i * ppi->PlaneHFragments) + FragArrayOffset;
+            YUVDiffsPtr = &ppi->yuv_differences[Row0Mod3 * BlockRowPixels];
+            
+			PixelsChangedPtr0 = (UINT8 *)(&ppi->PixelChangedMap[Row0Mod3 * BlockRowPixels]);
+			DispFragPtr0 =  &ppi->ScanDisplayFragments[FragIndex];
+
+            ChLocalsPtr0 = (UINT8 *)(&ppi->ChLocals[Row0Mod3 * BlockRowPixels]);
+
+        }
+
+        // Set up the changed locals pointer to trail behind by one row of fragments.
+        if ( i > 0 )
+        {
+            // For last iteration the ch locals and noise scans are invalid
+            if ( RowNumber1 < ppi->PlaneVFragments )
+            {
+                ScoreFragIndex1 = (RowNumber1 * ppi->PlaneHFragments) + FragArrayOffset;
+          
+                ChLocalsPtr1 = (UINT8 *)(&ppi->ChLocals[Row1Mod3 * BlockRowPixels]);
+				PixelsChangedPtr1 = (UINT8 *)(&ppi->PixelChangedMap[(Row1Mod3) * BlockRowPixels]);
+
+				PixelScoresPtr1 = &ppi->PixelScores[(RowNumber1 % 4) * BlockRowPixels];
+
+                YUVDiffsPtr1 = &ppi->yuv_differences[Row1Mod3 * BlockRowPixels];
+                FragScoresPtr1 = &ppi->FragScores[ScoreFragIndex1];
+				DispFragPtr1 = &ppi->ScanDisplayFragments[ScoreFragIndex1];
+
+            }
+
+            if ( RowNumber2 >= 0 )
+            {
+                ScoreFragIndex2 = (RowNumber2 * ppi->PlaneHFragments) + FragArrayOffset;
+                ChLocalsPtr2 = (UINT8 *)(&ppi->ChLocals[Row2Mod3 * BlockRowPixels]);
+                YUVDiffsPtr2 = &ppi->yuv_differences[Row2Mod3 * BlockRowPixels];
+
+				PixelScoresPtr2 = &ppi->PixelScores[(RowNumber2 % 4) * BlockRowPixels];
+
+                FragScoresPtr2 =  &ppi->FragScores[ScoreFragIndex2];
+				DispFragPtr2 = &ppi->ScanDisplayFragments[ScoreFragIndex2];
+            }
+            else
+            {
+                ChLocalsPtr2 = NULL;
+            }
+        }
+        else
+        {
+            ChLocalsPtr1 = NULL;
+            ChLocalsPtr2 = NULL;
+        }
+
+		// Fast break out test for obvious yes and no cases in this row of blocks
+		if ( i < ppi->PlaneVFragments )
+		{
+			UpdatedOrCandidateBlocks = RowSadScan( ppi, RawPlanePtr0, RawPlanePtr1, DispFragPtr0 );
+			if( ColSadScan( ppi, RawPlanePtr0, RawPlanePtr1, DispFragPtr0 ) )
+				UpdatedOrCandidateBlocks = TRUE;
+
+//			SadPass2( ppi, i, DispFragPtr0 );
+		}
+        else	// ????? Not needed now as we always do RowSadScan etc.
+        {
+            // Make sure we still call other functions if RowSadScan() etc. disabled
+            UpdatedOrCandidateBlocks = TRUE;
+        }
+
+		// Consolidation and fast break ot tests at Row 1 level
+		if ( (i > 0) && (RowNumber1 < ppi->PlaneVFragments) )
+		{
+			// Mark as coded any candidate block that lies adjacent to a coded block.
+			SadPass2( ppi, RowNumber1, DispFragPtr1 );
+
+			// Check results of diff scan in last set of blocks. 
+		    // Eliminate NO cases and add in +SGC cases
+			ConsolidateDiffScanResults( ppi, &ppi->FragDiffPixels[ScoreFragIndex1], &ppi->SameGreyDirPixels[ScoreFragIndex1], DispFragPtr1 );
+		}
+
+        for ( j = 0; j < ppi->VFragPixels; j++ )
+        {
+            // Last two iterations do not apply
+            if ( i < ppi->PlaneVFragments )
+            {
+                /* Is the current fragment at an edge. */
+                EdgeRow = ( ( (i == 0) && (j == 0) ) ||
+                            ( (i == (ppi->PlaneVFragments - 1)) && (j == (ppi->VFragPixels - 1)) ) );
+
+                // Clear the arrays that will be used for the changed pixels maps
+                memset( PixelsChangedPtr0, 0, ppi->PlaneWidth );
+
+                // Difference scan and map each row
+                if ( UpdatedOrCandidateBlocks )
+                {
+                    // Scan the row for interesting differences
+					// Also clear the array that will be used for changed locals map
+                    RowDiffScan( ppi, RawPlanePtr0, RawPlanePtr1, 
+                                 YUVDiffsPtr, PixelsChangedPtr0, 
+                                 &ppi->SameGreyDirPixels[FragIndex], 
+                                 DispFragPtr0, &ppi->FragDiffPixels[FragIndex], 
+							     RowDiffsPtr, ChLocalsPtr0, EdgeRow);
+                }
+                else
+                {
+					// Clear the array that will be used for changed locals map
+					memset( ChLocalsPtr0, 0, ppi->PlaneWidth );
+                }
+
+                // The actual image plane pointers must be incremented by stride as this may be 
+                // different (more) than the plane width. Our own internal buffers use ppi->PlaneWidth.
+                RawPlanePtr0 += ppi->PlaneStride;
+                RawPlanePtr1 += ppi->PlaneStride;
+				PixelsChangedPtr0 += ppi->PlaneWidth;
+                ChLocalsPtr0 += ppi->PlaneWidth;
+                YUVDiffsPtr += ppi->PlaneWidth;
+                RowDiffsPtr++;
+            }
+
+            // Run behind calculating the changed locals data and noise scores.
+            if ( ChLocalsPtr1 != NULL )
+            {
+                // Last few iterations do not apply
+                if ( RowNumber1 < ppi->PlaneVFragments )
+                {
+                    // Blank the next row in the pixel scores data structure.
+	                memset( PixelScoresPtr1, 0, ppi->PlaneWidth );
+
+                    // Don't bother doing anything if there are no changed pixels in this row
+                    if ( *RowDiffsPtr1 )
+                    {
+					    // Last valid row is a special case
+                        if ( i < ppi->PlaneVFragments )
+                            RowChangedLocalsScan( ppi, PixelsChangedPtr1, ChLocalsPtr1, DispFragPtr1, (UINT8)( (((i-1)==0) && (j==0)) ? FIRST_ROW : NOT_EDGE_ROW) );
+                        else    
+                            RowChangedLocalsScan( ppi, PixelsChangedPtr1, ChLocalsPtr1, DispFragPtr1, (UINT8)((j==(ppi->VFragPixels-1)) ? LAST_ROW : NOT_EDGE_ROW) );
+
+                        NoiseScoreRow( ppi, PixelsChangedPtr1, ChLocalsPtr1, YUVDiffsPtr1,
+                                       PixelScoresPtr1, FragScoresPtr1, DispFragPtr1, RowDiffsPtr1 );
+                    }
+
+                    ChLocalsPtr1 += ppi->PlaneWidth;
+					PixelsChangedPtr1 += ppi->PlaneWidth;
+                    YUVDiffsPtr1 += ppi->PlaneWidth;
+                    PixelScoresPtr1 += ppi->PlaneWidth;
+                    RowDiffsPtr1 ++;
+                }
+
+                // Run edge enhancement algorithms
+                if ( RowNumber2 < ppi->PlaneVFragments )
+                {
+					if ( ChLocalsPtr2 != NULL )
+					{
+						// Don't bother doing anything if there are no changed pixels in this row
+						if ( *RowDiffsPtr2 )
+						{
+							if ( RowNumber1 < ppi->PlaneVFragments )
+							{
+								PrimaryEdgeScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
+													 PixelScoresPtr2, FragScoresPtr2, DispFragPtr2,
+													 (UINT8)( (((i-2)==0) && (j==0)) ? FIRST_ROW : NOT_EDGE_ROW)  );
+							}
+							else
+							{
+								// Edge enhancement
+								PrimaryEdgeScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
+													 PixelScoresPtr2, FragScoresPtr2, DispFragPtr2,
+													 (UINT8)((j==(ppi->VFragPixels-1)) ? LAST_ROW : NOT_EDGE_ROW) );
+							}
+
+							// Recursive line search
+							LineSearchScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
+												PixelScoresPtr2, FragScoresPtr2, DispFragPtr2,
+												LineSearchRowNumber );
+						}
+
+						ChLocalsPtr2 += ppi->PlaneWidth;
+						YUVDiffsPtr2 += ppi->PlaneWidth;
+						PixelScoresPtr2 += ppi->PlaneWidth;
+						LineSearchRowNumber += 1;
+						RowDiffsPtr2 ++;
+					}
+				}
+            }
+        }
+
+		// BAR algorithm
+		if ( (RowNumber3 >= 0) && (RowNumber3 < ppi->PlaneVFragments) )
+		{
+            ScoreFragIndex3 = (RowNumber3 * ppi->PlaneHFragments) + FragArrayOffset;
+			RowBarEnhBlockMap(ppi,  &ppi->FragScores[ScoreFragIndex3], 
+			   				   &ppi->SameGreyDirPixels[ScoreFragIndex3],
+							   &ppi->ScanDisplayFragments[ScoreFragIndex3],
+							   &ppi->BarBlockMap[(RowNumber3 % 3) * ppi->PlaneHFragments],
+							   RowNumber3 );
+		}
+
+		// BAR copy back and "ppi->SRF filtering" or "pixel copy back"
+		if ( (RowNumber4 >= 0) && (RowNumber4 < ppi->PlaneVFragments) )
+		{
+            // BAR copy back stage must lag by one more row to avoid BAR blocks
+			// being used in BAR descisions.
+            ScoreFragIndex4 = (RowNumber4 * ppi->PlaneHFragments) + FragArrayOffset;
+
+			BarCopyBack(ppi, &ppi->ScanDisplayFragments[ScoreFragIndex4],
+						&ppi->BarBlockMap[(RowNumber4 % 3) * ppi->PlaneHFragments]);
+
+/*
+            // "Apply ppi->SRF filtering to" or "copy back" pixels.
+			PixelScoresPtr4 = &ppi->PixelScores[(RowNumber4 % 4) * BlockRowPixels];
+*/
+            // Copy over the data from any blocks marked for update into the output buffer.
+            //RowCopy(ppi, ScoreFragIndex4);
+		}
+    }
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     RowSadScan
+ *
+ *  INPUTS        :     UINT8  * YuvPtr1, YuvPtr2 
+ *								 Pointers into current and previous frame
+ *
+ *  OUTPUTS       :		INT8   * DispFragPtr
+ *								 Fragment update map (-1 = ???, 0 = No, >0 = Yes)
+ *
+ *  RETURNS       :     TRUE if row contains Candidate or coded blocsk else FALSE
+ *
+ *  FUNCTION      :     Preliminary fast scan based upon local SAD scores of 4 pixel groups
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+BOOL RowSadScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, INT8 *  DispFragPtr )
+{
+	INT32    i, j;
+	UINT32   GrpSad;
+	UINT32   LocalGrpLowSadThresh = ppi->ModifiedGrpLowSadThresh;
+	UINT32   LocalGrpHighSadThresh = ppi->ModifiedGrpHighSadThresh;
+	INT8   * LocalDispFragPtr;
+	UINT32 * LocalYuvPtr1;
+	UINT32 * LocalYuvPtr2;
+
+    BOOL     InterestingBlocksInRow = FALSE;
+
+    // For each row of pixels in the row of blocks
+    for ( j = 0; j < ppi->VFragPixels; j++ )
+    {
+		// Set local block map pointer.
+		LocalDispFragPtr = DispFragPtr;
+
+		// Set the local pixel data pointers for this row.
+		LocalYuvPtr1 = (UINT32 *)YuvPtr1;
+		LocalYuvPtr2 = (UINT32 *)YuvPtr2;
+
+		// Scan along the row of pixels
+		// If the block to which a group of pixels belongs is already marked for update then do nothing.
+		for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+		{
+			if ( *LocalDispFragPtr <= BLOCK_NOT_CODED )
+			{
+				// Calculate the SAD score for the block row		    
+				GrpSad = ppi->RowSAD((UINT8 *)LocalYuvPtr1,(UINT8 *)LocalYuvPtr2);
+
+				// Now test the group SAD score
+				if ( GrpSad > LocalGrpLowSadThresh )
+				{
+					// If SAD very high we must update else we have candidate block
+					if ( GrpSad > LocalGrpHighSadThresh )
+					{
+						// Force update
+						*LocalDispFragPtr = BLOCK_CODED;
+					}
+					else
+					{
+						// Possible Update required
+						*LocalDispFragPtr = CANDIDATE_BLOCK;
+					}
+                    InterestingBlocksInRow = TRUE;
+				}
+			}
+			/**********  PGW 27/APR/2001 ***********/
+			else
+                InterestingBlocksInRow = TRUE;
+
+			LocalDispFragPtr++;
+
+			LocalYuvPtr1 += 2;
+			LocalYuvPtr2 += 2;
+		}
+
+		// Increment the base data pointers to the start of the next line.
+		YuvPtr1 += ppi->PlaneStride;
+		YuvPtr2 += ppi->PlaneStride;
+	}
+
+    // This code is PC specific
+    if ( ppi->MmxEnabled )
+    {
+        ClearMmxState(ppi);
+    }
+
+    return InterestingBlocksInRow;
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ColSadScan
+ *
+ *  INPUTS        :     UINT8  * YuvPtr1, YuvPtr2 
+ *								 Pointers into current and previous frame
+ *
+ *  OUTPUTS       :		INT8   * DispFragPtr
+ *								 Fragment update map (-1 = ???, 0 = No, >0 = Yes)
+ *
+ *  RETURNS       :     TRUE if row contains Candidate or coded blocsk else FALSE
+ *
+ *  FUNCTION      :     Preliminary fast scan based upon local SAD scores of 4 pixel groups
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+BOOL ColSadScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, INT8 *  DispFragPtr )
+{
+	INT32    i;
+	UINT32   MaxSad;
+	UINT32   LocalGrpLowSadThresh = ppi->ModifiedGrpLowSadThresh;
+	UINT32   LocalGrpHighSadThresh = ppi->ModifiedGrpHighSadThresh;
+	INT8   * LocalDispFragPtr;
+	
+	UINT8  * LocalYuvPtr1;		
+	UINT8  * LocalYuvPtr2;
+
+    BOOL     InterestingBlocksInRow = FALSE;
+
+	// Set the local pixel data pointers for this row.
+	LocalYuvPtr1 = YuvPtr1;
+	LocalYuvPtr2 = YuvPtr2;
+
+	// Set local block map pointer.
+	LocalDispFragPtr = DispFragPtr;
+
+	// Scan along the row of blocks
+	for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+	{
+		// Skip if block already marked to be coded.
+		if ( *LocalDispFragPtr <= BLOCK_NOT_CODED )
+		{
+			// Calculate the SAD score for the block column		    
+			MaxSad = ppi->ColSAD( ppi, (UINT8 *)LocalYuvPtr1,(UINT8 *)LocalYuvPtr2 );
+
+			// Now test the group SAD score
+			if ( MaxSad > LocalGrpLowSadThresh )
+			{
+				// If SAD very high we must update else we have candidate block
+				if ( MaxSad > LocalGrpHighSadThresh )
+				{
+					// Force update
+					*LocalDispFragPtr = BLOCK_CODED;
+				}
+				else
+				{
+					// Possible Update required
+					*LocalDispFragPtr = CANDIDATE_BLOCK;
+				}
+				InterestingBlocksInRow = TRUE;
+			}
+		}
+		/**********  PGW 27/APR/2001 ***********/
+		else
+            InterestingBlocksInRow = TRUE;
+
+		// Increment the block map pointer.
+		LocalDispFragPtr++;			
+
+		// Step data pointers on ready for next block
+		LocalYuvPtr1 += ppi->HFragPixels;
+		LocalYuvPtr2 += ppi->HFragPixels;
+	}
+
+    // This code is PC specific
+    if ( ppi->MmxEnabled )
+    {
+        ClearMmxState(ppi);
+    }
+
+    return InterestingBlocksInRow;
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     SadPass2
+ *
+ *  INPUTS        :     UINT32   RowNumber
+ *								 Fragment row number
+ *						INT8  *  DispFragPtr
+ *								 Fragment update map (-1 = ???, 0 = No, >0 = Yes)
+ *
+ *  OUTPUTS       :		INT8  *  DispFragPtr
+ *								 Fragment update map (-1 = ???, 0 = No, >0 = Yes)
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     This second pass should only be used when speed is critical.
+ *                      The function revisits the classification of CANDIDATE_BLOCKS
+ *                      if they are adjacent to one or more CODED_BLOCKS.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+void SadPass2( PP_INSTANCE *ppi, INT32 RowNumber, INT8 *  DispFragPtr )
+{
+	INT32  i;
+
+	// First row
+	if ( RowNumber == 0 )
+	{
+		// First block in row.
+		if ( DispFragPtr[0] == CANDIDATE_BLOCK )
+		{
+			if ( (DispFragPtr[1] == BLOCK_CODED) ||
+				 (DispFragPtr[ppi->PlaneHFragments] == BLOCK_CODED) ||
+				 (DispFragPtr[ppi->PlaneHFragments+1] == BLOCK_CODED) )
+			{
+				ppi->TmpCodedMap[0] =  BLOCK_CODED_LOW;
+			}
+			else
+			{
+				ppi->TmpCodedMap[0] = DispFragPtr[0];
+			}
+		}
+		else
+		{
+				ppi->TmpCodedMap[0] = DispFragPtr[0];
+		}
+
+		// All but first and last in row
+		for ( i = 1; (i < ppi->PlaneHFragments-1); i++ )
+		{
+			if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+			{
+				if ( (DispFragPtr[i-1] == BLOCK_CODED) || 
+					 (DispFragPtr[i+1] == BLOCK_CODED) ||
+					 (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
+					 (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ||
+					 (DispFragPtr[i+ppi->PlaneHFragments+1] == BLOCK_CODED) )
+				{
+					ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
+				}
+				else
+				{
+					ppi->TmpCodedMap[i] = DispFragPtr[i];
+				}
+			}
+			else
+			{
+				ppi->TmpCodedMap[i] = DispFragPtr[i];
+			}
+		}
+
+		// Last block in row.
+		i = ppi->PlaneHFragments-1;
+		if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+		{
+			if ( (DispFragPtr[i-1] == BLOCK_CODED) || 
+				 (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
+				 (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) )
+			{
+				ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
+			}
+			else
+			{
+				ppi->TmpCodedMap[i] = DispFragPtr[i];
+			}
+		}
+		else
+		{
+			ppi->TmpCodedMap[i] = DispFragPtr[i];
+		}
+	}
+
+	// General case
+	else if ( RowNumber < (ppi->PlaneVFragments - 1) )
+	{
+		// First block in row.
+		if ( DispFragPtr[0] == CANDIDATE_BLOCK )
+		{
+			if ( (DispFragPtr[1] == BLOCK_CODED) ||
+				 (DispFragPtr[(-ppi->PlaneHFragments)] == BLOCK_CODED) ||
+				 (DispFragPtr[(-ppi->PlaneHFragments)+1] == BLOCK_CODED) ||
+				 (DispFragPtr[ppi->PlaneHFragments] == BLOCK_CODED) ||
+				 (DispFragPtr[ppi->PlaneHFragments+1] == BLOCK_CODED) )
+			{
+				ppi->TmpCodedMap[0] =  BLOCK_CODED_LOW;
+			}
+			else
+			{
+				ppi->TmpCodedMap[0] = DispFragPtr[0];
+			}
+		}
+		else
+		{
+			ppi->TmpCodedMap[0] = DispFragPtr[0];
+		}
+
+		// All but first and last in row
+		for ( i = 1; (i < ppi->PlaneHFragments-1); i++ )
+		{
+			if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+			{
+				if ( (DispFragPtr[i-1] == BLOCK_CODED) || 
+					 (DispFragPtr[i+1] == BLOCK_CODED) ||
+					 (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
+					 (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
+					 (DispFragPtr[i-ppi->PlaneHFragments+1] == BLOCK_CODED) ||
+					 (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
+					 (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ||
+					 (DispFragPtr[i+ppi->PlaneHFragments+1] == BLOCK_CODED) )
+				{
+					ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
+				}
+				else
+				{
+					ppi->TmpCodedMap[i] = DispFragPtr[i];
+				}
+			}
+			else
+			{
+				ppi->TmpCodedMap[i] = DispFragPtr[i];
+			}
+		}
+
+		// Last block in row.
+		i = ppi->PlaneHFragments-1;
+		if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+		{
+			if ( (DispFragPtr[i-1] == BLOCK_CODED) || 
+				 (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
+				 (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
+				 (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
+				 (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) )
+			{
+				ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
+			}
+			else
+			{
+				ppi->TmpCodedMap[i] = DispFragPtr[i];
+			}
+		}
+		else
+		{
+			ppi->TmpCodedMap[i] = DispFragPtr[i];
+		}
+	}
+
+    // Last row
+	else
+	{
+		// First block in row.
+		if ( DispFragPtr[0] == CANDIDATE_BLOCK )
+		{
+			if ( (DispFragPtr[1] == BLOCK_CODED) ||
+				 (DispFragPtr[(-ppi->PlaneHFragments)] == BLOCK_CODED) ||
+				 (DispFragPtr[(-ppi->PlaneHFragments)+1] == BLOCK_CODED))
+			{
+				ppi->TmpCodedMap[0] =  BLOCK_CODED_LOW;
+			}
+			else
+			{
+				ppi->TmpCodedMap[0] = DispFragPtr[0];
+			}
+		}
+		else
+		{
+			ppi->TmpCodedMap[0] = DispFragPtr[0];
+		}
+
+		// All but first and last in row
+		for ( i = 1; (i < ppi->PlaneHFragments-1); i++ )
+		{
+			if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+			{
+				if ( (DispFragPtr[i-1] == BLOCK_CODED) || 
+					 (DispFragPtr[i+1] == BLOCK_CODED) ||
+					 (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
+					 (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
+					 (DispFragPtr[i-ppi->PlaneHFragments+1] == BLOCK_CODED) )
+				{
+					ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
+				}
+				else
+				{
+					ppi->TmpCodedMap[i] = DispFragPtr[i];
+				}
+			}
+			else
+			{
+				ppi->TmpCodedMap[i] = DispFragPtr[i];
+			}
+		}
+
+		// Last block in row.
+		i = ppi->PlaneHFragments-1;
+		if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+		{
+			if ( (DispFragPtr[i-1] == BLOCK_CODED) || 
+				 (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
+				 (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) )
+			{
+				ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
+			}
+			else
+			{
+				ppi->TmpCodedMap[i] = DispFragPtr[i];
+			}
+		}
+		else
+		{
+			ppi->TmpCodedMap[i] = DispFragPtr[i];
+		}
+	}
+
+    // Now copy back the modified Fragment data
+	memcpy( &DispFragPtr[0], &ppi->TmpCodedMap[0], (ppi->PlaneHFragments) );
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     RowDiffScan
+ *
+ *  INPUTS        :     UINT8  * YuvPtr1, YuvPtr2 
+ *								 Pointers into current and previous frame
+ *                      BOOL     EdgeRow
+ *                               Is this row an edge row.
+ *
+ *  OUTPUTS       :		UINT16 * YUVDiffsPtr
+ *								 Differnece map
+ *                      UINT8  * bits_map_ptr
+ *                               Pixels changed map
+ *                      UINT8  * SgcPtr
+ *								 Level change score.
+ *                      INT8   * DispFragPtr
+ *                               Block update map.
+ *                      INT32  * RowDiffsPtr
+ *								 Total sig changes for row
+ *                      UINT8 *  ChLocalsPtr
+ *                               Changed locals data structure
+ *
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     Initial pixel differences scan
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+void RowDiffScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2, 
+                  INT16 * YUVDiffsPtr, UINT8 * bits_map_ptr, 
+                  INT8  * SgcPtr, INT8  * DispFragPtr, 
+				  UINT8 * FDiffPixels, INT32 * RowDiffsPtr, 
+                  UINT8 * ChLocalsPtr, BOOL EdgeRow )
+{
+    INT32 i,j;
+    INT32 FragChangedPixels;
+
+    UINT32 ZeroData[2] = { 0,0 };
+    UINT8  OneData[8] = { 1,1,1,1,1,1,1,1 };
+    UINT8  ChlocalsDummyData[8] = { 8,8,8,8,8,8,8,8 };
+
+    INT16 Diff;     // Temp local workspace.
+
+    // Cannot use kernel if at edge or if PAK disabled
+    if ( (!ppi->PAKEnabled) || EdgeRow )
+    {
+        for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+        {
+            // Reset count of pixels changed for the current fragment.
+            FragChangedPixels = 0;
+
+            // Test for break out conditions to save time. 
+			if (*DispFragPtr == CANDIDATE_BLOCK)
+			{
+                // Clear down entries in changed locals array
+                ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+                ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+
+                for ( j = 0; j < ppi->HFragPixels; j++ )
+                {
+                    // Take a local copy of the measured difference.
+    			    Diff = ((INT16)YuvPtr1[j]) - ((INT16)YuvPtr2[j]);
+
+                    // Store the actual difference value
+                    YUVDiffsPtr[j] = Diff;
+
+				    // Test against the Level thresholds and record the results
+                    SgcPtr[0] += ppi->SgcThreshTablePtr[Diff];
+
+                    // Test against the SRF thresholds
+                    bits_map_ptr[j] = ppi->SrfThreshTablePtr[Diff];
+                    FragChangedPixels += ppi->SrfThreshTablePtr[Diff];
+                }
+	        }
+            else
+            {
+                // For EBO coded blocks mark all pixels as changed.
+                if ( *DispFragPtr > BLOCK_NOT_CODED )
+                {
+                    ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+                    ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+                    ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+                    ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+                }
+                else
+                {
+                    ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+                    ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+                }
+            }
+
+			*RowDiffsPtr += FragChangedPixels;
+			*FDiffPixels += (UINT8)FragChangedPixels;
+
+			YuvPtr1 += ppi->HFragPixels;
+			YuvPtr2 += ppi->HFragPixels;
+			bits_map_ptr += ppi->HFragPixels;
+            ChLocalsPtr += ppi->HFragPixels;
+			YUVDiffsPtr += ppi->HFragPixels;
+			SgcPtr ++;
+			FDiffPixels ++;
+
+			// If we have a lot of changed pixels for this fragment on this row then 
+			// the fragment is almost sure to be picked (e.g. through the line search) so we
+			// can mark it as selected and then ignore it.
+			if (FragChangedPixels >= 7)
+			{
+				*DispFragPtr = BLOCK_CODED_LOW;
+			}
+			DispFragPtr++;    
+		}
+    }
+    else
+    {
+        
+        //*************************************************************
+        // First fragment of row !!
+        
+        i = 0;
+        // Reset count of pixels changed for the current fragment.
+        FragChangedPixels = 0;
+        
+        // Test for break out conditions to save time. 
+        if (*DispFragPtr == CANDIDATE_BLOCK)
+        {
+            // Clear down entries in changed locals array
+            ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+            ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+            
+            for ( j = 0; j < ppi->HFragPixels; j++ )
+            {
+                // Take a local copy of the measured difference.
+                Diff = ((INT16)YuvPtr1[j]) - ((INT16)YuvPtr2[j]);
+                
+                // Store the actual difference value
+                YUVDiffsPtr[j] = Diff;
+                
+                // Test against the Level thresholds and record the results
+                SgcPtr[0] += ppi->SgcThreshTablePtr[Diff];
+                
+                // jbb added i+j > 0 and i+j < ppi->HFragPixels - 1 check
+                if (j>0 && ppi->SrfPakThreshTablePtr[Diff] )
+                    Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) - 
+                    (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
+                
+                
+                // Test against the SRF thresholds
+                bits_map_ptr[j] = ppi->SrfThreshTablePtr[Diff];
+                FragChangedPixels += ppi->SrfThreshTablePtr[Diff];
+            }
+        }
+        else
+        {
+            // For EBO coded blocks mark all pixels as changed.
+            if ( *DispFragPtr > BLOCK_NOT_CODED )
+            {
+                ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+                ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+                
+                ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+                ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+            }
+            else
+            {
+                ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+                ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+            }
+        }
+        
+        *RowDiffsPtr += FragChangedPixels;
+        *FDiffPixels += (UINT8)FragChangedPixels;
+        
+        YuvPtr1 += ppi->HFragPixels;
+        YuvPtr2 += ppi->HFragPixels;
+        bits_map_ptr += ppi->HFragPixels;
+        ChLocalsPtr += ppi->HFragPixels;
+        YUVDiffsPtr += ppi->HFragPixels;
+        SgcPtr ++;
+        FDiffPixels ++;
+        
+        // If we have a lot of changed pixels for this fragment on this row then 
+        // the fragment is almost sure to be picked (e.g. through the line search) so we
+        // can mark it as selected and then ignore it.
+        if (FragChangedPixels >= 7)
+        {
+            *DispFragPtr = BLOCK_CODED_LOW;
+        }
+        DispFragPtr++;    
+        //*************************************************************
+        // Fragment in between!!
+
+        for ( i = ppi->HFragPixels ; i < ppi->PlaneWidth-ppi->HFragPixels; i += ppi->HFragPixels )
+        {
+            // Reset count of pixels changed for the current fragment.
+            FragChangedPixels = 0;
+
+            // Test for break out conditions to save time. 
+			if (*DispFragPtr == CANDIDATE_BLOCK)
+			{
+                // Clear down entries in changed locals array
+                ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+                ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+
+                for ( j = 0; j < ppi->HFragPixels; j++ )
+                {
+                    // Take a local copy of the measured difference.
+    			    Diff = ((INT16)YuvPtr1[j]) - ((INT16)YuvPtr2[j]);
+
+                    // Store the actual difference value
+                    YUVDiffsPtr[j] = Diff;
+
+				    // Test against the Level thresholds and record the results
+                    SgcPtr[0] += ppi->SgcThreshTablePtr[Diff];
+
+                    // jbb added i+j > 0 and i+j < ppi->HFragPixels - 1 check
+                    if (ppi->SrfPakThreshTablePtr[Diff] )
+						Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) - 
+							   (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
+
+
+                    // Test against the SRF thresholds
+                    bits_map_ptr[j] = ppi->SrfThreshTablePtr[Diff];
+                    FragChangedPixels += ppi->SrfThreshTablePtr[Diff];
+				}
+            }
+            else
+            {
+                // For EBO coded blocks mark all pixels as changed.
+                if ( *DispFragPtr > BLOCK_NOT_CODED )
+                {
+                    ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+                    ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+
+                    ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+                    ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+                }
+                else
+                {
+                    ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+                    ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+                }
+            }
+
+			*RowDiffsPtr += FragChangedPixels;
+			*FDiffPixels += (UINT8)FragChangedPixels;
+
+            YuvPtr1 += ppi->HFragPixels;
+            YuvPtr2 += ppi->HFragPixels;
+            bits_map_ptr += ppi->HFragPixels;
+            ChLocalsPtr += ppi->HFragPixels;
+            YUVDiffsPtr += ppi->HFragPixels;
+            SgcPtr ++;
+			FDiffPixels ++;
+
+			// If we have a lot of changed pixels for this fragment on this row then 
+			// the fragment is almost sure to be picked (e.g. through the line search) so we
+			// can mark it as selected and then ignore it.
+			if (FragChangedPixels >= 7)
+			{
+				*DispFragPtr = BLOCK_CODED_LOW;
+			}
+			DispFragPtr++;    
+        }
+        //*************************************************************
+
+        //*************************************************************
+        // Last fragment of row !!
+
+        // Reset count of pixels changed for the current fragment.
+        FragChangedPixels = 0;
+        
+        // Test for break out conditions to save time. 
+        if (*DispFragPtr == CANDIDATE_BLOCK)
+        {
+            // Clear down entries in changed locals array
+            ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+            ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+            
+            for ( j = 0; j < ppi->HFragPixels; j++ )
+            {
+                // Take a local copy of the measured difference.
+                Diff = ((INT16)YuvPtr1[j]) - ((INT16)YuvPtr2[j]);
+                
+                // Store the actual difference value
+                YUVDiffsPtr[j] = Diff;
+                
+                // Test against the Level thresholds and record the results
+                SgcPtr[0] += ppi->SgcThreshTablePtr[Diff];
+                
+                // jbb added i+j > 0 and i+j < ppi->HFragPixels - 1 check
+                if (j<7 && ppi->SrfPakThreshTablePtr[Diff] )
+                    Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) - 
+                    (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
+                
+                
+                // Test against the SRF thresholds
+                bits_map_ptr[j] = ppi->SrfThreshTablePtr[Diff];
+                FragChangedPixels += ppi->SrfThreshTablePtr[Diff];
+            }
+        }
+        else
+        {
+            // For EBO coded blocks mark all pixels as changed.
+            if ( *DispFragPtr > BLOCK_NOT_CODED )
+            {
+                ((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
+                ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+                
+                ((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
+                ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+            }
+            else
+            {
+                ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
+                ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
+            }
+        }
+        // If we have a lot of changed pixels for this fragment on this row then 
+        // the fragment is almost sure to be picked (e.g. through the line search) so we
+        // can mark it as selected and then ignore it.
+        *RowDiffsPtr += FragChangedPixels;
+        *FDiffPixels += (UINT8)FragChangedPixels;
+        
+        // If we have a lot of changed pixels for this fragment on this row then 
+        // the fragment is almost sure to be picked (e.g. through the line search) so we
+        // can mark it as selected and then ignore it.
+        if (FragChangedPixels >= 7)
+        {
+            *DispFragPtr = BLOCK_CODED_LOW;
+        }
+        DispFragPtr++;    
+        //*************************************************************
+
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ConsolidateDiffScanResults
+ *
+ *  INPUTS        :     UINT8  * FDiffPixels
+ *								 Fragment changed pixels records
+ *                      UINT8  * SgcScoresPtr
+ *								 Fragment SGC records
+ *                      INT8   * DispFragPtr
+ *                               Fragment update map (-1 = ???, 0 = No, 1 = Yes)
+ *
+ *  OUTPUTS       :		UINT8  * DispFragPtr
+ *								 Fragment update map (-1 = ???, 0 = No, 1 = Yes)
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     Considers new information from difference scan and, if necessary, 
+ *                      upates output map.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+void ConsolidateDiffScanResults( PP_INSTANCE *ppi, UINT8 * FDiffPixels, INT8 * SgcScoresPtr, INT8 * DispFragPtr )
+{
+	INT32 i;
+
+	for ( i = 0; i < ppi->PlaneHFragments; i ++ )
+	{
+		// Consider only those blocks that were candidates in the
+		// difference scan. Ignore definite YES and NO cases.
+		if ( DispFragPtr[i] == CANDIDATE_BLOCK )
+		{
+			if ( ((UINT32)abs(SgcScoresPtr[i]) > ppi->BlockSgcThresh) )
+			{
+				// Block marked for update due to Sgc change
+				DispFragPtr[i] = BLOCK_CODED_SGC;
+			}
+			else if ( FDiffPixels[i] == 0 )
+			{
+				// Block marked for NO update as no/too few interesting pixels. 
+				//DispFragPtr[i] = BLOCK_NOT_CODED;
+
+				// Block is no longer a candidate for the main tests but will 
+				// still be considered a candidate in RowBarEnhBlockMap()
+				DispFragPtr[i] = CANDIDATE_BLOCK_LOW;
+			}
+		}
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     RowChangedLocalsScan
+ *
+ *  INPUTS        :     UINT8  * PixelMapPtr.
+ *                      UINT8  * ChLocalsPtr.
+ *                      INT8   * DispFragPtr
+ *                      UINT8  * RowType
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     Calculates changed locals for changed pixels
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+void RowChangedLocalsScan( PP_INSTANCE *ppi, UINT8 * PixelMapPtr, UINT8 * ChLocalsPtr, 
+						   INT8  * DispFragPtr, UINT8   RowType )
+{
+    UINT8 ChlocalsDummyData[8] = { 8,8,8,8,8,8,8,8 };
+    UINT8 changed_locals = 0; 
+    UINT8 Score = 0;    
+	UINT8 * PixelsChangedPtr0;
+	UINT8 * PixelsChangedPtr1;
+	UINT8 * PixelsChangedPtr2;
+    INT32 i, j;
+	INT32 LastRowIndex = ppi->PlaneWidth - 1;
+
+	// Set up the line based pointers into the bits changed map.
+	PixelsChangedPtr0 = PixelMapPtr - ppi->PlaneWidth;
+	if ( PixelsChangedPtr0 < ppi->PixelChangedMap )
+		PixelsChangedPtr0 += ppi->PixelMapCircularBufferSize;
+	PixelsChangedPtr0 -= 1;	
+	
+	PixelsChangedPtr1 = PixelMapPtr - 1;
+
+	PixelsChangedPtr2 = PixelMapPtr + ppi->PlaneWidth;
+	if ( PixelsChangedPtr2 >= (ppi->PixelChangedMap + ppi->PixelMapCircularBufferSize) )
+		PixelsChangedPtr2 -= ppi->PixelMapCircularBufferSize;
+	PixelsChangedPtr2 -= 1;	
+
+    if ( RowType == NOT_EDGE_ROW )
+    {
+        // Scan through the row of pixels and calculate changed locals.
+        for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+        {
+            // Skip a group of 8 pixels if the assosciated fragment has no pixels of interest or
+            // if EBO is enabled and a breakout condition is met.
+            if ( *DispFragPtr == CANDIDATE_BLOCK )
+            {
+                for ( j = 0; j < ppi->HFragPixels; j++ )
+                {
+					changed_locals = 0;
+
+                    // If the pixel itself has changed
+                    if ( PixelsChangedPtr1[1] )
+                    {
+						if ( (i > 0) || (j > 0) )
+						{
+							changed_locals += PixelsChangedPtr0[0];
+							changed_locals += PixelsChangedPtr1[0];
+							changed_locals += PixelsChangedPtr2[0];
+						}
+
+						changed_locals += PixelsChangedPtr0[1];
+		                changed_locals += PixelsChangedPtr2[1];
+
+						if ( (i + j) < LastRowIndex )
+						{
+							changed_locals += PixelsChangedPtr0[2];
+							changed_locals += PixelsChangedPtr1[2];
+							changed_locals += PixelsChangedPtr2[2];
+						}
+
+                        // Store the number of changed locals
+                        *ChLocalsPtr |= changed_locals;
+                    }
+
+                    // Increment to next pixel in the row
+                    ChLocalsPtr++;
+					PixelsChangedPtr0++;
+					PixelsChangedPtr1++;
+					PixelsChangedPtr2++;
+                }
+            }
+            else
+            {
+                if ( *DispFragPtr > BLOCK_NOT_CODED )
+                {
+                    ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+                    ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+                }
+
+                // Step pointers
+                ChLocalsPtr += ppi->HFragPixels;
+				PixelsChangedPtr0 += ppi->HFragPixels;
+				PixelsChangedPtr1 += ppi->HFragPixels;
+				PixelsChangedPtr2 += ppi->HFragPixels;
+            }
+
+            // Move on to next fragment.
+			DispFragPtr++;    
+
+        }
+    }
+    else 
+    {
+        // Scan through the row of pixels and calculate changed locals.
+        for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+        {
+            // Skip a group of 8 pixels if the assosciated fragment has no pixels of interest or
+            // if EBO is enabled and a breakout condition is met.
+            if ( *DispFragPtr == CANDIDATE_BLOCK )
+            {
+                for ( j = 0; j < ppi->HFragPixels; j++ )
+                {
+					changed_locals = 0;
+
+                    // If the pixel itself has changed
+                    if ( PixelsChangedPtr1[1] )
+                    {
+						if ( RowType == FIRST_ROW )
+						{
+							if ( (i > 0) || (j > 0) )
+							{
+								changed_locals += PixelsChangedPtr1[0];
+								changed_locals += PixelsChangedPtr2[0];
+							}
+
+							changed_locals += PixelsChangedPtr2[1];
+
+							if ( (i + j) < LastRowIndex )
+							{
+								changed_locals += PixelsChangedPtr1[2];
+								changed_locals += PixelsChangedPtr2[2];
+							}
+						}
+						else	// Last row
+						{
+							if ( (i > 0) || (j > 0 ) )
+							{
+								changed_locals += PixelsChangedPtr0[0];
+								changed_locals += PixelsChangedPtr1[0];
+							}
+
+							changed_locals += PixelsChangedPtr0[1];
+
+							if ( (i + j) < LastRowIndex )
+							{
+								changed_locals += PixelsChangedPtr0[2];
+								changed_locals += PixelsChangedPtr1[2];
+							}
+						}
+
+                        // Store the number of changed locals
+                        *ChLocalsPtr |= changed_locals;
+                    }
+
+                    // Increment to next pixel in the row
+                    ChLocalsPtr++;
+					PixelsChangedPtr0++;
+					PixelsChangedPtr1++;
+					PixelsChangedPtr2++;
+                }
+            }
+            else
+            {
+                if ( *DispFragPtr > BLOCK_NOT_CODED )
+                {
+                    ((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
+                    ((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
+                }
+
+                // Step pointers
+                ChLocalsPtr += ppi->HFragPixels;
+				PixelsChangedPtr0 += ppi->HFragPixels;
+				PixelsChangedPtr1 += ppi->HFragPixels;
+				PixelsChangedPtr2 += ppi->HFragPixels;
+            }
+
+            // Move on to next fragment.
+			DispFragPtr++;    
+        }
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     NoiseScoreRow
+ *
+ *  INPUTS        :     UINT8  * PixelMapPtr.
+ *                      INT16  * YUVDiffsPtr,
+ *                      UINT8  * PixelNoiseScorePtr
+ *                      UINT32 * FragScorePtr
+ *                      INT8   * DispFragPtr
+ *                      INT32  * RowDiffsPtr 
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     Calculates the noise scores for a row of pixels.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+void NoiseScoreRow( PP_INSTANCE *ppi, UINT8  * PixelMapPtr, UINT8 * ChLocalsPtr, 
+				    INT16  * YUVDiffsPtr, 
+                    UINT8  * PixelNoiseScorePtr, 
+                    UINT32 * FragScorePtr, 
+					INT8   * DispFragPtr,
+                    INT32  * RowDiffsPtr )
+{ 
+    INT32 i,j;
+    UINT8  changed_locals = 0; 
+    INT32  Score;
+    UINT32 FragScore;
+    INT32  AbsDiff;
+
+    // For each pixel in the row
+    for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+    {
+        // Skip a group of 8 pixels if the assosciated fragment has no pixels of interest or
+        // if EBO is enabled and a breakout condition is met.
+        if ( *DispFragPtr == CANDIDATE_BLOCK )
+        {
+            // Reset the cumulative fragment score.
+            FragScore = 0;
+
+            // Pixels grouped along the row into fragments
+            for ( j = 0; j < ppi->HFragPixels; j++ )
+            {
+                if ( PixelMapPtr[j] )
+                {
+                    AbsDiff = (INT32)( abs(YUVDiffsPtr[j]) );
+                    changed_locals = ChLocalsPtr[j];
+
+                    // Give this pixel a score based on changed locals and level of its own change.
+                    Score = (1 + ((INT32)(changed_locals + ppi->NoiseScoreBoostTable[AbsDiff]) - ppi->NoiseSupLevel));  
+
+					// For no zero scores adjust by a level based score multiplier.
+					if ( Score > 0 )
+					{
+						Score = (INT32)( (double)Score * ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
+						if ( Score < 1 )
+							Score = 1;
+					}
+					else
+					{
+						// Set -ve values to 0
+						Score = 0;
+
+						// If there are no changed locals then clear the pixel changed flag and
+						// decrement the pixels changed in fragment count to speed later stages.
+						if ( changed_locals == 0 )
+						{
+							PixelMapPtr[j] = 0; 
+							*RowDiffsPtr -= 1;
+						}
+					}
+
+                    // Update the pixel scores etc.
+                    PixelNoiseScorePtr[j] = (UINT8)Score;
+                    FragScore += (UINT32)Score;
+                }
+            }
+
+            // Add fragment score (with plane correction factor) into main data structure
+            *FragScorePtr += (INT32)(FragScore * ppi->YUVPlaneCorrectionFactor);
+
+			// If score is greater than trip threshold then mark blcok for update.
+			if ( *FragScorePtr > ppi->BlockThreshold )
+			{
+				*DispFragPtr = BLOCK_CODED_LOW;
+			}
+        }
+
+        // Increment the various pointers
+        FragScorePtr++;
+		DispFragPtr++;
+        PixelNoiseScorePtr += ppi->HFragPixels;
+        PixelMapPtr += ppi->HFragPixels;
+        ChLocalsPtr += ppi->HFragPixels;
+        YUVDiffsPtr += ppi->HFragPixels;
+    }
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PrimaryEdgeScoreRow
+ *
+ *  INPUTS        :     UINT8  * PixelMapPtr.
+ *                      INT16  * YUVDiffsPtr,
+ *                      UINT32 * FragScorePtr
+ *                      INT8   * DispFragPtr,
+ *                      UINT8    RowType
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     Calculates the primary edge scores for a row of pixels.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+void PrimaryEdgeScoreRow( PP_INSTANCE *ppi, UINT8  * ChangedLocalsPtr, INT16 * YUVDiffsPtr, 
+                          UINT8  * PixelNoiseScorePtr, 
+                          UINT32 * FragScorePtr,
+						  INT8   * DispFragPtr,
+                          UINT8    RowType )
+{ 
+    UINT32 BodyNeighbours;
+    UINT32 AbsDiff;
+    UINT8  changed_locals = 0; 
+    INT32  Score;
+    UINT32 FragScore;
+	UINT8  * CHLocalsPtr0;
+	UINT8  * CHLocalsPtr1;
+	UINT8  * CHLocalsPtr2;
+    INT32  i,j;
+	INT32  LastRowIndex = ppi->PlaneWidth - 1;
+
+	// Set up  pointers into the current previous and next row of the changed locals data structure.
+	CHLocalsPtr0 = ChangedLocalsPtr - ppi->PlaneWidth;
+	if ( CHLocalsPtr0 < ppi->ChLocals )
+		CHLocalsPtr0 += ppi->ChLocalsCircularBufferSize;
+	CHLocalsPtr0 -= 1;	
+	
+	CHLocalsPtr1 = ChangedLocalsPtr - 1;
+	
+	CHLocalsPtr2 = ChangedLocalsPtr + ppi->PlaneWidth;
+	if ( CHLocalsPtr2 >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
+		CHLocalsPtr2 -= ppi->ChLocalsCircularBufferSize;
+	CHLocalsPtr2 -= 1;	
+
+
+    /* The defining rule used here is as follows. */
+    /* An edge pixels has 3-5 changed locals. */
+    /* And one or more of these changed locals has itself got 7-8 changed locals. */
+
+    if ( RowType == NOT_EDGE_ROW )
+    {
+		/* Loop for all pixels in the row. */
+		for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+		{
+			// Does the fragment contain anything interesting to work with.
+			if ( *DispFragPtr == CANDIDATE_BLOCK )
+			{
+				// Reset the cumulative fragment score.
+				FragScore = 0;
+
+				// Pixels grouped along the row into fragments
+				for ( j = 0; j < ppi->HFragPixels; j++ )
+				{
+					// How many changed locals has the current pixel got.
+					changed_locals = ChangedLocalsPtr[j];
+
+					// Is the pixel a suitable candidate
+					if ( (changed_locals > 2) && (changed_locals < 6) )                   
+					{
+						// The pixel may qualify... have a closer look. 
+						BodyNeighbours = 0;
+
+						// Count the number of "BodyNeighbours" .. Pixels
+						//  that have 7 or more changed neighbours. 
+						if ( (i > 0) || (j > 0 ) )
+						{
+							if ( CHLocalsPtr0[0] >= 7 )
+								BodyNeighbours++;
+							if ( CHLocalsPtr1[0] >= 7 )
+								BodyNeighbours++;
+							if ( CHLocalsPtr2[0] >= 7 )
+								BodyNeighbours++;
+						}
+
+						if ( CHLocalsPtr0[1] >= 7 )
+							BodyNeighbours++;
+						if ( CHLocalsPtr2[1] >= 7 )
+							BodyNeighbours++;
+
+						if ( (i + j) < LastRowIndex )
+						{
+							if ( CHLocalsPtr0[2] >= 7 )
+								BodyNeighbours++;
+							if ( CHLocalsPtr1[2] >= 7 )
+								BodyNeighbours++;
+							if ( CHLocalsPtr2[2] >= 7 )
+								BodyNeighbours++;
+						}
+
+						if ( BodyNeighbours > 0 )
+						{
+							AbsDiff = abs( YUVDiffsPtr[j] );
+							Score = (INT32)( (double)(BodyNeighbours * BodyNeighbourScore) * 
+								             ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
+							if ( Score < 1 )
+								Score = 1;
+
+							/* Increment the score by a value determined by the number of body neighbours. */
+							PixelNoiseScorePtr[j] += (UINT8)Score;  
+							FragScore += (UINT32)Score;
+						}
+					}
+
+					// Increment pointers into changed locals buffer
+					CHLocalsPtr0 ++;
+					CHLocalsPtr1 ++;
+					CHLocalsPtr2 ++;
+				}
+
+				// Add fragment score (with plane correction factor) into main data structure
+				*FragScorePtr += (INT32)(FragScore * ppi->YUVPlaneCorrectionFactor);
+
+				// If score is greater than trip threshold then mark blcok for update.
+				if ( *FragScorePtr > ppi->BlockThreshold )
+				{
+					*DispFragPtr = BLOCK_CODED_LOW;
+				}
+
+			}
+			else   // Nothing to do for this fragment group
+			{
+				// Advance pointers into changed locals buffer
+				CHLocalsPtr0 += ppi->HFragPixels;
+				CHLocalsPtr1 += ppi->HFragPixels;
+				CHLocalsPtr2 += ppi->HFragPixels;
+			}
+
+			// Increment the various pointers
+			FragScorePtr++;
+			DispFragPtr++;
+			PixelNoiseScorePtr += ppi->HFragPixels;
+			ChangedLocalsPtr += ppi->HFragPixels;
+			YUVDiffsPtr += ppi->HFragPixels;
+		}  
+	}
+	else		// This is either the top or bottom row of pixels in a plane.
+    {
+		/* Loop for all pixels in the row. */
+		for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+		{
+			// Does the fragment contain anything interesting to work with.
+			if ( *DispFragPtr == CANDIDATE_BLOCK )
+			{
+				// Reset the cumulative fragment score.
+				FragScore = 0;
+
+				// Pixels grouped along the row into fragments
+				for ( j = 0; j < ppi->HFragPixels; j++ )
+				{
+					// How many changed locals has the current pixel got.
+					changed_locals = ChangedLocalsPtr[j];
+
+					// Is the pixel a suitable candidate
+					if ( (changed_locals > 2) && (changed_locals < 6) )                   
+					{
+						/* The pixel may qualify... have a closer look. */
+						BodyNeighbours = 0;
+
+						/* Count the number of "BodyNeighbours" .. Pixels
+						*  that have 7 or more changed neighbours. */
+						if ( RowType == LAST_ROW )
+						{
+							// Test for cases where it could be the first pixel on the line
+							if ( (i > 0) || (j > 0) )
+							{
+								if ( CHLocalsPtr0[0] >= 7 )
+									BodyNeighbours++;
+								if ( CHLocalsPtr1[0] >= 7 )
+									BodyNeighbours++;
+							}
+
+							if ( CHLocalsPtr0[1] >= 7 )
+								BodyNeighbours++;
+
+							// Test for the end of line case
+ 							if ( (i + j) < LastRowIndex )
+							{
+								if ( CHLocalsPtr0[2] >= 7 )
+									BodyNeighbours++;
+
+								if ( CHLocalsPtr1[2] >= 7 )
+									BodyNeighbours++;
+							}
+						}
+						else  // FIRST ROW
+						{
+							// Test for cases where it could be the first pixel on the line
+							if ( (i > 0) || (j > 0) )
+							{
+								if ( CHLocalsPtr1[0] >= 7 )
+									BodyNeighbours++;
+								if ( CHLocalsPtr2[0] >= 7 )
+									BodyNeighbours++;
+							}
+
+							// Test for the end of line case
+							if ( CHLocalsPtr2[1] >= 7 )
+								BodyNeighbours++;
+                    
+ 							if ( (i + j) < LastRowIndex )
+							{
+								if ( CHLocalsPtr1[2] >= 7 )
+									BodyNeighbours++;
+								if ( CHLocalsPtr2[2] >= 7 )
+									BodyNeighbours++;
+							}
+						}
+
+						// Allocate a score according to the number of Body neighbours.
+						if ( BodyNeighbours > 0 )
+						{
+							AbsDiff = abs( YUVDiffsPtr[j] );
+							Score = (INT32)( (double)(BodyNeighbours * BodyNeighbourScore) * 
+								             ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
+							if ( Score < 1 )
+								Score = 1;
+
+							PixelNoiseScorePtr[j] += (UINT8)Score;  
+							FragScore += (UINT32)Score;
+						}
+					}
+
+					// Increment pointers into changed locals buffer
+					CHLocalsPtr0 ++;
+					CHLocalsPtr1 ++;
+					CHLocalsPtr2 ++;
+				}
+
+				// Add fragment score (with plane correction factor) into main data structure
+				*FragScorePtr += (INT32)(FragScore * ppi->YUVPlaneCorrectionFactor);
+
+				// If score is greater than trip threshold then mark blcok for update.
+				if ( *FragScorePtr > ppi->BlockThreshold )
+				{
+					*DispFragPtr = BLOCK_CODED_LOW;
+				}
+
+			}
+			else   // Nothing to do for this fragment group
+			{
+				// Advance pointers into changed locals buffer
+				CHLocalsPtr0 += ppi->HFragPixels;
+				CHLocalsPtr1 += ppi->HFragPixels;
+				CHLocalsPtr2 += ppi->HFragPixels;
+			}
+
+			// Increment the various pointers
+			FragScorePtr++;
+			DispFragPtr++;
+			PixelNoiseScorePtr += ppi->HFragPixels;
+			ChangedLocalsPtr += ppi->HFragPixels;
+			YUVDiffsPtr += ppi->HFragPixels;
+		}  
+	}
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     LineSearchScoreRow
+ *
+ *  INPUTS        :     UINT8  * ChangedLocalsPtr.
+ *                      INT16  * YUVDiffsPtr,
+ *                      UINT32 * FragScorePtr
+ *                      UINT8    RowNumber
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     Calculates the line match scores for a row of pixels.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+void LineSearchScoreRow( PP_INSTANCE *ppi, UINT8  * ChangedLocalsPtr, INT16 * YUVDiffsPtr, 
+                         UINT8  * PixelNoiseScorePtr, 
+                         UINT32 * FragScorePtr, 
+						 INT8   * DispFragPtr,
+                         INT32    RowNumber )
+{ 
+    UINT32 AbsDiff;
+    UINT8  changed_locals = 0; 
+    INT32  Score;
+    UINT32 FragScore;
+    INT32  i,j;
+
+    /* The defining rule used here is as follows. */
+    /* An edge pixels has 2-5 changed locals. */
+    /* And one or more of these changed locals has itself got 7-8 changed locals. */
+
+    /* Loop for all pixels in the row. */
+    for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
+    {
+        // Does the fragment contain anything interesting to work with.
+        if ( *DispFragPtr == CANDIDATE_BLOCK )
+        {
+            // Reset the cumulative fragment score.
+            FragScore = 0;
+
+            // Pixels grouped along the row into fragments
+            for ( j = 0; j < ppi->HFragPixels; j++ )
+            {
+                // How many changed locals has the current pixel got.
+                changed_locals = ChangedLocalsPtr[j];
+
+                // Is the pixel a suitable candidate for edge enhancement
+                if ( (changed_locals > 1) && (changed_locals < 6) &&
+                     (PixelNoiseScorePtr[j] < LineSearchTripTresh) )                   
+                {
+                    Score = (INT32)LineSearchScorePixel( ppi, &ChangedLocalsPtr[j], RowNumber, i+j );
+
+                    if ( Score )
+                    {
+						AbsDiff = abs( YUVDiffsPtr[j] );
+						Score = (INT32)( (double)Score * ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
+						if ( Score < 1 )
+							Score = 1;
+
+                        PixelNoiseScorePtr[j] += (UINT8)Score;  
+                        FragScore += (UINT32)Score;
+                    }
+                }
+            }
+               
+            // Add fragment score (with plane correction factor) into main data structure
+            *FragScorePtr += (INT32)(FragScore * ppi->YUVPlaneCorrectionFactor);
+
+			// If score is greater than trip threshold then mark blcok for update.
+			if ( *FragScorePtr > ppi->BlockThreshold )
+			{
+				*DispFragPtr = BLOCK_CODED_LOW;
+			}
+        }
+
+        // Increment the various pointers
+        FragScorePtr++;
+		DispFragPtr++;
+        PixelNoiseScorePtr += ppi->HFragPixels;
+        ChangedLocalsPtr += ppi->HFragPixels;
+        YUVDiffsPtr += ppi->HFragPixels;
+
+    }
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     LineSearchScorePixel 
+ *
+ *  INPUTS        :     UINT32  ChangedLocalsPtr     (this pixels index.)
+ *                      INT32   RowNumber			 (Row number)
+ *                      INT32   ColNumber            (Column number within a row)
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     A pixel line search score
+ *
+ *  FUNCTION      :     Returns a Line Search score for a pixel.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+UINT8 LineSearchScorePixel( PP_INSTANCE *ppi, UINT8 * ChangedLocalsPtr, INT32 RowNumber, INT32 ColNumber )
+{                   
+    UINT32 line_length = 0; 
+    UINT32 line_length2 = 0; 
+    UINT32 line_length_score = 0; 
+    UINT32 tmp_line_length = 0; 
+    UINT32 tmp_line_length2 = 0;  
+
+	// Look UP and Down
+    PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, ColNumber, UP, &tmp_line_length );
+
+	if (tmp_line_length < ppi->MaxLineSearchLen) 
+	{
+		// Look DOWN
+		PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, ColNumber, DOWN, &tmp_line_length2 );
+	    line_length = tmp_line_length + tmp_line_length2 - 1; 
+
+	    if ( line_length > ppi->MaxLineSearchLen )
+	        line_length = ppi->MaxLineSearchLen;
+	}    
+	else
+	    line_length = tmp_line_length; 
+
+	// If no max length line found then look left and right                
+	if ( line_length < ppi->MaxLineSearchLen )
+	{   
+	    tmp_line_length = 0;
+	    tmp_line_length2 = 0;
+    
+	    PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, ColNumber, LEFT,  &tmp_line_length );
+	    if (tmp_line_length < ppi->MaxLineSearchLen)
+	    {
+	        PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber, ColNumber, RIGHT,  &tmp_line_length2 ); 
+	        line_length2 = tmp_line_length + tmp_line_length2 - 1; 
+
+	        if ( line_length2 > ppi->MaxLineSearchLen )
+	            line_length2 = ppi->MaxLineSearchLen;
+	    }    
+	    else
+	        line_length2 = tmp_line_length; 
+
+	}
+
+	/* Take the largest line length */
+	if ( line_length2 > line_length )
+	    line_length = line_length2;
+
+	/* Create line length score */
+   	line_length_score = LineLengthScores[line_length];
+
+    return (UINT8)line_length_score;
+}
+
+
+
+/****************************************************************************
+ *                                  
+ *  ROUTINE       :     PixelLineSearch
+ *
+ *  INPUTS        :     UINT8 * ChangedLocalsPtr  (Map entry for this pixel)
+ *                      INT32   RowNumber		  (Row number)
+ *                      INT32   ColNumber         (Column number within a row)
+ *                      UINT8   direction
+ *
+ *  OUTPUTS       :     UINT8 * line_length
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Recursive function for tracking along a line of pixels
+ *                      obeying a specific set of rules
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void PixelLineSearch( PP_INSTANCE *ppi, UINT8 * ChangedLocalsPtr, INT32 RowNumber, INT32 ColNumber, UINT8 direction, UINT32 * line_length )
+{   
+    // Exit if the pixel does not qualify or we have fallen off the edge    
+	// of either the image plane or the row.
+    if ( ((*ChangedLocalsPtr) <= 1) ||
+         ((*ChangedLocalsPtr) >= 6) ||
+         (RowNumber < 0) ||
+         (RowNumber >= ppi->PlaneHeight) ||
+		 (ColNumber < 0) ||
+		 (ColNumber >= ppi->PlaneWidth) )
+    {
+        // If not then it isn't part of any line.
+        return;
+    }
+
+    if (*line_length < ppi->MaxLineSearchLen)
+    {   
+        UINT32 TmpLineLength; 
+        UINT32 BestLineLength;
+		UINT8 * search_ptr;
+
+        // Increment the line length to include this pixel. 
+        *line_length += 1;
+        BestLineLength = *line_length;
+         
+        // Continue search 
+        // up  
+        if ( direction == UP )
+        {
+            TmpLineLength = *line_length;
+
+			search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
+			if ( search_ptr < ppi->ChLocals )
+				search_ptr += ppi->ChLocalsCircularBufferSize;
+
+            PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber, direction, &TmpLineLength );    
+        
+            if ( TmpLineLength > BestLineLength )
+                BestLineLength = TmpLineLength;
+        }
+        
+        // up and left    
+        if ( (BestLineLength < ppi->MaxLineSearchLen) && ((direction == UP) || (direction == LEFT)) )
+        {   
+            TmpLineLength = *line_length;
+            
+			search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
+			if ( search_ptr < ppi->ChLocals )
+				search_ptr += ppi->ChLocalsCircularBufferSize;
+			search_ptr -= 1;
+
+			PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber - 1, direction,  &TmpLineLength );    
+            
+            if ( TmpLineLength > BestLineLength )
+                BestLineLength = TmpLineLength;
+        } 
+        
+        // up and right
+        if ( (BestLineLength < ppi->MaxLineSearchLen) && ((direction == UP) || (direction == RIGHT)) )
+        {   
+            TmpLineLength = *line_length;
+
+			search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
+			if ( search_ptr < ppi->ChLocals )
+				search_ptr += ppi->ChLocalsCircularBufferSize;
+			search_ptr += 1;
+
+            PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber + 1, direction, &TmpLineLength );   
+            
+            if ( TmpLineLength > BestLineLength )
+                BestLineLength = TmpLineLength;
+        }
+        
+        // left
+        if ( (BestLineLength < ppi->MaxLineSearchLen) && ( direction == LEFT ) )
+        {   
+            TmpLineLength = *line_length;
+            PixelLineSearch( ppi, ChangedLocalsPtr - 1, RowNumber, ColNumber - 1, direction, &TmpLineLength );    
+                
+            if ( TmpLineLength > BestLineLength )
+                BestLineLength = TmpLineLength;
+        }      
+              
+        // right
+        if ( (BestLineLength < ppi->MaxLineSearchLen) && ( direction == RIGHT ) )
+        {   
+            TmpLineLength = *line_length;
+            PixelLineSearch( ppi, ChangedLocalsPtr + 1, RowNumber, ColNumber + 1, direction, &TmpLineLength );    
+                
+            if ( TmpLineLength > BestLineLength )
+                BestLineLength = TmpLineLength;
+        }
+        
+        // Down...            
+        if ( BestLineLength < ppi->MaxLineSearchLen )
+        {   
+            TmpLineLength = *line_length;
+            // down
+            if ( direction == DOWN )
+            {
+				search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
+				if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
+					search_ptr -= ppi->ChLocalsCircularBufferSize;
+
+                PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber, direction, &TmpLineLength );    
+                
+                if ( TmpLineLength > BestLineLength )
+                    BestLineLength = TmpLineLength;
+            }
+            
+
+            // down and left    
+            if ( (BestLineLength < ppi->MaxLineSearchLen) && ((direction == DOWN) || (direction == LEFT)) )
+            {   
+                TmpLineLength = *line_length;
+				
+				search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
+				if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
+					search_ptr -= ppi->ChLocalsCircularBufferSize;
+				search_ptr -= 1;
+
+                PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber - 1, direction, &TmpLineLength );    
+                
+                if ( TmpLineLength > BestLineLength )
+                    BestLineLength = TmpLineLength;
+            } 
+            
+            // down and right
+            if ( (BestLineLength < ppi->MaxLineSearchLen) && ((direction == DOWN) || (direction == RIGHT)) )
+            {   
+                TmpLineLength = *line_length;
+
+				search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
+				if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
+					search_ptr -= ppi->ChLocalsCircularBufferSize;
+				search_ptr += 1;
+                
+				PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber + 1, direction, &TmpLineLength );   
+                
+                if ( TmpLineLength > BestLineLength )
+                    BestLineLength = TmpLineLength;
+            }
+        }    
+        
+        // Note the search value for this pixel.  
+        *line_length = BestLineLength;
+    }
+
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ScanCalcPixelIndexTable
+ *
+ *  INPUTS        :     Nonex.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Initialises the pixel index table used in the scan module.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ScanCalcPixelIndexTable(PP_INSTANCE *ppi)
+{
+    UINT32 i;
+    UINT32 * PixelIndexTablePtr = ppi->ScanPixelIndexTable;
+    
+    /* If appropriate add on extra inices for U and V planes. */
+    for ( i = 0; i < (ppi->ScanYPlaneFragments); i++ )
+    {
+        PixelIndexTablePtr[ i ] = ((i / ppi->ScanHFragments) * ppi->VFragPixels * ppi->ScanConfig.VideoFrameWidth);  
+        PixelIndexTablePtr[ i ] += ((i % ppi->ScanHFragments) * ppi->HFragPixels);
+    }
+
+    PixelIndexTablePtr = &ppi->ScanPixelIndexTable[ppi->ScanYPlaneFragments];
+
+    for ( i = 0; i < (ppi->ScanUVPlaneFragments * 2); i++ )
+    {
+        PixelIndexTablePtr[ i ] =  ((i / (ppi->ScanHFragments >> 1) ) * 
+                                   (ppi->VFragPixels * (ppi->ScanConfig.VideoFrameWidth >> 1)) );   
+        PixelIndexTablePtr[ i ] += ((i % (ppi->ScanHFragments >> 1) ) * ppi->HFragPixels) + ppi->YFramePixels;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     SetVcapLevelOffset
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Configures VCAP parameters to one of a set of pre-defined
+ *                      alternatives.
+ *
+ *  SPECIAL NOTES :     None.  
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void SetVcapLevelOffset( PP_INSTANCE *ppi, INT32 Level ) 
+{
+    switch ( Level )
+    {
+    case 0:
+	    ppi->SRFGreyThresh = 1; 
+	    ppi->SRFColThresh = 1; 
+	    ppi->NoiseSupLevel = 2;
+	    ppi->SgcLevelThresh = 1; 
+	    ppi->SuvcLevelThresh = 1; 
+	    ppi->GrpLowSadThresh = 6;
+	    ppi->GrpHighSadThresh = 24;
+	    ppi->PrimaryBlockThreshold = 2;
+	    ppi->SgcThresh = 10;
+	    
+		ppi->PAKEnabled = FALSE;
+        break;
+
+    case 1:
+	    ppi->SRFGreyThresh = 2; 
+	    ppi->SRFColThresh = 2; 
+	    ppi->NoiseSupLevel = 2;
+	    ppi->SgcLevelThresh = 2; 
+	    ppi->SuvcLevelThresh = 2; 
+	    ppi->GrpLowSadThresh = 8; 
+	    ppi->GrpHighSadThresh = 32;
+	    ppi->PrimaryBlockThreshold = 5;
+	    ppi->SgcThresh = 12; 
+
+		ppi->PAKEnabled = TRUE;
+        break;
+        
+    case 2:                         // Default VP3 settings
+	    ppi->SRFGreyThresh = 3; 
+	    ppi->SRFColThresh = 3;
+	    ppi->NoiseSupLevel = 2;
+	    ppi->SgcLevelThresh = 2;
+	    ppi->SuvcLevelThresh = 2;
+	    ppi->GrpLowSadThresh = 8;
+	    ppi->GrpHighSadThresh = 32;		
+	    ppi->PrimaryBlockThreshold = 5;
+	    ppi->SgcThresh = 16;
+
+		ppi->PAKEnabled = TRUE;
+        break;
+
+    case 3:
+		ppi->SRFGreyThresh = 4;
+	    ppi->SRFColThresh = 4;
+	    ppi->NoiseSupLevel = 3;
+	    ppi->SgcLevelThresh = 3;
+	    ppi->SuvcLevelThresh = 3;
+	    ppi->GrpLowSadThresh = 10;
+	    ppi->GrpHighSadThresh = 48; 
+	    ppi->PrimaryBlockThreshold = 5;
+	    ppi->SgcThresh = 18;
+
+		ppi->PAKEnabled = TRUE;
+        break;
+
+    case 4:
+	    ppi->SRFGreyThresh = 5;
+	    ppi->SRFColThresh = 5;
+	    ppi->NoiseSupLevel = 3;
+	    ppi->SgcLevelThresh = 4;
+	    ppi->SuvcLevelThresh = 4;
+	    ppi->GrpLowSadThresh = 12;
+	    ppi->GrpHighSadThresh = 48;
+	    ppi->PrimaryBlockThreshold = 5;
+	    ppi->SgcThresh = 20;
+
+		ppi->PAKEnabled = TRUE;
+        break;
+
+    case 5:                         // Default live narrow band settings                            
+	    ppi->SRFGreyThresh = 6;
+	    ppi->SRFColThresh = 6;
+	    ppi->NoiseSupLevel = 3;
+	    ppi->SgcLevelThresh = 4;
+	    ppi->SuvcLevelThresh = 4;
+	    ppi->GrpLowSadThresh = 12;
+	    ppi->GrpHighSadThresh = 64;
+	    ppi->PrimaryBlockThreshold = 10;
+	    ppi->SgcThresh = 24;
+
+		ppi->PAKEnabled = TRUE;
+        break;
+
+    case 6:                         // Default live narrow band settings                            
+	    ppi->SRFGreyThresh = 6;
+	    ppi->SRFColThresh = 7;
+	    ppi->NoiseSupLevel = 3;
+	    ppi->SgcLevelThresh = 4;
+	    ppi->SuvcLevelThresh = 4;
+	    ppi->GrpLowSadThresh = 12;
+	    ppi->GrpHighSadThresh = 64;
+	    ppi->PrimaryBlockThreshold = 10;
+	    ppi->SgcThresh = 24;
+
+		ppi->PAKEnabled = TRUE;
+        break;
+
+    default:
+	    ppi->SRFGreyThresh = 3; 
+	    ppi->SRFColThresh = 3;
+	    ppi->NoiseSupLevel = 2;
+	    ppi->SgcLevelThresh = 2;
+	    ppi->SuvcLevelThresh = 2;
+	    ppi->GrpLowSadThresh = 10;
+	    ppi->GrpHighSadThresh = 32;		
+	    ppi->PrimaryBlockThreshold = 5;
+	    ppi->SgcThresh = 16;
+		ppi->PAKEnabled = TRUE;
+        break;
+    }
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     GetLocalVarianceMultiplier
+ *
+ *  INPUTS        :     INT16 *   MasterYUVDiffPtr.
+ *                      UINT32    PlaneLineLength
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Pixel variance
+ *
+ *  FUNCTION      :     Calculates a score correction based on local variance
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+double GetLocalVarianceMultiplier( PP_INSTANCE *ppi, INT16 * MasterYUVDiffPtr, UINT32 PlaneLineLength )
+{
+	INT32	XSum=0;
+	INT32	XXSum=0;
+	INT32	DiffVal;
+    double  LocalVariance;
+    double  VarMultiplier;
+    INT16 * YUVDiffPtr;
+
+    // Previous row (wrap back to top of buffer if necessary
+    YUVDiffPtr = MasterYUVDiffPtr - PlaneLineLength;
+    if ( YUVDiffPtr < ppi->yuv_differences )
+        YUVDiffPtr += ppi->YuvDiffsCircularBufferSize;
+        
+    DiffVal = YUVDiffPtr[-1];
+	XSum += DiffVal;
+	XXSum += DiffVal * DiffVal;
+    
+    DiffVal = YUVDiffPtr[0];
+	XSum += DiffVal;
+	XXSum += DiffVal * DiffVal;
+
+    DiffVal = YUVDiffPtr[1];
+	XSum += DiffVal;
+	XXSum += DiffVal * DiffVal;
+
+    // Current row
+    YUVDiffPtr = MasterYUVDiffPtr;
+    DiffVal = YUVDiffPtr[-1];
+	XSum += DiffVal;
+	XXSum += DiffVal * DiffVal;
+
+    DiffVal = YUVDiffPtr[0];
+	XSum += DiffVal;
+	XXSum += DiffVal * DiffVal;
+
+    DiffVal = YUVDiffPtr[1];
+	XSum += DiffVal;
+	XXSum += DiffVal * DiffVal;
+
+    // Last row (wrap back around if neeeded
+    YUVDiffPtr = MasterYUVDiffPtr + PlaneLineLength;
+	if ( YUVDiffPtr > &ppi->yuv_differences[ppi->YuvDiffsCircularBufferSize] )
+		YUVDiffPtr -= ppi->YuvDiffsCircularBufferSize;
+
+    DiffVal = YUVDiffPtr[-1];
+	XSum += DiffVal;
+	XXSum += DiffVal * DiffVal;
+
+    DiffVal = YUVDiffPtr[0];
+	XSum += DiffVal;
+	XXSum += DiffVal * DiffVal;
+
+    DiffVal = YUVDiffPtr[1];
+	XSum += DiffVal;
+	XXSum += DiffVal * DiffVal;
+
+	// Compute and return population variance as mis-match metric.
+	LocalVariance = ((double)XXSum * 0.1111) - ((double)XSum * (double)XSum * 0.012346);
+
+    if ( LocalVariance > 2 * LowVarianceThresh )
+    {
+        VarMultiplier = 1.5;
+    }
+    else if ( LocalVariance > LowVarianceThresh )
+    {
+        VarMultiplier = 1.0;
+    }
+    else
+    {
+        VarMultiplier = 0.5;
+    }
+
+    return VarMultiplier;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ScalarRowSAD
+ *
+ *  INPUTS        :     UINT8 * Src1
+ *                      UINT8 * Src2
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     A Sum of the absolute difference value for a row of 4 pixels
+ *
+ *  FUNCTION      :     Calculates a sum of the absolute difference for one or two groups of
+ *                      of 4 pixels. If two groups it returns the larger of the two values.
+ *
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+UINT32 ScalarRowSAD( UINT8 * Src1, UINT8 * Src2 )
+{
+	UINT32 SadValue;
+	UINT32 SadValue1;
+
+	SadValue    = abs( Src1[0] - Src2[0] ) + abs( Src1[1] - Src2[1] ) + 
+                  abs( Src1[2] - Src2[2] ) + abs( Src1[3] - Src2[3] );
+
+	SadValue1   = abs( Src1[4] - Src2[4] ) + abs( Src1[5] - Src2[5] ) + 
+                  abs( Src1[6] - Src2[6] ) + abs( Src1[7] - Src2[7] );
+
+	SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;
+
+	return SadValue;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ScalarColSAD
+ *
+ *  INPUTS        :     PP_INSTANCE *ppi
+ *						UINT8 * Src1
+ *                      UINT8 * Src2
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     The maximum 4 pixel column SAD for an 8x8 block.
+ *
+ *  FUNCTION      :     Calculates a SAD for each 4 pixel column in a block and 
+ *						returns the MAX value.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/   
+UINT32 ScalarColSAD( PP_INSTANCE *ppi, UINT8 * Src1, UINT8 * Src2 )
+{
+	UINT32 SadValue[8] = {0,0,0,0,0,0,0,0};
+	UINT32 SadValue2[8] = {0,0,0,0,0,0,0,0};
+	UINT32 MaxSad = 0;
+	UINT32 i;
+
+	for ( i = 0; i < 4; i++ )
+	{
+		SadValue[0] += abs(Src1[0] - Src2[0]);
+		SadValue[1] += abs(Src1[1] - Src2[1]);
+		SadValue[2] += abs(Src1[2] - Src2[2]);
+		SadValue[3] += abs(Src1[3] - Src2[3]);
+		SadValue[4] += abs(Src1[4] - Src2[4]);
+		SadValue[5] += abs(Src1[5] - Src2[5]);
+		SadValue[6] += abs(Src1[6] - Src2[6]);
+		SadValue[7] += abs(Src1[7] - Src2[7]);
+		
+		Src1 += ppi->PlaneStride;
+		Src2 += ppi->PlaneStride;
+	}
+
+	for ( i = 0; i < 4; i++ )
+	{
+		SadValue2[0] += abs(Src1[0] - Src2[0]);
+		SadValue2[1] += abs(Src1[1] - Src2[1]);
+		SadValue2[2] += abs(Src1[2] - Src2[2]);
+		SadValue2[3] += abs(Src1[3] - Src2[3]);
+		SadValue2[4] += abs(Src1[4] - Src2[4]);
+		SadValue2[5] += abs(Src1[5] - Src2[5]);
+		SadValue2[6] += abs(Src1[6] - Src2[6]);
+		SadValue2[7] += abs(Src1[7] - Src2[7]);
+		
+		Src1 += ppi->PlaneStride;
+		Src2 += ppi->PlaneStride;
+	}
+
+	for ( i = 0; i < 8; i++ )
+	{
+		if ( SadValue[i] > MaxSad )
+			MaxSad = SadValue[i];
+		if ( SadValue2[i] > MaxSad )
+			MaxSad = SadValue2[i];
+	}
+
+	return MaxSad;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ApplyPakLowPass
+ *
+ *  INPUTS        :     UINT8 * SrcPtr
+ *                              central point in kernel.
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Filtered value.
+ *
+ *  FUNCTION      :     Applies a moderate low pass filter at the given location. 
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+UINT8 ApplyPakLowPass( PP_INSTANCE *ppi, UINT8 * SrcPtr )
+{
+	UINT8 * SrcPtr1 = SrcPtr - 1;
+	UINT8 * SrcPtr0 = SrcPtr1 - ppi->PlaneStride;        // Note the use of stride not width.
+	UINT8 * SrcPtr2 = SrcPtr1 + ppi->PlaneStride;
+
+	return  (UINT8)( ( (UINT32)SrcPtr0[0] + (UINT32)SrcPtr0[1] + (UINT32)SrcPtr0[2] +
+                       (UINT32)SrcPtr1[0] + (UINT32)SrcPtr1[2] +
+                       (UINT32)SrcPtr2[0] + (UINT32)SrcPtr2[1] + (UINT32)SrcPtr2[2] ) >> 3 );
+
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/preprocfunctions.c b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocfunctions.c
new file mode 100644
index 00000000..7c9070c5
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocfunctions.c
@@ -0,0 +1,110 @@
+/****************************************************************************
+*
+*   Module Title :     PreProcFunctions.c
+*
+*   Description  :     
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.00 JBB 22 Aug 00  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include "preproc.h"
+#ifdef _MSC_VER 
+#pragma warning( disable : 4799 )  // Disable no emms instruction warning!
+#endif
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*  Imports.
+*****************************************************************************
+*/   
+    
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Exported Functions 
+*****************************************************************************
+*/              
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/  
+
+
+/****************************************************************************
+*  Forward References
+*****************************************************************************
+*/  
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MachineSpecificConfig
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Checks for machine specifc features such as MMX support 
+ *                      sets approipriate flags and function pointers.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+#define MMX_ENABLED 1
+void MachineSpecificConfig(PP_INSTANCE *ppi)
+{
+    UINT32 FeatureFlags = 0;
+    ppi->RowSAD = ScalarRowSAD;
+    ppi->ColSAD = ScalarColSAD;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ClearMmxState()
+ *
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Clears down the MMX state
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ClearMmxState(PP_INSTANCE *ppi)
+{
+	return;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/preprocglobals.c b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocglobals.c
new file mode 100644
index 00000000..9df699bd
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocglobals.c
@@ -0,0 +1,501 @@
+/****************************************************************************
+*
+*   Module Title :     PreProcGlobals
+*
+*   Description  :     Pre-processor module globals.
+*
+*   AUTHOR       :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.07 PGW 20 Feb 01 Disable history buffer mechanism.
+*   1.06 JBB 20 Sep 00 duck_ memory allocation calls
+*   1.05 JBB 02 Aug 00 Checked duck_malloc return codes
+*   1.04 PGW 24 Jul 00 Deleted BeThreshold & ShowVcapPramsDlg.
+*   1.03 PGW 10 Jul 00 Added KFIndicator. 
+*   1.02 JBB 30/05/00  Removed hard coded size limits
+*   1.01 PGW 12/07/99  Changes to reduce uneccessary dependancies. 
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/						
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+#include "preprocconf.h"
+#include "preproc.h"
+#include <stdlib.h>
+#include "duck_mem.h"
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+
+
+//PP_INSTANCE *ppi;
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PDeleteFragmentInfo
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void PDeleteFragmentInfo(PP_INSTANCE * ppi)
+{
+
+	// duck_free prior allocs if present
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PAllocateFragmentInfo
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void PAllocateFragmentInfo(PP_INSTANCE * ppi)
+{
+
+	// clear any existing info
+	PDeleteFragmentInfo(ppi);
+
+	// Perform Fragment Allocations
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PDeleteFrameInfo
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void PDeleteFrameInfo(PP_INSTANCE * ppi)
+{
+	if(	ppi->ScanPixelIndexTableAlloc )
+		duck_free(ppi->ScanPixelIndexTableAlloc);
+	ppi->ScanPixelIndexTableAlloc= 0;
+	ppi->ScanPixelIndexTable= 0;
+
+	if(	ppi->ScanDisplayFragmentsAlloc )
+		duck_free(ppi->ScanDisplayFragmentsAlloc);
+	ppi->ScanDisplayFragmentsAlloc= 0;
+	ppi->ScanDisplayFragments= 0;
+
+	if(	ppi->FragScoresAlloc )
+		duck_free(ppi->FragScoresAlloc);
+	ppi->FragScoresAlloc= 0;
+	ppi->FragScores= 0;
+
+	if(	ppi->SameGreyDirPixelsAlloc )
+		duck_free(ppi->SameGreyDirPixelsAlloc);
+	ppi->SameGreyDirPixelsAlloc= 0;
+	ppi->SameGreyDirPixels= 0;
+
+	if(	ppi->FragDiffPixelsAlloc )
+		duck_free(ppi->FragDiffPixelsAlloc);
+	ppi->FragDiffPixelsAlloc= 0;
+	ppi->FragDiffPixels= 0;
+
+	if(	ppi->BarBlockMapAlloc )
+		duck_free(ppi->BarBlockMapAlloc);
+	ppi->BarBlockMapAlloc= 0;
+	ppi->BarBlockMap= 0;
+
+	if(	ppi->TmpCodedMapAlloc )
+		duck_free(ppi->TmpCodedMapAlloc);
+	ppi->TmpCodedMapAlloc= 0;
+	ppi->TmpCodedMap= 0;
+
+	if(	ppi->RowChangedPixelsAlloc )
+		duck_free(ppi->RowChangedPixelsAlloc);
+	ppi->RowChangedPixelsAlloc= 0;
+	ppi->RowChangedPixels= 0;
+
+	if(	ppi->PixelScoresAlloc )
+		duck_free(ppi->PixelScoresAlloc);
+	ppi->PixelScoresAlloc= 0;
+	ppi->PixelScores= 0;
+
+	if(	ppi->PixelChangedMapAlloc )
+		duck_free(ppi->PixelChangedMapAlloc);
+	ppi->PixelChangedMapAlloc= 0;
+	ppi->PixelChangedMap= 0;
+
+	if(	ppi->ChLocalsAlloc )
+		duck_free(ppi->ChLocalsAlloc);
+	ppi->ChLocalsAlloc= 0;
+	ppi->ChLocals= 0;
+
+	if(	ppi->yuv_differencesAlloc )
+		duck_free(ppi->yuv_differencesAlloc);
+	ppi->yuv_differencesAlloc= 0;
+	ppi->yuv_differences= 0;
+
+}
+
+
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PAllocateFrameInfo
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+BOOL PAllocateFrameInfo(PP_INSTANCE * ppi)
+{
+    PDeleteFrameInfo(ppi);
+
+	ppi->ScanPixelIndexTableAlloc = duck_malloc(32 + ppi->ScanFrameFragments*sizeof(UINT32), DMEM_GENERAL);
+    if(!ppi->ScanPixelIndexTableAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->ScanPixelIndexTable = (UINT32 *) ROUNDUP32(ppi->ScanPixelIndexTableAlloc);
+
+	ppi->ScanDisplayFragmentsAlloc = duck_malloc(32 + ppi->ScanFrameFragments*sizeof(INT8), DMEM_GENERAL);
+    if(!ppi->ScanDisplayFragmentsAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->ScanDisplayFragments = (INT8 *) ROUNDUP32(ppi->ScanDisplayFragmentsAlloc);
+
+	ppi->FragScoresAlloc = duck_malloc(32 + ppi->ScanFrameFragments*sizeof(UINT32), DMEM_GENERAL);
+    if(!ppi->FragScoresAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->FragScores = (UINT32 *) ROUNDUP32(ppi->FragScoresAlloc);
+
+	ppi->SameGreyDirPixelsAlloc = duck_malloc(32 + ppi->ScanFrameFragments*sizeof(INT8), DMEM_GENERAL);
+    if(!ppi->SameGreyDirPixelsAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->SameGreyDirPixels = (INT8 *) ROUNDUP32(ppi->SameGreyDirPixelsAlloc);
+
+	ppi->FragDiffPixelsAlloc = duck_malloc(32 + ppi->ScanFrameFragments*sizeof(UINT8), DMEM_GENERAL);
+    if(!ppi->FragDiffPixelsAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->FragDiffPixels = (UINT8 *) ROUNDUP32(ppi->FragDiffPixelsAlloc);
+
+	ppi->BarBlockMapAlloc = duck_malloc(32 + 3 * ppi->ScanHFragments*sizeof(INT8), DMEM_GENERAL);
+    if(!ppi->BarBlockMapAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->BarBlockMap = (INT8 *) ROUNDUP32(ppi->BarBlockMapAlloc);
+
+	ppi->TmpCodedMapAlloc = duck_malloc(32 + ppi->ScanHFragments*sizeof(INT8), DMEM_GENERAL);
+    if(!ppi->TmpCodedMapAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->TmpCodedMap = (INT8 *) ROUNDUP32(ppi->TmpCodedMapAlloc);
+
+	ppi->RowChangedPixelsAlloc = duck_malloc(32 + 3 * ppi->ScanConfig.VideoFrameHeight *sizeof(INT32), DMEM_GENERAL);
+    if(!ppi->RowChangedPixelsAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->RowChangedPixels = (INT32 *) ROUNDUP32(ppi->RowChangedPixelsAlloc);
+
+	ppi->PixelScoresAlloc = duck_malloc(32 + ppi->ScanConfig.VideoFrameWidth* sizeof(UINT8) * PSCORE_CB_ROWS, DMEM_GENERAL);
+    if(!ppi->PixelScoresAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->PixelScores = (UINT8 *) ROUNDUP32(ppi->PixelScoresAlloc);
+
+    ppi->PixelChangedMapAlloc = duck_malloc(32 + ppi->ScanConfig.VideoFrameWidth*sizeof(UINT8) * PMAP_CB_ROWS, DMEM_GENERAL);
+    if(!ppi->PixelChangedMapAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->PixelChangedMap = ( UINT8 *) ROUNDUP32(ppi->PixelChangedMapAlloc);
+
+    ppi->ChLocalsAlloc = duck_malloc(32 + ppi->ScanConfig.VideoFrameWidth*sizeof(UINT8) * CHLOCALS_CB_ROWS, DMEM_GENERAL);
+    if(!ppi->ChLocalsAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->ChLocals = (UINT8 *) ROUNDUP32(ppi->ChLocalsAlloc);
+
+    ppi->yuv_differencesAlloc = duck_malloc(32 + ppi->ScanConfig.VideoFrameWidth*sizeof(INT16) * YDIFF_CB_ROWS, DMEM_GENERAL);
+    if(!ppi->yuv_differencesAlloc) {PDeleteFrameInfo(ppi);return FALSE;}
+	ppi->yuv_differences = (INT16 *) ROUNDUP32(ppi->yuv_differencesAlloc);
+
+    return TRUE;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeletePPInstance
+ *
+ *
+ *  INPUTS        :     Instance of PB to be deleted
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     frees the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void DeletePPInstance(PP_INSTANCE **ppi)
+{
+    PDeleteFrameInfo(*ppi);
+	duck_free(*ppi);
+	*ppi=0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     Createppinstance
+ *
+ *
+ *  INPUTS        :     Instance of CP to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Create and Initializes the Compression instance 
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+PP_INSTANCE * CreatePPInstance(void)
+{
+	PP_INSTANCE *ppi;
+
+	/* The VCAP configuration. */
+	SCAN_CONFIG_DATA ScanConfigInit =
+	{
+		NULL, NULL, NULL, 0,0, NULL,
+			176, 144, 
+			8,8,
+	};
+
+	// allocate structure
+	int ppi_size = sizeof(PP_INSTANCE);
+	ppi=duck_calloc(1,ppi_size, DMEM_GENERAL);
+
+	ppi->OutputBlocksUpdated = 0;
+	ppi->KFIndicator = 0;
+
+// Initializations
+    ppi->VideoYPlaneWidth = 0;
+    ppi->VideoYPlaneHeight = 0;
+    ppi->VideoUVPlaneWidth = 0;
+    ppi->VideoUVPlaneHeight = 0;
+    
+    ppi->VideoYPlaneStride = 0;
+    ppi->VideoUPlaneStride = 0;
+    ppi->VideoVPlaneStride = 0;
+    
+    /* Scan control variables. */
+	ppi->HFragPixels = 8;
+	ppi->VFragPixels = 8;
+    
+    ppi->ScanFrameFragments = 0 ;
+    ppi->ScanYPlaneFragments = 0;
+    ppi->ScanUVPlaneFragments= 0;
+    ppi->ScanHFragments= 0;
+    ppi->ScanVFragments= 0;
+    
+    ppi->YFramePixels = 0; 
+    ppi->UVFramePixels = 0; 
+    ppi->TotFramePixels = 0;
+    
+	
+	ppi->SRFGreyThresh = 4;
+	ppi->SRFColThresh = 5;
+	ppi->NoiseSupLevel = 3;
+	ppi->SgcLevelThresh = 3;
+	ppi->SuvcLevelThresh = 4;
+	
+	// Variables controlling S.A.D. break outs.
+	ppi->GrpLowSadThresh = 10;
+	ppi->GrpHighSadThresh = 64;
+	ppi->PrimaryBlockThreshold = 5;
+	ppi->SgcThresh = 16;			   // (Default values for 8x8 blocks).
+    
+    ppi->PAKEnabled = FALSE; //TRUE;
+    
+    ppi->LevelThresh = 0; // no initializaiton in Paul's
+    ppi->NegLevelThresh = 0; // no initializaiton in Paul's
+    ppi->SrfThresh = 0; // no initializaiton in Paul's
+    ppi->NegSrfThresh = 0; // no initializaiton in Paul's
+    ppi->HighChange = 0; // no initializaiton in Paul's
+    ppi->NegHighChange = 0; // no initializaiton in Paul's
+    
+    ppi->ModifiedGrpLowSadThresh = 0;
+    ppi->ModifiedGrpHighSadThresh = 0; // no initializaiton in Paul's
+    
+    ppi->PlaneHFragments = 0; 
+    ppi->PlaneVFragments = 0;
+    ppi->PlaneHeight = 0;
+    ppi->PlaneWidth = 0;
+    ppi->PlaneStride = 0;
+    
+    ppi->BlockThreshold = 0; // no initializaiton in Paul's
+    ppi->BlockSgcThresh = 0;
+    ppi->UVBlockThreshCorrection = 1.25;
+    ppi->UVSgcCorrection = 1.5;
+    
+    ppi->SpeedCritical = 3;
+    
+    // PC specific variables
+    ppi->MmxEnabled = FALSE;
+    
+    ppi->YUVPlaneCorrectionFactor = 0;	// no initialization in Paul's
+    ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN;
+    
+    ppi->YuvDiffsCircularBufferSize = 0; // no initializaiton in Paul's
+    ppi->ChLocalsCircularBufferSize = 0;
+    ppi->PixelMapCircularBufferSize = 0;
+    
+    // Function pointers for mmx switches
+    ppi->RowSAD = 0;            
+
+
+	ppi->ScanPixelIndexTableAlloc= 0;
+	ppi->ScanPixelIndexTable= 0;
+
+	ppi->ScanDisplayFragmentsAlloc= 0;
+	ppi->ScanDisplayFragments= 0;
+
+	ppi->FragScores= 0;
+	ppi->FragScores= 0;
+
+	ppi->ScanDisplayFragmentsAlloc= 0;
+	ppi->ScanDisplayFragments= 0;
+
+	ppi->SameGreyDirPixelsAlloc= 0;
+	ppi->SameGreyDirPixels= 0;
+
+	ppi->FragDiffPixelsAlloc= 0;
+	ppi->FragDiffPixels= 0;
+
+	ppi->BarBlockMapAlloc= 0;
+	ppi->BarBlockMap= 0;
+
+	ppi->TmpCodedMapAlloc= 0;
+	ppi->TmpCodedMap= 0;
+
+	ppi->RowChangedPixelsAlloc= 0;
+	ppi->RowChangedPixels= 0;
+
+	ppi->PixelScoresAlloc= 0;
+	ppi->PixelScores= 0;
+
+	ppi->PixelChangedMapAlloc= 0;
+	ppi->PixelChangedMap= 0;
+
+	ppi->ChLocalsAlloc= 0;
+	ppi->ChLocals= 0;
+
+	ppi->yuv_differencesAlloc= 0;
+	ppi->yuv_differences= 0;
+
+
+	return ppi;
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VPInitLibrary
+ *
+ *
+ *  INPUTS        :     init VP library
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Fully initializes the playback library
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VPPInitLibrary(void)
+{
+
+}
+
+/*********************************************************/
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VPPDeinitLibrary
+ *
+ *
+ *  INPUTS        :     init VP library
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Fully initializes the playback library
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VPPDeInitLibrary(void)
+{
+
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/pp/generic/preprocif.c b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocif.c
new file mode 100644
index 00000000..2f02f60c
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/generic/preprocif.c
@@ -0,0 +1,252 @@
+/****************************************************************************
+*
+*   Module Title :     PreProcIf.c
+*
+*   Description  :     Pre-processor dll interface module.
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.09 PGW 27 Apr 01 Changes to use last frame coded list passed in from codec.
+*					   Removed code to set Y from UV.
+*   1.08 PGW 28 Feb 01 Removal of history buffer functionality.
+*   1.07 PGW 28 Feb 01 Removal of pre-processor output buffer.
+*   1.06 JBB 03 Aug 00 Added Malloc Checks
+*   1.05 PGW 27 Jul 00 Removed SetVcapParams() plus other housekeeping.
+*   1.04 PGW 10 Jul 00 Removed unused functions GetBlockStats(), BlockChangeVariance() 
+*					   and GetBlockCategories().
+*					   Change interface to YUVAnalyseFrame() to include KF indicator.
+*   1.03 PGW 22/06/00  Removed speed specific code.
+*   1.02 JBB 30/05/00  Removed hard coded size limits
+*   1.01 PGW 12/07/99  Changes to reduce uneccessary dependancies. 
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/						
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include <string.h> 
+#include "type_aliases.h"
+#include "preproc.h"
+
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+#define MIN_STEP_THRESH 6
+
+#define VARIANCE_THRESH			200
+#define LOW_VARIANCE_THRESH		100
+#define HIGH_SCORE				400
+
+
+/****************************************************************************
+*  Explicit Imports
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Foreward References
+*****************************************************************************
+*/              
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/              
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ScanYUVInit
+ *
+ *  INPUTS        :     SCAN_CONFIG_DATA * ScanConfigPtr
+ *                          Configuration data.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Initialises the scan process. 
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+extern BOOL PAllocateFrameInfo(PP_INSTANCE * ppi);
+BOOL ScanYUVInit( PP_INSTANCE *  ppi, SCAN_CONFIG_DATA * ScanConfigPtr)
+{  
+    // Test machine specific features such as MMX support
+    MachineSpecificConfig(ppi);
+
+	/* Set up the various imported data structure pointers. */
+	ppi->ScanConfig.Yuv0ptr = ScanConfigPtr->Yuv0ptr;
+	ppi->ScanConfig.Yuv1ptr = ScanConfigPtr->Yuv1ptr;
+	ppi->ScanConfig.FragInfo 			=	ScanConfigPtr->FragInfo;		
+	ppi->ScanConfig.FragInfoElementSize = 	ScanConfigPtr->FragInfoElementSize; 	
+	ppi->ScanConfig.FragInfoCodedMask 	=	ScanConfigPtr->FragInfoCodedMask ;
+	
+    ppi->ScanConfig.RegionIndex = ScanConfigPtr->RegionIndex;
+	ppi->ScanConfig.HFragPixels = ScanConfigPtr->HFragPixels;
+	ppi->ScanConfig.VFragPixels = ScanConfigPtr->VFragPixels;
+
+	ppi->ScanConfig.VideoFrameWidth = ScanConfigPtr->VideoFrameWidth;
+	ppi->ScanConfig.VideoFrameHeight = ScanConfigPtr->VideoFrameHeight;
+
+	// UV plane sizes.
+	ppi->VideoUVPlaneWidth = ScanConfigPtr->VideoFrameWidth / 2;
+	ppi->VideoUVPlaneHeight = ScanConfigPtr->VideoFrameHeight / 2;
+
+    /* Note the size of the entire frame and plaes in pixels. */
+    ppi->YFramePixels = ppi->ScanConfig.VideoFrameWidth * ppi->ScanConfig.VideoFrameHeight;
+    ppi->UVFramePixels = ppi->VideoUVPlaneWidth * ppi->VideoUVPlaneHeight;
+    ppi->TotFramePixels = ppi->YFramePixels + (2 * ppi->UVFramePixels);
+
+	/* Work out various fragment related values. */
+	ppi->ScanYPlaneFragments = ppi->YFramePixels / (ppi->HFragPixels * ppi->VFragPixels);
+	ppi->ScanUVPlaneFragments = ppi->UVFramePixels / (ppi->HFragPixels * ppi->VFragPixels);;
+    ppi->ScanHFragments = ppi->ScanConfig.VideoFrameWidth / ppi->HFragPixels;
+    ppi->ScanVFragments = ppi->ScanConfig.VideoFrameHeight / ppi->VFragPixels;
+	ppi->ScanFrameFragments = ppi->ScanYPlaneFragments + (2 * ppi->ScanUVPlaneFragments);
+
+    if(!PAllocateFrameInfo(ppi))
+        return FALSE;
+
+	/* Set up the scan pixel index table. */
+	ScanCalcPixelIndexTable(ppi);
+
+	/* Initialise scan arrays */
+	InitScanMapArrays(ppi);
+
+	return TRUE;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     YUVAnalyseFrame
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Number of "output" blocks to be updated.
+ *
+ *  FUNCTION      :     Scores the fragments for the YUV planes 
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+UINT32 YUVAnalyseFrame( PP_INSTANCE *ppi, UINT32 * KFIndicator ) 
+{  
+    UINT32 UpdatedYBlocks = 0;
+    UINT32 UpdatedUVBlocks = 0;
+	UINT32 i;
+
+	/* Initialise the map arrays. */
+	InitScanMapArrays(ppi);
+
+	/**********  PGW 27/APR/2001 ***********/
+	// 	If the block is already marked as coded in the input block map then 
+	//  mark it as coded here to avoid unnecessary pre-processor work.
+	for ( i = 0; i < ppi->ScanFrameFragments; i++ )
+	{
+
+		if ( blockCoded(i) )
+			ppi->ScanDisplayFragments[i] = BLOCK_ALREADY_MARKED_FOR_CODING;
+	}
+
+    // If the motion level in the previous frame was high then adjust the high and low SAD 
+    // thresholds to speed things up.
+    ppi->ModifiedGrpLowSadThresh = ppi->GrpLowSadThresh;
+    ppi->ModifiedGrpHighSadThresh = ppi->GrpHighSadThresh;
+    // testing force every block with any change to get coded
+    //ppi->ModifiedGrpHighSadThresh = 0;
+
+    // Set up the internal plane height and width variables.
+    ppi->VideoYPlaneWidth = ppi->ScanConfig.VideoFrameWidth;
+    ppi->VideoYPlaneHeight = ppi->ScanConfig.VideoFrameHeight;
+	ppi->VideoUVPlaneWidth = ppi->ScanConfig.VideoFrameWidth / 2;
+	ppi->VideoUVPlaneHeight = ppi->ScanConfig.VideoFrameHeight / 2;
+
+    // To start with *** TBD **** the stides will be set from the widths
+    ppi->VideoYPlaneStride = ppi->VideoYPlaneWidth;
+    ppi->VideoUPlaneStride = ppi->VideoUVPlaneWidth;
+    ppi->VideoVPlaneStride = ppi->VideoUVPlaneWidth;
+    
+    // Set up the plane pointers
+    ppi->YPlanePtr0 = ppi->ScanConfig.Yuv0ptr;
+    ppi->YPlanePtr1 = ppi->ScanConfig.Yuv1ptr;
+    ppi->UPlanePtr0 = (ppi->ScanConfig.Yuv0ptr + ppi->YFramePixels);
+    ppi->UPlanePtr1 = (ppi->ScanConfig.Yuv1ptr + ppi->YFramePixels);
+    ppi->VPlanePtr0 = (ppi->ScanConfig.Yuv0ptr + ppi->YFramePixels + ppi->UVFramePixels);
+    ppi->VPlanePtr1 = (ppi->ScanConfig.Yuv1ptr + ppi->YFramePixels + ppi->UVFramePixels);
+
+    // Ananlyse the U and V palnes. 
+    AnalysePlane( ppi, ppi->UPlanePtr0, ppi->UPlanePtr1, ppi->ScanYPlaneFragments, ppi->VideoUVPlaneWidth, ppi->VideoUVPlaneHeight, ppi->VideoUPlaneStride );
+    AnalysePlane( ppi, ppi->VPlanePtr0, ppi->VPlanePtr1, (ppi->ScanYPlaneFragments + ppi->ScanUVPlaneFragments), ppi->VideoUVPlaneWidth, ppi->VideoUVPlaneHeight, ppi->VideoVPlaneStride );
+
+    // Now analyse the Y plane.
+    AnalysePlane( ppi, ppi->YPlanePtr0, ppi->YPlanePtr1, 0, ppi->VideoYPlaneWidth, ppi->VideoYPlaneHeight, ppi->VideoYPlaneStride );
+    
+    // Create an output block map for the calling process. 
+	CreateOutputDisplayMap( ppi, ppi->ScanDisplayFragments);
+	
+	// Set the candidate key frame indicator (0-100)
+	*KFIndicator = ppi->KFIndicator;
+
+	// Return the normalised block count (this is actually a motion level 
+    // weighting not a true block count).
+	return ppi->OutputBlocksUpdated;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     SetScanParam
+ *
+ *  INPUTS        :     ParamID
+ *                      ParamValue
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Sets a scan parameter. 
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void SetScanParam( PP_INSTANCE *ppi, UINT32 ParamId, INT32 ParamValue ) 
+{  
+	switch (ParamId)
+	{
+
+	case SCP_SET_VCAP_LEVEL_OFFSET:
+		SetVcapLevelOffset(ppi, ParamValue);
+        break;
+
+	}
+
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/pp/include/preproc.h b/Src/libvpShared/corelibs/cdxv/pp/include/preproc.h
new file mode 100644
index 00000000..59a3b1c8
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/include/preproc.h
@@ -0,0 +1,343 @@
+/****************************************************************************
+*
+*   Module Title :     preproc.h
+*
+*   Description  :     Content analysis module header
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.08 PGW 27 Apr 01  Removed code to set Y from UV
+*   1.07 PGW 28 Feb 01  Removal of history buffer functionality.
+*   1.06 PGW 04 Oct 00  Added CANDIDATE_BLOCK_LOW.
+*   1.05 PGW 24 Jul 00  Added Column SAD functions. Deleted BeThreshold.
+*   1.04 PGW 13 Jul 00	Added BLOCK_CODED_LOW. Deleted BLOCK_CODED_EXTRA.
+*   1.03 PGW 10 Jul 00  Added lookup tables to reduce number of conditionals 
+*						in RowDiffScan(). Removed old "ifdef 0"ed code.
+*						Added KFIndicator.
+*   1.02 JBB 30/05/00   Removed hard coded size limits
+*   1.01 YX  06/04/00   Added XMMEnabled for optimizations
+*   1.00 PGW 16/06/96   Configuration baseline.
+*
+*****************************************************************************
+*/						
+
+#include "preprocconf.h"
+#include "type_aliases.h"
+#include "preprocif.h"
+
+/* Constants. */
+#define OUTPUT_BLOCK_HEIGHT		8
+#define OUTPUT_BLOCK_WIDTH		8
+
+#define INTERNAL_BLOCK_HEIGHT   8
+#define INTERNAL_BLOCK_WIDTH	8
+
+#define FILTER_BLOCK_SIZE		(INTERNAL_BLOCK_WIDTH * INTERNAL_BLOCK_HEIGHT)
+
+/* NEW Line search values. */ 
+#define UP      0
+#define DOWN    1
+#define LEFT    2
+#define RIGHT   3
+
+/* Low Pass Filter levels. */
+#define NO_LOW_PASS         0
+#define VERY_LOW_LOW_PASS   1               
+#define LOW_LOW_PASS        2               
+#define MODERATE_LOW_PASS   5               
+#define HIGH_LOW_PASS       7               
+#define VERY_HIGH_LOW_PASS  9    
+
+#define FIRST_ROW           0
+#define NOT_EDGE_ROW        1
+#define LAST_ROW            2      
+
+#define YDIFF_CB_ROWS			(INTERNAL_BLOCK_HEIGHT * 3)
+#define CHLOCALS_CB_ROWS		(INTERNAL_BLOCK_HEIGHT * 3)
+#define PMAP_CB_ROWS			(INTERNAL_BLOCK_HEIGHT * 3)
+#define FRAG_PIXEL_DIFF_ROWS	(INTERNAL_BLOCK_HEIGHT * 3)
+#define PSCORE_CB_ROWS			(INTERNAL_BLOCK_HEIGHT * 4)
+
+#define PIXEL_SCORES_BUFFER_SIZE	SCAN_MAX_LINE_LENGTH * PSCORE_CB_ROWS
+
+#define YUV_DIFFS_CIRC_BUFFER_SIZE	(SCAN_MAX_LINE_LENGTH * YDIFF_CB_ROWS)
+#define CH_LOCALS_CIRC_BUFFER_SIZE	(SCAN_MAX_LINE_LENGTH * CHLOCALS_CB_ROWS)
+#define PIXEL_MAP_CIRC_BUFFER_SIZE  (SCAN_MAX_LINE_LENGTH * PMAP_CB_ROWS)
+
+// Status values in block coding map
+#define CANDIDATE_BLOCK_LOW					-2
+#define CANDIDATE_BLOCK						-1
+#define BLOCK_NOT_CODED						0
+#define BLOCK_CODED_BAR 					3		
+#define BLOCK_ALREADY_MARKED_FOR_CODING		4
+#define BLOCK_CODED_SGC						4	
+#define BLOCK_CODED_LOW						4	
+#define BLOCK_CODED 						5	
+
+#define MAX_PREV_FRAMES             16
+#define MAX_SEARCH_LINE_LEN 7   
+
+/******************************************************************/
+/* Type definitions. */
+/******************************************************************/
+#define blockCoded(i) (ppi->ScanConfig.FragInfo[(i)*ppi->ScanConfig.FragInfoElementSize]&ppi->ScanConfig.FragInfoCodedMask)
+#define setBlockCoded(i) ppi->ScanConfig.FragInfo[(i)*ppi->ScanConfig.FragInfoElementSize]|=ppi->ScanConfig.FragInfoCodedMask;
+#define setBlockUncoded(i) ppi->ScanConfig.FragInfo[(i)*ppi->ScanConfig.FragInfoElementSize]&=(~ppi->ScanConfig.FragInfoCodedMask);
+
+typedef struct PP_INSTANCE
+{
+     UINT32 *ScanPixelIndexTableAlloc;		
+     INT8   *ScanDisplayFragmentsAlloc;
+
+     UINT32 *FragScoresAlloc;               // The individual frame difference ratings.    
+     INT8   *SameGreyDirPixelsAlloc;
+     INT8   *BarBlockMapAlloc;
+
+     // Number of pixels changed by diff threshold in row of a fragment. 
+     UINT8  *FragDiffPixelsAlloc;  
+
+     UINT8  *PixelScoresAlloc;  
+     UINT8  *PixelChangedMapAlloc;
+     UINT8  *ChLocalsAlloc;
+     INT16  *yuv_differencesAlloc;  
+     INT32  *RowChangedPixelsAlloc;
+	 INT8   *TmpCodedMapAlloc;
+
+     UINT32 *ScanPixelIndexTable;		
+     INT8   *ScanDisplayFragments;
+
+     UINT32 *FragScores;               // The individual frame difference ratings.    
+     INT8   *SameGreyDirPixels;
+     INT8   *BarBlockMap;
+
+     // Number of pixels changed by diff threshold in row of a fragment. 
+     UINT8  *FragDiffPixels;  
+
+     UINT8  *PixelScores;  
+     UINT8  *PixelChangedMap;
+     UINT8  *ChLocals;
+     INT16  *yuv_differences;  
+     INT32  *RowChangedPixels;
+	 INT8   *TmpCodedMap;
+
+     // Plane pointers and dimension variables
+     UINT8 * YPlanePtr0;
+     UINT8 * YPlanePtr1;
+     UINT8 * UPlanePtr0;
+     UINT8 * UPlanePtr1;
+     UINT8 * VPlanePtr0;
+     UINT8 * VPlanePtr1;
+
+     UINT32  VideoYPlaneWidth;
+     UINT32  VideoYPlaneHeight;
+     UINT32  VideoUVPlaneWidth;
+     UINT32  VideoUVPlaneHeight;
+
+     UINT32  VideoYPlaneStride;
+     UINT32  VideoUPlaneStride;
+     UINT32  VideoVPlaneStride;
+
+/* Scan control variables. */
+     UINT8   HFragPixels;
+     UINT8   VFragPixels;
+
+     UINT32  ScanFrameFragments;
+     UINT32  ScanYPlaneFragments;
+     UINT32  ScanUVPlaneFragments;
+     UINT32  ScanHFragments;
+     UINT32  ScanVFragments;
+
+     UINT32  YFramePixels; 
+     UINT32  UVFramePixels; 
+     UINT32  TotFramePixels;
+
+     BOOL	   SgcOnOff;
+
+     UINT32  SgcThresh;
+
+     UINT32  OutputBlocksUpdated;
+	 UINT32  KFIndicator;
+
+     BOOL	   ScanSRF_Enabled;
+
+/* The VCAP scan configuration. */
+     SCAN_CONFIG_DATA ScanConfig;
+
+     BOOL    VcapOn;
+
+     INT32 SRFGreyThresh;
+     INT32 SRFColThresh;
+     INT32 SgcLevelThresh;
+     INT32 SuvcLevelThresh;
+
+     INT32 SRFGreyThreshOffset;
+     INT32 SRFColThreshOffset;
+     INT32 SgcLevelThreshOffset;
+     INT32 SuvcLevelThreshOffset;
+
+     UINT32 NoiseSupLevel;
+
+	/* Block Thresholds. */
+     UINT32 PrimaryBlockThreshold;
+
+     INT32  SRFLevel;
+     INT32  SRFLevelOffset;
+
+     BOOL   PAKEnabled;
+
+     BOOL   EBO_Enabled;
+     BOOL   CategorisationEnabled;
+
+     int    LevelThresh; 
+     int    NegLevelThresh; 
+     int    SrfThresh;
+     int    NegSrfThresh;
+     int    HighChange;
+     int    NegHighChange;     
+
+     // Threshold lookup tables
+	 UINT8 SrfPakThreshTable[512];
+	 UINT8 * SrfPakThreshTablePtr;
+	 UINT8 SrfThreshTable[512];
+	 UINT8 * SrfThreshTablePtr;
+	 UINT8 SgcThreshTable[512];
+	 UINT8 * SgcThreshTablePtr;
+
+     // Variables controlling S.A.D. break outs.
+     UINT32 GrpLowSadThresh;
+     UINT32 GrpHighSadThresh;
+     UINT32 ModifiedGrpLowSadThresh;
+     UINT32 ModifiedGrpHighSadThresh;
+
+     INT32  PlaneHFragments;
+     INT32  PlaneVFragments;
+     INT32  PlaneHeight;
+     INT32  PlaneWidth;
+     INT32  PlaneStride;
+
+     UINT32 BlockThreshold;
+     UINT32 BlockSgcThresh;
+     double UVBlockThreshCorrection;
+     double UVSgcCorrection;
+
+     UINT32 SpeedCritical;
+
+// Live test harness specific.
+
+// PC specific variables
+	BOOL  MmxEnabled;
+	BOOL  XmmEnabled;
+
+	double YUVPlaneCorrectionFactor;	
+	double AbsDiff_ScoreMultiplierTable[256];
+	UINT8  NoiseScoreBoostTable[256];
+	UINT8  MaxLineSearchLen;
+
+	INT32 YuvDiffsCircularBufferSize;
+	INT32 ChLocalsCircularBufferSize;
+	INT32 PixelMapCircularBufferSize;
+
+	// Temp stats variable
+	UINT32 TotBlocksUpdated;
+
+	// Function pointers for mmx switches
+	UINT32 (*RowSAD)(UINT8 *, UINT8 * );            
+	UINT32 (*ColSAD)(xPP_INST ppi, UINT8 *, UINT8 * );            
+
+} PP_INSTANCE;
+
+/******************************************************************/
+/* Function prototypes. */
+/******************************************************************/
+
+
+INLINE UINT32 ScanGetFragIndex( PP_INSTANCE *ppi, UINT32 FragmentNo )
+{   
+    return ppi->ScanPixelIndexTable[ FragmentNo ];
+}
+
+
+extern void InitScanMapArrays
+(
+ PP_INSTANCE *ppi
+);
+
+extern void AnalysePlane
+(
+ PP_INSTANCE *ppi, UINT8 * PlanePtr0, UINT8 * PlanePtr1, UINT32 FragArrayOffset, UINT32 PWidth, UINT32 PHeight, UINT32 PStride 
+);
+
+extern void ScanCalcPixelIndexTable
+(
+ PP_INSTANCE *ppi
+);
+
+extern void CreateOutputDisplayMap
+(
+ PP_INSTANCE *ppi, 
+ INT8		 *InternalFragmentsPtr
+);
+
+extern void SetVcapLevelOffset
+(
+ PP_INSTANCE *ppi, INT32 LevelOffset 
+);
+
+//  Analysis functions
+extern void RowBarEnhBlockMap
+(
+ PP_INSTANCE *ppi, 
+ UINT32 * FragScorePtr, 
+ INT8   * FragSgcPtr,
+ INT8   * UpdatedBlockMapPtr,
+ INT8   * BarBlockMapPtr,
+ UINT32 RowNumber 
+);
+
+extern void BarCopyBack
+(
+ PP_INSTANCE *ppi, 
+ INT8  * UpdatedBlockMapPtr,
+ INT8  * BarBlockMapPtr 
+);
+
+// Secondary filter functions
+extern UINT8 ApplyLowPass
+(
+ PP_INSTANCE *ppi, UINT8 * SrcPtr, UINT32 PlaneLineLength, INT32 Level 
+);
+
+// PC specific functions
+extern void MachineSpecificConfig
+(
+ 
+);
+extern void ClearMmx
+(
+ PP_INSTANCE *ppi
+);
+
+extern UINT32 ScalarRowSAD
+(
+ UINT8 * Src1, UINT8 * Src2 
+);
+extern UINT32 ScalarColSAD
+(
+ PP_INSTANCE *ppi, UINT8 * Src1, UINT8 * Src2 
+);
+
+extern PP_INSTANCE * CreatePPInstance
+(
+ void
+);
+extern void DeletePPInstance
+(
+ PP_INSTANCE **ppi
+);
+
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/pp/include/preprocconf.h b/Src/libvpShared/corelibs/cdxv/pp/include/preprocconf.h
new file mode 100644
index 00000000..d75980c7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/pp/include/preprocconf.h
@@ -0,0 +1,17 @@
+/****************************************************************************
+*
+*   Module Title :     PreProcConf.H
+*
+*   Description  :     Content analysis module configuration header
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.00 PGW 11/10/98  Header to control different configurations
+*
+*****************************************************************************
+*/						
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/preproc/Makefile b/Src/libvpShared/corelibs/cdxv/preproc/Makefile
new file mode 100644
index 00000000..88ff6918
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/preproc/Makefile
@@ -0,0 +1,54 @@
+## Target to built
+
+TARGET 		=libpreproc
+
+## TOOLS
+CC      	= ecc
+LD      	= ecc
+AR      	= ar
+OBJDUMP 	= objdump
+RM      	= rm -f
+
+## Directories
+TOPDIR  		=C:\DuckSoft
+PRIVATEINCLUDE  =${TOPDIR}\private\include
+PRIVATEINCLUDE2 =${TOPDIR}\private\include\vp60
+CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
+CDXVINCLUDE     =${TOPDIR}\private\corelibs\cdxv\include 
+
+
+CURRENTDIR 		=${TOPDIR}\private\corelibs\cdxv\preproc
+LIBDIR			=${TOPDIR}\private\corelibs\lib\mapca 
+
+## Compile Flags
+ALLINCLUDES     =-I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${PRIVATEINCLUDE2} 
+VP6DEFINES		=-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
+ETIDEFINES      =-DMAPCA
+ALLDEFINES      =${VP6DEFINES} ${ETIDEFINES}
+DEBUG			=-O2
+CFLAGS 			=-msvc -align 8 -etswp -mP3OPT_nonlocal_calls_through_register=true \
+				-mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
+				-magen_interroutine_padding
+ALLFLAGS 		= $(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
+
+
+## Files
+OBJS		= 	preproc.o	 \
+
+SRCS		= $(OBJS:.o=.c)
+
+ARTARGET	= ${TARGET}.a
+
+# archive
+
+ARTARGET:${OBJS}
+	${AR} -cr ${ARTARGET} ${OBJS}
+	mv ${ARTARGET} ${LIBDIR}
+
+${OBJS} : ${SRCS}
+	$(CC) $(ALLFLAGS) -c $*.c -o $*.o
+
+clean:
+	${RM} ${OBJS} ${ARTARGET}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/preproc/preproc.c b/Src/libvpShared/corelibs/cdxv/preproc/preproc.c
new file mode 100644
index 00000000..46e8e3bd
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/preproc/preproc.c
@@ -0,0 +1,693 @@
+/****************************************************************************
+*
+*   Module Title :     preproc.c
+*
+*   Description  :     Simple pre-processor.
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+
+#include "memory.h"
+#include "preproc.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define FRAMECOUNT 7
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern void GetProcessorFlags (int *MmxEnabled, int *XmmEnabled, int *WmtEnabled );
+
+/****************************************************************************
+*  Exported Global Variables
+****************************************************************************/
+void (*tempFilter)( PreProcInstance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength );
+
+#ifndef MAPCA
+/****************************************************************************
+ *
+ *  ROUTINE       : spatialFilter_wmt
+ *
+ *  INPUTS        : PreProcInstance *ppi : Pointer to pre-processor instance.
+ *                  unsigned char *s     : Pointer to source frame.
+ *                  unsigned char *d     : Pointer to destination frame.
+ *                  int width            : WIdth of images.
+ *	                int height           : Height of images.
+ *                  int pitch            : Stride of images.
+ *	                int strength         : Strength of filter to apply.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Performs a closesness adjusted temporarl blur
+ *
+ *  SPECIAL NOTES : Destination frame can be same as source frame.
+ *
+ ****************************************************************************/
+void spatialFilter_wmt
+(
+	PreProcInstance *ppi,
+	unsigned char *s,
+	unsigned char *d,
+	int width,
+	int height,
+	int pitch,
+	int strength
+)
+{
+	int i;
+	int row = 1;
+	int PixelOffsets[] = 
+	{ 
+		-pitch-1,	-pitch,	-pitch+1,
+			  -1,		 0,       +1,
+		 pitch-1,	 pitch,	 pitch+1
+	};
+	unsigned char *frameptr = ppi->frameBuffer;
+	
+    __declspec(align(16)) unsigned short threes[]  = { 3, 3, 3, 3, 3, 3, 3, 3};
+	__declspec(align(16)) unsigned short sixteens[]= {16,16,16,16,16,16,16,16};
+
+	memcpy ( d, s, width );
+	
+    d += pitch;
+	s += pitch;
+	
+    do
+	{
+		// NOTE: By doing it this way I am ensuring that pixels will always be unaligned!!!
+		int col = 1;
+		d[0] = s[0];
+		d[width - 1] = s[width - 1];
+		do
+		{
+			__declspec(align(16)) unsigned short counts[8];
+			__declspec(align(16)) unsigned short sums[8];
+			_asm 
+			{
+				mov			esi, s					// get the source line 
+				add         esi, col				// add the column offset 
+				pxor		xmm1,xmm1				// accumulator
+				pxor		xmm2,xmm2				// count 
+				pxor        xmm7,xmm7				// 0s for use with unpack
+
+		        movq		xmm3, QWORD PTR [esi]   // get 8 pixels
+				punpcklbw   xmm3, xmm7				// unpack to shorts
+				xor			eax, eax				// neighbor iterator
+
+NextNeighbor:
+				mov			ecx, [PixelOffsets+eax*4] // get eax index pixel neighbor offset
+				movq        xmm4, QWORD PTR [esi + ecx]  // get ecx index neighbor values
+				punpcklbw   xmm4, xmm7				// xmm4 unpacked neighbor values
+				movdqa      xmm6, xmm4              // save the pixel values
+				psubsw      xmm4, xmm3              // subtracted pixel values
+				pmullw		xmm4, xmm4				// square xmm4 
+				movd        xmm5, strength
+				psrlw       xmm4, xmm5				// should be strength
+				pmullw      xmm4, threes			// 3 * modifier
+				movdqa		xmm5, sixteens			// 16s
+				psubusw     xmm5, xmm4				// 16 - modifiers
+				movdqa		xmm4, xmm5				// save the modifiers
+				pmullw      xmm4, xmm6				// multiplier values
+				paddusw     xmm1, xmm4              // accumulator
+				paddusw     xmm2, xmm5              // count
+				inc         eax						// next neighbor
+				cmp			eax,9					// there are nine neigbors
+				jne         NextNeighbor
+
+				movdqa      counts, xmm2
+				psrlw       xmm2,1                  // divide count by 2 for rounding
+				paddusw     xmm1,xmm2				// rounding added in
+
+				mov			frameptr,esi
+
+				movdqa      sums, xmm1
+			}
+			
+			for ( i=0; i<8; i++ )
+			{
+				int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
+				blurvalue >>= 16;
+				d[col+i] = blurvalue;
+			}
+			col += 8;
+
+		} while ( col<width-1 );
+
+		d += pitch;
+		s += pitch;
+		++row;
+    } while ( row<height-1 );
+
+	memcpy ( d, s, width );
+	__asm emms
+}
+#endif
+/****************************************************************************
+ *
+ *  ROUTINE       : tempFilter_c
+ *
+ *  INPUTS        : PreProcInstance *ppi : Pointer to pre-processor instance.
+ *                  unsigned char *s     : Pointer to source frame.
+ *                  unsigned char *d     : Pointer to destination frame.
+ *                  int bytes            : Number of bytes to filter.
+ *	                int strength         : Strength of filter to apply.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Performs a closesness adjusted temporarl blur
+ *
+ *  SPECIAL NOTES : Destination frame can be same as source frame.
+ *
+ ****************************************************************************/
+void tempFilter_c
+(
+	PreProcInstance *ppi,
+	unsigned char *s,
+	unsigned char *d,
+	int bytes,
+	int strength
+)
+{
+	int byte = 0;
+	unsigned char *frameptr = ppi->frameBuffer;
+
+	if ( ppi->frame == 0 )
+	{
+		do
+		{
+			int frame = 0;
+			do
+			{
+				*frameptr = s[byte];
+				++frameptr;
+				++frame;
+			} while ( frame < FRAMECOUNT );
+			
+			d[byte] = s[byte];
+			
+			++byte;
+		} while ( byte < bytes );
+	}
+	else
+	{
+		int modifier;
+		int offset = (ppi->frame % FRAMECOUNT);
+
+		do
+		{
+			int accumulator = 0;
+			int count = 0;
+			int frame = 0;
+			
+			frameptr[offset] = s[byte];
+
+			do
+			{
+				int pixelValue = *frameptr;
+				
+				modifier   = s[byte];		
+				modifier  -= pixelValue;
+				modifier  *= modifier;
+				modifier >>= strength;
+				modifier  *= 3;
+
+				if(modifier > 16)
+					modifier = 16;
+				
+				modifier = 16 - modifier;
+				
+				accumulator += modifier * pixelValue;
+				
+				count += modifier;
+				
+				frameptr++;
+				
+				++frame;
+			} while ( frame < FRAMECOUNT );
+			
+			accumulator += (count >> 1);
+			accumulator *= ppi->fixedDivide[count];
+			accumulator >>= 16;
+
+			d[byte] = accumulator;
+			
+			++byte;
+		} while ( byte < bytes );
+	}
+	++ppi->frame;
+}
+#ifndef MAPCA
+/****************************************************************************
+ *
+ *  ROUTINE       : tempFilter_wmt
+ *
+ *  INPUTS        : PreProcInstance *ppi : Pointer to pre-processor instance.
+ *                  unsigned char *s     : Pointer to source frame.
+ *                  unsigned char *d     : Pointer to destination frame.
+ *                  int bytes            : Number of bytes to filter.
+ *	                int strength         : Strength of filter to apply.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Performs a closesness adjusted temporarl blur
+ *
+ *  SPECIAL NOTES : Destination frame can be same as source frame.
+ *
+ ****************************************************************************/
+void tempFilter_wmt
+(
+	PreProcInstance *ppi,
+	unsigned char *s,
+	unsigned char *d,
+	int bytes,
+	int strength
+)
+{
+	int byte = 0;
+	unsigned char * frameptr = ppi->frameBuffer;
+
+    __declspec(align(16)) unsigned short threes[]  ={ 3, 3, 3, 3, 3, 3, 3, 3};
+	__declspec(align(16)) unsigned short sixteens[]={16,16,16,16,16,16,16,16};
+
+	if ( ppi->frame == 0 )
+	{
+		do
+		{
+			int i;
+			int frame = 0;
+			
+			do
+			{
+			    for ( i=0; i<8; i++ )
+				{
+					*frameptr = s[byte+i];
+					++frameptr;
+				}
+				++frame;
+			} while ( frame < FRAMECOUNT );
+			
+		    for ( i=0; i<8; i++ )
+				d[byte+i] = s[byte+i];
+
+			byte += 8;
+			
+		} while ( byte < bytes );
+    }
+	else
+	{
+		int i;
+		int offset2 = (ppi->frame % FRAMECOUNT);
+		
+        do
+		{
+			__declspec(align(16)) unsigned short counts[8];
+			__declspec(align(16)) unsigned short sums[8];
+			int accumulator = 0;
+			int count = 0;
+			int frame = 0;
+			_asm 
+			{
+        		mov         eax,offset2	
+				mov			edi,s					// source pixels
+				pxor		xmm1,xmm1				// accumulator
+
+				pxor        xmm7,xmm7
+
+				mov         esi,frameptr			// accumulator
+				pxor		xmm2,xmm2				// count 
+
+		        movq		xmm3, QWORD PTR [edi]       
+
+				movq		QWORD PTR [esi+8*eax],xmm3					
+
+				punpcklbw   xmm3, xmm2				// xmm3 source pixels
+				mov			ecx,  FRAMECOUNT
+
+NextFrame:
+				movq        xmm4, QWORD PTR [esi]   // get frame buffer values
+				punpcklbw   xmm4, xmm7				// xmm4 frame buffer pixels
+				movdqa      xmm6, xmm4              // save the pixel values
+				psubsw      xmm4, xmm3              // subtracted pixel values
+				pmullw		xmm4, xmm4				// square xmm4 
+				movd        xmm5, strength
+				psrlw       xmm4, xmm5				// should be strength
+				pmullw      xmm4, threes			// 3 * modifier
+				movdqa		xmm5, sixteens			// 16s
+				psubusw     xmm5, xmm4				// 16 - modifiers
+				movdqa		xmm4, xmm5				// save the modifiers
+				pmullw      xmm4, xmm6				// multiplier values
+				paddusw     xmm1, xmm4              // accumulator
+				paddusw     xmm2, xmm5              // count
+				add         esi, 8					// next frame
+				dec         ecx						// next set of eight pixels
+				jnz         NextFrame
+
+				movdqa      counts, xmm2
+				psrlw       xmm2,1                  // divide count by 2 for rounding
+				paddusw     xmm1,xmm2				// rounding added in
+
+				mov			frameptr,esi
+
+				movdqa      sums, xmm1
+			}
+			
+			for ( i=0; i<8; i++ )
+			{
+				int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
+				blurvalue >>= 16;
+				d[i] = blurvalue;
+			}
+			s += 8;
+			d += 8;
+			byte += 8;
+		} while ( byte < bytes );
+	}
+	++ppi->frame;
+	__asm emms
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : tempFilter_mmx
+ *
+ *  INPUTS        : PreProcInstance *ppi : Pointer to pre-processor instance.
+ *                  unsigned char *s     : Pointer to source frame.
+ *                  unsigned char *d     : Pointer to destination frame.
+ *                  int bytes            : Number of bytes to filter.
+ *	                int strength         : Strength of filter to apply.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Performs a closesness adjusted temporarl blur
+ *
+ *  SPECIAL NOTES : Destination frame can be same as source frame.
+ *
+ ****************************************************************************/
+void tempFilter_mmx
+(
+	PreProcInstance *ppi,
+	unsigned char *s,
+	unsigned char *d,
+	int bytes,
+	int strength
+)
+{
+	int byte = 0;
+	unsigned char *frameptr = ppi->frameBuffer;
+	
+    __declspec(align(16)) unsigned short threes[]  ={ 3, 3, 3, 3};
+	__declspec(align(16)) unsigned short sixteens[]={16,16,16,16};
+
+	if ( ppi->frame == 0 )
+	{
+		do
+		{
+			int i;
+			int frame = 0;
+			
+			do
+			{
+			    for ( i=0; i<4; i++ )
+				{
+					*frameptr = s[byte+i];
+					++frameptr;
+				}
+				++frame;
+			} while ( frame < FRAMECOUNT );
+			
+		    for ( i=0; i<4; i++ )
+				d[byte+i] = s[byte+i];
+
+			byte += 4;
+			
+		} while ( byte < bytes );
+	}
+	else
+	{
+		int i;
+		int offset2 = (ppi->frame % FRAMECOUNT);
+		do
+		{
+			__declspec(align(16)) unsigned short counts[8];
+			__declspec(align(16)) unsigned short sums[8];
+			int accumulator = 0;
+			int count = 0;
+			int frame = 0;
+			_asm 
+			{
+
+				mov         eax,offset2	
+				mov			edi,s					// source pixels
+				pxor		mm1,mm1				    // accumulator
+				pxor        mm7,mm7
+
+				mov         esi,frameptr			// accumulator
+				pxor		mm2,mm2				    // count 
+
+		        movd		mm3, DWORD PTR [edi]       
+				movd		DWORD PTR [esi+4*eax],mm3					
+
+				punpcklbw   mm3, mm2				// mm3 source pixels
+				mov			ecx,  FRAMECOUNT
+
+NextFrame:
+				movd        mm4, DWORD PTR [esi]    // get frame buffer values
+				punpcklbw   mm4, mm7				// mm4 frame buffer pixels
+				movq	    mm6, mm4                // save the pixel values
+				psubsw      mm4, mm3                // subtracted pixel values
+				pmullw		mm4, mm4				// square mm4 
+				movd        mm5, strength
+				psrlw       mm4, mm5				// should be strength
+				pmullw      mm4, threes			    // 3 * modifier
+				movq		mm5, sixteens			// 16s
+				psubusw     mm5, mm4				// 16 - modifiers
+				movq		mm4, mm5				// save the modifiers
+				pmullw      mm4, mm6				// multiplier values
+				paddusw     mm1, mm4                // accumulator
+				paddusw     mm2, mm5                // count
+				add         esi, 4					// next frame
+				dec         ecx						// next set of eight pixels
+				jnz         NextFrame
+
+				movq        counts, mm2
+				psrlw       mm2,1                   // divide count by 2 for rounding
+				paddusw     mm1,mm2				    // rounding added in
+
+				mov			frameptr,esi
+
+				movq        sums, mm1
+
+			}
+			
+			for ( i=0; i<4; i++ )
+			{
+				int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
+				blurvalue >>= 16;
+				d[i] = blurvalue;
+			}
+			s += 4;
+			d += 4;
+			byte += 4;
+		} while ( byte < bytes );
+	}
+	++ppi->frame;
+	__asm emms
+}
+#endif
+/****************************************************************************
+ *
+ *  ROUTINE       : DeletePreProc
+ *
+ *  INPUTS        : PreProcInstance *ppi : Pointer to pre-processor instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Deletes a pre-processing instance.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeletePreProc ( PreProcInstance *ppi )
+{				   
+    if ( ppi->frameBufferAlloc )
+        duck_free ( ppi->frameBufferAlloc );
+    ppi->frameBufferAlloc = 0;
+    ppi->frameBuffer      = 0;
+
+    if( ppi->fixedDivideAlloc )
+        duck_free ( ppi->fixedDivideAlloc );
+    ppi->fixedDivideAlloc = 0;
+    ppi->fixedDivide      = 0;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : InitPreProc
+ *
+ *  INPUTS        : PreProcInstance *ppi : Pointer to pre-processor instance.
+ *                  int FrameSize        : Number of bytes in one frame.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : int: 1 if successful, 0 if failed.
+ *
+ *  FUNCTION      : Initializes prepprocessor instance.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int InitPreProc ( PreProcInstance *ppi, int FrameSize )
+{
+	int i;
+	int MmxEnabled;
+	int XmmEnabled; 
+	int WmtEnabled;
+#ifndef MAPCA
+	GetProcessorFlags ( &MmxEnabled, &XmmEnabled, &WmtEnabled );
+
+	if ( WmtEnabled )
+		tempFilter = tempFilter_wmt;
+	else if ( MmxEnabled )
+		tempFilter = tempFilter_mmx;
+	else 
+#endif
+		tempFilter = tempFilter_c;
+
+	DeletePreProc ( ppi );
+
+	ppi->frameBufferAlloc = duck_malloc ( 32+FrameSize*7*sizeof(unsigned char), DMEM_GENERAL );
+    if ( !ppi->frameBufferAlloc ) { DeletePreProc( ppi ); return 0; }
+    ppi->frameBuffer = (unsigned char *) ROUNDUP32( ppi->frameBufferAlloc );
+
+	ppi->fixedDivideAlloc = duck_malloc ( 32+255*sizeof(unsigned int), DMEM_GENERAL );
+    if ( !ppi->fixedDivideAlloc ) { DeletePreProc( ppi ); return 0; }
+    ppi->fixedDivide = (unsigned int *) ROUNDUP32( ppi->fixedDivideAlloc );
+
+	for ( i=1; i<255; i++ )
+		ppi->fixedDivide[i] = 0x10000 / i;
+	return 1;
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : spatialFilter_c
+ *
+ *  INPUTS        : PreProcInstance *ppi : Pointer to pre-processor instance.
+ *                  unsigned char *s     : Pointer to source frame.
+ *                  unsigned char *d     : Pointer to destination frame.
+ *                  int width            : Width of images.
+ *	                int height           : Height of images.
+ *                  int pitch            : Stride of images.
+ *	                int strength         : Strength of filter to apply.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Performs a closesness adjusted temporal blur.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void spatialFilter_c
+(
+	PreProcInstance *ppi,
+	unsigned char *s,
+	unsigned char *d,
+	int width,
+	int height,
+	int pitch,
+	int strength
+)
+{
+	int modifier;
+	int byte = 0;
+	int row = 1;
+	int PixelOffsets[9];
+	
+	
+	PixelOffsets[0] = -pitch - 1;
+	PixelOffsets[1] = -pitch;
+	PixelOffsets[2] = -pitch + 1;
+	PixelOffsets[3] =		 - 1;
+	PixelOffsets[4] =		   0;
+	PixelOffsets[5] =		 + 1;
+	PixelOffsets[6] =  pitch - 1;
+	PixelOffsets[7] =  pitch    ;
+	PixelOffsets[8] =  pitch + 1;
+	
+	memcpy ( d, s, width );
+
+    d += pitch;
+	s += pitch;
+	
+    do
+	{
+		int col = 1;
+		
+        d[0] = s[0];
+		d[width - 1] = s[width - 1];
+		
+        do
+		{
+			int accumulator = 0;
+			int count = 0;
+			int neighbor = 0;
+		
+            do
+			{
+				int pixelValue = s[ col + PixelOffsets[neighbor] ];
+				
+				modifier = s[col];
+				modifier -= pixelValue;
+				modifier *= modifier;
+				modifier >>= strength;
+				modifier *= 3;
+				
+				if(modifier > 16)
+					modifier = 16;
+				
+				modifier = 16 - modifier;
+				
+				accumulator += modifier * pixelValue;
+				
+				count += modifier;
+				
+				neighbor++;
+			} while ( neighbor < sizeof(PixelOffsets)/sizeof(int) );
+			
+			accumulator += (count >> 1);
+			accumulator *= ppi->fixedDivide[count];
+			accumulator >>= 16;
+			
+			d[col] = accumulator;
+			
+			++col;
+
+		} while ( col < width-1 );
+
+		d += pitch;
+		s += pitch;
+
+		++row;
+		
+	} while ( row < height-1 );
+	
+    memcpy ( d, s, width );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/preproc/preproc.sln b/Src/libvpShared/corelibs/cdxv/preproc/preproc.sln
new file mode 100644
index 00000000..cac9c7e0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/preproc/preproc.sln
@@ -0,0 +1,23 @@
+Microsoft Visual Studio Solution File, Format Version 8.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "preproc", "preproc.vcproj", "{0FDF0DE2-6841-4C51-A008-A08C42E50948}"
+	ProjectSection(ProjectDependencies) = postProject
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfiguration) = preSolution
+		Debug = Debug
+		Release = Release
+	EndGlobalSection
+	GlobalSection(ProjectDependencies) = postSolution
+	EndGlobalSection
+	GlobalSection(ProjectConfiguration) = postSolution
+		{0FDF0DE2-6841-4C51-A008-A08C42E50948}.Debug.ActiveCfg = Debug|Win32
+		{0FDF0DE2-6841-4C51-A008-A08C42E50948}.Debug.Build.0 = Debug|Win32
+		{0FDF0DE2-6841-4C51-A008-A08C42E50948}.Release.ActiveCfg = Release|Win32
+		{0FDF0DE2-6841-4C51-A008-A08C42E50948}.Release.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+	EndGlobalSection
+	GlobalSection(ExtensibilityAddIns) = postSolution
+	EndGlobalSection
+EndGlobal
diff --git a/Src/libvpShared/corelibs/cdxv/preproc/preproc.vcproj b/Src/libvpShared/corelibs/cdxv/preproc/preproc.vcproj
new file mode 100644
index 00000000..52b65f22
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/preproc/preproc.vcproj
@@ -0,0 +1,302 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="preproc"
+	ProjectGUID="{0FDF0DE2-6841-4C51-A008-A08C42E50948}"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				InlineFunctionExpansion="1"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="..\vp60\include,..\include,..\..\include,.\include,..\..\..\include,..\..\..\..\include,..\..\..\..\include\vp60"
+				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS"
+				StringPooling="true"
+				RuntimeLibrary="2"
+				BufferSecurityCheck="false"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				PrecompiledHeaderFile=".\Release/preproc.pch"
+				AssemblerListingLocation=""
+				ObjectFile="$(IntDir)/"
+				ProgramDataBaseFileName="$(IntDir)/vc70.pdb"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(SolutionDir)lib\win32\release\s_preproc.lib"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory=".\Debug"
+			IntermediateDirectory=".\Debug"
+			ConfigurationType="4"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\vp60\include,..\..\include,.\include,..\include,..\..\..\include,..\..\..\..\include,..\..\..\..\include\vp60"
+				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				PrecompiledHeaderFile=".\Debug/preproc.pch"
+				AssemblerListingLocation=".\Debug/"
+				ObjectFile=".\Debug/"
+				ProgramDataBaseFileName=".\Debug/"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="4"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="..\..\..\Lib\Win32\Debug\s_preproc.lib"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release 64|Win32"
+			OutputDirectory="$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalOptions="/GS-"
+				Optimization="2"
+				InlineFunctionExpansion="1"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="..\vp60\include,..\include,..\..\include,.\include,..\..\..\include,..\..\..\..\include,..\..\..\..\include\vp60"
+				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS"
+				StringPooling="true"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				PrecompiledHeaderFile=".\Release/preproc.pch"
+				AssemblerListingLocation=""
+				ObjectFile="$(IntDir)/"
+				ProgramDataBaseFileName="$(IntDir)/vc70.pdb"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				AdditionalOptions="/machine:AMD64"
+				OutputFile="..\..\..\Lib\Win64\Release\s_preproc.lib"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+			>
+			<File
+				RelativePath="preproc.c"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release 64|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl"
+			>
+			<File
+				RelativePath="..\include\preproc.h"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/OptFunctions.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/OptFunctions.c
new file mode 100644
index 00000000..0ac90ff1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/OptFunctions.c
@@ -0,0 +1,315 @@
+/****************************************************************************
+*
+*   Module Title :     OptFunctions.c
+*
+*   Description  :     MMX or otherwise processor specific 
+*                      optimised versions of functions
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+ *  1.08 JBB 13 Jun 01 VP4 Code Clean Out
+*   1.07 JBB 26/01/01  Removed unused function
+*	1.06 YWX 23/05/00  Remove the clamping in MmxReconPostProcess()
+*	1.05 YWX 15/05/00  Added MmxReconPostProcess()
+*   1.04 SJL 03/14/00  Added in Tim's versions of MmxReconInter and MmxReconInterHalfPixel2. 
+*   1.03 PGW 12/10/99  Changes to reduce uneccessary dependancies. 
+*   1.02 PGW 30/08/99  Minor changes to MmxReconInterHalfPixel2().
+*   1.01 PGW 13/07/99  Changes to keep reconstruction data to 16 bit
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/* 
+    Use Tim's optimized version.
+*/
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              // Strict type checking. 
+
+#include "codec_common.h"
+
+#include "pbdll.h"
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*  Imports.
+*****************************************************************************
+*/   
+
+extern INT32 * XX_LUT;
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Exported Functions 
+*****************************************************************************
+*/              
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/  
+
+INT16 Ones[4]               = {1,1,1,1};
+INT16 OneTwoEight[4]        = {128,128,128,128};
+UINT8 Eight128s[8]          = {128,128,128,128,128,128,128,128};
+
+#pragma warning( disable : 4799 )  // Disable no emms instruction warning!
+                                      
+/****************************************************************************
+*  Forward References
+*****************************************************************************
+*/  
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ClearSysState()
+ *
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     DoesNothing
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ClearSysStateC(void)
+{
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ClearMmx()
+ *
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Clears down the MMX state
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ClearMmx(void)
+{
+	__asm
+	{
+		emms									; Clear the MMX state.
+	}
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MMXReconIntra
+ *
+ *  INPUTS        :     INT16 *  idct
+ *                               Pointer to the output from the idct for this block
+ *
+ *                      UINT32   stride
+ *                               Line Length in pixels in recon and reference images
+ *                               
+ *
+ *                     
+ *
+ *  OUTPUTS       :     UINT8 *  dest
+ *                               The reconstruction buffer
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs an intra block - MMX version
+ *
+ *  SPECIAL NOTES :     Tim Murphy's optimized version 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void MMXReconIntra( PB_INSTANCE *pbi, UINT8 * dest, INT16 * idct, INT32 stride )
+{
+    __asm
+    {
+        // u    pipe
+        //   v  pipe
+        mov         eax,[idct]              ; Signed 16 bit inputs
+          mov         edx,[dest]            ; Signed 8 bit outputs
+        movq        mm0,[Eight128s]         ; Set mm0 to 0x8080808080808080
+          ;
+        mov         ebx,[stride]            ; Line stride in output buffer
+          lea         ecx,[eax+128]         ; Endpoint in input buffer
+loop_label:                                 ;
+        movq        mm2,[eax]               ; First four input values
+          ;
+        packsswb    mm2,[eax+8]             ; pack with next(high) four values
+          por         mm0,mm0               ; stall
+        pxor        mm2,mm0                 ; Convert result to unsigned (same as add 128)
+          lea         eax,[eax + 16]        ; Step source buffer
+        cmp         eax,ecx                 ; are we done
+          ;
+        movq        [edx],mm2               ; store results
+          ;
+        lea         edx,[edx+ebx]           ; Step output buffer
+          jc          loop_label            ; Loop back if we are not done
+    }
+    // 6c/8 elts = 9c/8 = 1.125 c/pix
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MmxReconInter
+ *
+ *  INPUTS        :     UINT8 *  RefPtr
+ *                               The last frame reference
+ *
+ *                      INT16 *  ChangePtr
+ *                               Pointer to the change data
+ *
+ *                      UINT32   LineStep
+ *                               Line Length in pixels in recon and ref images
+ *
+ *  OUTPUTS       :     UINT8 *  ReconPtr
+ *                               The reconstruction
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs data from last data and change
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void MmxReconInter( PB_INSTANCE *pbi, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+    (void) pbi;
+
+ _asm {
+	push	edi
+;;	 mov	ebx, [ref]
+;;	mov		ecx, [diff]
+;;	 mov	eax, [dest]
+;;	mov		edx, [stride]
+	 mov	ebx, [RefPtr]
+	mov		ecx, [ChangePtr]
+	 mov	eax, [ReconPtr]
+	mov		edx, [LineStep]
+	 pxor	mm0, mm0
+	lea		edi, [ecx + 128]
+	 ;
+  L:
+	movq	mm2, [ebx]			; (+3 misaligned) 8 reference pixels
+	 ;
+	movq	mm4, [ecx]			; first 4 changes
+	 movq	mm3, mm2
+	movq	mm5, [ecx + 8]		; last 4 changes
+	 punpcklbw mm2, mm0			; turn first 4 refs into positive 16-bit #s
+	paddsw	mm2, mm4			; add in first 4 changes
+	 punpckhbw mm3, mm0			; turn last 4 refs into positive 16-bit #s
+	paddsw	mm3, mm5			; add in last 4 changes
+	 add	ebx, edx			; next row of reference pixels
+	packuswb mm2, mm3			; pack result to unsigned 8-bit values
+	 lea	ecx, [ecx + 16]		; next row of changes
+	cmp		ecx, edi			; are we done?
+	 ;
+	movq	[eax], mm2			; store result
+	 ;
+	lea		eax, [eax+edx]		; next row of output
+	 jc		L					; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+	pop		edi
+ }
+}
+
+
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CopyBlockUsingMMX
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Copies a block from source to destination
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride)
+{
+	unsigned char *s = src;
+	unsigned char *d = dest;
+	unsigned int stride = srcstride;
+	// recon copy 
+	_asm
+	{
+			mov		ecx, [stride]
+			mov		eax, [s]
+			mov		ebx, [d]
+			lea		edx, [ecx + ecx * 2]
+
+			movq	mm0, [eax]
+			movq	mm1, [eax + ecx]
+			movq	mm2, [eax + ecx*2]
+			movq	mm3, [eax + edx]
+
+			lea		eax, [eax + ecx*4]
+
+			movq	[ebx], mm0
+			movq	[ebx + ecx], mm1
+			movq	[ebx + ecx*2], mm2
+			movq	[ebx + edx], mm3
+
+			lea		ebx, [ebx + ecx * 4]
+
+			movq	mm0, [eax]
+			movq	mm1, [eax + ecx]
+			movq	mm2, [eax + ecx*2]
+			movq	mm3, [eax + edx]
+
+			movq	[ebx], mm0
+			movq	[ebx + ecx], mm1
+			movq	[ebx + ecx*2], mm2
+			movq	[ebx + edx], mm3
+	}
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/WmtOptFunctions.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/WmtOptFunctions.c
new file mode 100644
index 00000000..d1106ec7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/WmtOptFunctions.c
@@ -0,0 +1,204 @@
+ /****************************************************************************
+ *
+ *   Module Title :     WmtOptFunctions.c
+ *
+ *   Description  :     willamette processor specific 
+ *                      optimised versions of functions
+ *
+ *   AUTHOR      :		Yaowu Xu
+ *
+ *	 Special Note:		
+ *
+ *****************************************************************************
+ *   Revision History
+ *
+ *
+ *  1.04 JBB 13 Jun 01 VP4 Code Clean Out
+ *   1.03 YWX 07-Dec-00 Removed constants and functions that are not in use
+ * 			Added push and pop ebx in WmtReconIntra
+ *   1.02 YWX 30 Aug 00 changed to be compatible with Microsoft compiler
+ *   1.01 YWX 13 JUL 00 New Willamette Optimized Functions
+ *   1.00 YWX 14/06/00  Configuration baseline from OptFunctions.c
+ *
+ *****************************************************************************
+ */
+ 
+/* 
+    Use Tim's optimized version.
+*/
+
+/****************************************************************************
+ *  Header Files
+ *****************************************************************************
+ */
+
+#define STRICT              // Strict type checking. 
+
+#include "codec_common.h"
+
+#include "pbdll.h"
+
+/****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */        
+
+/**************************************************************************** 
+ *  Imports.
+ *****************************************************************************
+ */   
+
+
+/****************************************************************************
+ *  Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ *  Exported Functions 
+ *****************************************************************************
+ */              
+
+/****************************************************************************
+ *  Module Statics
+ *****************************************************************************
+ */  
+
+
+
+_declspec(align(16)) static  UINT8 Eight128s[8] =  {128,128,128,128,128,128,128,128};
+
+#pragma warning( disable : 4799 )  // Disable no emms instruction warning!
+                                      
+/****************************************************************************
+*  Forward References
+*****************************************************************************
+*/  
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     WmtReconIntra
+ *
+ *  INPUTS        :     INT16 *  idct
+ *                               Pointer to the output from the idct for this block
+ *
+ *                      UINT32   stride
+ *                               Line Length in pixels in recon and reference images
+ *                               
+ *
+ *                     
+ *
+ *  OUTPUTS       :     UINT8 *  dest
+ *                               The reconstruction buffer
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs an intra block - wmt version
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void WmtReconIntra( PB_INSTANCE *pbi, UINT8 * dest, INT16 * idct, INT32 stride )
+{
+    __asm
+    {
+	
+		push		ebx
+
+        mov         eax,[idct]						; Signed 16 bit inputs
+        mov         edx,[dest]						; Unsigned 8 bit outputs
+
+        movq		xmm0,QWORD PTR [Eight128s]		; Set xmm0 to 0x000000000000008080808080808080
+		pxor		xmm3, xmm3						; set xmm3 to 0
+													;
+        mov         ebx,[stride]					; Line stride in output buffer
+        lea         ecx,[eax+128]					; Endpoint in input buffer
+
+loop_label:                                 
+
+        movdqa		xmm2,XMMWORD PTR [eax]			; Read the eight inputs
+		packsswb	xmm2,xmm3						;		
+		
+		pxor        xmm2,xmm0						; Convert result to unsigned (same as add 128)
+        lea         eax,[eax + 16]					; Step source buffer
+
+        cmp         eax,ecx							; are we done
+        movq		QWORD PTR [edx],xmm2			; store results
+
+        lea         edx,[edx+ebx]					; Step output buffer
+        jc          loop_label						; Loop back if we are not done
+
+		pop			ebx
+    }
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     WmtReconInter
+ *
+ *  INPUTS        :     UINT8 *  RefPtr
+ *                               The last frame reference
+ *
+ *                      INT16 *  ChangePtr
+ *                               Pointer to the change data
+ *
+ *                      UINT32   LineStep
+ *                               Line Length in pixels in recon and ref images
+ *
+ *  OUTPUTS       :     UINT8 *  ReconPtr
+ *                               The reconstruction
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs data from last data and change
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void WmtReconInter( PB_INSTANCE *pbi, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+    (void) pbi;
+
+ _asm {
+		push	edi
+		
+		mov		ebx, [RefPtr]
+		mov		ecx, [ChangePtr]
+
+		mov		eax, [ReconPtr]
+		mov		edx, [LineStep]
+
+		pxor	xmm0, xmm0
+		lea		edi, [ecx + 128]
+  L:
+		movq	xmm2, QWORD ptr [ebx]		; (+3 misaligned) 8 reference pixels
+		movdqa	xmm4, XMMWORD ptr [ecx]		; 8 changes
+		
+		punpcklbw xmm2, xmm0				; 
+
+		add	ebx, edx						; next row of reference pixels
+		paddsw	xmm2, xmm4					; add in first 4 changes
+
+		lea		ecx, [ecx + 16]				; next row of changes
+		packuswb xmm2, xmm0					; pack result to unsigned 8-bit values
+
+		cmp		ecx, edi					; are we done?
+		movq	QWORD PTR [eax], xmm2		; store result
+
+		lea		eax, [eax+edx]				; next row of output
+		jc		L							; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+		pop		edi
+ }
+
+}
+
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/dsystemdependant.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/dsystemdependant.c
new file mode 100644
index 00000000..b5486f05
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/dsystemdependant.c
@@ -0,0 +1,369 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+* 
+*   1.19 YWX 15-Jun-01 added function pointer setups for new deblocking filter
+*   1.18 YWX 26-Apr-01 Fixed the cpu frequency detection bug caused by Sleep()
+*   1.17 JBX 22-Mar-01 Merged with new vp4-mapca bitstream
+*   1.16 JBB 26-Jan-01 Cleaned out unused function
+*   1.15 YWX 08-dec-00 Added WMT PostProcessor and 
+*                        moved function declarations into _head files
+*   1.14 JBB 30 NOV 00 Version number changes 
+*   1.13 YWX 03-Nov-00 Optimized postprocessor filters
+*   1.12 YWX 02-Nov-00 Added new loopfilter function pointers
+*   1.11 YWX 19-Oct-00 Added 1-2 Scaling functions pointers
+*   1.10 jbb 16 oct 00 added ifdefs to insure version code
+*   1.09 YWX 04-Oct-00 Added function pointers for scaling 
+*   1.08 YWX 06 Sep 00 Added function pointers for new deringing filter 
+*                      using frag baseed Q Value.
+*   1.07 JBB 21 Aug 00 New More Blurry in high variance area deringer
+*	1.06 YWX 2  Aug 00 Added function pointers for postprocess  
+*	1.05 YWX 15/05/00  Added functions to check processor frequency
+*					   and more function pointers for postprocessor
+*	1.04 YWX 08/05/00  Added function pointers setup for postprocess
+*   1.03 SJL 20/04/00  Added ability to enable the new dequant code.
+*   1.02 SJL 22/03/00  Function pointers for the loop filter.
+*   1.01 JBB 21/03/00  More Function Pointers for optimized playback
+*   1.00 PGW 12/10/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include "pbdll.h" 
+#pragma warning(disable:4115)
+#include <windows.h>
+
+extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
+
+//extern void ReadTokens_c(PB_INSTANCE *pbi, INT32 * HuffIndices );
+extern void  (*VP5_BuildQuantIndex)( QUANTIZER * pbi);
+
+extern void UnPackVideo_C(PB_INSTANCE *pbi);
+extern void UnPackVideo2(PB_INSTANCE *pbi);
+
+extern void VP5_BuildQuantIndex_Generic(QUANTIZER *pbi);
+extern void VP5_BuildQuantIndex_ForMMX(QUANTIZER *pbi);
+extern void VP5_BuildQuantIndex_ForWMT(QUANTIZER *pbi);
+
+
+//extern void ReadTokens_mmx(PB_INSTANCE *pbi, INT32 * HuffIndices );
+extern void UnPackVideoMMX_LL (PB_INSTANCE *pbi);
+extern void ClearMmx(void);
+extern void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride);
+//extern void ReadTokensPredict_c( PB_INSTANCE *pbi, UINT32 BlockSize, UINT32 Hpos );
+
+/****************************************************************************
+*  Explicit imports
+*****************************************************************************
+*/
+extern unsigned int CPUFrequency;
+
+//extern MmxEnabled;          // Is MMX enabled flag
+
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+ 
+
+/****************************************************************************
+*  Module statics.
+*****************************************************************************
+*/        
+
+              
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     readTSC
+ *
+ *  INPUTS        :     None
+ *                   
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     read the cpu time stamp counter
+ *
+ *  SPECIAL NOTES :     Since this function uses RDTSC instruction, which is 
+ *						introduced in Pentium processor, so this routine is 
+ *						expected to work on Pentium and above.
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void VP5_readTSC(unsigned long *tsc)
+{
+	int time;
+	
+	__asm 
+	{
+        pushad
+        cpuid
+		rdtsc
+		mov time,eax
+        popad
+	}
+
+	*tsc=time;
+	return;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_GetProcessorFrequency()
+ *
+ *  INPUTS        :     None
+ *                   
+ *
+ *  OUTPUTS       :     The Frequency in MHZ
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Check the Processor's working freqency 
+ *
+ *  SPECIAL NOTES :     This function should only be used here. Limited tests 
+ *						has verified it works till 166MHz Pentium with MMX. 
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+unsigned long VP5_GetProcessorFrequency()
+{
+
+	LARGE_INTEGER pf;						//Performance Counter Frequencey
+	LARGE_INTEGER startcount, endcount;		
+	unsigned long tsc1, tsc2;
+
+	//If the cpu does not support the high resolution counter, return 0
+    unsigned long time1, time2;
+	unsigned long cpufreq=0;				
+    unsigned long Nearest66Mhz, Nearest50Mhz;
+    unsigned long Delta66, Delta50;
+
+	if( QueryPerformanceFrequency(&pf))
+	{
+		
+		// read the counter and TSC at start
+		QueryPerformanceCounter(&startcount);
+		VP5_readTSC(&tsc1);
+		// delay for 10 ms to get enough accuracy
+        time1 = timeGetTime();
+        time2 = time1;
+
+        while( time2 < time1+5 )
+            time2 = timeGetTime();
+
+		//read the counter and TSC at end
+		QueryPerformanceCounter(&endcount);
+		VP5_readTSC(&tsc2);
+		
+		//calculate the frequency
+		cpufreq = (unsigned long )((double)( tsc2 - tsc1 ) 
+			* (double)pf.LowPart 
+			/ (double) ( endcount.LowPart - startcount.LowPart ) 
+			/ 1000000);
+
+	}
+   
+    Nearest66Mhz = ((cpufreq * 3 + 100)/200 * 200) / 3;
+    Delta66 = abs(Nearest66Mhz - cpufreq);
+    Nearest50Mhz = ((cpufreq + 25)/50 *50);
+    Delta50 = abs(Nearest50Mhz - cpufreq);
+
+    if(Delta50 < Delta66)
+        cpufreq = Nearest50Mhz;
+    else
+    {
+    
+        cpufreq = Nearest66Mhz;
+        if(cpufreq == 666)
+            cpufreq = 667;
+    }
+    return cpufreq;
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MachineSpecificConfig
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Checks for machine specifc features such as MMX support 
+ *                      sets approipriate flags and function pointers.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+#define MMX_ENABLED 1
+void VP5_DMachineSpecificConfig(void)
+{
+	INT32 MmxEnabled;
+	INT32 XmmEnabled; 
+	INT32 WmtEnabled;
+
+	GetProcessorFlags( &MmxEnabled,&XmmEnabled,&WmtEnabled);
+	
+
+	// If MMX supported then set to use MMX versions of functions else 
+    // use original 'C' versions.
+
+	if(WmtEnabled)		//Willamette
+	{
+		VP5_BuildQuantIndex = VP5_BuildQuantIndex_ForWMT;
+	}
+	else if ( MmxEnabled )
+    {
+		VP5_BuildQuantIndex = VP5_BuildQuantIndex_ForMMX;
+    }
+    else
+    {
+		VP5_BuildQuantIndex = VP5_BuildQuantIndex_Generic;
+    }
+	
+//	ReadTokens = ReadTokensPredict_c;
+
+}
+
+// Issues a warning message
+void VP5_IssueWarning( char * WarningMessage )
+{
+    // Issue the warning messge
+    MessageBox(NULL, WarningMessage, NULL, MB_ICONEXCLAMATION | MB_TASKMODAL );
+}
+
+// Pause/Sleep for a X milliseconds
+void VP5_PauseProcess( unsigned int SleepMs )
+{
+    Sleep( SleepMs );
+}
+
+char * VP5_SytemGlobalAlloc( unsigned int Size )  
+{
+    return GlobalAlloc( GPTR, Size );  
+}
+
+void VP5_SystemGlobalFree( char * MemPtr )
+{
+    GlobalFree( (HGLOBAL) MemPtr );
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_SetPbParam
+ *
+ *  INPUTS        :     PB_COMMAND_TYPE Command
+ *                      char *          Parameter
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *  
+ *  FUNCTION      :     Generalised command interface to decoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void CCONV VP5_SetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, UINT32 Parameter )
+{
+
+#if defined(POSTPROCESS)
+    switch ( Command )
+    {
+    case PBC_SET_CPUFREE:
+    {
+        
+        double Pixels = pbi->Configuration.VideoFrameWidth * pbi->Configuration.VideoFrameHeight;
+        double FreeMhz = pbi->ProcessorFrequency * Parameter / 100;
+        double PixelsPerMhz = 100 * sqrt(1.0*Pixels) / FreeMhz;
+        pbi->CPUFree = Parameter; 
+
+        if( PixelsPerMhz > 150 )
+            pbi->PostProcessingLevel = 0;
+        else if( PixelsPerMhz > 100 )
+            pbi->PostProcessingLevel = 8;
+        else if( PixelsPerMhz > 90 )
+            pbi->PostProcessingLevel = 4;
+        else if( PixelsPerMhz > 80 )
+            pbi->PostProcessingLevel = 5;
+        else
+            pbi->PostProcessingLevel = 6;
+        break;
+
+    }
+	case PBC_SET_REFERENCEFRAME:
+        CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->LastFrameRecon);
+		CopyFrame( pbi->postproc, (YUV_BUFFER_CONFIG *) Parameter, pbi->GoldenFrame);
+		break;
+	
+	case PBC_SET_POSTPROC:
+        if( Parameter == 9 )                
+        {
+            VP5_SetPbParam( pbi, PBC_SET_CPUFREE, 70);
+        }
+        else
+
+        {
+            pbi->CPUFree = 0;
+            pbi->PostProcessingLevel = Parameter;
+        }
+        break;
+
+    case PBC_SET_DEINTERLACEMODE:
+        pbi->DeInterlaceMode = Parameter;
+        break;
+
+    case PBC_SET_BLACKCLAMP:
+        pbi->BlackClamp = Parameter;
+        break;
+
+    case PBC_SET_WHITECLAMP:
+        pbi->WhiteClamp = Parameter;
+        break;
+    default:
+        break;
+    }
+#endif
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/quantindexmmx.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/quantindexmmx.c
new file mode 100644
index 00000000..e76e7d49
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/quantindexmmx.c
@@ -0,0 +1,377 @@
+/****************************************************************************
+*
+*   Module Title :     quantindexmmx.c
+*
+*   Description  :     
+*
+*    AUTHOR      :     
+*
+*****************************************************************************
+*   Revision History
+* 
+*   1.03 JBB 15Nov00   Removed unnecessary ifdefs
+*   1.02 JBB 26Jul00   Removed unnecessary macro
+*	1.01 YWX 26 JUL 00 Bug Fixing, used WMT TI(x) for MMX processors
+*   1.00 SJL 14/04/00  
+*
+*****************************************************************************
+*/						
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+#define STRICT              /* Strict type checking. */
+#include "codec_common.h"
+#include "quantize.h"
+#define MIN16 ((1<<16)-1)
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/ 
+       
+/****************************************************************************
+*  Imported Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Imported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Foreward References
+*****************************************************************************
+*/    
+          
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/
+static UINT32 dequant_index[64] = 
+{	0,  1,  8,  16,  9,  2,  3, 10,
+	17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+ 
+static UINT32 dequant_indexMMX[64] = 
+{
+    0,  1,   5,  6, 14, 15, 27, 28,
+    2,  4,   7, 13, 16, 26, 29, 42,
+    3,  8,  12, 17, 25, 30, 41, 43,
+    9,  11, 18, 24, 31, 40, 44, 53,
+    10, 19, 23, 32, 39, 45, 52, 54, 
+    20, 22, 33, 38, 46, 51, 55, 60,
+    21, 34, 37, 47, 50, 56, 59, 61,
+    35, 36, 48, 49, 57, 58, 62, 63
+};
+/*
+    used to unravel the coeffs in the proper order required by MMX_idct 
+    see mmxidct.cxx
+*/
+static UINT32 transIndexMMX[64] = 
+{
+     0,  8,  1,  2,    9, 16, 24, 17,
+    10,  3, 32, 11,   18, 25,  4, 12,
+     5, 26, 19, 40,   33, 34, 41, 48,
+    27,  6, 13, 20,   28, 21, 14,  7,
+
+    56, 49, 42, 35,   43, 50, 57, 36, 
+    15, 22, 29, 30,   23, 44, 37, 58,
+    51, 59, 38, 45,   52, 31, 60, 53,
+    46, 39, 47, 54,   61, 62, 55, 63
+};
+
+static UINT32 transIndexWMT[64] = 
+{	
+	 0,  8,  1,  2,   9, 16, 24, 17,
+	10,  3,  4, 11,	 18, 25, 32, 40,
+    33, 26, 19, 12,   5,  6, 13, 20,
+    27, 34, 41, 48,  56, 49, 42, 35,
+    28, 21, 14,  7,  15, 22, 29, 36, 
+    43, 50, 57, 58,  51, 44, 37, 30,
+    23, 31, 38, 45,  52, 59, 60, 53,
+    46, 39, 47, 54,  61, 62, 55, 63
+};
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     BuildQuantIndex_ForMMX
+ *
+ *  INPUTS        :     
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Builds the quant_index table in a transposed order.  
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_BuildQuantIndex_ForMMX(QUANTIZER *pbi)
+{
+    INT32 i,j;
+
+    pbi->transIndex = transIndexMMX;
+
+    // invert the dequant index into the quant index
+	for ( i = 0; i < BLOCK_SIZE; i++ )
+	{	
+        j = transIndexMMX[ dequant_indexMMX[i] ];
+		pbi->quant_index[j] = i;
+	}
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     BuildQuantIndex_ForWMT
+ *
+ *  INPUTS        :     
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Builds the quant_index table in a transposed order.  
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void VP5_BuildQuantIndex_ForWMT(QUANTIZER *pbi)
+{
+    INT32 i,j;
+
+    pbi->transIndex = transIndexWMT;
+
+    // invert the dequant index into the quant index
+	for ( i = 0; i < BLOCK_SIZE; i++ )
+	{	
+        j = transIndexWMT[ dequant_indexMMX[i] ];
+		pbi->quant_index[j] = i;
+	}
+}
+
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_quantize_wmt
+ *
+ *  INPUTS        :     
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Builds the quant_index table in a transposed order.  
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_quantize_wmt( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp )
+{
+    UINT32  i, j;
+    
+    INT32 * QuantRoundPtr = pbi->QuantRound[QTableSelect[bp]];
+    INT32 * QuantCoeffsPtr = pbi->QuantCoeffs[QTableSelect[bp]];
+    INT32 * ZBinPtr = pbi->ZeroBinSize[QTableSelect[bp]];
+
+    INT16 * DCT_blockPtr = DCT_block;
+	INT32 temp;
+	INT32 NonZeroACs = 0;
+	INT16 *round = &pbi->round[0];
+	INT16 *mult = &pbi->mult[0];
+	INT16 *zbin = &pbi->zbin[0];
+
+	// DC quantization 
+	temp = 0;
+	if ( DCT_blockPtr[0] >= QuantRoundPtr[0] )
+		temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] ) ;
+	else if ( DCT_blockPtr[0] <= -QuantRoundPtr[0] )
+		temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+	quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+
+	// this quantizer stores its results back in the source!!
+	__asm
+	{
+
+		// setup and collect registers
+		mov			esi, DCT_block
+		xor         ecx, ecx        // index ptr
+		mov			edi, round
+		movdqu      xmm2, [edi]		// get the round values
+		mov         edi, mult
+		movdqu      xmm3, [edi]     // get the quantizer values
+		mov         edi, zbin
+		movdqu      xmm4, [edi]  	// get the zerobin values
+	
+		// 8 coefficients at a time loop 
+next8:
+		movdqa      xmm0, [esi+ecx]	// get source values
+		movdqa      xmm1, xmm0		// sign bits of the abs values 
+		psraw		xmm1, 15		// negative all 1's postive all 0's
+
+        // get the absolute value of the input values
+		pxor        xmm0, xmm1      // one's complement of negatives 
+		psubw       xmm0, xmm1      // xmm0 = abs coeffs
+
+		// zero bin coefficients
+		movdqa      xmm5, xmm0 
+        pcmpgtw     xmm5, xmm4      // ZBin > Coeffs 
+		pand        xmm0, xmm5      // zerobined coefficients
+
+		// calculate & round quantizer
+		paddw		xmm0, xmm2      // Coeff + Quant Round
+        pmulhuw     xmm0, xmm3      // *QuantCoeffs >> 16
+
+
+		// get back the sign bit
+        pxor        xmm0, xmm1      // ones complement of negatives
+        psubw       xmm0, xmm1      // negatives are back as negative
+
+		// output the results
+		movdqa      [esi+ecx], xmm0 
+
+		// loop back to the next set
+		add         ecx, 16			
+		cmp			ecx, 128
+		jl          next8
+	}
+
+	// zigzagify 
+    for( i = 1; i < 64; i++)
+    {
+		// Zig Zag order
+		j = dequant_index[i];
+		quantized_list[i] = DCT_block[j];
+    }
+
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_quantize_mmx
+ *
+ *  INPUTS        :     
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Builds the quant_index table in a transposed order.  
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_quantize_mmx( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp )
+{
+    UINT32  i, j;
+    
+    INT32 * QuantRoundPtr = pbi->QuantRound[QTableSelect[bp]];
+    INT32 * QuantCoeffsPtr = pbi->QuantCoeffs[QTableSelect[bp]];
+    INT32 * ZBinPtr = pbi->ZeroBinSize[QTableSelect[bp]];
+
+    INT16 * DCT_blockPtr = DCT_block;
+	INT32 temp;
+	INT32 NonZeroACs = 0;
+	INT16 *round = &pbi->round[0];
+	INT16 *mult = &pbi->mult[0];
+	INT16 *zbin = &pbi->zbin[0];
+
+	// DC quantization 
+	temp = 0;
+	if ( DCT_blockPtr[0] >= QuantRoundPtr[0] )
+		temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] ) ;
+	else if ( DCT_blockPtr[0] <= -QuantRoundPtr[0] )
+		temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+	quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+
+	// this quantizer stores its results back in the source!!
+	__asm
+	{
+
+		// setup and collect registers
+		mov			esi, DCT_block
+		xor         ecx, ecx        // index ptr
+		mov			edi, round
+		movq        mm2, [edi]		// get the round values
+		mov         edi, mult
+		movq        mm3, [edi]     // get the quantizer values
+		mov         edi, zbin
+		movq        mm4, [edi]  	// get the zerobin values
+	
+		// 8 coefficients at a time loop 
+next4:
+		movq        mm0, [esi+ecx]	// get source values
+		movq        mm1, mm0		// sign bits of the abs values 
+		psraw		mm1, 15			// negative all 1's postive all 0's
+
+        // get the absolute value of the input values
+		pxor        mm0, mm1		// one's complement of negatives 
+		psubw       mm0, mm1		// mm0 = abs coeffs
+
+		// zero bin coefficients
+		movq        mm5, mm0 
+        pcmpgtw     mm5, mm4		// ZBin > Coeffs 
+		pand        mm0, mm5		// zerobined coefficients
+
+		// calculate & round quantizer
+		paddw		mm0, mm2		// Coeff + Quant Round
+        pmulhuw     mm0, mm3		// *QuantCoeffs >> 16
+
+
+		// get back the sign bit
+        pxor        mm0, mm1		// ones complement of negatives
+        psubw       mm0, mm1		// negatives are back as negative
+
+		// output the results
+		movq        [esi+ecx], mm0 
+
+		// loop back to the next set
+		add         ecx, 8			
+		cmp			ecx, 128
+		jl          next4
+	}
+
+	// zigzagify 
+    for( i = 1; i < 64; i++)
+    {
+		// Zig Zag order
+		j = dequant_index[i];
+		quantized_list[i] = DCT_block[j];
+    }
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/timer.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/timer.c
new file mode 100644
index 00000000..8df2b37f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/Win32/timer.c
@@ -0,0 +1,147 @@
+/****************************************************************************
+*
+*   Module Title :     Timer.C
+*
+*   Description  :     Video CODEC timer module
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*   
+*   1.01 PGW 09/07/99  Added code to support profile timing
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/					  
+
+#define STRICT              /* Strict type checking. */
+#define INC_WIN_HEADER      1
+#include <windows.h>
+
+#include "type_aliases.h"
+#include <mmsystem.h> 
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+                
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/  
+
+
+/****************************************************************************
+*  Module Static Variables
+*****************************************************************************
+*/              
+
+// Used for calculation of elapsed time
+UINT32 LastCPUTime;
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MyInitTimer
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Initialises the timer mechanism.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void MyInitTimer( void )
+{
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MyGetTime
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     Time in ms since startup.
+ *
+ *  FUNCTION      :     Provides a model independant interface for getting times.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+UINT32 MyGetTime( void )	  
+{
+/* Use different timing mechanisms for win32 and win16. 
+*  The win16 method is accurate to 1ms whilst the Win32 is not garauteed to better than 16ms
+*/
+    return timeGetTime();
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MyGetElapsedCpuTime
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     CPU cycles since last call
+ *
+ *  FUNCTION      :     Calculate the CPU cycles elapsed since the last call
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+UINT32 MyGetElapsedCpuTime( void )	  
+{
+    UINT32 CurrCPUTime[2];                          // Full 64 bit CPU time
+    UINT32 CurrentCpuTime;                          // modified 32 bit current time
+    UINT32 ElapsedTime;
+
+__asm
+	{
+        rdtsc                                       ; Get CPU time into EDX:EAX
+
+        mov         dword ptr [CurrCPUTime], eax    ; Save to a global
+        mov         dword ptr [CurrCPUTime+4], edx   
+    }
+
+    // Save CurrCPUTime to LastCPUTime
+    CurrCPUTime[0] = (CurrCPUTime[0] >> 8);
+    CurrCPUTime[1] = (CurrCPUTime[1] & 0x000000FF) << 24;
+    CurrentCpuTime = CurrCPUTime[0] | CurrCPUTime[1];
+
+    // Check for wrapp around
+    if ( CurrentCpuTime >= LastCPUTime )
+    {
+        ElapsedTime =  CurrentCpuTime - LastCPUTime;
+    }
+    else
+    {
+        ElapsedTime =  (LastCPUTime - CurrentCpuTime) + 0xFFFF;
+    }
+    LastCPUTime = CurrentCpuTime;
+
+    return ElapsedTime;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DFrameR.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DFrameR.c
new file mode 100644
index 00000000..0ebfeedf
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DFrameR.c
@@ -0,0 +1,380 @@
+/****************************************************************************
+*
+*   Module Title :     DFrameR.C
+*
+*   Description  :     Functions to read
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.18 YWX 17/dec/02 Added DeInterlacedMode setup
+*   1.17 YWX 05/08/02  Added initialization of postprocessor 's interlaced flag
+*   1.16 JBB 13 Jun 01 VP4 Code Clean Out
+*	1.15 AWG 08-Jun-01 Added support for DCT16
+*   1.14 JBB 04 May 01 Added set of ReadTokens Function for VP5
+*   1.13 JBB 04 Dec 00 Added new Center vs Scale Bits
+*   1.12 JBB 30 NOV 00 Version number changes 
+*   1.11 JBB 14 Oct 00 Added ifdefs around version specific code
+*   1.10 PGW 06 Oct 00 QThreshTable[] made instance specific.
+*					   Changes to LoadFrameHeader() to call InitQTables().
+*	1.09 YWX 25 Aug 00 Added version number check
+*   1.08 JBB 22 Aug 00 Ansi C conversion
+*   1.07 JBB 27 Jul 00 Malloc checks
+*   1.06 PGW 20/03/00  Removed InterIntra mode flag.
+*	1.05 JBB 27/01/99  Globals Removed, use of PB_INSTANCE, Bit Management Functions
+*   1.04 PGW 17/12/99  Changes to Synch code to reflect the fact that 0 length
+*                      frames are no longer legal (simply not transmittedd)
+*                      Note that this change is only relevant to the live version 
+*                      of the codec
+*   1.03 PGW 15/11/99  Added support for VP3 version ID.
+*   1.02 PGW 30/08/99  Use bit functions to read header data.
+*                      Changes to way bytes are read.
+*   1.01 PGW 16/08/99  Header changes for VFW version and key frames.
+*   1.00 PGW 22/06/99  pbi->Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+#include "pbdll.h"
+#include "duck_mem.h" 
+#include "boolhuff.h"
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+
+#define START_SIZE  0
+#define END_SIZE    1
+
+#define READ_BUFFER_EMPTY_WAIT  20
+ 
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Module Statics
+***** ************************************************************************
+*/             
+#ifndef MAPCA
+static const UINT32 loMaskTbl_VP31[] = { 0,
+	1, 3, 7, 15,
+	31, 63, 127, 255,
+	0x1ff, 0x3ff, 0x7ff, 0xfff,
+	0x1fff, 0x3fff, 0x7fff, 0xffff,
+	0x1FFFF, 0x3FFFF, 0x7FFFF, 0xfFFFF,
+	0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
+	0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF,
+	0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, 0xffffFFFF
+};
+
+static const UINT32 hiMaskTbl_VP31[] = { 0,
+	0x80000000, 0xC0000000, 0xE0000000, 0xF0000000,
+	0xF8000000, 0xFC000000, 0xFE000000, 0xFF000000,
+	0xFF800000, 0xFFC00000, 0xFFE00000, 0xFFF00000,
+	0xFFF80000, 0xFFFC0000, 0xFFFE0000, 0xFFFF0000,
+	0xFFFF8000, 0xFFFFC000, 0xFFFFE000, 0xFFFFF000,
+	0xFFFFF800, 0xFFFFFC00, 0xFFFFFE00, 0xFFFFFF00,
+	0xFFFFFF80, 0xFFFFFFC0, 0xFFFFFFE0, 0xFFFFFFF0,
+	0xFFFFFFF8, 0xFFFFFFFC, 0xFFFFFFFE, 0xFFFFFFFF
+};
+
+#endif
+/****************************************************************************
+*  Forward References.
+*****************************************************************************
+*/              
+static BOOL LoadFrameHeader(PB_INSTANCE *pbi);
+
+
+/****************************************************************************
+*  Imports
+*****************************************************************************
+*/              
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     LoadFrame
+ *
+ *  INPUTS        :     None 
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     FALSE if an Error is detected or the frame is empty else TRUE.
+ *
+ *  FUNCTION      :     Loads a frame and decodes the fragment arrays.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+BOOL VP5_LoadFrame(PB_INSTANCE *pbi)
+{ 
+    BOOL RetVal = TRUE;           
+
+    // Initialise the bit extractor.
+    //ExtractInit(pbi);
+
+    // Load the frame header (including the frame size).     
+    if ( !LoadFrameHeader(pbi) )
+    {
+        RetVal = FALSE;
+    }
+
+    return RetVal;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     LoadFrameHeader
+ *
+ *  INPUTS        :     fptr - The file pointer for the data file.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     FALSE if and Error is detected else TRUE.
+ *
+ *  FUNCTION      :     Loads and interprets the frame header.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+// VFW codec version
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) ) 
+static BOOL LoadFrameHeader(PB_INSTANCE *pbi)
+{
+	UINT8  VersionByte0;    // Must be 0 for VP30b and later
+    UINT8  DctQMask;
+    UINT8  SpareBits;       // Spare cfg bits
+	UINT8  Unused;
+
+    BOOL   RetVal = TRUE;
+
+    // Is the frame and inter frame or a key frame
+    pbi->FrameType = DecodeBool(&pbi->br, 128);
+    
+	// unused bit
+    Unused = DecodeBool(&pbi->br, 128);
+
+    // Quality (Q) index
+    DctQMask = (UINT8)VP5_bitread( &pbi->br,   6 );
+		
+
+	// If the frame was a base frame then read the frame dimensions and build a bitmap structure. 
+	if ( (pbi->FrameType == BASE_FRAME) )
+	{
+        // Read the frame dimensions bytes (0,0 indicates vp31 or later)
+    	VersionByte0 = (UINT8)VP5_bitread( &pbi->br,   8 );
+	    pbi->Vp3VersionNo = (UINT8)VP5_bitread( &pbi->br,   5 );
+
+		if(pbi->Vp3VersionNo > CURRENT_DECODE_VERSION)
+		{
+			RetVal = FALSE;
+			return RetVal;
+		}
+		// Initialise version specific quantiser values
+		VP5_InitQTables( pbi->quantizer, pbi->Vp3VersionNo );
+
+        // Read the type / coding method for the key frame.
+        pbi->KeyFrameType = (UINT8)DecodeBool(&pbi->br, 128);
+
+        SpareBits = (UINT8)DecodeBool(&pbi->br, 128);
+
+		// is this keyframe section of the file interlaced
+		pbi->Configuration.Interlaced = (UINT32)DecodeBool(&pbi->br, 128);		
+#ifndef MAPCA
+		SetPPInterlacedMode(pbi->postproc, pbi->Configuration.Interlaced);
+        if(pbi->Configuration.Interlaced)
+        {
+            SetDeInterlaceMode(pbi->postproc, pbi->DeInterlaceMode);
+        }
+#endif
+        // Spare config bits
+         {             
+             UINT32 HFragments;             
+             UINT32 VFragments;             
+             UINT32 HOldScaled;
+             UINT32 VOldScaled;
+             UINT32 HNewScaled;
+             UINT32 VNewScaled;
+			 UINT32 OutputHFragments;
+			 UINT32 OutputVFragments;
+
+             VFragments = 2 * ((UINT8)VP5_bitread( &pbi->br,   8 ));             
+             HFragments = 2 * ((UINT8)VP5_bitread( &pbi->br,   8 ));              
+
+             OutputVFragments = 2 * ((UINT8)VP5_bitread( &pbi->br,   8 ));             
+             OutputHFragments = 2 * ((UINT8)VP5_bitread( &pbi->br,   8 ));              
+
+             if(pbi->Configuration.HRatio == 0)
+                 pbi->Configuration.HRatio = 1;
+
+             if(pbi->Configuration.VRatio == 0)
+                 pbi->Configuration.VRatio = 1;
+
+             HOldScaled = pbi->Configuration.HScale * pbi->HFragments * 8 / pbi->Configuration.HRatio;
+             VOldScaled = pbi->Configuration.VScale * pbi->VFragments * 8 / pbi->Configuration.VRatio;
+
+			 pbi->Configuration.ExpandedFrameWidth = OutputHFragments * 8;
+			 pbi->Configuration.ExpandedFrameHeight = OutputVFragments * 8;
+
+			 if(VFragments >= OutputVFragments)
+			 {
+	             pbi->Configuration.VScale = 1;
+		         pbi->Configuration.VRatio = 1;
+			 }
+			 else if (5*VFragments >= 4*OutputVFragments)
+			 {
+	             pbi->Configuration.VScale = 5;
+		         pbi->Configuration.VRatio = 4;
+			 }
+			 else if (5*VFragments >= 3*OutputVFragments)
+			 {
+	             pbi->Configuration.VScale = 5;
+		         pbi->Configuration.VRatio = 3;
+			 }
+			 else
+			 {
+	             pbi->Configuration.VScale = 2;
+		         pbi->Configuration.VRatio = 1;
+			 }
+
+			 if(HFragments >= OutputHFragments)
+			 {
+	             pbi->Configuration.HScale = 1;
+		         pbi->Configuration.HRatio = 1;
+			 }
+			 else if (5*HFragments >= 4*OutputHFragments)
+			 {
+	             pbi->Configuration.HScale = 5;
+		         pbi->Configuration.HRatio = 4;
+			 }
+			 else if (5*HFragments >= 3*OutputHFragments)
+			 {
+	             pbi->Configuration.HScale = 5;
+		         pbi->Configuration.HRatio = 3;
+			 }
+			 else
+			 {
+	             pbi->Configuration.HScale = 2;
+		         pbi->Configuration.HRatio = 1;
+			 }
+
+             HNewScaled = pbi->Configuration.HScale * HFragments * 8 / pbi->Configuration.HRatio;
+             VNewScaled = pbi->Configuration.VScale * VFragments * 8 / pbi->Configuration.VRatio;
+
+			 pbi->ScaleWidth = HNewScaled;
+			 pbi->ScaleHeight = VNewScaled; 
+
+             pbi->Configuration.ScalingMode = ((UINT32)VP5_bitread( &pbi->br, 2 ));
+
+             // we have a new input size
+             if( VFragments != pbi->VFragments ||                
+                 HFragments != pbi->HFragments)             
+             {
+                 // Validate the combination of height and width.                 
+                 pbi->Configuration.VideoFrameWidth = HFragments*8;                 
+                 pbi->Configuration.VideoFrameHeight = VFragments*8;                  
+				 VP5_InitFrameDetails(pbi);
+             }
+
+
+             // we have a new intermediate buffer clean the screen 
+             if( pbi->ScaleBuffer != 0 &&
+                 (HOldScaled != HNewScaled ||
+                  VOldScaled != VNewScaled ) )
+             {
+                 // turn the screen black!!                 
+                 memset(pbi->ScaleBuffer, 0x0, (pbi->OutputWidth+32) * (pbi->OutputHeight+32) );                 
+                 memset(pbi->ScaleBuffer + 	(pbi->OutputWidth+32) * (pbi->OutputHeight+32),
+					 0x80, (pbi->OutputWidth+32) * (pbi->OutputHeight+32) / 2 );                                   
+             }
+         }         
+    }
+	
+	// Set this frame quality value from Q Index
+	pbi->quantizer->FrameQIndex = DctQMask;
+#ifdef MAPCA
+    SetFLimit(DctQMask);
+    SetSimpleDeblockFlimit(DctQMask);
+#endif
+    pbi->quantizer->ThisFrameQuantizerValue = pbi->quantizer->QThreshTable[DctQMask];
+	VP5_UpdateQ( pbi->quantizer, pbi->Vp3VersionNo );  
+
+    return RetVal;                    
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_SetFrameType
+ *
+ *  INPUTS        :     A Frame type.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Sets the current frame type.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_SetFrameType( PB_INSTANCE *pbi,UINT8 FrType )
+{ 
+    /* Set the appropriate frame type according to the request. */
+    switch ( FrType )
+    {  
+    
+    case BASE_FRAME:
+        pbi->FrameType = FrType;
+        break;
+        
+    default:
+        pbi->FrameType = FrType;
+        break;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_GetFrameType
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     The current frame type.
+ *
+ *  FUNCTION      :     Gets the current frame type.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+UINT8 VP5_GetFrameType(PB_INSTANCE *pbi)
+{
+    return pbi->FrameType; 
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DSystemDependant.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DSystemDependant.c
new file mode 100644
index 00000000..fad7e93e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/DSystemDependant.c
@@ -0,0 +1,198 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+* 
+*   1.20 YWX 06-Nov-01 Configuration Baseline for C only version
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include "pbdll.h" 
+
+extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
+
+//extern void ReadTokens_c(PB_INSTANCE *pbi, INT32 * HuffIndices );
+extern void  (*VP5_BuildQuantIndex)( QUANTIZER * pbi);
+
+extern void UnPackVideo_C(PB_INSTANCE *pbi);
+extern void UnPackVideo2(PB_INSTANCE *pbi);
+
+extern void VP5_BuildQuantIndex_Generic(QUANTIZER *pbi);
+
+/****************************************************************************
+*  Explicit imports
+*****************************************************************************
+*/
+
+extern unsigned int CPUFrequency;
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+ 
+
+/****************************************************************************
+*  Module statics.
+*****************************************************************************
+*/        
+
+              
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Functions
+*****************************************************************************
+*/
+/****************************************************************************
+ * 
+ *  ROUTINE       :     GetProcessorFrequency()
+ *
+ *  INPUTS        :     None
+ *                   
+ *
+ *  OUTPUTS       :     The Frequency in MHZ
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Check the Processor's working freqency 
+ *
+ *  SPECIAL NOTES :     This function should only be used here. Limited tests 
+ *						has verified it works till 166MHz Pentium with MMX. 
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+unsigned long VP5_GetProcessorFrequency()
+{
+
+    return 0;
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MachineSpecificConfig
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Checks for machine specifc features such as MMX support 
+ *                      sets approipriate flags and function pointers.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_DMachineSpecificConfig(void)
+{
+    VP5_BuildQuantIndex = VP5_BuildQuantIndex_Generic;
+}
+
+// Issues a warning message
+void VP5_IssueWarning( char * WarningMessage )
+{
+	(void) WarningMessage;
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_SetPbParam
+ *
+ *  INPUTS        :     PB_COMMAND_TYPE Command
+ *                      char *          Parameter
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *  
+ *  FUNCTION      :     Generalised command interface to decoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void CCONV VP5_SetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, UINT32 Parameter )
+{
+
+#if defined(POSTPROCESS)
+    switch ( Command )
+    {
+    case PBC_SET_CPUFREE:
+    {
+        
+        double PixelsPerMhz = 100 *10;
+        pbi->CPUFree = Parameter; 
+
+        if( PixelsPerMhz > 150 )
+            pbi->PostProcessingLevel = 0;
+        else if( PixelsPerMhz > 100 )
+            pbi->PostProcessingLevel = 8;
+        else if( PixelsPerMhz > 90 )
+            pbi->PostProcessingLevel = 4;
+        else if( PixelsPerMhz > 80 )
+            pbi->PostProcessingLevel = 5;
+        else
+            pbi->PostProcessingLevel = 6;
+        break;
+
+    }
+	case PBC_SET_REFERENCEFRAME:
+		break;
+	
+	case PBC_SET_POSTPROC:
+        if( Parameter == 9 )                
+        {
+            VP5_SetPbParam( pbi, PBC_SET_CPUFREE, 70);
+        }
+        else
+
+        {
+            pbi->CPUFree = 0;
+            pbi->PostProcessingLevel = Parameter;
+        }
+        break;
+
+    case PBC_SET_DEINTERLACEMODE:
+        pbi->DeInterlaceMode = Parameter;
+        break;
+
+    case PBC_SET_BLACKCLAMP:
+        pbi->BlackClamp = Parameter;
+        break;
+
+    case PBC_SET_WHITECLAMP:
+        pbi->WhiteClamp = Parameter;
+        break;
+    default:
+        break;
+    }
+#endif
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/FrameIni.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/FrameIni.c
new file mode 100644
index 00000000..ff639f75
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/FrameIni.c
@@ -0,0 +1,484 @@
+/****************************************************************************
+*
+*   Module Title :     FrameIni.c
+*
+*   Description  :     Video CODEC playback module
+*
+*    AUTHOR      :     JimBankoski
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.21 YWX 06-Nov-01 Changed to align the MB coeffs buffer memory
+*   1.20 JBB 13-Jun-01 VP4 Code Clean Out
+*	1.19 AWG 11-Jun-01 Added support for DCT16
+*   1.18 JBB 24-May-01 Fixed Memory Allocation problem and frame recon prob
+*   1.17 JBB 09-Apr-01 CPUFree persistence
+*	1.16 SJL 05-Apr-01 Fixed MAC compile errors.
+*	1.15 JBB 23-Mar-01 New DC prediction
+*   1.14 JBX 22-Mar-01 Merged with vp4-mapca bitstream
+*   1.13 JBB 30 NOV 00 Version number changes 
+*   1.12 JBB 15-NOV-00 cleaned out ifdefs
+*   1.11 JBB 17-oct-00 Ifdefs around version information
+*   1.10 YWX 17-Oct-00 Added Initialization of block coordinates for 
+*                      new loop filtering strategy
+*   1.09 YWX 11-Oct-00 Added LastFrameNoMvRecon and LastFrameNoMvReconAlloc 
+*   1.08 SJL 25 Aug 00 Fixed Mac compile error
+*   1.08 JBB 24 Aug 00 Removed extraneous definition of load and decode
+*   1.07 SJL 16 Aug 00 Fixed Mac compile error
+*   1.06 JBB 28 jul 00 Added fragment variance array for post processor
+*   1.05 JBB 27Jul00   Added checks on Mallocs
+*   1.04 SJL 24Jul00   Changed Frees to DUCK_FREE for Mac utilization
+*	1.03 YWX 08/05/00  Added #if defined(POSTPROCESS) for postprocess 
+*	1.02 JBB 05/05/00  Added Post Processing Buffer & Block Quality Buffers
+*   1.01 YWX 06/04/00  Alligned more buffers for speed
+*	1.00 JBB 27/01/99  Globals Removed, use of PB_INSTANCE, common between 
+*                      compressor and decompressor
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#include "pbdll.h"
+#include "stdlib.h"
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+                
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/  
+
+/****************************************************************************
+*  Imports 
+*****************************************************************************
+*/  
+
+/****************************************************************************
+*  Module Static Variables
+*****************************************************************************
+*/  
+
+static const struct 
+{
+	INT32 row;
+	INT32 col;
+} NearMacroBlocks[12] = 
+{
+	{ -1, 0 },
+	{ 0, -1 },
+	{ -1, -1 },
+	{ -1, 1 },
+	{ -2, 0 },
+	{ 0, -2 },
+	{ -1, -2 },
+	{ -2, -1 },
+	{ -2, 1 },
+	{ -1, 2 },
+	{ -2, -2 },
+	{ -2, 2 }
+};
+
+/****************************************************************************
+*  Forward References
+*****************************************************************************
+*/  
+void InitializeFragCoordinates(PB_INSTANCE *pbi);
+/****************************************************************************
+*  Explicit Imports
+*****************************************************************************
+*/
+
+
+#include "duck_mem.h"
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeleteFragmentInfo
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_DeleteFragmentInfo(PB_INSTANCE * pbi)
+{
+
+	// free prior allocs if present
+#ifndef MAPCA
+    if( pbi->mbi.CoeffsAlloc)
+        duck_free(pbi->mbi.CoeffsAlloc);
+    pbi->mbi.CoeffsAlloc = 0;
+    pbi->mbi.Coeffs=0;
+#endif
+
+    if(	pbi->FragInfoAlloc)
+		duck_free(pbi->FragInfoAlloc);
+    pbi->FragInfoAlloc = 0;
+    pbi->FragInfo = 0;
+
+	if(	pbi->fc.AboveYAlloc)
+		duck_free(pbi->fc.AboveYAlloc);
+    pbi->fc.AboveYAlloc = 0;
+    pbi->fc.AboveY = 0;
+
+	if(	pbi->fc.AboveUAlloc)
+		duck_free(pbi->fc.AboveUAlloc);
+    pbi->fc.AboveUAlloc = 0;
+    pbi->fc.AboveU = 0;
+
+	if(	pbi->fc.AboveVAlloc)
+		duck_free(pbi->fc.AboveVAlloc);
+    pbi->fc.AboveVAlloc = 0;
+    pbi->fc.AboveV = 0;
+
+	if(	pbi->MBInterlacedAlloc)
+		duck_free(pbi->MBInterlacedAlloc);
+    pbi->MBInterlacedAlloc = 0;
+    pbi->MBInterlaced = 0;
+
+	if(	pbi->MBMotionVectorAlloc)
+		duck_free(pbi->MBMotionVectorAlloc);
+    pbi->MBMotionVectorAlloc = 0;
+    pbi->MBMotionVector = 0;
+
+	if(	pbi->predictionModeAlloc)
+		duck_free(pbi->predictionModeAlloc);
+    pbi->predictionModeAlloc = 0;
+    pbi->predictionMode = 0;
+
+#ifdef MAPCA
+    if(pbi->ReferenceBlocksAlloc)
+        duck_free(pbi->ReferenceBlocksAlloc);
+    pbi->ReferenceBlocksAlloc = 0;
+    pbi->ReferenceBlocks = 0;
+
+    if(pbi->ReconstructedMBsAlloc)
+			duck_free(pbi->ReconstructedMBsAlloc);
+	pbi->ReconstructedMBsAlloc=0;	
+    pbi->ReconstructedMBs =0;
+#endif
+
+
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     AllocateFragmentInfo
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+BOOL VP5_AllocateFragmentInfo(PB_INSTANCE * pbi)
+{
+
+	// clear any existing info
+	VP5_DeleteFragmentInfo(pbi);
+#ifndef MAPCA
+    pbi->mbi.CoeffsAlloc = (Q_LIST_ENTRY(*)[72]) duck_malloc(32 + sizeof(Q_LIST_ENTRY)*72*6, DMEM_GENERAL);
+    if(!pbi->mbi.CoeffsAlloc) {VP5_DeleteFragmentInfo(pbi); return FALSE;}
+    pbi->mbi.Coeffs = (Q_LIST_ENTRY(*)[72])ROUNDUP32(pbi->mbi.CoeffsAlloc);
+#endif
+	// context allocations
+    pbi->fc.AboveYAlloc = (BLOCK_CONTEXTA *) duck_malloc(32 + (8+pbi->HFragments) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+    if(!pbi->fc.AboveYAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+    pbi->fc.AboveY = (BLOCK_CONTEXTA *) ROUNDUP32(pbi->fc.AboveYAlloc);
+
+    pbi->fc.AboveUAlloc = (BLOCK_CONTEXTA *) duck_malloc(32 + (8+pbi->HFragments / 2) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+    if(!pbi->fc.AboveUAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+    pbi->fc.AboveU = (BLOCK_CONTEXTA *) ROUNDUP32(pbi->fc.AboveUAlloc);
+
+    pbi->fc.AboveVAlloc = (BLOCK_CONTEXTA *) duck_malloc(32 + (8+pbi->HFragments / 2) * sizeof(BLOCK_CONTEXT), DMEM_GENERAL);
+    if(!pbi->fc.AboveVAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+    pbi->fc.AboveV = (BLOCK_CONTEXTA *) ROUNDUP32(pbi->fc.AboveVAlloc);
+
+
+	// the encoder is the only thing using this move it to compdll
+    pbi->MBInterlacedAlloc = (char *) duck_malloc(32+pbi->MacroBlocks * sizeof(char), DMEM_GENERAL);
+    if(!pbi->MBInterlacedAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+    pbi->MBInterlaced = (char *) ROUNDUP32(pbi->MBInterlacedAlloc );
+
+    pbi->predictionModeAlloc = (char *) duck_malloc(32+pbi->MacroBlocks * sizeof(char), DMEM_GENERAL);
+    if(!pbi->predictionModeAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+    pbi->predictionMode = (char *) ROUNDUP32(pbi->predictionModeAlloc );
+
+    pbi->MBMotionVectorAlloc = (MOTION_VECTORA *) duck_malloc(32+pbi->MacroBlocks * sizeof(MOTION_VECTORA ), DMEM_GENERAL);
+    if(!pbi->MBMotionVectorAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+    pbi->MBMotionVector = (MOTION_VECTORA  *) ROUNDUP32(pbi->MBMotionVectorAlloc );
+
+
+	// the encoder is the only thing using this move it to compdll
+    pbi->FragInfoAlloc = (FRAG_INFO *) duck_malloc(32+pbi->UnitFragments * sizeof(FRAG_INFO), DMEM_GENERAL);
+    if(!pbi->FragInfoAlloc) { VP5_DeleteFragmentInfo(pbi); return FALSE;}
+    pbi->FragInfo = (FRAG_INFO *) ROUNDUP32(pbi->FragInfoAlloc );
+
+
+#ifdef MAPCA
+    pbi->ReferenceBlocksAlloc=(UINT8(*)[192])duck_malloc(32 + 6*192, DMEM_GENERAL);
+    if(!pbi->ReferenceBlocksAlloc){ VP5_DeleteFragmentInfo(pbi); return FALSE;}
+    pbi->ReferenceBlocks = (UINT8(*)[192])ROUNDUP32(pbi->ReferenceBlocksAlloc); 
+
+    pbi->ReconstructedMBsAlloc = (UINT8*) duck_malloc(32 + 768, DMEM_GENERAL);
+    if(!pbi->ReconstructedMBsAlloc){ VP5_DeleteFragmentInfo(pbi); return FALSE;}
+    pbi->ReconstructedMBs = (UINT8*) ROUNDUP32(pbi->ReconstructedMBsAlloc);
+#endif
+
+    return TRUE;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeleteFrameInfo
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_DeleteFrameInfo(PB_INSTANCE * pbi)
+{
+	if(pbi->ThisFrameReconAlloc )
+		duck_free(pbi->ThisFrameReconAlloc );
+	if(pbi->GoldenFrameAlloc)
+		duck_free(pbi->GoldenFrameAlloc);
+	if(pbi->LastFrameReconAlloc)
+		duck_free(pbi->LastFrameReconAlloc);
+	if(pbi->PostProcessBufferAlloc)
+		duck_free(pbi->PostProcessBufferAlloc);
+
+	pbi->ThisFrameReconAlloc = 0;
+	pbi->GoldenFrameAlloc = 0;
+	pbi->LastFrameReconAlloc = 0;
+	pbi->PostProcessBufferAlloc = 0;
+
+	pbi->ThisFrameRecon = 0;
+	pbi->GoldenFrame = 0;
+	pbi->LastFrameRecon = 0;
+	pbi->PostProcessBufferAlloc = 0;
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     AllocateFrameInfo
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+BOOL VP5_AllocateFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize)
+{
+
+	// clear any existing info
+	VP5_DeleteFrameInfo(pbi);
+
+	// allocate frames
+
+	// (JBB+YX ) Added 2 extra lines to framebuffer so that copy12x12
+	// doesn't fail when we have a large motion vector in V 
+	// on the last v block.  Note : We never use these pixels
+	// anyway so this doesn't hurt anything
+
+	pbi->ThisFrameReconAlloc = (UINT8 *)duck_malloc(32+pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+    if(!pbi->ThisFrameReconAlloc) { VP5_DeleteFrameInfo(pbi); return FALSE;}
+
+	pbi->GoldenFrameAlloc = (UINT8 *)duck_malloc(32+pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY ), DMEM_GENERAL);
+    if(!pbi->GoldenFrameAlloc) { VP5_DeleteFrameInfo(pbi); return FALSE;}
+
+	pbi->LastFrameReconAlloc = (UINT8 *)duck_malloc(32+pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+    if(!pbi->LastFrameReconAlloc) { VP5_DeleteFrameInfo(pbi); return FALSE;}
+
+	pbi->PostProcessBufferAlloc = (UINT8 *)duck_malloc(32+pbi->Configuration.YStride+FrameSize*sizeof(YUV_BUFFER_ENTRY), DMEM_GENERAL);
+    if(!pbi->PostProcessBufferAlloc) { VP5_DeleteFrameInfo(pbi); return FALSE;}
+
+
+	// adjust up to the next 32 byte boundary
+	pbi->ThisFrameRecon = (unsigned char *) ROUNDUP32(pbi->ThisFrameReconAlloc );
+	pbi->GoldenFrame = (unsigned char *) ROUNDUP32(pbi->GoldenFrameAlloc );
+	pbi->LastFrameRecon = (unsigned char *) ROUNDUP32(pbi->LastFrameReconAlloc );
+	pbi->PostProcessBuffer = (unsigned char *) ROUNDUP32( pbi->PostProcessBufferAlloc );
+
+    return TRUE;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_InitFrameDetails
+ *
+ *  INPUTS        :     Nonex.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Initialises the frame details.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+BOOL VP5_InitFrameDetails(PB_INSTANCE *pbi)
+{
+	int FrameSize;
+	UINT32 i;
+
+	if(pbi->CPUFree > 0 )
+		VP5_SetPbParam( pbi, PBC_SET_CPUFREE, pbi->CPUFree );
+
+    /* Set the frame size etc. */                                                        
+    pbi->YPlaneSize = pbi->Configuration.VideoFrameWidth * pbi->Configuration.VideoFrameHeight; 
+    pbi->UVPlaneSize = pbi->YPlaneSize / 4;  
+    pbi->HFragments = pbi->Configuration.VideoFrameWidth / pbi->Configuration.HFragPixels;
+    pbi->VFragments = pbi->Configuration.VideoFrameHeight / pbi->Configuration.VFragPixels;
+    pbi->UnitFragments = ((pbi->VFragments * pbi->HFragments)*3)/2;
+	pbi->YPlaneFragments = pbi->HFragments * pbi->VFragments;
+	pbi->UVPlaneFragments = pbi->YPlaneFragments / 4;
+
+    pbi->Configuration.YStride = (pbi->Configuration.VideoFrameWidth + STRIDE_EXTRA);
+    pbi->Configuration.UVStride = pbi->Configuration.YStride / 2;
+    pbi->ReconYPlaneSize = pbi->Configuration.YStride * (pbi->Configuration.VideoFrameHeight + STRIDE_EXTRA);
+    pbi->ReconUVPlaneSize = pbi->ReconYPlaneSize / 4;
+	FrameSize = pbi->ReconYPlaneSize + 2 * pbi->ReconUVPlaneSize;
+
+    pbi->YDataOffset = 0;
+    pbi->UDataOffset = pbi->YPlaneSize;
+    pbi->VDataOffset = pbi->YPlaneSize + pbi->UVPlaneSize;
+    pbi->ReconYDataOffset = 0;//(pbi->Configuration.YStride * UMV_BORDER) + UMV_BORDER;
+    pbi->ReconUDataOffset = pbi->ReconYPlaneSize;// + (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2);
+    pbi->ReconVDataOffset = pbi->ReconYPlaneSize + pbi->ReconUVPlaneSize;// + (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2);
+
+	// Image dimensions in Macro-Blocks
+	pbi->MBRows  = 4+(pbi->Configuration.VideoFrameHeight/16)  + ( pbi->Configuration.VideoFrameHeight%16 ? 1 : 0 );
+	pbi->MBCols  = 4+(pbi->Configuration.VideoFrameWidth/16)  + ( pbi->Configuration.VideoFrameWidth%16 ? 1 : 0 );
+	pbi->MacroBlocks = pbi->MBRows * pbi->MBCols;
+
+
+	for(i=0;i<12;i++)
+	{
+		pbi->mvNearOffset[i] = MBOffset(NearMacroBlocks[i].row, NearMacroBlocks[i].col);
+	}
+#ifndef MAPCA
+	ChangePostProcConfiguration(pbi->postproc, &pbi->Configuration);
+#endif
+	if(!VP5_AllocateFragmentInfo(pbi))
+        return FALSE;
+
+	if(!VP5_AllocateFrameInfo(pbi, FrameSize))
+    {
+        VP5_DeleteFragmentInfo(pbi);
+        return FALSE;
+    }
+
+	// We have a differently output size than our scaling provides
+	if( pbi->ScaleBuffer == 0 && pbi->OutputWidth &&
+		(pbi->Configuration.VideoFrameWidth != pbi->OutputWidth ||
+		pbi->Configuration.VideoFrameHeight != pbi->OutputHeight ) )
+	{
+		// we add 32 to outputwidth to insure that we have enough to overscale (ie scale to a size that's bigger 
+		// than our output size) we do this now even though we don't use it so that we don't have to check border conditions
+		pbi->ScaleBufferAlloc = (UINT8 *) 
+			duck_malloc(32 + 3 * 
+			(pbi->OutputWidth + 32) * 
+			(pbi->OutputHeight + 32)* 
+			sizeof(YUV_BUFFER_ENTRY) / 2, DMEM_GENERAL);  
+		
+		pbi->ScaleBuffer = (UINT8 *) ROUNDUP32(pbi->ScaleBufferAlloc );                  
+	}
+	
+	// this is just so the post processor will work !!
+	for(i=0;i<pbi->UnitFragments;i++)
+		pbi->FragInfo[i].DisplayFragment = 1;
+
+
+    return TRUE;
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     InitialiseConfiguration
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Sets up the default starting pbi->Configuration.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_InitialiseConfiguration(PB_INSTANCE *pbi)
+{  
+
+    // IDCT table initialisation
+    //InitDctTables();
+
+    pbi->Configuration.HFragPixels = 8;
+    pbi->Configuration.VFragPixels = 8;
+} 
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/Huffman.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/Huffman.c
new file mode 100644
index 00000000..3e42b9b3
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/Huffman.c
@@ -0,0 +1,285 @@
+/****************************************************************************
+*
+*   Module Title :     Huffman.c
+*
+*   Description  :     Video CODEC
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*  
+*   1.13 YWX 06-Nov-01 Changed for compatibility with Equator C compiler
+*   1.12 JBB 13-Jun-01 VP4 Code Clean Out
+*   1.11 SJL 22-Mar-01 Fixed MAC compile errors
+*   1.10 JBX 22-Mar-01 Changed size of SORT_NODE array to 1024;
+*   1.09 JBB 26 Jan 00 Reworked Huffman to remove dynamic allocation and 
+*                      to condense tree storage.
+*   1.08 PGW 11 Oct 00 Changes to support different entropy tables for 
+*					   different encoder versions.
+*   1.07 PGW 17/03/00  Further Entropy changes.
+*   1.06 PGW 15/03/00  Updated entropy tables.
+*	1.05 JBB 27/01/99  Globals Removed, use of PB_INSTANCE, Bit Management
+*   1.04 PGW 05/11/99  Changes to support AC range entropy tables.
+*   1.03 PGW 12/10/99  Changes to reduce uneccessary dependancies.
+*   1.02 PGW 19/07/99  Deleted the funtion DecodeHuffToken().
+*   1.01 PGW 15/07/99  Added inline bit extraction to DecodeHuffToken().
+*   1.00 PGW 07/07/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include "systemdependant.h"
+#include "huffman.h"
+#include "pbdll.h"
+#include "boolhuff.h"
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*  Forward references.
+*****************************************************************************
+*/       
+ 
+void VP5_BuildHuffTree(
+    HUFF_NODE *hn, 
+    unsigned int *counts, 
+    int values );
+
+void VP5_CreateCodeArray( HUFF_NODE *hn,
+                      int node,
+                      unsigned int *codearray,
+                      unsigned char *lengtharray,
+					  int codevalue, 
+                      int codelength );
+                      
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/  
+typedef struct _SORT_NODE
+{
+    int next;
+    int freq;
+    unsigned char value;
+} SORT_NODE;
+
+/****************************************************************************
+*  Module Static Variables
+*****************************************************************************
+*/              
+
+//***********************************************************
+// Jim's version of Eric's condensed huffman trees!
+
+
+typedef struct _sortnode
+{
+    int next;
+    int freq;
+    tokenorptr value;
+} sortnode;
+
+
+// inserts a node into a sorted linklist
+static void InsertSorted( 
+    sortnode *sn, 
+    int node, 
+    int *startnode  )
+{
+    int which = *startnode;
+    int prior = *startnode;
+
+    // find the position at which to insert the node
+    while( which != -1 && sn[node].freq > sn[which].freq )
+    {
+        prior = which;
+        which = sn[which].next;
+    }
+
+    if(which == *startnode)
+    {
+        *startnode = node;
+        sn[node].next = which;
+    }
+    else
+    {
+        sn[prior].next = node;
+        sn[node].next = which;
+    }
+}
+
+// returns a pointer to the condensed huffman root node
+void VP5_BuildHuffTree(
+    HUFF_NODE *hn, 
+    unsigned int *counts, 
+    int values )
+{
+    int i;
+    sortnode sn[256];
+    int sncount=0;
+    int startnode=0;
+
+    // note we are creating the huffman tree in 
+    // reverse order so that the root will always be 0
+    int huffptr=values-1;
+
+    // set up our sorted linked list of values 
+    // or pointers into the huffman tree
+    for(i=0;i<values;i++)
+    {
+        sn[i].value.selector = 1;
+        sn[i].value.value = i;
+        if(counts[i] == 0)
+            counts[i] = 1;
+        sn[i].freq = counts[i];
+        sn[i].next = -1;
+    }
+    sncount=values;
+
+    // connected the above list into a linked list
+    for(i=1;i<values;i++)
+    {
+        InsertSorted(sn,i,&startnode);
+    }
+
+    // while there is more than one node in our linked list
+    while(sn[startnode].next!=-1)
+    {
+        int first = startnode;
+        int second = sn[startnode].next;
+        int sumfreq = sn[first].freq + sn[second].freq;
+
+        // setup new merged huffman node
+        --huffptr;
+        hn[huffptr].leftunion.left = sn[first].value;
+        hn[huffptr].rightunion.right = sn[second].value;
+        hn[huffptr].freq = 256 * sn[first].freq / sumfreq;
+
+        // set up new merged sort node pointing to our huffnode
+        sn[sncount].value.selector = 0;
+        sn[sncount].value.value = huffptr;
+        sn[sncount].freq = sumfreq;
+        sn[sncount].next = -1;
+
+        // remove the two nodes we just merged from the linked list
+        startnode = sn[second].next;
+
+        // insert the new sort node into the proper location
+        InsertSorted(sn, sncount, &startnode);
+
+        // account for new nodes
+        sncount++;
+
+    }
+    return ;
+}
+
+void VP5_CreateCodeArray( HUFF_NODE *hn,
+                      int node,
+                      unsigned int *codearray,
+                      unsigned char *lengtharray,
+					  int codevalue, 
+                      int codelength )
+{    
+    
+    /* If we are at a leaf then fill in a code array entry. */
+    /* Recursive calls to scan down the tree. */
+    if( hn[node].leftunion.left.selector )
+    {
+        codearray[hn[node].leftunion.left.value] = (codevalue<<1)+0;
+        lengtharray[hn[node].leftunion.left.value] = codelength+1;
+    }
+    else
+    {
+        VP5_CreateCodeArray( 
+            hn, 
+            hn[node].leftunion.left.value,
+            codearray,
+            lengtharray, 
+            ((codevalue << 1) + 0), 
+            (codelength + 1)
+            );
+    }
+    
+    if( hn[node].rightunion.right.selector )
+    {
+        codearray[hn[node].rightunion.right.value] = (codevalue<<1)+1;
+        lengtharray[hn[node].rightunion.right.value] = codelength+1;
+    }
+    else
+    {
+        VP5_CreateCodeArray( 
+            hn, 
+            hn[node].rightunion.right.value,
+            codearray,
+            lengtharray, 
+            ((codevalue << 1) + 1), 
+            (codelength + 1)
+            );
+    }    
+}
+
+int VP5_DecodeValue(
+    BOOL_CODER *bc, 
+    HUFF_NODE *hn
+    )
+{
+    tokenorptr torp;
+	torp.value=0;
+	torp.selector=0;
+    // Loop searches down through tree based upon bits read from the bitstream 
+    // until it hits a leaf at which point we have decoded a token
+
+    do
+    {
+		if(DecodeBool(bc, hn[torp.value].freq))
+		{
+	        torp = hn[torp.value].rightunion.right;
+		}
+		else
+		{
+	        torp = hn[torp.value].leftunion.left;
+		}
+    }
+	while ( !(torp.selector));
+
+	return torp.value;
+}
+
+void VP5_EncodeValue(
+	BOOL_CODER *bc,
+    HUFF_NODE *hn,
+    int value,
+    int length)
+{
+    int i;
+    int node = 0;
+    for(i=length-1;i>=0;i--)
+    {
+        int v= (value>>i) & 1;
+
+		if ( bc->MeasureCost )
+			EncodeBool2(bc,(BOOL) v , hn[node].freq);
+		else
+			EncodeBool(bc,(BOOL) v , hn[node].freq);
+
+        if(v)
+        {
+            node=hn[node].rightunion.right.value;
+        }
+        else
+        {
+            node=hn[node].leftunion.left.value;
+        }
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/MvEntropy.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/MvEntropy.c
new file mode 100644
index 00000000..e0025016
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/MvEntropy.c
@@ -0,0 +1,710 @@
+/****************************************************************************
+*
+*   Module Title :     MvEntropy.c
+*
+*   Description  :     Video CODEC: Motion vector entropy module.
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.03 YWX 06-Nov-01 Changed for compatibility with Equator C compiler
+*   1.02 JBB 13 Jun 01 VP4 Code Clean Out
+*   1.01 PGW 23 Jan 01  Module created.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include "type_aliases.h"
+#include "systemdependant.h"
+#include "codec_common.h"
+#include "codec_common_interface.h"
+#include "huffman.h"
+#include "pbdll.h"
+
+
+/****************************************************************************
+*  Explicit Imports
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Constants
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Types
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*   Data structures
+*****************************************************************************
+*/
+// VP5 MV coding tables
+UINT8 VP5_MvTableIndex[MV_ENTROPY_TOKENS] = 
+{ 15, 15, 14, 14, 13, 13, 13, 13, 
+  12, 12, 12, 12, 11, 11, 11, 11,
+  10, 10, 9,  9,  8,  8,  7,  7,  
+  6,  6,  5,  4,  3,  2,  1,  
+  0,
+  1,  2,  3,  4,  5,  6,  6,  
+  7,  7,  8,  8,  9,  9,  10, 10, 
+  11, 11, 11, 11, 12, 12, 12, 12, 
+  13, 13, 13, 13, 14, 14, 15, 15
+};
+
+HUFF_NODE XMvHuffTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS-1];
+UINT32 XMvPatternTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS];
+UINT8  XMvBitsTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS];
+
+static UINT32 VP5_XMvFrequencyCounts[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS] =
+{
+ 67 , 15 , 32 ,  4 , 21 ,  8 , 33 ,  2 , 32 ,  6 , 29 ,  4 , 25 ,  5 , 83 , 16 , 58 ,  3 , 59 ,  7 , 75 , 11 ,129 , 19 ,126 , 22 ,159 , 29 ,207 , 88 ,636 ,388 ,579 ,125 ,269 , 66 ,223 , 33 ,177 , 15 ,118 , 14 ,130 , 14 , 81 , 10 , 91 , 25 ,106 , 10 , 68 , 12 , 51 , 10 , 61 , 13 , 46 ,  8 , 55 ,  6 , 63 , 29 ,207 ,  
+ 77 ,  6 , 23 ,  1 , 19 ,  1 , 25 ,  4 , 38 ,  2 , 39 ,  6 , 48 ,  6 ,116 , 25 ,110 , 16 ,105 , 16 ,125 , 26 ,223 , 28 ,279 , 56 ,407 , 98 ,509 ,238 ,921 ,461 ,1260 ,628 ,857 ,173 ,543 , 74 ,368 , 31 ,284 , 28 ,207 , 21 ,159 , 21 ,180 , 38 ,166 , 14 , 56 ,  5 , 42 ,  8 , 48 ,  5 , 35 ,  4 , 29 ,  8 , 37 , 10 , 84 ,  
+  7 ,  3 ,  3 ,  1 ,  4 ,  0 ,  3 ,  1 ,  4 ,  0 ,  4 ,  0 , 10 ,  0 , 13 ,  7 , 18 ,  1 , 17 ,  2 , 22 ,  5 , 36 ,  5 , 52 , 15 , 83 , 15 ,116 , 35 ,220 ,144 ,391 ,262 ,282 ,178 ,235 , 52 ,150 , 23 , 83 , 12 , 74 ,  4 , 44 ,  8 , 46 ,  9 , 36 ,  1 , 18 ,  1 , 10 ,  1 ,  7 ,  3 ,  8 ,  0 ,  6 ,  2 ,  6 ,  0 , 15 ,  
+ 25 ,  3 , 17 ,  3 ,  5 ,  2 ,  9 ,  1 , 23 ,  0 , 21 ,  5 , 17 ,  6 , 51 ,  7 , 61 ,  9 , 48 ,  5 , 76 ,  7 , 93 , 11 ,125 , 26 ,190 , 54 ,271 , 98 ,502 ,194 ,604 ,221 ,606 ,258 ,639 ,296 ,527 , 56 ,282 , 32 ,195 , 17 ,138 , 11 ,135 , 18 ,132 ,  3 , 50 ,  1 , 31 ,  6 , 34 ,  3 , 23 ,  3 , 17 ,  1 , 20 ,  3 , 49 ,  
+ 10 ,  0 ,  3 ,  1 ,  4 ,  0 ,  2 ,  1 ,  3 ,  1 ,  2 ,  1 ,  4 ,  0 ,  7 ,  7 , 11 ,  0 , 12 ,  2 , 16 ,  2 , 21 ,  2 , 31 , 10 , 40 , 10 , 46 , 20 ,115 , 59 ,140 ,106 ,222 ,106 ,225 , 76 ,199 , 82 ,126 ,  9 , 67 ,  4 , 38 ,  2 , 36 , 12 , 34 ,  0 ,  8 ,  0 ,  7 ,  1 ,  9 ,  0 ,  7 ,  3 ,  8 ,  2 , 12 ,  0 , 17 ,  
+ 28 ,  5 , 11 ,  1 , 12 ,  2 ,  8 ,  1 , 14 ,  2 , 19 ,  2 , 19 ,  2 , 38 ,  8 , 38 ,  2 , 47 ,  4 , 54 ,  5 , 81 , 11 ,103 , 16 ,124 , 23 ,183 , 56 ,376 ,178 ,451 ,142 ,487 ,125 ,549 ,185 ,552 , 89 ,443 ,170 ,358 , 34 ,180 , 16 ,167 , 31 ,152 , 12 , 49 ,  7 , 39 ,  5 , 37 ,  5 , 33 ,  4 , 20 ,  4 , 25 , 10 , 59 ,  
+ 22 ,  3 , 18 ,  0 ,  6 ,  2 ,  8 ,  1 , 15 ,  0 , 10 ,  0 , 17 ,  6 , 40 ,  6 , 48 ,  6 , 50 ,  3 , 55 ,  7 , 83 , 14 ,100 , 11 , 99 , 14 ,151 , 39 ,309 ,159 ,370 ,124 ,487 ,176 ,686 ,188 ,704 ,121 ,606 , 79 ,385 , 68 ,368 ,126 ,331 , 62 ,200 , 16 ,101 ,  5 , 74 ,  6 , 79 ,  5 , 57 ,  9 , 47 ,  6 , 47 ,  6 , 75 ,  
+ 15 ,  2 , 10 ,  0 ,  5 ,  2 ,  7 ,  3 , 11 ,  3 , 13 ,  0 , 15 ,  0 , 25 ,  1 , 27 ,  2 , 35 ,  3 , 46 ,  7 , 48 ,  7 , 62 , 15 , 77 , 13 ,112 , 32 ,220 ,104 ,251 , 64 ,242 , 72 ,347 , 95 ,525 , 79 ,699 , 65 ,533 , 41 ,240 , 13 ,254 , 77 ,264 , 72 ,179 ,  8 , 89 ,  7 , 65 ,  3 , 61 ,  3 , 45 ,  2 , 49 ,  8 , 68 ,  
+ 14 ,  3 ,  7 ,  0 ,  7 ,  1 ,  7 ,  0 ,  8 ,  3 , 10 ,  0 ,  7 ,  0 , 21 ,  4 , 28 ,  2 , 17 ,  0 , 33 ,  7 , 26 ,  2 , 40 ,  4 , 48 ,  9 , 70 , 13 ,147 , 84 ,160 , 41 ,167 , 41 ,202 , 42 ,301 , 32 ,475 , 29 ,410 , 38 ,333 , 17 ,265 , 31 ,171 , 10 ,117 , 16 ,156 , 72 ,146 ,  6 , 64 ,  5 , 48 ,  2 , 49 , 12 , 80 ,  
+ 14 ,  2 ,  6 ,  0 ,  2 ,  0 ,  3 ,  1 ,  2 ,  0 ,  6 ,  1 ,  4 ,  1 , 14 ,  5 , 12 ,  0 , 13 ,  1 , 24 ,  2 , 20 ,  5 , 20 ,  3 , 32 ,  7 , 43 , 11 , 93 , 46 , 96 , 25 , 96 , 18 ,117 , 22 ,137 , 14 ,140 , 17 ,256 , 14 ,388 , 13 ,330 , 20 ,112 ,  4 , 81 ,  7 , 63 ,  6 , 68 , 16 , 79 , 43 , 71 ,  7 , 43 , 10 , 80 ,  
+ 19 ,  2 ,  3 ,  0 ,  6 ,  0 ,  3 ,  0 , 11 ,  1 ,  8 ,  2 ,  7 ,  0 , 18 ,  4 , 11 ,  1 , 17 ,  3 , 20 ,  3 , 15 ,  2 , 33 ,  9 , 33 , 11 , 51 , 14 ,127 , 64 ,148 , 35 ,106 , 16 ,107 , 21 ,119 , 11 ,133 , 18 ,195 , 19 ,287 , 11 ,267 , 15 ,200 ,  3 ,101 ,  7 , 86 ,  7 , 56 ,  3 , 49 ,  8 , 65 , 11 ,100 , 63 ,163 ,  
+ 25 ,  5 , 12 ,  0 ,  8 ,  0 ,  7 ,  0 ,  8 ,  1 , 18 ,  0 , 21 ,  0 , 42 ,  5 , 33 ,  1 , 22 ,  5 , 23 ,  5 , 29 ,  4 , 51 , 17 , 47 , 14 , 69 , 28 ,189 ,104 ,200 , 15 ,131 , 33 ,121 , 18 ,146 , 26 ,162 , 18 ,155 , 12 ,175 , 12 ,278 , 21 ,331 , 13 ,240 ,  7 ,148 ,  9 ,104 , 11 , 84 , 15 , 83 ,  8 ,163 , 71 ,339 ,  
+ 17 ,  4 ,  8 ,  0 ,  5 ,  0 ,  6 ,  0 ,  5 ,  0 , 13 ,  0 ,  3 ,  1 , 12 ,  4 , 14 ,  1 ,  8 ,  2 , 14 ,  1 , 20 ,  0 , 16 ,  0 , 23 ,  7 , 34 , 10 , 63 , 44 , 85 , 19 , 53 , 13 , 66 , 12 , 63 , 10 , 70 ,  5 , 66 , 12 , 49 ,  5 ,106 ,  8 , 93 , 11 , 81 ,  6 , 87 ,  2 ,101 ,  3 , 78 ,  3 , 58 ,  4 , 93 , 32 ,155 ,  
+ 13 ,  3 ,  4 ,  1 ,  8 ,  0 ,  3 ,  1 ,  6 ,  1 , 10 ,  0 ,  8 ,  0 , 11 ,  3 , 14 ,  2 ,  9 ,  2 , 13 ,  4 , 14 ,  3 , 13 ,  4 , 18 ,  6 , 20 , 10 , 66 , 16 , 57 , 10 , 27 ,  9 , 44 , 10 , 35 ,  4 , 30 ,  7 , 42 ,  5 , 38 ,  1 , 42 ,  9 , 48 ,  2 , 42 ,  1 , 41 ,  6 , 52 ,  2 , 70 ,  2 , 71 ,  3 , 77 , 16 ,130 ,  
+  3 ,  2 ,  2 ,  1 ,  2 ,  1 ,  1 ,  0 ,  3 ,  0 ,  4 ,  0 ,  2 ,  0 ,  6 ,  4 ,  9 ,  0 ,  6 ,  3 ,  7 ,  1 ,  6 ,  1 ,  8 ,  0 , 12 ,  2 , 17 ,  4 , 50 , 16 , 50 ,  3 , 22 ,  8 , 24 ,  4 , 20 ,  3 , 20 ,  3 , 27 ,  1 , 24 ,  2 , 41 ,  1 , 31 ,  1 , 29 ,  1 , 24 ,  0 , 30 ,  0 , 31 ,  2 , 29 ,  1 , 36 , 15 , 76 ,  
+ 35 ,  7 , 15 ,  1 ,  6 ,  0 ,  3 ,  1 ,  9 ,  1 , 11 ,  0 , 10 ,  1 , 24 ,  4 , 28 ,  0 ,  9 ,  1 , 15 ,  6 , 23 ,  2 , 29 ,  5 , 32 ,  3 , 40 , 15 ,121 , 64 ,136 , 17 , 49 , 11 , 49 , 12 , 50 , 10 , 64 ,  4 , 53 ,  7 , 42 ,  9 , 57 ,  8 , 75 ,  3 , 47 ,  3 , 39 ,  7 , 38 ,  2 , 35 ,  7 , 52 ,  3 , 87 , 40 ,194 ,  
+  
+
+/*
+ 95 ,284 , 30 , 82 , 36 ,143 , 37 ,148 , 34 ,174 , 45 ,164 , 56 ,241 ,101 ,690 ,145 ,296 ,101 ,283 ,118 ,313 ,129 ,346 ,136 ,448 ,185 ,543 ,283 ,1035 ,725 ,3842 ,1009 ,1273 ,347 ,757 ,243 ,543 ,225 ,381 ,162 ,362 ,130 ,294 ,124 ,358 ,188 ,722 ,128 ,260 , 90 ,200 , 64 ,200 , 64 ,160 , 50 ,153 , 34 ,129 , 60 ,374 ,171 ,  
+ 90 ,162 , 21 , 68 , 15 , 76 , 26 , 83 , 25 ,123 , 34 ,119 , 46 ,158 , 80 ,374 , 87 ,232 , 84 ,204 , 90 ,247 ,138 ,283 ,155 ,398 ,217 ,453 ,244 ,771 ,556 ,2108 ,915 ,1372 ,406 ,579 ,239 ,393 ,169 ,273 ,156 ,246 , 88 ,183 , 84 ,197 ,108 ,383 , 72 ,178 , 35 ,105 , 42 ,127 , 31 , 97 , 25 , 68 , 20 , 61 , 23 ,122 , 78 ,  
+ 88 ,255 , 38 ,112 , 31 ,139 , 47 ,134 , 46 ,215 , 47 ,205 , 78 ,250 ,123 ,672 ,133 ,328 ,117 ,295 ,137 ,344 ,139 ,371 ,197 ,472 ,212 ,620 ,322 ,980 ,694 ,2402 ,1096 ,2773 ,1079 ,1699 ,549 ,971 ,317 ,650 ,277 ,552 ,185 ,381 ,147 ,428 ,206 ,799 ,125 ,293 , 82 ,212 , 64 ,211 , 60 ,151 , 71 ,163 , 29 ,126 , 33 ,280 ,120 ,  
+ 40 , 78 , 25 , 29 , 19 , 57 , 18 , 53 , 14 , 58 , 33 , 85 , 36 ,100 , 56 ,224 , 52 ,109 , 47 ,133 , 44 ,122 , 70 ,134 , 70 ,200 , 81 ,208 ,142 ,343 ,223 ,827 ,373 ,1018 ,512 ,933 ,314 ,509 ,170 ,300 ,111 ,189 , 80 ,164 , 76 ,143 , 77 ,298 , 78 ,112 , 34 , 85 , 40 , 57 , 20 , 59 , 20 , 47 , 11 , 50 , 22 , 73 , 56 ,  
+ 65 ,161 , 25 , 59 , 18 ,103 , 37 , 88 , 45 ,106 , 39 ,105 , 51 ,172 , 76 ,492 ,103 ,214 , 59 ,212 , 65 ,246 ,104 ,281 ,126 ,340 ,153 ,381 ,214 ,604 ,357 ,1262 ,444 ,1463 ,679 ,1863 ,785 ,1296 ,449 ,685 ,307 ,517 ,162 ,314 ,161 ,366 ,132 ,606 ,112 ,232 , 68 ,187 , 70 ,161 , 41 ,123 , 38 ,119 , 36 ,112 , 35 ,194 , 99 ,  
+ 46 , 76 , 18 , 37 , 16 , 52 , 12 , 54 ,  7 , 48 , 14 , 69 , 28 ,112 , 38 ,219 , 57 ,116 , 56 , 96 , 37 , 93 , 38 ,121 , 82 ,178 ,107 ,177 , 78 ,259 ,181 ,491 ,214 ,554 ,302 ,717 ,382 ,788 ,306 ,459 ,199 ,311 ,134 ,235 ,120 ,207 , 85 ,308 , 70 ,135 , 45 , 79 , 41 , 80 , 23 , 72 , 12 , 54 ,  7 , 56 , 23 ,113 , 59 ,  
+111 ,226 , 21 ,100 , 36 ,125 , 50 ,158 , 62 ,172 , 55 ,177 , 71 ,288 ,113 ,633 ,129 ,326 , 91 ,268 ,113 ,362 ,133 ,321 ,136 ,424 ,156 ,461 ,240 ,643 ,470 ,1336 ,464 ,1271 ,503 ,1739 ,898 ,2410 ,961 ,1777 ,643 ,1105 ,332 ,709 ,304 ,619 ,294 ,973 ,213 ,386 ,123 ,282 , 78 ,254 , 79 ,222 , 54 ,185 , 57 ,160 , 57 ,264 ,153 ,  
+ 80 ,188 , 28 , 87 , 31 ,117 , 35 ,108 , 38 ,147 , 54 ,184 , 58 ,188 ,121 ,584 ,108 ,312 , 93 ,231 , 89 ,272 ,109 ,303 ,132 ,326 ,146 ,377 ,156 ,496 ,297 ,1039 ,320 ,825 ,281 ,1010 ,459 ,1403 ,696 ,2078 ,895 ,1575 ,441 ,800 ,260 ,567 ,315 ,891 ,239 ,357 ,118 ,240 , 91 ,212 , 93 ,184 , 69 ,174 , 56 ,129 , 52 ,284 ,120 ,  
+ 89 ,177 , 17 , 70 , 23 , 77 , 39 ,110 , 31 ,147 , 44 ,138 , 64 ,183 , 81 ,461 , 87 ,234 , 79 ,236 , 70 ,253 ,113 ,244 ,125 ,290 ,111 ,241 ,124 ,384 ,221 ,940 ,251 ,599 ,220 ,602 ,320 ,840 ,336 ,1185 ,638 ,1811 ,664 ,1197 ,307 ,751 ,344 ,942 ,213 ,377 ,131 ,259 , 69 ,201 , 66 ,164 , 48 ,171 , 53 ,136 , 36 ,272 ,142 ,  
+ 91 ,130 , 27 , 69 , 21 , 85 , 23 , 93 , 36 ,142 , 26 ,121 , 30 ,182 , 65 ,427 , 83 ,201 , 59 ,175 , 64 ,211 , 67 ,198 ,100 ,257 ,107 ,245 ,139 ,322 ,184 ,723 ,214 ,430 ,169 ,445 ,208 ,572 ,228 ,650 ,328 ,1034 ,483 ,1618 ,676 ,1115 ,348 ,1025 ,233 ,413 ,137 ,271 , 81 ,227 , 77 ,170 , 45 ,144 , 56 ,111 , 45 ,178 ,115 ,  
+ 89 ,200 , 30 , 88 , 21 ,110 , 35 ,102 , 39 ,130 , 46 ,148 , 35 ,234 , 95 ,522 ,128 ,236 , 85 ,188 , 81 ,233 , 84 ,251 , 84 ,282 ,117 ,285 ,121 ,370 ,193 ,806 ,217 ,484 ,162 ,469 ,161 ,494 ,205 ,518 ,241 ,622 ,259 ,918 ,594 ,1431 ,675 ,1505 ,338 ,455 ,129 ,273 , 84 ,236 , 82 ,201 , 80 ,142 , 44 ,142 , 31 ,294 ,162 ,  
+237 ,546 , 61 ,167 , 63 ,238 , 85 ,255 , 71 ,357 , 81 ,323 , 97 ,417 ,186 ,1285 ,302 ,588 ,189 ,469 ,174 ,517 ,224 ,561 ,207 ,619 ,234 ,593 ,285 ,800 ,469 ,1935 ,468 ,983 ,329 ,845 ,281 ,927 ,328 ,944 ,378 ,980 ,372 ,1070 ,523 ,1544 ,857 ,3091 ,995 ,1478 ,465 ,811 ,245 ,613 ,188 ,442 ,166 ,445 ,132 ,334 ,123 ,759 ,360 ,  
+ 86 ,220 , 34 , 85 , 27 , 93 , 35 ,102 , 39 ,113 , 57 ,138 , 45 ,182 , 71 ,489 , 98 ,221 , 84 ,170 , 78 ,230 , 93 ,248 , 81 ,237 ,109 ,255 ,126 ,365 ,188 ,752 ,209 ,422 ,133 ,331 ,163 ,400 ,133 ,401 ,151 ,410 ,155 ,425 ,153 ,468 ,194 ,916 ,246 ,491 ,205 ,413 ,201 ,373 ,153 ,278 , 96 ,197 , 76 ,162 , 72 ,342 ,210 ,  
+ 68 ,184 , 17 , 52 , 12 , 86 , 35 , 70 , 18 ,100 , 38 ,102 , 35 ,134 , 76 ,373 , 55 ,185 , 48 ,130 , 65 ,200 , 68 ,177 , 77 ,225 , 90 ,186 , 73 ,271 ,128 ,509 ,126 ,343 ,103 ,286 ,103 ,297 ,133 ,252 ,107 ,269 , 87 ,244 ,114 ,289 ,160 ,621 ,145 ,281 ,101 ,270 ,120 ,265 ,132 ,257 ,142 ,320 ,115 ,201 , 58 ,329 ,193 ,  
+ 52 , 96 ,  4 , 25 , 12 , 39 ,  9 , 36 , 15 , 45 , 22 , 32 , 22 , 64 , 36 ,154 , 34 , 79 , 23 , 61 , 28 , 92 , 18 , 66 , 30 , 85 , 41 , 90 , 48 ,140 , 55 ,250 , 57 ,112 , 42 ,103 , 38 ,128 , 44 , 99 , 43 ,112 , 45 ,115 , 38 ,131 , 63 ,288 , 57 ,138 , 31 , 99 , 31 , 94 , 37 , 96 , 30 ,131 , 45 ,117 , 52 ,161 , 94 ,  
+147 ,381 , 25 ,105 , 21 ,147 , 37 ,119 , 43 ,177 , 37 ,168 , 49 ,204 ,106 ,620 ,102 ,238 , 65 ,200 , 57 ,231 , 97 ,191 ,101 ,224 , 89 ,222 , 95 ,394 ,199 ,748 ,171 ,384 ,125 ,322 ,100 ,301 ,118 ,289 ,125 ,304 , 92 ,234 ,100 ,331 ,163 ,834 ,175 ,334 , 81 ,274 , 64 ,266 , 85 ,212 ,113 ,285 , 63 ,206 , 96 ,690 ,402 ,  
+	{
+		  36,      69,       8,      21,       8,      28,      15,      29,
+		  11,      32,      14,      33,      15,      41,      25,     109,
+		  22,      61,      32,      62,      32,      78,      49,      91,
+		  64,     126,     102,     165,     207,     615,    1860,    2163,
+		1514,     598,     254,     180,     105,     131,      75,      70,
+		  43,      60,      38,      48,      31,      60,      36,     105,
+		  26,      47,      29,      35,      17,      32,      16,      25,
+		   7,      29,       8,      21,       7,      49,      49,
+	},
+	{
+		  11,      22,       6,       8,       8,      11,       5,      11,
+		   1,      14,       5,      16,      13,      23,      14,      32,
+		  18,      29,      25,      32,      26,      37,      32,      52,
+		  55,      83,      94,     145,     201,     458,    1233,    2600,
+		1790,    1313,     420,     221,     125,     126,      94,      71,
+		  38,      41,      30,      40,      26,      29,      22,      47,
+		  24,      23,      12,      17,      15,      14,      14,      14,
+		   8,       9,       9,       9,       2,      17,      23,
+	},
+	{
+		  29,      32,       9,      22,       7,      21,      10,      11,
+		   5,      27,      16,      17,      13,      23,      20,      59,
+		  14,      35,      14,      32,      25,      41,      43,      53,
+		  40,      75,      61,     135,     146,     298,     635,    1241,
+		2473,    1648,    1042,     437,     182,     146,      94,      94,
+		  55,      49,      37,      52,      28,      51,      34,      49,
+		  22,      26,      13,      22,      15,      23,      10,      20,
+		  14,      26,      10,      14,       5,      27,      37,
+	},
+	{
+		  18,      18,      10,      10,      16,      20,       6,      22,
+		  16,      30,      14,      38,      16,      24,      12,      59,
+		  28,      61,      36,      53,      18,      59,      59,      57,
+		  43,      86,     100,     137,     178,     291,     560,    1069,
+		1243,    1919,    1102,     866,     346,     270,     153,     137,
+		  90,      63,      43,      34,      38,      61,      36,      57,
+		  24,      47,      18,      30,      26,      24,      10,      24,
+		  14,      36,       8,      12,       4,      34,      34,
+	},
+	{
+		  38,      61,      15,      22,      12,      33,      22,      33,
+		  17,      35,      10,      33,      12,      27,      15,      86,
+		  25,      63,      33,      78,      35,      71,      55,      89,
+		  55,     165,      83,     132,     172,     256,     462,     778,
+		 671,     989,    1261,    1045,     803,     524,     259,     231,
+		 109,     109,      61,      89,      48,      76,      66,     101,
+		  55,      58,      20,      45,      20,      33,      27,      40,
+		  12,      33,      10,      25,       2,      71,      53,
+	},
+	{
+		  39,      35,       7,      39,      10,      31,      24,      21,
+		  35,      46,      35,      46,      21,      46,      17,      78,
+		  24,      49,      24,      99,      67,      74,      42,      92,
+		  56,     127,      95,     202,     191,     414,     475,     613,
+		 652,     656,     652,    1042,     680,     769,     574,     333,
+		 205,     195,      88,      74,      88,      74,      70,      81,
+		  53,      95,      46,      39,      24,      46,      14,      67,
+		  24,      17,       7,      28,      14,      63,      28,
+	},
+	{
+		  57,      58,       7,      29,       7,      36,       7,      58,
+		  27,      47,      22,      58,      23,      53,      33,      91,
+		  29,      84,      38,      57,      66,      69,      79,      84,
+		  77,      97,      93,     169,     139,     285,     364,     597,
+		 480,     388,     448,     636,     774,     995,     691,     551,
+		 320,     241,     160,     169,      99,     125,      79,     123,
+		  68,      79,      33,      60,      20,      47,      34,      62,
+		  22,      62,      25,      51,      27,      84,      77,
+	},
+	{
+		  91,      88,      16,      40,      13,      32,      10,      42,
+		  16,      53,      21,      50,      13,      66,      53,     107,
+		  34,      72,      29,      58,      32,      74,      64,      88,
+		  85,     109,      96,     168,     149,     326,     323,     495,
+		 374,     425,     364,     471,     412,     554,     589,     787,
+		 527,     460,     313,     214,     163,     168,     149,     184,
+		 104,     109,      61,      93,      66,      80,      37,      64,
+		  29,      40,      40,      29,      13,      72,      66,
+	},
+	{
+		  76,      83,       6,      56,      16,      46,      36,      73,
+		  26,      69,      36,      69,      53,      66,      56,     103,
+		  66,     106,      49,      79,      36,      93,      83,     133,
+		  79,     119,      89,     136,     119,     233,     226,     442,
+		 319,     306,     236,     312,     266,     409,     372,     532,
+		 502,     675,     496,     386,     236,     262,     239,     206,
+		 163,     143,      93,      96,      59,      99,      53,      93,
+		  53,      59,      33,      53,      36,      99,     123,
+	},
+	{
+		  60,      86,      18,      33,      18,      63,      11,      60,
+		  18,     105,      30,      48,      56,      90,      33,      82,
+		  78,     101,      30,     101,      67,     116,      75,     123,
+		  45,     108,      97,     131,      71,     213,     217,     330,
+		 292,     255,     180,     210,     243,     371,     258,     390,
+		 240,     562,     446,     675,     480,     498,     318,     348,
+		 225,     247,     112,     135,      93,      82,      48,      90,
+		  67,      90,      52,      45,      22,      90,      97,
+	},
+	{
+		  76,     129,      10,      45,       3,      48,      20,      66,
+		  20,      73,      17,      59,      17,      76,      31,     125,
+		  55,      94,      38,      80,      41,     108,      80,     111,
+		  87,     104,      55,     108,      76,     160,     234,     451,
+		 290,     248,     199,     223,     139,     297,     150,     318,
+		 199,     342,     321,     489,     433,     605,     563,     510,
+		 391,     286,     171,     139,     115,     108,      73,     108,
+		  59,     115,      48,      31,      20,     139,     132,
+	},
+	{
+		  93,     100,      15,      36,      24,      51,      17,      58,
+		  26,      58,      15,      68,      20,      58,      55,     201,
+		  79,      96,      39,      85,      49,     125,      53,      85,
+		  51,      89,      83,     112,      96,     186,     224,     440,
+		 224,     190,     115,     178,     100,     193,     114,     250,
+		 171,     229,     157,     294,     247,     421,     437,     859,
+		 482,     530,     336,     285,     186,     231,     142,     155,
+		  77,     112,      76,      74,      47,     125,     144,
+	},
+	{
+		 137,      99,      12,      54,      15,      54,      25,      60,
+		  25,      76,      25,      70,      44,      54,      76,     127,
+		  60,      70,      19,      99,      67,     127,      54,     127,
+		  79,      95,      60,      76,      60,     172,     134,     338,
+		 213,     213,      89,     150,     137,     210,     111,     182,
+		  95,     255,     105,     255,     150,     322,     265,     427,
+		 255,     472,     408,     443,     415,     495,     306,     287,
+		 178,     175,     134,     111,      83,     185,     252,
+	},
+	{
+		 105,     149,      28,      64,      32,      72,      24,      72,
+		  12,      64,      24,      64,      12,      64,      48,     157,
+		  80,      76,      40,      76,      56,     117,      64,     109,
+		  76,     141,      64,     109,      76,     198,     145,     307,
+		 133,     137,     109,     125,      93,     226,      80,     153,
+		 113,     214,      97,     234,      88,     218,     258,     384,
+		 234,     234,     214,     283,     222,     432,     311,     465,
+		 384,     343,     258,     283,     190,     323,     404,
+	},
+	{
+		 263,     160,      22,      80,      11,     148,      11,      91,
+		  34,     137,      22,      91,      80,     137,      34,     171,
+		  22,      57,      22,     102,      45,     160,      80,      91,
+		  80,     171,      45,     114,      80,     137,     171,     286,
+		  57,     148,      80,     286,     125,     160,     125,     171,
+		 102,     102,      57,     217,     125,     160,      80,     286,
+		 114,     102,     148,     251,     217,     297,     137,     343,
+		 251,     514,     228,     400,     331,     526,     675,
+	},
+	{
+		 188,     256,      21,      68,      21,     108,      25,     119,
+		  32,     101,      21,      54,      10,      83,      61,     151,
+		  68,      94,      43,     126,      43,     108,      43,     151,
+		  65,     137,      68,     159,      86,     209,     144,     368,
+		 159,     188,      72,     155,      83,     188,      97,     137,
+		  65,     155,      97,     144,      57,     195,      97,     249,
+		 104,     213,      61,     169,     101,     278,     101,     209,
+		 144,     365,     173,     285,     216,     824,    1356,
+	}
+*/
+};
+
+
+HUFF_NODE YMvHuffTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS-1];
+UINT32 YMvPatternTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS];
+UINT8  YMvBitsTables[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS];
+
+static UINT32 VP5_YMvFrequencyCounts[MV_ENTROPY_TABLES][MV_ENTROPY_TOKENS] = 
+{
+ 19 , 12 , 31 , 74 , 10 , 11 , 33 , 66 , 10 , 14 , 26 , 82 , 14 , 27 , 22 ,118 , 27 , 28 , 49 ,170 , 32 , 54 , 70 ,313 , 94 ,101 ,190 ,999 ,326 ,643 ,695 ,1793 ,370 ,583 ,290 ,393 ,100 , 72 , 94 ,215 , 48 , 45 , 59 ,122 , 30 , 33 , 29 , 93 , 14 , 21 , 24 , 78 ,  6 , 19 , 20 , 43 ,  7 , 12 , 15 , 66 , 17 ,  8 , 15 ,  
+  4 ,  5 ,  2 , 11 ,  3 ,  2 ,  2 , 10 ,  5 ,  1 ,  4 , 17 ,  5 ,  8 ,  9 , 28 ,  6 , 20 , 26 , 68 , 22 , 43 , 51 , 94 , 63 , 75 , 84 ,179 ,203 ,308 ,352 ,1106 ,655 ,1312 ,535 ,643 ,219 ,117 ,108 ,153 , 74 , 53 , 48 , 77 , 31 , 31 , 23 , 39 , 16 , 12 ,  6 , 21 , 10 , 15 ,  9 ,  9 ,  5 ,  4 ,  2 , 17 ,  7 , 11 ,  6 ,  
+  6 ,  6 ,  2 , 10 ,  4 ,  9 ,  4 , 17 ,  5 , 10 ,  5 , 16 ,  9 , 13 , 11 , 44 , 16 , 31 , 17 , 46 , 24 , 39 , 39 , 89 , 55 , 48 , 62 ,139 ,126 ,276 ,210 ,715 ,742 ,1649 ,361 ,945 ,265 ,242 ,110 ,206 , 93 , 78 , 55 ,101 , 31 , 45 , 23 , 68 , 22 , 25 , 16 , 22 ,  6 , 14 ,  9 , 23 ,  3 , 17 ,  9 , 26 ,  2 , 10 ,  8 ,  
+  2 ,  5 ,  2 ,  6 ,  3 ,  3 ,  2 , 11 ,  3 ,  8 ,  3 , 12 ,  2 , 12 ,  7 , 23 , 11 , 13 ,  8 , 28 , 16 , 31 , 30 , 65 , 28 , 43 , 69 ,110 ,120 ,149 ,183 ,502 ,403 ,627 ,370 ,750 ,259 ,180 ,136 ,173 , 71 , 50 , 39 , 67 , 34 , 27 , 23 , 44 , 15 , 13 ,  6 , 21 ,  9 , 11 ,  3 , 12 ,  3 ,  2 ,  2 , 13 ,  2 ,  9 ,  4 ,  
+  6 ,  5 ,  3 , 22 ,  3 ,  6 ,  2 , 15 ,  4 ,  4 ,  3 , 24 ,  4 ,  8 , 10 , 32 , 13 , 12 , 12 , 48 , 15 , 20 , 26 ,117 , 33 , 45 , 58 ,181 , 87 ,146 ,146 ,891 ,341 ,639 ,524 ,1885 ,524 ,313 ,300 ,1314 ,194 ,108 , 74 ,252 , 48 , 53 , 30 ,140 , 31 , 42 , 14 , 58 , 13 , 15 , 10 , 39 ,  5 , 11 ,  4 , 31 ,  6 , 14 ,  7 ,  
+  1 ,  4 ,  1 ,  9 ,  0 ,  0 ,  6 , 10 ,  2 ,  2 ,  1 ,  9 ,  2 ,  1 ,  8 , 13 ,  4 ,  7 ,  4 , 13 , 10 ,  8 ,  8 , 32 , 19 , 21 , 24 , 58 , 54 , 52 , 57 ,163 , 87 ,164 ,199 ,599 ,183 ,127 ,168 ,557 ,119 ,104 , 59 ,108 , 49 , 24 , 20 , 53 , 19 , 11 ,  7 , 17 ,  6 ,  3 ,  3 , 10 ,  7 ,  4 ,  4 ,  6 ,  4 ,  3 ,  0 ,  
+  4 ,  5 ,  1 , 11 ,  1 ,  3 ,  5 ,  6 ,  2 ,  9 ,  3 ,  8 ,  3 ,  5 ,  4 , 19 ,  4 ,  9 ,  8 , 18 ,  6 , 12 , 10 , 35 , 16 , 29 , 26 , 60 , 47 , 82 , 64 ,120 , 70 ,185 ,145 ,335 ,155 ,118 ,104 ,405 ,100 , 85 , 57 ,192 , 49 , 81 , 45 , 83 , 22 , 20 , 16 , 42 , 15 , 17 ,  6 , 35 ,  7 , 14 ,  6 , 22 ,  9 ,  8 ,  5 ,  
+  6 ,  7 ,  1 , 12 ,  3 ,  8 ,  4 , 14 ,  7 ,  7 ,  3 , 23 ,  8 ,  5 ,  8 , 33 ,  6 , 15 ,  4 , 33 , 13 , 21 , 20 , 69 , 23 , 38 , 40 ,135 , 57 , 91 , 84 ,287 ,133 ,196 ,172 ,730 ,229 ,174 ,234 ,1521 ,194 ,140 ,124 ,895 ,130 ,115 , 87 ,443 , 73 , 77 , 36 ,109 , 32 , 34 , 19 , 82 , 16 , 16 ,  6 , 79 , 11 , 19 , 23 ,  
+  1 ,  2 ,  3 ,  3 ,  2 ,  3 ,  0 ,  3 ,  1 ,  2 ,  3 ,  4 ,  4 ,  3 ,  1 , 10 ,  2 ,  3 ,  3 , 11 ,  6 ,  5 ,  5 , 12 ,  8 , 12 , 18 , 31 , 24 , 39 , 39 , 68 , 50 , 61 , 45 , 74 , 36 , 43 , 32 ,133 , 25 , 37 , 16 ,101 , 15 , 29 , 20 , 50 , 10 , 26 , 15 , 50 , 12 , 23 , 12 , 21 ,  6 ,  8 ,  4 , 19 ,  8 ,  7 ,  4 ,  
+  2 ,  3 ,  2 ,  8 ,  2 ,  2 ,  2 ,  9 ,  2 ,  2 ,  2 , 10 ,  3 ,  4 ,  4 , 18 ,  2 , 10 ,  2 , 31 ,  6 , 12 ,  9 , 29 , 12 , 22 , 19 , 54 , 37 , 46 , 34 ,131 , 65 , 78 , 64 ,149 , 54 , 65 , 68 ,388 , 46 , 26 , 57 ,620 , 59 , 33 , 31 ,471 , 37 , 37 , 29 ,158 , 21 , 39 , 25 ,171 , 21 , 36 ,  8 , 70 , 14 , 12 , 14 ,  
+  0 ,  3 ,  1 ,  4 ,  1 ,  3 ,  1 ,  5 ,  1 ,  6 ,  2 ,  6 ,  3 ,  2 ,  2 ,  8 ,  2 ,  9 ,  4 ,  6 ,  2 ,  2 ,  3 , 18 ,  8 ,  4 ,  9 , 29 , 19 , 34 , 28 , 38 , 22 , 28 , 19 , 27 , 12 , 18 , 14 , 68 , 13 , 15 ,  8 , 36 ,  6 , 26 ,  3 , 34 ,  6 , 22 ,  3 , 16 ,  4 , 19 ,  3 , 11 ,  8 , 14 ,  4 , 48 , 15 , 23 ,  7 ,  
+  1 ,  3 ,  1 , 12 ,  1 ,  1 ,  3 , 12 ,  3 ,  4 ,  7 , 13 ,  2 ,  5 ,  4 , 21 ,  5 , 14 ,  6 , 20 ,  7 , 10 , 14 , 29 ,  8 , 25 , 22 , 50 , 27 , 54 , 33 ,115 , 57 , 89 , 43 ,121 , 33 , 56 , 32 ,149 , 19 , 30 , 22 ,295 , 22 , 30 , 26 ,411 , 28 , 21 , 16 ,267 , 25 , 28 , 15 , 99 , 12 , 32 , 17 ,128 , 29 , 61 , 52 ,  
+  2 ,  4 ,  4 , 12 ,  1 ,  4 ,  1 ,  7 ,  1 ,  1 ,  3 , 11 ,  2 ,  7 ,  5 , 13 ,  2 ,  9 ,  3 , 19 ,  5 ,  8 ,  9 , 17 , 11 ,  8 , 12 , 34 , 24 , 41 , 25 , 55 , 31 , 58 , 29 , 54 , 17 , 26 , 21 , 52 , 14 , 17 , 18 , 68 ,  4 , 18 , 13 ,170 , 13 , 22 , 14 ,179 , 18 , 15 ,  5 ,120 , 14 , 18 , 12 ,102 , 26 , 49 , 29 ,  
+  5 ,  3 ,  0 ,  8 ,  2 ,  2 ,  2 ,  4 ,  0 ,  5 ,  0 ,  5 ,  0 ,  2 ,  0 ,  8 ,  0 ,  3 ,  3 , 12 ,  1 ,  3 ,  3 , 23 ,  8 , 16 ,  7 , 26 , 13 , 20 , 18 , 45 , 22 , 28 , 22 , 41 , 14 , 16 , 16 , 49 , 11 , 15 , 11 , 73 ,  3 , 10 ,  7 , 61 ,  7 ,  9 ,  5 , 90 , 12 , 12 ,  7 , 63 , 14 ,  8 ,  4 , 83 , 13 , 19 , 15 ,  
+  3 ,  4 ,  3 ,  9 ,  0 ,  2 ,  0 ,  3 ,  0 ,  2 ,  3 ,  7 ,  0 ,  3 ,  1 ,  8 ,  2 ,  8 ,  1 ,  5 ,  3 ,  6 ,  2 , 18 ,  3 , 12 ,  8 , 34 , 15 , 19 , 12 , 47 , 19 , 25 , 21 , 35 , 15 , 16 , 10 , 33 ,  6 , 12 ,  6 , 39 ,  4 , 15 ,  2 , 48 ,  6 , 10 ,  8 , 42 ,  3 ,  5 ,  4 , 73 ,  4 ,  1 ,  5 ,152 , 11 , 15 , 18 ,  
+  0 ,  2 ,  1 ,  1 ,  0 ,  4 ,  0 ,  3 ,  0 ,  3 ,  0 ,  4 ,  0 ,  4 ,  1 ,  2 ,  1 ,  3 ,  0 ,  5 ,  0 ,  3 ,  0 ,  7 ,  4 ,  7 ,  6 , 15 ,  9 , 13 ,  8 , 20 ,  8 , 16 , 10 , 10 ,  5 ,  6 ,  2 , 12 ,  2 , 11 ,  1 , 10 ,  0 , 10 ,  2 , 11 ,  0 ,  2 ,  0 ,  7 ,  3 ,  2 ,  3 ,  9 ,  2 ,  5 ,  0 ,  6 ,  3 , 10 ,  4 ,  
+/*
+ 36 , 94 ,136 ,248 , 42 , 90 , 62 ,284 , 73 , 93 , 87 ,430 ,119 ,147 ,180 ,888 ,157 ,216 ,203 ,716 ,131 ,208 ,243 ,1216 ,272 ,346 ,457 ,2737 ,863 ,1978 ,2840 ,8045 ,1609 ,1342 ,538 ,1273 ,316 ,287 ,195 ,794 ,177 ,199 ,134 ,584 ,193 ,198 ,163 ,755 ,145 ,162 , 99 ,370 ,106 , 76 , 49 ,313 , 64 , 71 , 38 ,252 ,165 , 75 , 39 ,  
+ 23 , 53 , 71 ,150 , 34 , 48 , 45 ,176 , 57 , 65 , 76 ,251 , 75 ,115 ,138 ,556 ,130 ,153 ,128 ,428 , 94 ,122 ,172 ,606 ,188 ,219 ,287 ,917 ,407 ,804 ,1000 ,5395 ,2602 ,2741 ,1082 ,2329 ,428 ,261 ,215 ,727 ,174 ,154 ,128 ,431 ,149 ,138 ,143 ,593 ,155 ,108 , 95 ,263 , 85 , 62 , 46 ,193 , 49 , 60 , 43 ,168 , 81 , 65 , 33 ,  
+ 37 , 50 , 64 ,112 , 30 , 51 , 35 ,186 , 44 , 69 , 50 ,251 , 58 ,106 ,126 ,484 , 95 ,167 ,105 ,314 , 97 ,115 ,138 ,527 ,131 ,203 ,185 ,621 ,298 ,678 ,761 ,3375 ,2087 ,2890 ,1483 ,2215 ,482 ,432 ,260 ,745 ,180 ,208 ,128 ,411 ,152 ,134 ,136 ,568 ,124 ,152 , 78 ,226 , 72 , 92 , 51 ,181 , 39 , 66 , 28 ,149 ,102 , 71 , 22 ,  
+ 16 , 30 , 45 , 84 , 14 , 51 , 29 ,100 , 28 , 49 , 38 ,159 , 48 , 78 , 70 ,326 , 81 , 77 , 71 ,243 , 65 , 64 ,100 ,330 ,100 ,135 ,149 ,378 ,210 ,370 ,370 ,1619 ,907 ,1450 ,1128 ,2217 ,513 ,260 ,240 ,710 ,200 ,128 ,118 ,320 ,128 , 94 , 86 ,353 , 92 , 98 , 80 ,147 , 50 , 36 , 31 ,127 , 45 , 27 , 24 , 78 , 63 , 38 , 22 ,  
+ 30 , 76 ,122 ,219 , 38 , 74 , 58 ,308 , 50 , 97 ,116 ,409 , 89 ,168 ,143 ,810 ,146 ,169 ,189 ,626 ,150 ,194 ,199 ,809 ,177 ,230 ,261 ,1073 ,322 ,567 ,620 ,3684 ,1380 ,1976 ,1723 ,7569 ,1900 ,854 ,742 ,3046 ,468 ,334 ,267 ,990 ,273 ,249 ,215 ,1029 ,273 ,190 ,133 ,524 ,120 ,107 , 58 ,358 , 77 , 84 , 52 ,292 ,139 , 82 , 49 ,  
+  7 , 23 , 47 , 61 , 22 , 23 , 21 ,103 , 14 , 22 , 33 ,136 , 54 , 45 , 57 ,229 , 48 , 54 , 62 ,169 , 52 , 63 , 76 ,241 , 68 , 68 ,115 ,272 ,140 ,153 ,185 ,772 ,302 ,420 ,445 ,1755 ,642 ,242 ,319 ,1349 ,276 ,128 ,105 ,421 ,151 , 86 , 83 ,362 , 77 , 57 , 66 ,167 , 64 , 27 , 34 ,134 , 32 , 29 , 14 , 90 , 61 , 38 , 24 ,  
+ 23 , 17 , 46 , 77 , 18 , 40 , 28 ,109 , 20 , 37 , 34 ,157 , 36 , 72 , 47 ,242 , 50 , 60 , 69 ,205 , 63 , 75 , 77 ,240 , 77 , 98 ,118 ,363 ,181 ,266 ,269 ,889 ,305 ,457 ,344 ,1010 ,388 ,304 ,365 ,1372 ,317 ,167 ,131 ,572 ,191 ,109 ,110 ,438 , 98 , 93 , 75 ,213 , 83 , 58 , 57 ,157 , 45 , 46 , 32 , 96 , 63 , 78 , 31 ,  
+ 38 , 89 , 80 ,239 , 44 , 71 , 68 ,286 , 59 , 98 , 76 ,355 , 94 ,145 ,134 ,677 ,142 ,149 ,163 ,540 ,135 ,154 ,157 ,706 ,143 ,212 ,239 ,882 ,307 ,403 ,489 ,1903 ,579 ,795 ,614 ,2498 ,984 ,648 ,760 ,5794 ,1295 ,394 ,465 ,2504 ,467 ,394 ,289 ,1266 ,232 ,250 ,164 ,561 ,146 ,128 , 74 ,383 ,129 ,103 , 47 ,265 ,156 , 96 , 58 ,  
+ 11 , 31 , 34 , 51 , 12 , 19 , 22 , 67 , 14 , 25 , 46 , 78 , 32 , 41 , 44 ,145 , 28 , 37 , 39 ,130 , 26 , 37 , 46 ,159 , 67 , 47 , 61 ,206 ,102 ,138 ,116 ,429 ,131 ,207 ,121 ,325 ,119 , 98 ,106 ,347 ,108 , 77 , 62 ,330 , 83 , 72 , 51 ,242 , 56 , 44 , 28 ,145 , 32 , 34 , 27 , 78 , 31 , 24 , 15 , 81 , 52 , 27 , 15 ,  
+ 39 , 72 ,103 ,179 , 29 , 52 , 53 ,207 , 40 , 62 , 66 ,258 , 75 , 80 ,114 ,420 , 81 , 89 ,125 ,392 ,105 , 88 ,120 ,488 ,115 ,119 ,184 ,614 ,194 ,321 ,286 ,1271 ,327 ,367 ,316 ,1021 ,303 ,287 ,238 ,1478 ,452 ,159 ,231 ,2465 ,662 ,191 ,265 ,1859 ,292 ,161 ,158 ,523 ,132 , 82 , 76 ,326 ,108 , 82 , 60 ,247 ,182 , 79 , 44 ,  
+ 15 , 21 , 32 , 40 , 12 , 15 , 15 , 63 , 15 , 16 , 28 , 72 , 24 , 35 , 31 ,154 , 38 , 35 , 44 ,120 , 37 , 27 , 51 ,141 , 46 , 43 , 61 ,227 ,109 ,149 ,128 ,410 ,143 ,176 ,104 ,288 , 73 , 99 , 56 ,259 , 83 , 55 , 47 ,206 , 73 , 51 , 59 ,315 , 64 , 43 , 32 ,146 , 37 , 40 , 23 , 84 , 22 , 23 , 23 , 64 , 48 , 25 , 14 ,  
+ 34 , 80 , 79 ,191 , 41 , 62 , 73 ,272 , 41 , 73 , 67 ,295 , 64 ,116 ,137 ,579 ,129 ,141 ,166 ,443 ,102 ,132 ,132 ,535 ,151 ,183 ,209 ,800 ,282 ,476 ,461 ,1521 ,406 ,526 ,351 ,1005 ,285 ,248 ,223 ,1036 ,341 ,172 ,215 ,1113 ,427 ,204 ,254 ,2027 ,432 ,172 ,189 ,828 ,198 ,118 , 99 ,437 ,127 ,108 , 89 ,355 ,204 ,114 , 50 ,  
+ 13 , 70 , 76 ,116 , 28 , 41 , 57 ,139 , 32 , 37 , 48 ,203 , 57 , 66 , 74 ,308 , 60 , 62 ,131 ,273 , 89 , 66 , 83 ,322 , 79 , 81 ,138 ,423 ,180 ,250 ,246 ,760 ,214 ,276 ,174 ,540 ,165 ,102 , 86 ,610 ,155 , 82 ,109 ,509 ,164 , 87 ,121 ,723 ,212 , 81 ,101 ,637 ,168 , 46 , 69 ,296 , 76 , 54 , 61 ,209 ,182 , 78 , 66 ,  
+ 14 , 36 , 53 ,132 , 17 , 38 , 21 ,121 , 24 , 34 , 40 ,153 , 38 , 39 , 45 ,290 , 39 , 64 , 68 ,185 , 49 , 54 , 61 ,256 , 53 , 88 , 71 ,330 ,135 ,213 ,177 ,577 ,159 ,162 ,164 ,401 ,113 , 96 ,114 ,415 ,138 , 91 , 73 ,369 ,148 , 79 , 75 ,502 ,112 , 69 , 87 ,295 , 97 , 63 , 40 ,274 ,105 , 51 , 34 ,194 ,156 , 70 , 54 ,  
+ 14 , 46 , 63 , 89 , 14 , 50 , 17 , 97 , 21 , 24 , 30 ,116 , 22 , 41 , 44 ,181 , 35 , 37 , 43 ,185 , 32 , 55 , 62 ,201 , 39 , 47 , 82 ,273 ,112 ,159 ,131 ,512 ,110 ,163 ,117 ,303 ,103 , 67 , 86 ,322 , 90 , 65 , 46 ,304 , 94 , 55 , 88 ,376 , 89 , 50 , 61 ,248 , 89 , 36 , 58 ,234 , 85 , 44 , 40 ,210 ,293 , 73 , 44 ,  
+ 17 , 16 , 21 , 40 ,  8 , 10 , 14 , 45 ,  8 , 14 ,  7 , 42 , 13 , 13 , 15 , 70 , 17 , 29 , 20 , 64 , 19 , 10 , 29 , 69 , 23 , 16 , 20 , 90 , 25 , 64 , 58 ,167 , 72 , 75 , 41 ,121 , 35 , 29 , 15 ,118 , 25 , 30 , 25 , 90 , 28 , 29 , 20 , 95 , 19 , 25 , 16 , 73 , 17 , 18 , 12 , 54 , 17 , 31 , 14 , 66 , 43 , 27 , 23 ,  
+	{
+		  25,      61,      10,      20,       8,      25,       9,      24,
+		  10,      23,       9,      23,      13,      35,      19,      77,
+		  18,      56,      21,      38,      21,      49,      35,      65,
+		  45,      86,      80,     141,     197,     521,    1533,    2830,
+		1891,     605,     232,     160,      91,     100,      63,      71,
+		  51,      57,      26,      43,      27,      36,      23,      68,
+		  22,      39,      13,      25,      14,      25,      10,      21,
+		   6,      23,       5,      19,       3,      49,      29,
+	},
+	{
+		  17,      14,       2,      15,       4,      10,       3,      16,
+		   5,      11,       6,      17,       8,      23,      17,      38,
+		  20,      34,      14,      35,      17,      34,      29,      42,
+		  47,      75,      83,     115,     145,     372,     910,    2907,
+		2317,    1284,     375,     198,     116,      93,      52,      52,
+		  35,      46,      18,      33,      23,      22,      24,      34,
+		  10,      23,       9,      16,       9,      15,       8,      10,
+		   8,       7,       4,       9,       4,      17,      10,
+	},
+	{
+		  12,      34,       9,      24,       2,      26,       6,      15,
+		   7,      22,      18,      28,      11,      31,      17,      51,
+		  18,      29,      20,      35,      21,      51,      53,      68,
+		  53,      85,      68,     120,     151,     310,     525,    1339,
+		2210,    1761,     988,     498,     210,     167,      96,      93,
+		  76,      54,      32,      44,      30,      53,      25,      60,
+		  26,      40,      24,      30,       6,      34,      13,      22,
+		   4,      26,      10,      18,       7,      35,      17,
+	},
+	{
+		  27,      33,       5,      15,       6,      15,       5,      41,
+		  12,      24,      15,      19,      12,      48,      12,      24,
+		  17,      29,       8,      34,      26,      60,      47,      69,
+		  40,      95,      94,     121,     151,     303,     468,    1041,
+		1205,    1641,    1136,     986,     548,     304,     212,     195,
+		 123,      83,      55,      76,      27,      41,      33,      60,
+		  31,      36,      29,      24,      15,      27,      10,      17,
+		   6,      33,       8,      12,       3,      48,      26,
+	},
+	{
+		  42,      61,       6,      36,      16,      40,       8,      20,
+		  30,      20,       4,      30,      28,      40,      18,      47,
+		  26,      53,      22,      44,      26,      77,      53,      79,
+		  61,     128,      77,     108,     145,     265,     298,     776,
+		 670,    1079,    1128,    1269,     840,     603,     265,     226,
+		 165,     167,      63,      75,      49,      79,      42,      94,
+		  36,      44,      14,      59,      20,      30,      16,      28,
+		  12,      38,      22,      38,      12,      67,      34,
+	},
+	{
+		  22,      45,       8,      19,       5,      31,       8,      16,
+		  16,      28,      31,      14,      19,      36,      16,      56,
+		  31,      48,      59,      59,      22,      76,      50,      59,
+		  33,      96,      96,     127,     118,     266,     379,     651,
+		 577,     722,     835,    1152,    1019,     864,     535,     325,
+		 186,     184,     118,     101,      79,      73,      59,     127,
+		  50,      70,      45,      33,      25,      39,      22,      39,
+		   8,      28,      14,       8,       8,      45,      33,
+	},
+	{
+		  38,      53,      15,      34,       5,      25,       8,      44,
+		  20,      32,      19,      20,      19,      51,      25,      74,
+		  34,      55,      36,      76,      39,      67,      60,      74,
+		  36,      95,      82,     120,     131,     190,     352,     606,
+		 400,     509,     463,     769,     888,     966,     762,     570,
+		 330,     316,     162,     174,     120,     159,      86,     127,
+		  67,      86,      44,      53,      29,      70,      31,      38,
+		  24,      31,      10,      34,      12,      53,      53,
+	},
+	{
+		  46,      80,       7,      43,       4,      24,       7,      41,
+		   7,      48,      12,      43,      24,      51,      17,      73,
+		  38,      68,      29,      68,      34,      90,      14,     109,
+		  55,     126,      58,     146,     126,     221,     250,     630,
+		 370,     462,     370,     489,     430,     613,     737,     890,
+		 747,     535,     233,     197,     163,     185,     109,      99,
+		  65,      94,      29,      80,      51,      70,      31,      53,
+		  12,      48,      14,      41,      19,      87,      60,
+	},
+	{
+		  35,      86,       6,      41,      16,      25,       9,      38,
+		  45,      38,      28,      48,      22,      51,      28,      83,
+		  41,      73,      38,      41,      32,     112,      80,      80,
+		  70,     112,      96,     119,     141,     292,     337,     521,
+		 299,     347,     241,     340,     289,     488,     402,     649,
+		 572,     810,     440,     385,     295,     273,     167,     183,
+		  90,     102,      57,     102,      54,      73,      48,      61,
+		  32,      67,      25,      64,      12,      99,     115,
+	},
+	{
+		  47,      95,      21,      39,      13,      69,       8,      52,
+		  17,      65,      21,      60,      26,      43,      21,      95,
+		  82,     112,      21,      47,      43,     138,      52,      99,
+		  82,     125,      60,     130,     143,     273,     225,     555,
+		 286,     416,     251,     316,     234,     390,     268,     360,
+		 338,     507,     438,     646,     446,     386,     308,     281,
+		 182,     186,      86,     138,      26,     143,      34,      47,
+		  43,     121,      21,      69,      13,      30,      82,
+	},
+	{
+		  90,     129,      12,      60,       8,      51,      17,      17,
+		  30,     124,      38,      60,      34,      90,      47,     120,
+		  64,      99,      38,      94,      47,     163,      64,     112,
+		  73,     133,      77,     116,     133,     323,     262,     478,
+		 288,     353,     168,     185,     150,     336,     202,     232,
+		 232,     340,     284,     422,     469,     577,     478,     409,
+		 245,     189,     120,     163,      94,     155,      51,     112,
+		  64,      68,      51,      64,      21,     137,     107,
+	},
+	{
+		  67,     111,      29,      35,      24,      65,      16,      73,
+		   8,      51,      35,      70,      10,      97,      65,     189,
+		  59,     105,      40,      94,      51,      97,      62,      97,
+		  84,     127,      86,     151,     121,     311,     281,     512,
+		 289,     349,     178,     257,     151,     257,     214,     241,
+		 176,     273,     173,     252,     235,     344,     355,     650,
+		 376,     401,     214,     254,     140,     168,      84,     102,
+		  43,      97,      65,      97,      32,     140,     140,
+	},
+	{
+		  60,     174,      34,      60,      26,      87,      30,      43,
+		  17,     100,      26,      78,      47,     104,      52,     126,
+		  39,     108,      60,     113,      78,     113,      82,     143,
+		  60,     143,      69,     169,      91,     283,     239,     522,
+		 261,     274,     139,     217,     148,     174,     117,     226,
+		 156,     204,     143,     174,     191,     283,     248,     413,
+		 261,     331,     222,     383,     243,     296,     222,     239,
+		 100,     178,      74,     156,      34,     265,     222,
+	},
+	{
+		 160,     183,      41,      41,       5,      89,      29,     100,
+		  41,      89,      65,      47,      47,      77,      59,     136,
+		  53,     100,      47,     183,      47,      94,      35,     100,
+		  77,     142,      89,     142,     106,     272,     332,     522,
+		 249,     302,     178,     178,     178,     189,     106,     172,
+		 100,     261,     112,     178,      94,     290,     160,     284,
+		 136,     249,     106,     231,     172,     272,     201,     403,
+		 178,     308,     148,     213,     100,     338,     332,
+	},
+	{
+		 114,     158,       1,      86,      57,      86,      28,     100,
+		  86,      57,      28,     114,      57,      86,      43,     129,
+		  14,      86,      43,      57,      86,     114,      57,     158,
+		  57,     186,     129,     186,     158,     215,     215,     474,
+		 158,     316,     186,     172,     129,     316,     114,     114,
+		 201,     201,      71,     100,     186,     201,     114,     272,
+		 100,     172,      86,     143,      86,     330,     244,     301,
+		 158,     201,     129,     445,      86,     502,     675,
+	},
+	{
+		 173,     195,      21,      75,      37,      75,       5,     102,
+		  43,     113,      21,      86,      27,      92,      37,     173,
+		  37,     140,      54,     108,      48,     124,      48,     146,
+		  70,     173,     108,     195,     119,     227,     292,     596,
+		 238,     265,     151,     157,     130,     195,      54,     184,
+		  86,     249,      65,     130,      54,     178,     102,     254,
+		 124,     216,      54,     162,      86,     216,      92,     222,
+		 113,     205,      86,     216,     146,     807,     970,
+	}
+*/};
+
+
+/****************************************************************************
+*  Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CreateMvTrees
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :		Builds the VP5 huffman trees used for decoding motion vectors.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_CreateMvTrees ()
+{
+    UINT32 i;
+
+	// Build the VP5 trees.
+	memset( XMvHuffTables, 0, (sizeof(HUFF_NODE) * MV_ENTROPY_TABLES * (MV_ENTROPY_TOKENS-1)));
+	memset( YMvHuffTables, 0, (sizeof(HUFF_NODE) * MV_ENTROPY_TABLES * (MV_ENTROPY_TOKENS-1)));
+    for ( i = 0; i < MV_ENTROPY_TABLES;  i ++ )
+    {
+		VP5_BuildHuffTree( XMvHuffTables[i], VP5_XMvFrequencyCounts[i], MV_ENTROPY_TOKENS );
+		VP5_BuildHuffTree( YMvHuffTables[i], VP5_YMvFrequencyCounts[i], MV_ENTROPY_TOKENS );
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CreateMvCodeArrays
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :		Creates the VP5 Mv huffman code arrays from the VP5 
+ *						Mv huffman trees.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+//sjlhack - Jim, is this code used???????????????
+#if 0
+void VP5_CreateMvCodeArrays()
+{
+    UINT32 i;
+
+	for ( i = 0; i < MV_ENTROPY_TABLES; i++ ) 
+	{
+        VP5_CreateCodeArray( XMvHuffTables[i], 0, XMvPatternTables[i], XMvBitsTables[i], 0, 0 );
+        VP5_CreateCodeArray( YMvHuffTables[i], 0, YMvPatternTables[i], YMvBitsTables[i], 0, 0 );
+	}
+}
+#endif
+
+
+/* Decoder specific functions */
+#ifdef PBDLL
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_ExtractMVectorComponent
+ *
+ *  INPUTS        :     Decoder Instance
+ *                      Tree root
+ *                      Invert sign flag
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Extracts a motion vector component for VP5
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+INT32 VP5_ExtractMVectorComponent(PB_INSTANCE *pbi, HUFF_NODE * hn, BOOL Invert )
+{
+    int nodeptr = 0;
+	int selector = 0;
+	INT32 MvComponent;
+    
+    // Loop searches down through tree based upon bits read from the bitstream 
+    // until it hits a leaf at which point we have decoded a token
+    do
+    {
+		int which =DecodeBool(&pbi->br, hn[nodeptr].freq); 
+		if(which)
+		{
+			selector = hn[nodeptr].rightunion.right.selector;
+			nodeptr = hn[nodeptr].rightunion.right.value;
+		}
+		else
+		{
+			selector = hn[nodeptr].leftunion.left.selector;
+			nodeptr = (int) hn[nodeptr].leftunion.left.value;
+		}
+    }
+	while ( !selector);
+
+	MvComponent = (INT32)(nodeptr - 31);
+    return ( Invert ) ? (-MvComponent) : MvComponent;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ExtractMVectorComponentA
+ *
+ *  INPUTS        :     Decoder Instance
+ *                      Tree root (Not used for VP4)
+ *                      Invert sign flag (Not used for VP4)
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Extracts a motion vector component coded with method A.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+static INT32 ExtractMVectorComponentA(PB_INSTANCE *pbi, HUFF_NODE * hn, BOOL Invert )
+{
+    INT32   MVectComponent;     // temp storage for motion vector
+    UINT32  MVCode = 0;         // Temporary storage while decoding the MV
+    UINT32  ExtraBits = 0;
+
+    // Get group to which coded component belongs
+    MVCode = VP5_bitread( &pbi->br,  3 ); 
+
+    //  Now extract the appropriate number of bits to identify the component
+    switch ( MVCode )
+    {
+    case 0:
+        MVectComponent = 0;
+        break;
+    case 1:
+        MVectComponent = 1;
+        break;
+    case 2:
+        MVectComponent = -1;
+        break;
+    case 3:
+        if ( VP5_bitread1( &pbi->br ))
+            MVectComponent = -2;
+        else 
+            MVectComponent = 2;
+        break;
+    case 4:
+        if ( VP5_bitread1( &pbi->br ) )
+            MVectComponent = -3;
+        else 
+            MVectComponent = 3;
+        break;
+    case 5:
+        ExtraBits = VP5_bitread( &pbi->br,  2 ); 
+        MVectComponent = 4 + ExtraBits;
+        if ( VP5_bitread1( &pbi->br ) )
+            MVectComponent = -MVectComponent;
+        break;
+    case 6:
+        ExtraBits = VP5_bitread( &pbi->br,  3 ); 
+        MVectComponent = 8 + ExtraBits;
+        if ( VP5_bitread1( &pbi->br ))
+            MVectComponent = -MVectComponent;
+        break;
+    case 7:
+        ExtraBits = VP5_bitread( &pbi->br,  4 ); 
+        MVectComponent = 16 + ExtraBits;
+        if ( VP5_bitread1( &pbi->br ) )
+            MVectComponent = -MVectComponent;
+        break;
+    }
+
+    return MVectComponent;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ExtractMVectorComponentB
+ *
+ *  INPUTS        :     Decoder Instance
+ *                      Tree root (Not used for VP4)
+ *                      Invert sign flag (Not used for VP4)
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Extracts an MV component coded using the fallback method
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+static INT32 ExtractMVectorComponentB(PB_INSTANCE *pbi, HUFF_NODE * MvNodePtr, BOOL Invert )
+{
+    INT32   MVectComponent;     // temp storage for motion vector
+
+    // Get group to which coded component belongs
+    MVectComponent = VP5_bitread( &pbi->br,  5 ); 
+    if ( VP5_bitread1( &pbi->br ) )
+        MVectComponent = -MVectComponent;
+
+    return MVectComponent;
+}
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/TokenEntropy.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/TokenEntropy.c
new file mode 100644
index 00000000..c06fb611
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/TokenEntropy.c
@@ -0,0 +1,439 @@
+/****************************************************************************
+*
+*   Module Title :     TokenEntropy.c
+*
+*   Description  :     Video CODEC: Coefficient token entropy module.
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.01 PGW 27 Jun 01 Created
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include "type_aliases.h"
+#include "systemdependant.h"
+#include "codec_common.h"
+#include "codec_common_interface.h"
+#include "tokenentropy.h"
+#include "pbdll.h"
+
+
+/****************************************************************************
+*  Explicit Imports
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Constants
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Types
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*   Data structures
+*****************************************************************************
+*/
+#ifndef MAPCA
+
+// Costs in bits for different probabilities (expressed in range 0-255)
+// Costs are multiplied by 256
+const UINT32 ProbCost[256] = 
+{
+	2047,
+	2047,1791,1641,1535,1452,1385,1328,1279,1235,1196,
+	1161,1129,1099,1072,1046,1023,1000,979,959,940,
+	922,905,889,873,858,843,829,816,803,790,
+	778,767,755,744,733,723,713,703,693,684,
+	675,666,657,649,641,633,625,617,609,602,
+	594,587,580,573,567,560,553,547,541,534,
+	528,522,516,511,505,499,494,488,483,477,
+	472,467,462,457,452,447,442,437,433,428,
+	424,419,415,410,406,401,397,393,389,385,
+	381,377,373,369,365,361,357,353,349,346,
+	342,338,335,331,328,324,321,317,314,311,
+	307,304,301,297,294,291,288,285,281,278,
+	275,272,269,266,263,260,257,255,252,249,
+	246,243,240,238,235,232,229,227,224,221,
+	219,216,214,211,208,206,203,201,198,196,
+	194,191,189,186,184,181,179,177,174,172,
+	170,168,165,163,161,159,156,154,152,150,
+	148,145,143,141,139,137,135,133,131,129,
+	127,125,123,121,119,117,115,113,111,109,
+	107,105,103,101,99,97,95,93,92,90,
+	88,86,84,82,81,79,77,75,73,72,
+	70,68,66,65,63,61,60,58,56,55,
+	53,51,50,48,46,45,43,41,40,38,
+	37,35,33,32,30,29,27,25,24,22,
+	21,19,18,16,15,13,12,10,9,7,
+	6,4,3,1,
+	1,
+};
+#endif
+// Index categories for previous tokens in this block 
+const UINT8  PrevTokenIndex[MAX_ENTROPY_TOKENS] = { 0,1,2,2,2,2,2,2,2,2,2,0 };
+
+// For details of tokens and extra bit breakdown see token definitions in huffman.h
+const UINT8  ExtraBitLengths_VP5[MAX_ENTROPY_TOKENS] = { 0, 1, 1, 1, 1, 2, 3, 4, 5, 6, 12, 0 };
+const UINT32 DctRangeMinVals[MAX_ENTROPY_TOKENS] = { 0, 1, 2, 3, 4, 5, 7, 11, 19, 35, 67, 0 };
+
+const UINT8 DcUpdateProbs[2][MAX_ENTROPY_TOKENS-1] = 
+{ 
+	{ 146, 197, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+	{ 179, 219, 214, 240, 250, 254, 244, 254, 254, 254, 254 }
+};
+
+const UINT8 AcUpdateProbs[PREC_CASES][2][VP5_AC_BANDS][MAX_ENTROPY_TOKENS-1] =
+{
+	{	// preceded by 0
+		{
+			{ 227, 246, 230, 247, 244, 254, 254, 254, 254, 254, 254 },
+			{ 202, 254, 209, 231, 231, 249, 249, 253, 254, 254, 254 },
+			{ 206, 254, 225, 242, 241, 251, 253, 254, 254, 254, 254 },
+			{ 235, 254, 241, 253, 252, 254, 254, 254, 254, 254, 254 },
+			{ 234, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+		},
+		{
+			{ 240, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 238, 254, 240, 253, 254, 254, 254, 254, 254, 254, 254 },
+			{ 244, 254, 251, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+		},
+	},
+	{	// preceded by 1
+		{
+			{ 206, 203, 227, 239, 247, 254, 253, 254, 254, 254, 254 },
+			{ 207, 199, 220, 236, 243, 252, 252, 254, 254, 254, 254 },
+			{ 212, 219, 230, 243, 244, 253, 252, 254, 254, 254, 254 },
+			{ 236, 237, 247, 252, 253, 254, 254, 254, 254, 254, 254 },
+			{ 240, 240, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+		},
+		{
+			{ 230, 233, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 238, 238, 250, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 248, 251, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+		},
+	},
+	{	// preceded by > 1
+		{
+			{ 225, 239, 227, 231, 244, 253, 243, 254, 254, 253, 254 },
+			{ 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 254 },
+			{ 235, 249, 238, 240, 251, 254, 249, 254, 253, 253, 254 },
+			{ 249, 253, 251, 250, 254, 254, 254, 254, 254, 254, 254 },
+			{ 251, 250, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+		},
+		{
+			{ 243, 244, 250, 250, 254, 254, 254, 254, 254, 254, 254 },
+			{ 249, 248, 250, 253, 254, 254, 254, 254, 254, 254, 254 },
+			{ 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+		},
+	},
+};
+
+/*
+{
+	{	// preceded by 0
+		{
+			{ 234, 246, 250, 249, 244, 254, 254, 254, 254, 254, 254 },
+			{ 225, 254, 242, 238, 234, 253, 252, 254, 254, 254, 254 },
+			{ 230, 254, 248, 243, 238, 254, 254, 254, 254, 254, 254 },
+			{ 244, 254, 254, 252, 247, 254, 254, 254, 254, 254, 254 },
+			{ 253, 254, 254, 254, 253, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+
+			 227, 246, 230, 247, 244, 254, 254, 254, 254, 254, 254,
+			 202, 254, 209, 231, 231, 249, 249, 253, 254, 254, 254,
+			 206, 254, 225, 242, 241, 251, 253, 254, 254, 254, 254,
+			 235, 254, 241, 253, 252, 254, 254, 254, 254, 254, 254,
+			 234, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254,
+			 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
+		},
+		{
+			{ 251, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 243, 254, 254, 253, 253, 254, 254, 254, 254, 254, 254 },
+			{ 252, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },	
+		},
+	},
+	{	// preceded by 1
+		{
+			{ 211, 216, 233, 233, 234, 252, 251, 254, 254, 254, 254 },
+			{ 224, 219, 236, 237, 236, 252, 250, 254, 254, 253, 254 },
+			{ 227, 230, 245, 241, 238, 253, 254, 254, 254, 254, 254 },
+			{ 237, 235, 253, 252, 250, 254, 254, 254, 254, 254, 254 },
+			{ 252, 251, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },	
+		},
+		{
+			{ 237, 242, 253, 253, 253, 254, 254, 254, 254, 254, 254 },
+			{ 248, 250, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },	
+		},
+	},
+	{	// preceded by > 1
+		{
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },	
+		},
+		{
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+			{ 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },	
+		},
+	},
+};
+*/
+// DC context equations
+LINE_EQ DcNodeEqs[CONTEXT_NODES][TOKEN_CONTEXTS][TOKEN_CONTEXTS] =
+{
+  { // zero
+    { { 154,  61 },{ 141,  54 },{  90,  45 },{  54,  34 },{  54,  13 },{ 128, 109 }, },
+    { { 136,  54 },{ 148,  45 },{  92,  41 },{  54,  33 },{  51,  15 },{  87, 113 }, },
+    { {  87,  44 },{  97,  40 },{  67,  36 },{  46,  29 },{  41,  15 },{  64,  80 }, },
+    { {  59,  33 },{  61,  31 },{  51,  28 },{  44,  22 },{  33,  12 },{  49,  63 }, },
+    { {  69,  12 },{  59,  16 },{  46,  14 },{  31,  13 },{  26,   6 },{  92,  26 }, },
+    { { 128, 108 },{  77, 119 },{  54,  84 },{  26,  71 },{  87,  19 },{  95, 155 }, },
+  },
+  { // eob
+    { { 154,   4 },{ 182,   0 },{ 159,  -8 },{ 128,  -5 },{ 143,  -5 },{ 187,  55 }, },
+    { { 182,   0 },{ 228,  -3 },{ 187,  -7 },{ 174,  -9 },{ 189, -11 },{ 169,  79 }, },
+    { { 161,  -9 },{ 192,  -8 },{ 187,  -9 },{ 169, -10 },{ 136,  -9 },{ 184,  40 }, },
+    { { 164, -11 },{ 179, -10 },{ 174, -10 },{ 161, -10 },{ 115,  -7 },{ 197,  20 }, },
+    { { 195, -11 },{ 195, -11 },{ 146, -10 },{ 110,  -6 },{  95,  -4 },{ 195,  39 }, },
+    { { 182,  55 },{ 172,  77 },{ 177,  37 },{ 169,  29 },{ 172,  52 },{  92, 162 }, },
+  },
+  { // one
+    { { 174,  80 },{ 164,  80 },{  95,  80 },{  46,  66 },{  56,  24 },{  36, 193 }, },
+    { { 164,  80 },{ 166,  77 },{ 105,  76 },{  49,  68 },{  46,  31 },{  49, 186 }, },
+    { {  97,  78 },{ 110,  74 },{  72,  72 },{  44,  60 },{  33,  30 },{  69, 131 }, },
+    { {  61,  61 },{  69,  63 },{  51,  57 },{  31,  48 },{  26,  27 },{  64,  89 }, },
+    { {  67,  23 },{  51,  32 },{  36,  33 },{  26,  28 },{  20,  12 },{  44,  68 }, },
+    { {  26, 197 },{  41, 189 },{  61, 129 },{  28, 103 },{  49,  52 },{ -12, 245 }, },
+  },
+  { // low value
+    { { 102, 141 },{  79, 166 },{  72, 162 },{  97, 125 },{ 179,   4 },{ 307,   0 }, },
+    { {  72, 168 },{  69, 175 },{  84, 160 },{ 105, 127 },{ 148,  34 },{ 310,   0 }, },
+    { {  84, 151 },{  82, 161 },{  87, 153 },{  87, 135 },{ 115,  51 },{ 317,   0 }, },
+    { {  97, 125 },{ 102, 131 },{ 105, 125 },{  87, 122 },{  84,  64 },{  54, 184 }, },
+    { { 166,  18 },{ 146,  43 },{ 125,  51 },{  90,  64 },{  95,   7 },{  38, 154 }, },
+    { { 294,   0 },{  13, 225 },{  10, 225 },{  67, 168 },{   0, 167 },{ 161,  94 }, },
+  },
+  { // two 
+    { { 172,  76 },{ 172,  75 },{ 136,  80 },{  64,  98 },{  74,  67 },{ 315,   0 }, },
+    { { 169,  76 },{ 207,  56 },{ 164,  66 },{  97,  80 },{  67,  72 },{ 328,   0 }, },
+    { { 136,  80 },{ 187,  53 },{ 154,  62 },{  72,  85 },{  -2, 105 },{ 305,   0 }, },
+    { {  74,  91 },{ 128,  64 },{ 113,  64 },{  61,  77 },{  41,  75 },{ 259,   0 }, },
+    { {  46,  84 },{  51,  81 },{  28,  89 },{  31,  78 },{  23,  77 },{ 202,   0 }, },
+    { { 323,   0 },{ 323,   0 },{ 300,   0 },{ 236,   0 },{ 195,   0 },{ 328,   0 }, },
+  },
+};
+// AC context equations
+LINE_EQ AcNodeEqs[PREC_CASES][VP5_AC_BANDS-3][CONTEXT_NODES][TOKEN_CONTEXTS] =
+{
+  { // Preceded by 0 
+    { // Band 0
+      { { 276,   0 },{ 238,   0 },{ 195,   0 },{ 156,   0 },{ 113,   0 },{ 274,   0 }, },
+      { {   0,   1 },{   0,   1 },{   0,   1 },{   0,   1 },{   0,   1 },{   0,   1 }, },
+      { { 192,  59 },{ 182,  50 },{ 141,  48 },{ 110,  40 },{  92,  19 },{ 125, 128 }, },
+      { { 169,  87 },{ 169,  83 },{ 184,  62 },{ 220,  16 },{ 184,   0 },{ 264,   0 }, },
+      { { 212,  40 },{ 212,  36 },{ 169,  49 },{ 174,  27 },{   8, 120 },{ 182,  71 }, },
+    },
+    { // Band 1
+      { { 259,  10 },{ 197,  19 },{ 143,  22 },{ 123,  16 },{ 110,   8 },{ 133,  88 }, },
+      { {   0,   1 },{ 256,   0 },{   0,   1 },{   0,   1 },{   0,   1 },{   0,   1 }, },
+      { { 207,  46 },{ 187,  50 },{  97,  83 },{  23, 100 },{  41,  56 },{  56, 188 }, },
+      { { 166,  90 },{ 146, 108 },{ 161,  88 },{ 136,  95 },{ 174,   0 },{ 266,   0 }, },
+      { { 264,   7 },{ 243,  18 },{ 184,  43 },{ -14, 154 },{  20, 112 },{  20, 199 }, },
+    },
+    { // Band 2
+      { { 230,  26 },{ 197,  22 },{ 159,  20 },{ 146,  12 },{ 136,   4 },{  54, 162 }, },
+      { {   0,   1 },{   0,   1 },{   0,   1 },{   0,   1 },{   0,   1 },{   0,   1 }, },
+      { { 192,  59 },{ 156,  72 },{  84, 101 },{  49, 101 },{  79,  47 },{  79, 167 }, },
+      { { 138, 115 },{ 136, 116 },{ 166,  80 },{ 238,   0 },{ 195,   0 },{ 261,   0 }, },
+      { { 225,  33 },{ 205,  42 },{ 159,  61 },{  79,  96 },{  92,  66 },{  28, 195 }, },
+    },
+  },
+  { // Preceded by 1 
+    { // Band 0
+      { { 200,  37 },{ 197,  18 },{ 159,  13 },{ 143,   7 },{ 102,   5 },{ 123, 126 }, },
+      { { 197,   3 },{ 220,  -9 },{ 210, -12 },{ 187,  -6 },{ 151,  -2 },{ 174,  80 }, },
+      { { 200,  53 },{ 187,  47 },{ 159,  40 },{ 118,  38 },{ 100,  18 },{ 141, 111 }, },
+      { { 179,  78 },{ 166,  86 },{ 197,  50 },{ 207,  27 },{ 187,   0 },{ 115, 139 }, },
+      { { 218,  34 },{ 220,  29 },{ 174,  46 },{ 128,  61 },{  54,  89 },{ 187,  65 }, },
+    },
+    { // Band 1
+      { { 238,  14 },{ 197,  18 },{ 125,  26 },{  90,  25 },{  82,  13 },{ 161,  86 }, },
+      { { 189,   1 },{ 205,  -2 },{ 156,  -4 },{ 143,  -4 },{ 146,  -4 },{ 172,  72 }, },
+      { { 230,  31 },{ 192,  45 },{ 102,  76 },{  38,  85 },{  56,  41 },{  64, 173 }, },
+      { { 166,  91 },{ 141, 111 },{ 128, 116 },{ 118, 109 },{ 177,   0 },{  23, 222 }, },
+      { { 253,  14 },{ 236,  21 },{ 174,  49 },{  33, 118 },{  44,  93 },{  23, 187 }, },
+    },
+    { // Band 2
+      { { 218,  28 },{ 179,  28 },{ 118,  35 },{  95,  30 },{  72,  24 },{ 128, 108 }, },
+      { { 187,   1 },{ 174,  -1 },{ 125,  -1 },{ 110,  -1 },{ 108,  -1 },{ 202,  52 }, },
+      { { 197,  53 },{ 146,  75 },{  46, 118 },{  33, 103 },{  64,  50 },{ 118, 126 }, },
+      { { 138, 114 },{ 128, 122 },{ 161,  86 },{ 243,  -6 },{ 195,   0 },{  38, 210 }, },
+      { { 215,  39 },{ 179,  58 },{  97, 101 },{  95,  85 },{  87,  70 },{  69, 152 }, },
+    },
+  },
+  { // Preceded by 2 
+    { // Band 0
+      { { 236,  24 },{ 205,  18 },{ 172,  12 },{ 154,   6 },{ 125,   1 },{ 169,  75 }, },
+      { { 187,   4 },{ 230,  -2 },{ 228,  -4 },{ 236,  -4 },{ 241,  -2 },{ 192,  66 }, },
+      { { 200,  46 },{ 187,  42 },{ 159,  34 },{ 136,  25 },{ 105,  10 },{ 179,  62 }, },
+      { { 207,  55 },{ 192,  63 },{ 192,  54 },{ 195,  36 },{ 177,   1 },{ 143,  98 }, },
+      { { 225,  27 },{ 207,  34 },{ 200,  30 },{ 131,  57 },{  97,  60 },{ 197,  45 }, },
+    },
+    { // Band 1
+      { { 271,   8 },{ 218,  13 },{ 133,  19 },{  90,  19 },{  72,   7 },{ 182,  51 }, },
+      { { 179,   1 },{ 225,  -1 },{ 154,  -2 },{ 110,  -1 },{  92,   0 },{ 195,  41 }, },
+      { { 241,  26 },{ 189,  40 },{  82,  64 },{  33,  60 },{  67,  17 },{ 120,  94 }, },
+      { { 192,  68 },{ 151,  94 },{ 146,  90 },{ 143,  72 },{ 161,   0 },{ 113, 128 }, },
+      { { 256,  12 },{ 218,  29 },{ 166,  48 },{  44,  99 },{  31,  87 },{ 148,  78 }, },
+    },
+    { // Band 2
+      { { 238,  20 },{ 184,  22 },{ 113,  27 },{  90,  22 },{  74,   9 },{ 192,  37 }, },
+      { { 184,   0 },{ 215,  -1 },{ 141,  -1 },{  97,   0 },{  49,   0 },{ 264,  13 }, },
+      { { 182,  51 },{ 138,  61 },{  95,  63 },{  54,  59 },{  64,  25 },{ 200,  45 }, },
+      { { 179,  75 },{ 156,  87 },{ 174,  65 },{ 177,  44 },{ 174,   0 },{ 164,  85 }, },
+      { { 195,  45 },{ 148,  65 },{ 105,  79 },{  95,  72 },{  87,  60 },{ 169,  63 }, },
+    },
+  },
+};
+
+/****************************************************************************
+*   Functions
+*****************************************************************************
+*/
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ConfigureContexts
+ *
+ *  INPUTS        :     Decoder Instance
+ *                      
+ *                      
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Configures the context dependant entropy probabilities.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ConfigureContexts(PB_INSTANCE *pbi)
+{
+	UINT32 i,j;
+
+	UINT32 Band;
+	UINT32 Node;
+	UINT32 Plane;
+	UINT32 Prec;
+	INT32  Temp;
+
+
+	// Clear MMX state so floating point can work again
+	#ifndef MACPPC
+#ifndef MAPCA
+	ClearSysState();
+#endif
+	#endif
+
+	// DC Node Probabilities
+	for ( Plane = 0; Plane < 2; Plane ++ )
+	{
+		for ( i = 0; i < TOKEN_CONTEXTS; i++ )
+		{
+			for ( j = 0; j < TOKEN_CONTEXTS; j++ )
+			{
+				// Tree Nodes
+				for ( Node = 0; Node < CONTEXT_NODES; Node ++ )
+				{
+					Temp = ( ( pbi->DcProbs[DCProbOffset(Plane,Node)] * DcNodeEqs[Node][i][j].M + 128 ) >> 8) 
+						+ DcNodeEqs[Node][i][j].C;	
+                    Temp = (Temp > 254)? 254: Temp;
+                    Temp = (Temp <   1)? 1  : Temp;
+					pbi->DcNodeContexts[DCContextOffset(Plane,i,j,Node)] = (UINT8)Temp;
+
+				}
+
+			}
+		}
+	}
+
+
+	// AC  Node Probabilities
+	for ( Prec = 0; Prec < PREC_CASES; Prec++ )
+	{
+		for ( Plane = 0; Plane < 2; Plane ++ )
+		{
+			// Higher AC bands do not use contexts.
+			for ( Band = 0; Band < VP5_AC_BANDS-3; Band++ )
+			{
+				for ( i = 0; i < TOKEN_CONTEXTS; i++ )
+				{
+					// Tree Nodes
+					for ( Node = 0; Node < CONTEXT_NODES; Node ++ )
+					{
+                        Temp = ( ( pbi->AcProbs[ACProbOffset(Plane,Prec,Band,Node)] 
+							       * AcNodeEqs[Prec][Band][Node][i].M + 128 ) >> 8) 
+						  	 + AcNodeEqs[Prec][Band][Node][i].C;	
+
+                        Temp = (Temp > 254)? 254: Temp;
+                        Temp = (Temp <   1)? 1  : Temp;
+                        pbi->AcNodeContexts[ACContextOffset(Plane,Prec,Band,i,Node)] = (UINT8)Temp;
+
+					}
+				}
+			}
+		}
+	}
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/boolhuff.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/boolhuff.c
new file mode 100644
index 00000000..22d45ed6
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/boolhuff.c
@@ -0,0 +1,815 @@
+
+/****************************************************************************
+*
+*   Module Title :     boolhuff.c
+*
+*   Description  :     Video CODEC
+*
+*    AUTHOR      :     James Bankoski
+*
+*****************************************************************************
+*   Revision History
+*  
+*   1.00 JBB 01JUN01  Configuration baseline
+*
+*****************************************************************************
+*/
+ 
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+#include "boolhuff.h"
+#ifdef MAPCA 
+#include "eti/mm.h"
+#endif
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+ 
+/****************************************************************************
+*  Forward references.
+*****************************************************************************
+*/       
+ 
+                      
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+// in the bool coder defined herein a probability of 4 means 4/256 chance its a 0 252/256 chance its a 1 
+// so shannon cost of 0 given prob x = 8 - log2(x) | shannon cost of 1 given prob x =  8-log2(256-x)  
+#ifndef MAPCA
+double shannonCost0[256]=
+{
+8.000000000,8.000000000,7.000000000,6.415037499,6.000000000,5.678071905,5.415037499,5.192645078,5.000000000,4.830074999,4.678071905,4.540568381,4.415037499,4.299560282,4.192645078,4.093109404,
+4.000000000,3.912537159,3.830074999,3.752072487,3.678071905,3.607682577,3.540568381,3.476438044,3.415037499,3.356143810,3.299560282,3.245112498,3.192645078,3.142019005,3.093109404,3.045803690,
+3.000000000,2.955605881,2.912537159,2.870716983,2.830074999,2.790546634,2.752072487,2.714597781,2.678071905,2.642447995,2.607682577,2.573735245,2.540568381,2.508146904,2.476438044,2.445411148,
+2.415037499,2.385290156,2.356143810,2.327574658,2.299560282,2.272079545,2.245112498,2.218640286,2.192645078,2.167109986,2.142019005,2.117356951,2.093109404,2.069262662,2.045803690,2.022720077,
+2.000000000,1.977632187,1.955605881,1.933910810,1.912537159,1.891475543,1.870716983,1.850252880,1.830074999,1.810175441,1.790546634,1.771181310,1.752072487,1.733213459,1.714597781,1.696219252,
+1.678071905,1.660149997,1.642447995,1.624960569,1.607682577,1.590609064,1.573735245,1.557056504,1.540568381,1.524266569,1.508146904,1.492205360,1.476438044,1.460841189,1.445411148,1.430144392,
+1.415037499,1.400087158,1.385290156,1.370643380,1.356143810,1.341788517,1.327574658,1.313499473,1.299560282,1.285754482,1.272079545,1.258533014,1.245112498,1.231815675,1.218640286,1.205584134,
+1.192645078,1.179821038,1.167109986,1.154509949,1.142019005,1.129635280,1.117356951,1.105182237,1.093109404,1.081136763,1.069262662,1.057485495,1.045803690,1.034215715,1.022720077,1.011315313,
+1.000000000,0.988772745,0.977632187,0.966576998,0.955605881,0.944717564,0.933910810,0.923184403,0.912537159,0.901967917,0.891475543,0.881058927,0.870716983,0.860448648,0.850252880,0.840128663,
+0.830074999,0.820090910,0.810175441,0.800327655,0.790546634,0.780831480,0.771181310,0.761595261,0.752072487,0.742612157,0.733213459,0.723875595,0.714597781,0.705379251,0.696219252,0.687117045,
+0.678071905,0.669083122,0.660149997,0.651271846,0.642447995,0.633677786,0.624960569,0.616295708,0.607682577,0.599120564,0.590609064,0.582147485,0.573735245,0.565371772,0.557056504,0.548788888,
+0.540568381,0.532394450,0.524266569,0.516184223,0.508146904,0.500154113,0.492205360,0.484300162,0.476438044,0.468618539,0.460841189,0.453105540,0.445411148,0.437757576,0.430144392,0.422571172,
+0.415037499,0.407542963,0.400087158,0.392669686,0.385290156,0.377948181,0.370643380,0.363375379,0.356143810,0.348948309,0.341788517,0.334664083,0.327574658,0.320519900,0.313499473,0.306513043,
+0.299560282,0.292640868,0.285754482,0.278900811,0.272079545,0.265290380,0.258533014,0.251807150,0.245112498,0.238448768,0.231815675,0.225212940,0.218640286,0.212097441,0.205584134,0.199100100,
+0.192645078,0.186218809,0.179821038,0.173451513,0.167109986,0.160796212,0.154509949,0.148250959,0.142019005,0.135813855,0.129635280,0.123483053,0.117356951,0.111256751,0.105182237,0.099133192,
+0.093109404,0.087110664,0.081136763,0.075187496,0.069262662,0.063362061,0.057485495,0.051632768,0.045803690,0.039998068,0.034215715,0.028456446,0.022720077,0.017006425,0.011315313,0.005646563
+};
+double shannonCost1[256]=
+{
+0.000000000,0.005646563,0.011315313,0.017006425,0.022720077,0.028456446,0.034215715,0.039998068,0.045803690,0.051632768,0.057485495,0.063362061,0.069262662,0.075187496,0.081136763,0.087110664,
+0.093109404,0.099133192,0.105182237,0.111256751,0.117356951,0.123483053,0.129635280,0.135813855,0.142019005,0.148250959,0.154509949,0.160796212,0.167109986,0.173451513,0.179821038,0.186218809,
+0.192645078,0.199100100,0.205584134,0.212097441,0.218640286,0.225212940,0.231815675,0.238448768,0.245112498,0.251807150,0.258533014,0.265290380,0.272079545,0.278900811,0.285754482,0.292640868,
+0.299560282,0.306513043,0.313499473,0.320519900,0.327574658,0.334664083,0.341788517,0.348948309,0.356143810,0.363375379,0.370643380,0.377948181,0.385290156,0.392669686,0.400087158,0.407542963,
+0.415037499,0.422571172,0.430144392,0.437757576,0.445411148,0.453105540,0.460841189,0.468618539,0.476438044,0.484300162,0.492205360,0.500154113,0.508146904,0.516184223,0.524266569,0.532394450,
+0.540568381,0.548788888,0.557056504,0.565371772,0.573735245,0.582147485,0.590609064,0.599120564,0.607682577,0.616295708,0.624960569,0.633677786,0.642447995,0.651271846,0.660149997,0.669083122,
+0.678071905,0.687117045,0.696219252,0.705379251,0.714597781,0.723875595,0.733213459,0.742612157,0.752072487,0.761595261,0.771181310,0.780831480,0.790546634,0.800327655,0.810175441,0.820090910,
+0.830074999,0.840128663,0.850252880,0.860448648,0.870716983,0.881058927,0.891475543,0.901967917,0.912537159,0.923184403,0.933910810,0.944717564,0.955605881,0.966576998,0.977632187,0.988772745,
+1.000000000,1.011315313,1.022720077,1.034215715,1.045803690,1.057485495,1.069262662,1.081136763,1.093109404,1.105182237,1.117356951,1.129635280,1.142019005,1.154509949,1.167109986,1.179821038,
+1.192645078,1.205584134,1.218640286,1.231815675,1.245112498,1.258533014,1.272079545,1.285754482,1.299560282,1.313499473,1.327574658,1.341788517,1.356143810,1.370643380,1.385290156,1.400087158,
+1.415037499,1.430144392,1.445411148,1.460841189,1.476438044,1.492205360,1.508146904,1.524266569,1.540568381,1.557056504,1.573735245,1.590609064,1.607682577,1.624960569,1.642447995,1.660149997,
+1.678071905,1.696219252,1.714597781,1.733213459,1.752072487,1.771181310,1.790546634,1.810175441,1.830074999,1.850252880,1.870716983,1.891475543,1.912537159,1.933910810,1.955605881,1.977632187,
+2.000000000,2.022720077,2.045803690,2.069262662,2.093109404,2.117356951,2.142019005,2.167109986,2.192645078,2.218640286,2.245112498,2.272079545,2.299560282,2.327574658,2.356143810,2.385290156,
+2.415037499,2.445411148,2.476438044,2.508146904,2.540568381,2.573735245,2.607682577,2.642447995,2.678071905,2.714597781,2.752072487,2.790546634,2.830074999,2.870716983,2.912537159,2.955605881,
+3.000000000,3.045803690,3.093109404,3.142019005,3.192645078,3.245112498,3.299560282,3.356143810,3.415037499,3.476438044,3.540568381,3.607682577,3.678071905,3.752072487,3.830074999,3.912537159,
+4.000000000,4.093109404,4.192645078,4.299560282,4.415037499,4.540568381,4.678071905,4.830074999,5.000000000,5.192645078,5.415037499,5.678071905,6.000000000,6.415037499,7.000000000,8.000000000
+};
+
+unsigned int shannon64Cost0[256]={
+512,512,448,411,384,363,347,332,320,309,299,291,283,275,268,262,
+256,250,245,240,235,231,227,222,219,215,211,208,204,201,198,195,
+192,189,186,184,181,179,176,174,171,169,167,165,163,161,158,157,
+155,153,151,149,147,145,144,142,140,139,137,136,134,132,131,129,
+128,127,125,124,122,121,120,118,117,116,115,113,112,111,110,109,
+107,106,105,104,103,102,101,100,99,98,97,96,94,93,93,92,
+91,90,89,88,87,86,85,84,83,82,81,81,80,79,78,77,
+76,76,75,74,73,72,72,71,70,69,68,68,67,66,65,65,
+64,63,63,62,61,60,60,59,58,58,57,56,56,55,54,54,
+53,52,52,51,51,50,49,49,48,48,47,46,46,45,45,44,
+43,43,42,42,41,41,40,39,39,38,38,37,37,36,36,35,
+35,34,34,33,33,32,32,31,30,30,29,29,29,28,28,27,
+27,26,26,25,25,24,24,23,23,22,22,21,21,21,20,20,
+19,19,18,18,17,17,17,16,16,15,15,14,14,14,13,13,
+12,12,12,11,11,10,10,9,9,9,8,8,8,7,7,6,
+6,6,5,5,4,4,4,3,3,3,2,2,1,1,1,0,
+};
+unsigned int shannon64Cost1[256]={
+0,0,1,1,1,2,2,3,3,3,4,4,4,5,5,6,
+6,6,7,7,8,8,8,9,9,9,10,10,11,11,12,12,
+12,13,13,14,14,14,15,15,16,16,17,17,17,18,18,19,
+19,20,20,21,21,21,22,22,23,23,24,24,25,25,26,26,
+27,27,28,28,29,29,29,30,30,31,32,32,33,33,34,34,
+35,35,36,36,37,37,38,38,39,39,40,41,41,42,42,43,
+43,44,45,45,46,46,47,48,48,49,49,50,51,51,52,52,
+53,54,54,55,56,56,57,58,58,59,60,60,61,62,63,63,
+64,65,65,66,67,68,68,69,70,71,72,72,73,74,75,76,
+76,77,78,79,80,81,81,82,83,84,85,86,87,88,89,90,
+91,92,93,93,94,96,97,98,99,100,101,102,103,104,105,106,
+107,109,110,111,112,113,115,116,117,118,120,121,122,124,125,127,
+128,129,131,132,134,136,137,139,140,142,144,145,147,149,151,153,
+155,157,158,161,163,165,167,169,171,174,176,179,181,184,186,189,
+192,195,198,201,204,208,211,215,219,222,227,231,235,240,245,250,
+256,262,268,275,283,291,299,309,320,332,347,363,384,411,448,512,
+};
+#endif
+// TEMP STATS VARIABLES 
+
+/****************************************************************************
+*  Module Static Variables
+*****************************************************************************
+*/              
+
+#ifdef NOTNORMALIZED
+/****************************************************************************
+ * 
+ *  ROUTINE       :     StartDecode
+ *
+ *  INPUTS        :     bc		ptr to instance of our boolean coder
+ *						buffer	ptr to data to start decoding
+ *
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     This function fills initializes the boolean coder
+ *                           
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void StartDecode(BOOL_CODER *bc, unsigned char *buffer)
+{
+    bc->pos = 0;
+    bc->value = 0;
+    bc->range = 0;
+    bc->buffer = buffer;
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DecodeBool
+ *
+ *  INPUTS        :     bc		ptr to instance of our boolean coder
+ *						prob	probability of getting a 0 normalized to 8 bits 
+ *
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		0 or 1 
+ *
+ *  FUNCTION      :     This function determines the next value stored in the 
+ *						boolean coder based upon the probability passed in.
+ *						It uses a simple probability model to approximate 
+ *						an arithmetic coder.
+ *                           
+ *
+ *  SPECIAL NOTES :     The accuracy of this encoder gets worse as the range 
+ *						approaches 0.  This can be avoided with more complex 
+ *						normalization functions (as in a standard arithmetic)
+ *						coder.  I chose to avoid this for speed reasons.
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+int DecodeBool(
+	BOOL_CODER *bc, 
+	int probability)
+{
+	unsigned int split;
+
+	// we don't have enough in our range to tell between a 0 and 1 so get 
+	// a new 3 bytes. 
+    if( bc->range < 2)
+    {
+		unsigned char *spot = bc->buffer+bc->pos;
+		bc->v[0] = spot[0];
+		bc->v[1] = spot[1];
+		bc->v[2] = spot[2];
+
+		// range is set to 0x01000001 to avoid having the range * probability 
+		// calculation outrange ( this can be handled differently at the cost 
+		// of an extra if.
+        bc->range = 0x01000000;
+        bc->pos+=3;
+    }
+
+	// calculate the decision point 
+	// black magic: This code works better than if I calculate probability *
+	// range and then truncating to 1 ( I can't explain why)
+	split = bc->range;
+	split --;				// we always have to maintain
+	split *= probability;
+	split >>= 8;
+	split ++;
+
+	if( bc->value < split )
+	{
+		bc->range = split;
+		return 0;
+	}
+	else
+	{
+		bc->range-=split;
+		bc->value-=split;
+		return 1;
+	}
+} 
+/****************************************************************************
+ * 
+ *  ROUTINE       :     StopDecode
+ *
+ *  INPUTS        :     bc		ptr to instance of our boolean coder
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     This function does clean up for boolean decoder
+ *                           
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void StopDecode(BOOL_CODER *bc)
+{
+    return;
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     StartEncode
+ *
+ *  INPUTS        :     bc		ptr to instance of our boolean coder
+ *						buffer	ptr to hold encoded data
+ *
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     This function fills initializes the boolean coder
+ *                           
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void StartEncode(BOOL_CODER *bc, unsigned char *buffer)
+{
+    bc->pos = 0;
+    bc->value = 0;
+    bc->range = 0x01000000;
+    bc->buffer = buffer;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     EncodeBool
+ *
+ *  INPUTS        :     bc		ptr to instance of our boolean coder
+ *						x		value to encode
+ *						prob	probability of getting a 0 normalized to 8 bits 
+ *
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		
+ *
+ *  FUNCTION      :     This function encodes a boolean value using the 
+ *						boolean coder.
+ *                           
+ *
+ *  SPECIAL NOTES :     The accuracy of this encoder gets worse as the range 
+ *						approaches 0.  This can be avoided with more complex 
+ *						normalization functions (as in a standard arithmetic)
+ *						coder.  I chose to avoid this for speed reasons.
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void EncodeBool(BOOL_CODER *bc, int x, int probability)
+{
+	unsigned int split;
+
+	// we don't have enough in our range to tell between a 0 and 1 so get 
+	// a new 3 bytes. 
+    if( bc->range < 2 )
+    {
+		bc->buffer[bc->pos] = bc->v[0];
+		bc->buffer[bc->pos+1] = bc->v[1];
+		bc->buffer[bc->pos+2] = bc->v[2];
+        bc->pos+=3;
+
+		// range is set to 0x01000001 to avoid having the range * probability 
+		// calculation outrange ( this can be handled differently at the cost 
+		// of an extra if.
+        bc->range = 0x01000000;
+        bc->value = 0;
+    }
+	
+	// calculate the decision point 
+	// black magic: This code works better than if I calculate probability *
+	// range and then truncating to 1 ( I can't explain why)
+	split = bc->range;
+	split --;
+	split *= probability;
+	split >>= 8;
+	split ++;
+	
+	if( x )
+	{
+		bc->range-=split;
+		bc->value+=split;
+	}
+	else
+	{
+		bc->range = split;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     StopEncode
+ *
+ *  INPUTS        :     bc		ptr to instance of our boolean coder
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     This function does clean up for boolean encoder
+ *                           
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void StopEncode(BOOL_CODER *bc)
+{
+	int i;
+	for(i=0;i<3;i++)
+	{ 
+		bc->buffer[bc->pos + i] = 
+			*((unsigned char *) &bc->value + i);
+	}
+    bc->pos+=3;
+}
+
+#else 
+
+#ifndef MAPCA
+/****************************************************************************
+ * 
+ *  ROUTINE       :     StartEncode
+ *
+ *  INPUTS        :     br		ptr to instance of our boolean coder
+ *						source	ptr to data to start decoding
+ *
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     This function initializes the boolean coder
+ *                           
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void StartEncode
+(
+	BOOL_CODER *br, 
+	unsigned char *source
+)
+{
+	br->lowvalue = 0;
+	br->range = 255;
+	br->value = 0;
+	br->count = -24; 
+	br->buffer=source;
+	br->pos=0;
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     StopEncode
+ *
+ *  INPUTS        :     bc		ptr to instance of our boolean coder
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     This function does clean up for boolean encoder
+ *                           
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void StopEncode
+(
+	BOOL_CODER *br
+)
+{	
+	if(br->count<-16)
+		br->lowvalue <<= (24-(br->count&7));
+	else if(br->count<-8)
+		br->lowvalue <<= (16-(br->count&7));
+	else 
+		br->lowvalue <<= (8-(br->count&7));
+
+	br->buffer[br->pos++]=(br->lowvalue>>24);
+	br->buffer[br->pos++]=(br->lowvalue>>16)& 0xff;
+	br->buffer[br->pos++]=(br->lowvalue>>8)& 0xff;
+	br->buffer[br->pos++]=(br->lowvalue)& 0xff;
+}
+	
+/****************************************************************************
+ * 
+ *  ROUTINE       :     EncodeBool
+ *
+ *  INPUTS        :     bc		ptr to instance of our boolean coder
+ *						x		value to encode
+ *						prob	probability of getting a 0 normalized to 8 bits 
+ *
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		
+ *
+ *  FUNCTION      :     This function encodes a boolean value using the 
+ *						boolean coder.
+ *                           
+ *
+ *  SPECIAL NOTES :     This encoder uses normalizations, and is fairly accurate,
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void EncodeBool
+(
+	BOOL_CODER 	* br,
+	int bit,
+	int probability
+)
+{
+	unsigned int split;
+    split = 1 +  (((br->range-1) * probability) >> 8);
+	if(bit)
+	{
+		br->lowvalue += split;
+		br->range -= split;
+	}
+	else
+	{	
+		br->range = split;
+	}
+	while(br->range < 0x80)
+	{
+		br->range <<= 1;
+
+
+		if((br->lowvalue & 0x80000000 ))
+        {
+            int x = br->pos-1;
+            while(x>=0 && br->buffer[x] == 0xff)
+            {
+                br->buffer[x] =(unsigned char)0;
+                x--;
+            }
+            br->buffer[x]+=1;
+            
+        }
+        br->lowvalue  <<= 1;
+		if (!++br->count) 
+		{
+			br->count = -8;
+			br->buffer[br->pos++]=(br->lowvalue >> 24);
+			br->lowvalue &= 0xffffff;
+		}
+	}
+}
+
+
+
+
+// TEMP
+
+extern const unsigned long ProbCost[256];
+extern const unsigned long ProbCost[256];
+void EncodeBool2
+(
+	BOOL_CODER 	* br,
+	int bit,
+	int probability
+)
+{
+	if (bit)
+		br->BitCounter += ProbCost[255-probability];
+	else
+		br->BitCounter += ProbCost[probability];
+}
+
+#endif
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DecodeBool
+ *
+ *  INPUTS        :     br		ptr to instance of our boolean coder
+ *						prob	probability of getting a 0 normalized to 8 bits 
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :		0 or 1 
+ *
+ *  FUNCTION      :     This function determines the next value stored in the 
+ *						boolean coder based upon the probability passed in.
+ *						It uses a simple probability model to approximate 
+ *						an arithmetic coder.
+ *                           
+ *
+ *  ERRORS        :     None.
+ *
+ *  SPECIAL NOTES :     The DecodeBool128() is a special case for this
+ *                      function that assums the input probability is 128
+ *
+ ****************************************************************************/
+#ifdef MAPCA
+
+int DecodeBool
+(
+	BOOL_CODER	* br,
+	int probability
+) 
+{
+
+    unsigned int bit;
+	unsigned int split;
+	unsigned int bigsplit;
+    unsigned int lmbdoffset;
+    int count = br->count;
+    unsigned int range = br->range;
+    unsigned int value = br->value;
+
+	split = 1 +  (((range-1) * probability) >> 8);	
+    bigsplit = (split<<24);
+
+	if(value >= bigsplit)
+	{
+		range = range-split;
+		value = value-bigsplit;
+		bit = 1;
+	}
+	else
+	{	
+		range = split;
+		bit = 0;
+	}
+    
+    
+    if(range>=0x80)
+    {
+        br->value = value;
+        br->range = range;
+        return bit;
+            
+    }
+
+    lmbdoffset = 7 - hmpv_lmo_32(range);
+	value 	 <<= lmbdoffset;
+	range 	 <<= lmbdoffset;
+	count 	  -= lmbdoffset;	
+
+    if(count<=0)
+	{
+		count +=8;
+		value |= ((unsigned int)br->buffer[br->pos]<<(8-count));				
+		br->pos++;
+		
+	}
+
+    br->count = count;
+    br->value = value;
+    br->range = range;
+	return bit;
+} 
+
+
+#else
+int DecodeBool
+(
+	BOOL_CODER	* br,
+	int probability
+) 
+{
+
+    unsigned int bit=0;
+	unsigned int split;
+	unsigned int bigsplit;
+    unsigned int count = br->count;
+    unsigned int range = br->range;
+    unsigned int value = br->value;
+
+	split = 1 +  (((range-1) * probability) >> 8);	
+    bigsplit = (split<<24);
+
+	if(value >= bigsplit)
+	{
+		range -= split;
+		value -= bigsplit;
+		bit = 1;
+	}
+	else
+	{	
+		range = split;		
+	}
+
+	if(range>=0x80)
+    {
+        br->value = value;
+        br->range = range;
+        return bit;
+            
+    }
+    else
+	{
+		do
+		{
+            
+       	range +=range;
+        value <<=1;
+            
+        	if (!--count) 
+        	{
+    	        count = 8;
+	            value |= br->buffer[br->pos];
+        	    br->pos++;
+	    	}
+    	}while(range < 0x80 );
+    }
+    br->count = count;
+    br->value = value;
+    br->range = range;
+	return bit;
+} 
+#endif
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DecodeBool128
+ *
+ *  INPUTS        :     br		ptr to instance of our boolean coder
+ *
+ *  RETURNS       :		0 or 1 
+ *
+ *  FUNCTION      :     This function determines the next value stored in the 
+ *						boolean coder based upon the probability passed in.
+ *						It uses a simple probability model to approximate 
+ *						an arithmetic coder.
+ *
+ *  ERRORS        :     None.
+ *
+ *  SPECIAL NOTES :     The DecodeBool128() is a special case for DecodeBool()
+ *                      functionf and assums the input probability is 128
+ *
+ ****************************************************************************/
+int DecodeBool128
+(
+	BOOL_CODER	* br
+) 
+{
+    unsigned int bit;
+	unsigned int split;
+	unsigned int bigsplit;
+    unsigned int count = br->count;
+    unsigned int range = br->range;
+    unsigned int value = br->value;
+    
+    split = ( range + 1) >> 1;
+    bigsplit = (split<<24);
+    
+	if(value >= bigsplit)
+	{
+		range = (range-split)<<1;
+		value = (value-bigsplit)<<1;
+		bit = 1;
+	}
+	else
+	{	
+		range = split<<1;
+		value = value<<1;
+		bit = 0;
+	}
+
+    if(!--count)
+    {
+        count=8;
+        value |= br->buffer[br->pos];
+        br->pos++;        
+    }
+    br->count = count;
+    br->value = value;
+    br->range = range;
+    return bit;
+        
+}    
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     StartDecode
+ *
+ *  INPUTS        :     bc		ptr to instance of our boolean coder
+ *						buffer	ptr to data to start decoding
+ *
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     This function fills initializes the boolean coder
+ *                           
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void StartDecode
+(
+	BOOL_CODER *br,
+	unsigned char *source
+)
+{
+	br->lowvalue = 0;
+	br->range = 255;
+	br->count = 8;
+	br->buffer=source;
+	br->pos =0;
+	br->value = (br->buffer[0]<<24)+(br->buffer[1]<<16)+(br->buffer[2]<<8)+(br->buffer[3]);
+	br->pos+=4;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     StopDecode
+ *
+ *  INPUTS        :     bc		ptr to instance of our boolean coder
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     This function does clean up for boolean decoder
+ *                           
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void StopDecode(BOOL_CODER *bc)
+{
+}
+#endif
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/debug.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/debug.c
new file mode 100644
index 00000000..0c7c5192
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/debug.c
@@ -0,0 +1,410 @@
+#include "pbdll.h"
+#include "misc_common.h"
+
+
+//#define     OVERLAY_MOTION_VECTORS
+#include "xprintf.h"
+#if defined OVERLAY_MOTION_VECTORS
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DrawVector
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi
+ *						UINT8 *BlockPtr
+ *						INT32 x
+ *						INT32 y
+ *						UINT8 VectorColour
+ *						UINT8 DotColour
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None .
+ *
+ *  FUNCTION      :     Draws motion vector into reconstruction buffer
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void DrawVector( PB_INSTANCE *pbi, UINT8 *BlockPtr, INT32 x, INT32 y, UINT8 VectorColour, UINT8 DotColour )
+{
+	UINT8 *PixelPtr;
+	double Xpos, Ypos;
+	double Xdelta, Ydelta;
+	INT32 x0, x1, y0, y1;
+
+	if ( abs(x) > abs(y) )
+	{
+		// Step along x axis
+		if ( x < 0 )
+		{
+			x0 = x;
+			x1 = 0;
+			Ypos = (double)y;
+		}
+		else
+		{
+			x0 = 0;
+			x1 = x;
+			Ypos = 0.0;
+		}
+
+		Ydelta = (double)y / (double)x;
+
+		for ( x=x0; x<=x1; x++ )
+		{
+			y = (UINT32)( Ypos<0.0 ? (Ypos-0.5) : (Ypos+0.5) );
+			PixelPtr = BlockPtr + y*pbi->Configuration.YStride + x;
+			*PixelPtr = VectorColour;
+			Ypos += Ydelta;
+		}
+	}
+	else if ( abs(y) > abs(x) )
+	{
+		// Step along y axis
+		if ( y < 0 )
+		{
+			y0 = y;
+			y1 = 0;
+			Xpos = (double)x;
+		}
+		else
+		{
+			y0 = 0;
+			y1 = y;
+			Xpos = 0.0;
+		}
+
+		Xdelta = (double)x / (double)y;
+
+		for ( y=y0; y<=y1; y++ )
+		{
+			x = (UINT32)( Xpos<0.0 ? (Xpos-0.5) : (Xpos+0.5) );
+			PixelPtr = BlockPtr + y*pbi->Configuration.YStride + x;
+			*PixelPtr = VectorColour;
+			Xpos += Xdelta;
+		}
+	}
+
+	// Indicate current position in specified colour
+	*BlockPtr = DotColour;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DisplayMotionVectors
+ *
+ *  INPUTS        :     PB_INSTANCE *pbi
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None .
+ *
+ *  FUNCTION      :     Overlays colour coded motion vectors into reconstruction buffer
+ *
+ *  SPECIAL NOTES :     This routine will only display motion vectors when Post-processing
+ *						is enabled since it draws into the PostProcessBuffer.
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void DisplayMotionVectors ( PB_INSTANCE *pbi )
+{
+	INT32	FragIndex;			// Fragment number
+	UINT32	MB, B;		   		// Macro-Block, Block indices
+    UINT32  CodingMethod;       // Temp Storage for coding mode.
+	INT32	x, y;
+	UINT32	Blocks;
+	UINT32	BlockOffset[4] = {0, 1, pbi->HFragments, pbi->HFragments + 1};
+	UINT8	*BlockPtr;
+	UINT8	DotColour;
+	UINT8	VectorColour;
+
+	// Nothing to display if keyframe
+    if ( VP5_GetFrameType(pbi) == BASE_FRAME )
+    {
+        return;
+    }
+
+    // Traverse the quad-tree
+	for ( MB=0; MB<pbi->YMacroBlocks; MB++ )
+	{
+		// Is the Macro-Block coded:
+//		if ( pbi->MBCodedFlags[MB] )
+		{
+			CodingMethod = pbi->FragInfo[FragIndex].FragCodingMode;
+
+			if ( VP5_ModeUsesMC[CodingMethod] )
+			{
+				// Indicate previous/golden frame predictor
+				if ( CodingMethod == CODE_GOLDEN_MV )
+				{
+					DotColour    = 0x00;	// Black dot
+					VectorColour = 0x7F;	// Mid-Grey Vector
+				}
+				else if( (CodingMethod == CODE_INTER_LAST_MV) || (CodingMethod == CODE_INTER_PRIOR_LAST) )
+				{
+					DotColour    = 0xFF;	// White dot
+					VectorColour = 0x00;	// Black Vector	
+				}
+				else
+				{
+					DotColour    = 0x00;	// Black dot
+					VectorColour = 0xFF;	// White Vector
+				}
+
+				if ( CodingMethod == CODE_INTER_FOURMV )
+					Blocks = 4;
+				else
+					Blocks = 1;
+
+				for ( B=0; B<Blocks; B++ )
+				{
+					// Pointer to top LH-corner of block
+					BlockPtr = pbi->PostProcessBuffer ;// sorry adrian I'll fix it soon (removing getfragindex)
+						//+ ReconGetFragIndex(pbi->recon_pixel_index_table, FragIndex+BlockOffset[B]);
+
+					// Motion vector ( oops motion vectors only remembered at the macroblock level now!!
+					/*
+					x = pbi->FragInfo[FragIndex + BlockOffset[B]].MVectorX;
+					y = pbi->FragInfo[FragIndex + BlockOffset[B]].MVectorY;
+					*/
+					DrawVector( pbi, BlockPtr, x/2, y/2, VectorColour, DotColour );
+				}
+			}
+		}
+	}
+}
+#endif
+/****************************************************************************
+ Debugging Aid Only
+*/
+
+/****************************************************************************
+ Debugging Aid Only
+*/
+#ifdef _MSC_VER
+#include <stdio.h>
+void vp5_writeframe(PB_INSTANCE *pbi, char * address,int x)
+{
+	// write the frame
+	FILE *yframe;
+	char filename[255];
+	sprintf(filename,"y%04d.raw",x);
+	yframe=fopen(filename,"wb");
+	fwrite(address,pbi->ReconYPlaneSize+2*pbi->ReconUVPlaneSize,1,yframe);
+	fclose(yframe);
+}
+void vp5_writeframe2(PB_INSTANCE *pbi, char * address,int x)
+{
+	// write the frame
+	FILE *yframe;
+	char filename[255];
+	sprintf(filename,"y%d.raw",x);
+	yframe=fopen(filename,"wb");
+	fwrite(address,pbi->YPlaneSize,1,yframe);
+	fclose(yframe);
+}
+void vp5_draw(unsigned char *prefix, int frame, char * address,int size)
+{
+	// write the frame
+	FILE *yframe;
+	char filename[255];
+	sprintf(filename,"%s%04d.raw",prefix,frame);
+	yframe=fopen(filename,"wb");
+	fwrite(address,size,1,yframe);
+	fclose(yframe);
+}
+void vp5_drawb(unsigned char *prefix, int frame, char * address,int pitch,int width,int height)
+{
+	// write the frame
+	FILE *yframe;
+	int i;
+	char filename[255];
+	sprintf(filename,"%s%04d.raw",prefix,frame);
+	yframe=fopen(filename,"wb");
+	for(i=0;i<height;i++)
+	{
+		fwrite(address,width,1,yframe);
+		address+=pitch;
+	}
+	fclose(yframe);
+}
+void vp5_drawc(char *filename, char * address,int pitch,int width,int height)
+{
+	// write the frame
+	FILE *yframe;
+	int i;
+	yframe=fopen(filename,"ab");
+	for(i=0;i<height;i++)
+	{
+		fwrite(address,width,1,yframe);
+		address+=pitch;
+	}
+	fclose(yframe);
+}
+
+void vp5_showinfo2(PB_INSTANCE *pbi)
+{
+//	int i;
+//	for (i=0;i<pbi->PostProcessingLevel;i++)
+//		pbi->PostProcessBuffer[pbi->Configuration.YStride * 32 + 32 + +4 +4*i] = 255;
+
+	vp5_xprintf(pbi, 
+			pbi->Configuration.YStride * 32 + 32, 
+			"F:%d Q:%d S:%d W:%d H:%d V:%d Decode:%8d, Blit:%8d, PP:%8d, P:%d",
+			pbi->FrameType,
+			pbi->quantizer->ThisFrameQuantizerValue,
+			pbi->CurrentFrameSize,
+			pbi->HFragments,
+			pbi->VFragments,
+			pbi->Vp3VersionNo,
+			pbi->avgDecodeTime,
+			pbi->avgBlitTime,
+			pbi->avgPPTime[8],
+			pbi->PostProcessingLevel);
+
+}
+void vp5_appendframe(PB_INSTANCE *pbi)
+{
+	// write the frame
+	FILE *yframe;
+	yframe=fopen("test.raw","ab");
+	fwrite(pbi->LastFrameRecon,pbi->ReconYPlaneSize+2*pbi->ReconUVPlaneSize,1,yframe);
+	fclose(yframe);
+}
+
+void vp5_showinfo(PB_INSTANCE *pbi)
+{
+	UINT32 MBrow, MBcol;
+	UINT32 MBRows = pbi->MBRows; 
+	UINT32 MBCols = pbi->MBCols;
+
+	// for each row of macroblocks 
+	for ( MBrow=0; MBrow<MBRows; MBrow++ )
+	{
+
+		// for each macroblock within a row of macroblocks
+		for ( MBcol=0; MBcol<MBCols; MBcol++)
+		{
+			vp5_xprintf(pbi, 
+				((MBrow+1)* 16+5) * pbi->Configuration.YStride  + (MBcol+1)*16+5, 
+				"%d",
+				pbi->predictionMode[MBOffset(MBrow,MBcol)]);
+
+		} // mb col
+
+
+	} // mbrow
+
+	{
+	}
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PredictBlockToPostProcessBuffer
+ *
+ *  INPUTS        :     
+ *						
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Codes a DCT block
+ *
+ *                      Motion vectors and modes asumed to be defined at the MB level.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void PredictBlockToPostProcessBuffer
+( 
+	PB_INSTANCE *pbi, 
+	BLOCK_POSITION bp
+)
+{
+	
+	memset(pbi->ReconDataBuffer,0,64*sizeof(short));
+
+	// Action depends on decode mode.
+	if ( pbi->mbi.Mode == CODE_INTER_NO_MV )       // Inter with no motion vector
+	{
+		ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon], 
+			(UINT8 *)&pbi->LastFrameRecon[pbi->mbi.Recon], 
+			pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride);
+		
+	}
+	else if ( VP5_ModeUsesMC[pbi->mbi.Mode] )          // The mode uses a motion vector.
+	{
+		// For the compressor we did this already ( possible optimization).
+		PredictFilteredBlock( pbi, pbi->TmpDataBuffer,bp);
+
+		ReconBlock( 
+			pbi->TmpDataBuffer,
+			pbi->ReconDataBuffer,
+			(UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon],
+			pbi->mbi.CurrentReconStride );
+	}
+	else if ( pbi->mbi.Mode == CODE_USING_GOLDEN )     // Golden frame with motion vector
+	{
+		// Reconstruct the pixel data using the golden frame reconstruction and change data
+		ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon], 
+			(UINT8 *)&pbi->GoldenFrame[ pbi->mbi.Recon ], 
+			pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+	}
+	else                                            // Simple Intra coding
+	{
+		// Get the pixel index for the first pixel in the fragment.
+		ReconIntra( pbi->TmpDataBuffer, (UINT8 *)&pbi->PostProcessBuffer[pbi->mbi.Recon], (UINT16 *)pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+	} 
+}
+
+ 
+void printmodes(PB_INSTANCE *pbi)
+{
+    static int nFrame = 0;  //  PB_INSTANCE doesn't provide a frame number, does it?
+    FILE *f=fopen("modes.txt","a");
+    unsigned int i,j;
+
+    fprintf(f, "Frame %d\n\n", nFrame);
+
+    for(i=2;i<pbi->MBRows-2;i++)
+    {
+		if(pbi->Configuration.Interlaced == 1)
+		{
+			for(j=2;j<pbi->MBCols-2;j++)
+			{
+				fprintf(f,"%d",pbi->MBInterlaced[MBOffset(i,j)]);
+			}
+			fprintf(f,"   ");
+		}
+		for(j=2;j<pbi->MBCols-2;j++)        
+		{
+            fprintf(f,"%d",pbi->predictionMode[MBOffset(i,j)]);
+        }
+        fprintf(f,"   ");
+		for(j=2;j<pbi->MBCols-2;j++)        
+        {
+            fprintf(f,"%3d:%-3d",pbi->MBMotionVector[MBOffset(i,j)].x,pbi->MBMotionVector[MBOffset(i,j)].y);
+        }
+        fprintf(f,"\n");
+	}
+
+    fprintf(f,"\n");
+    fprintf(f,"\n");
+    fclose(f);
+
+    ++nFrame;
+
+    return;
+}
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodembs.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodembs.c
new file mode 100644
index 00000000..8347299d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodembs.c
@@ -0,0 +1,1071 @@
+/****************************************************************************
+*
+*   Module Title :     Decodembs.c
+*
+*   Description  :     Compressor functions for block order transmittal
+*
+*   AUTHOR       :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*   1.28 YWX 27-Dec-01 Rewrote ReadTokensPredictA()
+*   1.27 YWX 06-Nov-01 Removed Warning errors
+*   1.26 JBB 13 Jun 01 VP4 Code Clean Out
+*	1.25 AWG 08-JUN-01 Added support for DCT16.
+*	1.24 AWG 22-MAY-01 Removed HExtra/VExtra from call to QuadCodeComponent2
+*   1.23 JBB 01-MAY-01 VP5 Functionality
+*   1.22 JBB 09-Apr-01 first pass file clean up 
+*   1.21 JBB 23-Mar-01 New DC preidction
+*   1.20 JBB 30 NOV 00 Configuration BaseLine
+*****************************************************************************
+*/
+#define STRICT              /* Strict type checking. */
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+//#include "compdll.h"
+//#include "misc_common.h"
+#include "pbdll.h"
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include "codec_common_interface.h"
+#include "tokenentropy.h"
+#include "decodemode.h"
+#include "decodemv.h"
+/****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */     
+
+#define DCT_MAX_VALUE	2048
+
+// For details of tokens and extra bit breakdown see token definitions in huffman.h
+typedef struct 
+{    
+    UINT16  MinVal;
+    INT16   Length;
+    UINT8   Probs[11];
+} TOKENEXTRABITS;
+
+const TOKENEXTRABITS TokenExtraBits2[ MAX_ENTROPY_TOKENS]=
+{
+    { 0, -1,{   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //ZERO_TOKEN
+    { 1, 0, {   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //ONE_TOKEN
+    { 2, 0, {   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //TWO_TOKEN
+    { 3, 0, {   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //THREE_TOKEN
+    { 4, 0, {   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //FOUR_TOKEN
+    { 5, 0, {   159,0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //DCT_VAL_CATEGORY1
+    { 7, 1, {   145,165,0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //DCT_VAL_CATEGORY2
+    { 11,2, {   140,148,173,0,  0,  0,  0,  0,  0,  0,  0   } },   //DCT_VAL_CATEGORY3
+    { 19,3, {   135,140,155,176,0,  0,  0,  0,  0,  0,  0   } },   //DCT_VAL_CATEGORY4
+    { 35,4, {   130,134,141,157,180,0,  0,  0,  0,  0,  0   } },   //DCT_VAL_CATEGORY5
+    { 67,10,{   129,130,133,140,153,177,196,230,243,254,254 } },   //DCT_VAL_CATEGORY6
+    { 0, -1,{   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   // EOB TOKEN
+};
+
+const UINT32 LTIndex[MAX_ENTROPY_TOKENS] = { 0,1, 2, 3,3,4,4,4,4,4,4, 5 }; 
+const INT32 CoeffToBand[65] = 
+{	-1,0,1,1,2,1,1,2,
+	2,1,1,2,2,2,1,2, 
+	2,2,2,2,1,1,2,2,
+	3,3,4,3,4,4,4,3,
+	3,3,3,3,4,3,3,3,
+	4,4,4,4,4,3,3,4,
+	4,4,3,4,4,4,4,4,
+	4,4,5,5,5,5,5,5,7
+};
+
+
+const UINT32 toggleBand3[]= { 4,5,7,9,11,14,15,20,22 };
+
+const int VP5_Mode2Frame[] =
+{
+	1,	// CODE_INTER_NO_MV		0 => Encoded diff from same MB last frame 
+	0,	// CODE_INTRA			1 => DCT Encoded Block
+	1,	// CODE_INTER_PLUS_MV	2 => Encoded diff from included MV MB last frame
+	1,	// CODE_INTER_LAST_MV	3 => Encoded diff from MRU MV MB last frame
+	1,	// CODE_INTER_PRIOR_MV	4 => Encoded diff from included 4 separate MV blocks
+	2,	// CODE_USING_GOLDEN	5 => Encoded diff from same MB golden frame
+	2,	// CODE_GOLDEN_MV		6 => Encoded diff from included MV MB golden frame
+	1,  // CODE_INTER_FOUR_MV	7 => Encoded diff from included 4 separate MV blocks
+	2,	// CODE_GOLD_NEAREST_MV 8 => Encoded diff from MRU MV MB last frame
+	2,	// CODE_GOLD_NEAR_MV	9 => Encoded diff from included 4 separate MV blocks
+};
+/****************************************************************************
+*  Explicit imports
+*****************************************************************************
+*/ 
+extern UINT32 LoopFilterLimitValuesV2[Q_TABLE_SIZE];
+extern void decodeModeAndMotionVector(PB_INSTANCE *pbi,UINT32 MBrow,UINT32 MBcol);
+
+
+INLINE 
+int nDecodeBool128
+(
+	BOOL_CODER	* br
+) 
+{
+    unsigned int bit;
+	unsigned int split;
+	unsigned int bigsplit;
+    unsigned int count = br->count;
+    unsigned int range = br->range;
+    unsigned int value = br->value;
+    
+    split = ( range + 1) >> 1;
+    bigsplit = (split<<24);
+    bit = (value >= bigsplit);
+    range  =  bit?range-split:split;    
+    value  =  bit?value-bigsplit:value;        
+    value += value;
+    range += range;
+    if(!--count)
+    {
+        count=8;
+        value |= br->buffer[br->pos];
+        br->pos++;
+        
+    }
+    br->count = count;
+    br->value = value;
+    br->range = range;
+    return bit;
+        
+}    
+
+INLINE
+int nDecodeBool
+(
+	BOOL_CODER	* br,
+	int probability
+) 
+{
+
+    unsigned int bit=0;
+	unsigned int split;
+	unsigned int bigsplit;
+    int count = br->count;
+    unsigned int range = br->range;
+    unsigned int value = br->value;
+
+	// perform the actual encoding
+	split = 1 +  (((range-1) * probability) >> 8);	
+    bigsplit = (split<<24);
+
+	if(value >= bigsplit)
+	{
+		range = range-split;
+		value = value-bigsplit;
+		bit = 1;
+	}
+	else
+	{	
+		range = split;
+	}
+    while(range < 0x80 )
+	{
+		range +=range;
+		value +=value;
+		
+		if (!--count) 
+		{
+			count = 8;
+			value |= br->buffer[br->pos];
+			br->pos++;
+		}
+	}
+    br->count = count;
+    br->value = value;
+    br->range = range;
+	return bit;
+} 
+
+
+/****************************************************************************
+* 
+*  ROUTINE       :     ConfigureEntropyDecoder
+*
+*  INPUTS        :     None
+*
+*  OUTPUTS       :     None
+*
+*  RETURNS       :     None.
+*
+*  FUNCTION      :     Configure entropy subsystem for decode
+*
+*  SPECIAL NOTES :     None. 
+*
+*
+*  ERRORS        :     None.
+*
+****************************************************************************/
+void ConfigureEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType )
+{
+	UINT32	i;
+	UINT32  Plane;
+	UINT32  Band;
+	INT32   Prec;
+	UINT8   PrecNonZero;
+	UINT8   LastProb[MAX_ENTROPY_TOKENS-1];
+	
+	// Clear down Last Probs data structure
+	memset( LastProb, 128, MAX_ENTROPY_TOKENS-1 );
+
+	// Read in the Baseline DC probabilities and initialise the DC context for Y and then UV plane
+	for ( Plane = 0; Plane < 2; Plane++ )
+	{
+		// If so then read them in.
+		for ( i = 0; i < MAX_ENTROPY_TOKENS-1; i++ )
+		{
+			if ( nDecodeBool(&pbi->br, DcUpdateProbs[Plane][i] ) )
+			{
+				// 0 is not a legal value.
+				LastProb[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				LastProb[i] += ( LastProb[i] == 0 );
+				pbi->DcProbs[DCProbOffset(Plane,i)] = LastProb[i];
+
+			}
+			else if ( FrameType == BASE_FRAME )
+			{
+				pbi->DcProbs[DCProbOffset(Plane,i)] = LastProb[i];
+			}
+		}
+	}
+
+
+	// Read in the Baseline AC band probabilities and initialise the appropriate contexts
+	// Prec=0 means last token in current block was 0: Prec=1 means it was !0
+	for ( Prec = 0; Prec < PREC_CASES; Prec++ )
+	{
+		PrecNonZero = ( Prec > 0 ) ? 1 : 0;
+		for ( Plane = 0; Plane < 2; Plane++ )
+		{
+			for ( Band = 0; Band < VP5_AC_BANDS; Band++ )
+			{
+				// If so then read them in.
+				for ( i = 0; i < MAX_ENTROPY_TOKENS-1; i++ )
+				{
+					if ( nDecodeBool(&pbi->br, AcUpdateProbs[Prec][Plane][Band][i] ) )
+					{
+						// Probabilities transmitted at reduced resolution. 
+						// 0 is not a legal value.
+						LastProb[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+						LastProb[i] += ( LastProb[i] == 0 );                        
+						pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = LastProb[i];
+					}
+					else if ( FrameType == BASE_FRAME )
+					{
+						pbi->AcProbs[ACProbOffset(Plane,Prec,Band,i)] = LastProb[i];
+					}
+				}
+			}
+		}
+	} 
+
+	// Create all the context specific propabilities based upon the new baseline data
+	ConfigureContexts(pbi);
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ResetLeftContext
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Updates the left contexts
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ResetLeftContext
+( 
+	PB_INSTANCE *pbi
+)
+{
+
+	memset((void *) &pbi->fc.LeftY[0], 0, sizeof(BLOCK_CONTEXT));
+	memset((void *) &pbi->fc.LeftY[1], 0, sizeof(BLOCK_CONTEXT));
+	memset((void *) &pbi->fc.LeftU,    0, sizeof(BLOCK_CONTEXT));
+	memset((void *) &pbi->fc.LeftV,    0, sizeof(BLOCK_CONTEXT));
+		
+	pbi->fc.LeftY[0].Mode = (CODING_MODE)-1;
+	pbi->fc.LeftY[1].Mode = (CODING_MODE)-1;
+	pbi->fc.LeftU.Mode    = (CODING_MODE)-1;
+	pbi->fc.LeftV.Mode    = (CODING_MODE)-1;
+		
+	pbi->fc.LeftY[0].Frame = 4;
+	pbi->fc.LeftY[1].Frame = 4;
+	pbi->fc.LeftU.Frame    = 4;
+	pbi->fc.LeftV.Frame    = 4;
+
+	pbi->fc.LeftY[0].EOBPos = 24;
+	pbi->fc.LeftY[1].EOBPos = 24;
+	pbi->fc.LeftU.EOBPos = 24;
+	pbi->fc.LeftV.EOBPos = 24;
+   
+	
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ResetAboveContext
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Updates the above contexts
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ResetAboveContext
+( 
+	PB_INSTANCE *pbi
+)
+{
+	UINT32 i;
+
+	/*
+    memset ((void *) pbi->fc.AboveY, 0, (pbi->HFragments+2)*sizeof(BLOCK_CONTEXT));
+    memset ((void *) pbi->fc.AboveU, 0, (pbi->HFragments/2+2)*sizeof(BLOCK_CONTEXT));
+    memset ((void *) pbi->fc.AboveV, 0, (pbi->HFragments/2+2)*sizeof(BLOCK_CONTEXT));
+    */
+    for ( i = 0 ; i < pbi->HFragments+8;i++)
+	{
+        pbi->fc.AboveY[i].Mode = -1;
+        pbi->fc.AboveY[i].Frame = 4;
+		pbi->fc.AboveY[i].Dc =0;
+		pbi->fc.AboveY[i].Tokens[0]=0;
+
+
+	}
+	for ( i = 0 ; i < pbi->HFragments/2 + 8;i++)
+	{        
+        pbi->fc.AboveU[i].Mode = -1;
+        pbi->fc.AboveU[i].Frame = 4;
+		pbi->fc.AboveU[i].Tokens[0]=0;
+		pbi->fc.AboveU[i].Dc=0;
+        pbi->fc.AboveV[i].Mode = -1;
+        pbi->fc.AboveV[i].Frame = 4;  
+		pbi->fc.AboveV[i].Tokens[0]=0;
+		pbi->fc.AboveV[i].Dc=0;
+	}
+
+	if(pbi->Vp3VersionNo < 6)
+	{
+        pbi->fc.AboveU[1].Mode = 0;
+        pbi->fc.AboveU[1].Frame = 0;
+        pbi->fc.AboveV[1].Mode = 0;
+        pbi->fc.AboveV[1].Frame = 0;                
+	}
+
+	pbi->fc.LastDcY[0] = 0;
+	pbi->fc.LastDcU[0] = 128;
+	pbi->fc.LastDcV[0] = 128;
+	for ( i = 1 ; i < 3 ; i++)
+	{
+		pbi->fc.LastDcY[i] = 0;
+		pbi->fc.LastDcU[i] = 0;
+		pbi->fc.LastDcV[i] = 0;
+	}
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     UpdateContext
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Updates the frame context
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void UpdateContext
+( 
+	PB_INSTANCE *pbi,
+	BLOCK_CONTEXT *c,
+	BLOCK_POSITION bp
+)
+{
+	c->Mode = pbi->mbi.BlockMode[bp];
+	c->Dc = pbi->mbi.Coeffs[bp][0];
+	c->Frame = VP5_Mode2Frame[pbi->mbi.Mode];
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     UpdateContext
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Updates the frame context
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void UpdateContextA
+( 
+	PB_INSTANCE *pbi,
+	BLOCK_CONTEXTA *c,
+	BLOCK_POSITION bp
+)
+{
+	c->Mode = pbi->mbi.BlockMode[bp];
+	c->Dc = pbi->mbi.Coeffs[bp][0];
+	c->Frame = VP5_Mode2Frame[pbi->mbi.Mode];
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PredictDc
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Predicts coefficients in this block based on the 
+ *                      contexts we have
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+#define HIGHBITDUPPED(X) (((signed short) X)  >> 15)
+void PredictDC
+( 
+	PB_INSTANCE *pbi,
+	BLOCK_POSITION bp,
+	Q_LIST_ENTRY *LastDC,
+	BLOCK_CONTEXTA *Above,
+	BLOCK_CONTEXT *Left
+)
+{
+	UINT8 Frame = VP5_Mode2Frame[pbi->mbi.Mode];
+	UINT8 Count = 0;
+#if 0
+	INT32 Avg = 0;
+	if( Frame==Left->Frame) 
+	{
+		Avg += Left->Dc;
+		Count ++;
+	}
+	if( Frame==Above->Frame) 
+	{
+		Avg += Above->Dc;
+		Count ++;
+	}
+
+	if( Count < 2 && Frame == Above[-1].Frame)
+	{
+		Avg += Above[-1].Dc;
+		Count ++;
+	}
+
+	if( Count < 2 && Frame == Above[+1].Frame)
+	{
+		Avg += Above[+1].Dc;
+		Count ++;
+	}
+	if(Count==0)
+	{
+		Avg = LastDC[Frame];
+	}
+	else if(Count==2)
+	{
+		// trick to determine when to add 1 if negative (for proper truncation)
+		Avg += (HIGHBITDUPPED(Avg)&1);
+		Avg >>= 1;
+	}
+
+#else
+INT32 Avg ;
+//state_L:
+    if( Frame != Left->Frame) 
+		goto state_A0;
+	Avg = Left->Dc;
+//	goto state_A1;
+
+//state_A1:
+	if(Frame != Above->Frame)
+		goto state_AM1;
+	Avg += Above->Dc;
+//	goto state_TWO;
+
+state_TWO:
+    Avg += (HIGHBITDUPPED(Avg)&1);
+	Avg >>= 1;
+	goto state_done;
+
+state_A0:
+	if(Frame != Above->Frame)
+		goto state_AM0;
+	Avg = Above->Dc;
+//	goto state_AM1;
+
+state_AM1:
+	if(Frame == Above[-1].Frame)
+	{
+		Avg += Above[-1].Dc;
+		goto state_TWO;
+	}
+//	goto state_AP1;
+
+state_AP1:
+	if(Frame != Above[+1].Frame)
+		goto state_done;
+	Avg += Above[+1].Dc;
+	goto state_TWO;
+
+
+state_AM0:
+	if(Frame == Above[-1].Frame)
+	{
+		Avg = Above[-1].Dc;
+		goto state_AP1;
+	}
+	//goto state_AP0;
+
+//state_AP0:
+	if(Frame != Above[+1].Frame)
+		Avg = LastDC[Frame];
+	else
+		Avg = Above[+1].Dc;
+
+state_done:
+
+#endif 
+
+	pbi->mbi.Coeffs[bp][0] += Avg;
+	LastDC[Frame] = pbi->mbi.Coeffs[bp][0];
+
+	return ;
+}
+
+
+#define TI(x) (TransIndex[x]) 
+
+/****************************************************************************
+* 
+*  ROUTINE       :     ReadTokensPredictA
+*
+*  INPUTS        :     None
+*                               
+*  OUTPUTS       :     None
+*
+*  RETURNS       :     None
+*
+*  FUNCTION      :     Fills CoeffData with one blocks worth of coefficients
+*                      decoded from the bitstream.
+*
+*  SPECIAL NOTES :     
+*
+*
+*  ERRORS        :     None.
+*
+****************************************************************************/
+UINT8 ReadTokensPredictA(
+	PB_INSTANCE *pbi,
+	INT16 * CoeffData,
+	UINT32 BlockSize,
+	UINT32 Plane,
+	BLOCK_CONTEXTA *Above,
+	BLOCK_CONTEXT *Left
+)
+{
+	INT32		token;
+	BOOL_CODER	* br = &pbi->br;
+	UINT8		EncodedCoeffs = 0;
+	UINT8		LeftContext;
+	UINT8		AboveContext;
+	UINT8		*BaselineProbsPtr;
+	UINT8		*ContextProbsPtr;
+    BOOL        LastTokenNonZero;   // Was last token in this block non-zero
+    UINT8       PrecTokenIndex;		// Preceeding token index
+    UINT32      Band;
+	INT32		SignBit;
+	INT32		BitsCount ;
+	UINT8		*AcProbsPtr = pbi->AcProbs + ACProbOffset(Plane,0,0,0);
+	UINT8		*AcContextPtr = pbi->AcNodeContexts + ACContextOffset(Plane,0,0,0,0);
+	BOOL        EOB = FALSE;
+	UINT32      *TransIndex = pbi->quantizer->transIndex; 
+	INT32       value;
+
+
+	// determine the contexts for dc
+	LastTokenNonZero = TRUE;
+	LeftContext  = Left->Tokens[EncodedCoeffs];
+	AboveContext = Above->Tokens[EncodedCoeffs];
+
+	BaselineProbsPtr = pbi->DcProbs+DCProbOffset(Plane,0);
+	ContextProbsPtr = pbi->DcNodeContexts+DCContextOffset(Plane,LeftContext,AboveContext,0);
+	
+	do
+	{
+		// First test for the ! ZeroContext
+		if ( !nDecodeBool(br, ContextProbsPtr[ZERO_CONTEXT_NODE] ) )	 		
+		{
+			// Zero or EOB
+			if ( LastTokenNonZero )	
+			{
+				if ( nDecodeBool(br, ContextProbsPtr[EOB_CONTEXT_NODE]) )
+				{
+					PrecTokenIndex = 0;
+					Left->Tokens[EncodedCoeffs] = 0;
+				}
+				else 
+				{
+					EncodedCoeffs++;
+					break;
+				}
+			}
+			else
+			{
+				PrecTokenIndex = 0;
+				Left->Tokens[EncodedCoeffs] = 0;
+			}
+			LastTokenNonZero = FALSE;
+		}
+		else
+        {													
+			
+			// Was the value a 1
+			if ( nDecodeBool(br, ContextProbsPtr[ONE_CONTEXT_NODE]) )
+			{
+				// Value token > 1
+				if ( nDecodeBool(br, ContextProbsPtr[LOW_VAL_CONTEXT_NODE]) )
+				{												
+					// High value (value category) token
+					Left->Tokens[EncodedCoeffs] = 4;
+					if ( nDecodeBool(br, BaselineProbsPtr[HIGH_LOW_CONTEXT_NODE]) )
+					{								
+						// Cat3,Cat4 or Cat5
+						if ( nDecodeBool(br, BaselineProbsPtr[CAT_THREEFOUR_CONTEXT_NODE]) )
+						{
+							token = DCT_VAL_CATEGORY5 + nDecodeBool(br, BaselineProbsPtr[CAT_FIVE_CONTEXT_NODE]);
+						}
+						else									
+						{
+							token = DCT_VAL_CATEGORY3 + nDecodeBool(br, BaselineProbsPtr[CAT_THREE_CONTEXT_NODE]);
+						}
+					}
+					else
+					{								
+						// Either Cat1 or Cat2
+						token = DCT_VAL_CATEGORY1 + nDecodeBool(br, BaselineProbsPtr[CAT_ONE_CONTEXT_NODE]);
+					}
+
+
+					// Get the Sign Bit
+					SignBit = nDecodeBool128(br);
+
+					value = TokenExtraBits2[token].MinVal;	
+
+					// Read the extra bits
+					BitsCount = TokenExtraBits2[token].Length;
+
+					do
+					{
+						value += (nDecodeBool(br, TokenExtraBits2[token].Probs[BitsCount])<<BitsCount);
+						BitsCount -- ;
+					}
+					while( BitsCount >= 0);
+
+
+					// Combine the signa and value
+					CoeffData[TI(EncodedCoeffs)] =(Q_LIST_ENTRY)((value ^ -SignBit) + SignBit); 
+
+				}
+				else
+				{									
+					// Low value token
+					if ( nDecodeBool(br, ContextProbsPtr[TWO_CONTEXT_NODE]) )
+					{											
+						// Either a 3 or a 4
+						Left->Tokens[EncodedCoeffs] = 3;
+						token = THREE_TOKEN + nDecodeBool(br, BaselineProbsPtr[THREE_CONTEXT_NODE]);
+					}
+					else			
+					{											
+						// Is it a  2
+						token = TWO_TOKEN;	
+						Left->Tokens[EncodedCoeffs] = 2;
+					}
+
+					// Get the Sign Bit and store the result in our coeff array
+			        SignBit = nDecodeBool128(br);
+					CoeffData[TI(EncodedCoeffs)] =(Q_LIST_ENTRY)((token ^ -SignBit) + SignBit); 
+
+				}
+				PrecTokenIndex = 2;
+			}
+			else
+			{
+				PrecTokenIndex = 1;
+				Left->Tokens[EncodedCoeffs] = 1;
+
+				// Get the Sign Bit
+		        SignBit = nDecodeBool128(br);
+
+			    // Combine the signa and value
+				CoeffData[TI(EncodedCoeffs)] =(Q_LIST_ENTRY)((1 ^ -SignBit) + SignBit); 
+			}
+			LastTokenNonZero = TRUE;
+
+		}
+		
+		// calculate the context for the next token. 
+        EncodedCoeffs ++;			
+        Band = CoeffToBand [ EncodedCoeffs ];
+        BaselineProbsPtr = AcProbsPtr + ACProbOffset(0,PrecTokenIndex,Band,0);
+		if(Band < 3)
+		{
+			ContextProbsPtr = AcContextPtr + ACContextOffset(0,PrecTokenIndex,Band,Left->Tokens[EncodedCoeffs],0);			
+		}
+		else
+		{
+			if(EncodedCoeffs >= BlockSize)
+				break;
+
+			ContextProbsPtr = BaselineProbsPtr;
+		}
+
+        
+	} while ( 1 );
+	EncodedCoeffs --;
+				
+	return EncodedCoeffs;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DecodeBlock
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Decodes A Block
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void DecodeBlock
+( 
+	PB_INSTANCE *pbi,
+	UINT32 MBrow,
+	UINT32 MBcol,
+	BLOCK_POSITION bp
+)
+{
+
+	unsigned int lastEOB = pbi->mbi.Left->EOBPos;
+
+	if(lastEOB >24)
+		lastEOB =24;
+
+	// read tokens from the bitstream and convert to coefficients.
+	pbi->mbi.Left->EOBPos = ReadTokensPredictA(pbi, pbi->mbi.Coeffs[bp], 64, (pbi->mbi.Plane!=0), pbi->mbi.Above, pbi->mbi.Left);
+
+	// Update LEFT and ABOVE Contexts
+	if(pbi->mbi.Left->EOBPos < lastEOB )
+		memset (&pbi->mbi.Left->Tokens[pbi->mbi.Left->EOBPos], LTIndex[DCT_EOB_TOKEN], lastEOB - pbi->mbi.Left->EOBPos);
+
+	pbi->mbi.Above->Tokens[0] = pbi->mbi.Left->Tokens[0];
+
+	// predict our dc values from the surrounding guys
+	PredictDC(pbi, bp, pbi->mbi.LastDc, pbi->mbi.Above, pbi->mbi.Left);
+
+	// do the inverse transform
+	pbi->idct[pbi->mbi.Left->EOBPos]( pbi->mbi.Coeffs[bp], pbi->quantizer->dequant_coeffs[QTableSelect[bp]], pbi->ReconDataBuffer );
+
+	// put it into our reconstruction buffer
+	ReconstructBlock(pbi,bp);
+	
+	// update the context info for the next block 
+	UpdateContextA(pbi,pbi->mbi.Above,bp);
+	UpdateContext(pbi,pbi->mbi.Left,bp);
+
+	// Default clear data area down to 0s
+	if(pbi->mbi.Left->EOBPos <= 1)
+	{
+		pbi->mbi.Coeffs[bp][0] = 0;
+	}
+	else if(pbi->mbi.Left->EOBPos <= 10)
+	{
+	    memset(pbi->mbi.Coeffs[bp], 0,8*sizeof(Q_LIST_ENTRY));
+	    memset(pbi->mbi.Coeffs[bp]+8, 0,4*sizeof(Q_LIST_ENTRY));
+	    memset(pbi->mbi.Coeffs[bp]+16, 0,4*sizeof(Q_LIST_ENTRY));
+	    memset(pbi->mbi.Coeffs[bp]+24, 0,4*sizeof(Q_LIST_ENTRY));
+	}
+	else 
+	{
+	    memset(pbi->mbi.Coeffs[bp], 0,64*sizeof(Q_LIST_ENTRY));
+	}
+
+}
+
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DecodeMacroBlock
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Decodes A MacroBlock
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void DecodeMacroBlock
+(
+	PB_INSTANCE *pbi,
+	UINT32 MBrow,
+	UINT32 MBcol
+)
+{
+	UINT32 MBPointer;
+	INT32  NextBlock;
+
+    //***********************************************************************
+    // Copy the existing structures into what we have now I'll fix this next.
+
+	// dumb way to encode the interlaced decision but it works!!!
+
+	if(pbi->Configuration.Interlaced)
+	{
+		UINT8 prob = pbi->probInterlaced;
+		// super simple context adjustment
+		if(MBcol>2)
+		{
+			// adjust the probability per the last one we did 
+			if(pbi->mbi.Interlaced)
+				prob=prob-(prob>>1);
+			else 
+				prob=prob+((256-prob)>>1);
+		}
+		pbi->mbi.Interlaced = nDecodeBool(	&pbi->br, prob);
+	}
+	else
+		pbi->mbi.Interlaced = 0;
+
+	if(pbi->FrameType == BASE_FRAME )
+	{
+		pbi->mbi.Mode = CODE_INTRA;
+	}
+	else
+	{
+		decodeModeAndMotionVector(pbi, MBrow, MBcol );
+	}
+
+	if(pbi->mbi.Interlaced == 0)
+	{
+		NextBlock = 8;
+		pbi->mbi.CurrentReconStride = pbi->Configuration.YStride ;
+	}
+	else
+	{
+		NextBlock = 1;
+		pbi->mbi.CurrentReconStride = pbi->Configuration.YStride * 2;
+	}
+
+	// y plane values
+	pbi->mbi.FrameReconStride = pbi->Configuration.YStride;
+	pbi->mbi.MvShift = 1;
+	pbi->mbi.MvModMask = 1;
+	pbi->mbi.LastDc = pbi->fc.LastDcY;
+	pbi->mbi.Plane = 0;
+	pbi->mbi.SourceY = MBrow * 16;
+	pbi->mbi.SourceX = MBcol * 16;
+	MBPointer = pbi->ReconYDataOffset 
+		+ pbi->mbi.SourceY * pbi->Configuration.YStride
+		+ pbi->mbi.SourceX;
+	
+	// Block 0 
+	pbi->mbi.Recon = MBPointer;
+	pbi->mbi.Above = &pbi->fc.AboveY[MBcol*2];
+	pbi->mbi.Left  = &pbi->fc.LeftY[0];
+	DecodeBlock(pbi, MBrow, MBcol, (BLOCK_POSITION)0);
+	
+	// Block 1 
+	pbi->mbi.Recon += 8;
+	pbi->mbi.Above = &pbi->fc.AboveY[MBcol*2+1];
+	pbi->mbi.Left  = &pbi->fc.LeftY[0];
+	pbi->mbi.SourceX += 8;
+	DecodeBlock(pbi, MBrow, MBcol, (BLOCK_POSITION)1);
+	
+	// Block 2 
+	pbi->mbi.Recon = MBPointer + NextBlock * pbi->Configuration.YStride;
+	pbi->mbi.Above = &pbi->fc.AboveY[MBcol*2];
+	pbi->mbi.Left  = &pbi->fc.LeftY[1];
+	pbi->mbi.SourceX -= 8;
+	pbi->mbi.SourceY += NextBlock;
+	DecodeBlock(pbi, MBrow, MBcol, 2);
+	
+	// Block 3
+	pbi->mbi.Recon += 8;
+	pbi->mbi.Above = &pbi->fc.AboveY[MBcol*2+1];
+	pbi->mbi.Left  = &pbi->fc.LeftY[1];
+	pbi->mbi.SourceX += 8;
+	DecodeBlock(pbi, MBrow, MBcol, (BLOCK_POSITION)3);
+	
+	// uv plane values
+	pbi->mbi.FrameReconStride = pbi->Configuration.UVStride;
+	pbi->mbi.CurrentReconStride = pbi->Configuration.UVStride;
+	pbi->mbi.SourceY = MBrow * 8;
+	pbi->mbi.SourceX = MBcol * 8;
+	pbi->mbi.MvShift = 2;
+	pbi->mbi.MvModMask = 3;
+	
+	// Block 4
+	pbi->mbi.Recon = pbi->ReconUDataOffset + pbi->mbi.SourceY * pbi->mbi.CurrentReconStride + pbi->mbi.SourceX;
+	pbi->mbi.Above = &pbi->fc.AboveU[MBcol];
+	pbi->mbi.Left = &pbi->fc.LeftU;
+	pbi->mbi.LastDc = pbi->fc.LastDcU;
+	pbi->mbi.Plane = 1;
+	DecodeBlock(pbi, MBrow, MBcol, (BLOCK_POSITION)4);
+	
+	// Block 5
+	pbi->mbi.Above = &pbi->fc.AboveV[MBcol];
+	pbi->mbi.Left = &pbi->fc.LeftV;
+	pbi->mbi.Recon = pbi->ReconVDataOffset + pbi->mbi.SourceY * pbi->mbi.CurrentReconStride + pbi->mbi.SourceX;
+	pbi->mbi.LastDc = pbi->fc.LastDcV;
+	pbi->mbi.Plane = 2;
+	DecodeBlock(pbi, MBrow, MBcol, (BLOCK_POSITION)5);
+	
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DecodeFrame
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Decodes MacroBlocks of a Frame
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void DecodeFrameMbs
+( 
+	PB_INSTANCE *pbi
+)
+{
+	UINT32 MBrow, MBcol;
+	UINT32 MBRows = pbi->MBRows; 
+	UINT32 MBCols = pbi->MBCols;
+	UINT32 MB = 0;
+
+	if(pbi->FrameType != BASE_FRAME )
+	{
+		DecodeModeProbs(pbi);
+		ConfigureMvEntropyDecoder( pbi, pbi->FrameType );
+        pbi->LastMode = CODE_INTER_NO_MV;
+	}
+	else
+	{
+		memcpy ( pbi->probXmitted,BaselineXmittedProbs,sizeof(pbi->probXmitted));
+		// For now these are just 128
+		memset ( pbi->MvSignProbs, 128, sizeof(pbi->MvSignProbs) );
+		memset ( pbi->MvZeroProbs, 128, sizeof(pbi->MvZeroProbs) );
+		memset ( pbi->MvHalfPixelProbs, DEFAULT_HALF_PIXEL_PROB, sizeof(pbi->MvHalfPixelProbs) );
+		memset ( pbi->MvLowBitProbs, 128, sizeof(pbi->MvLowBitProbs) );
+		memset ( pbi->MvSizeProbs, 128, sizeof(pbi->MvSizeProbs) );
+		memset ( pbi->MBModeProb,128,sizeof(pbi->MBModeProb));
+		memset ( pbi->BModeProb,128,sizeof(pbi->MBModeProb));
+		memset ( pbi->predictionMode,1,sizeof(char)*pbi->MacroBlocks );
+	}
+
+	ConfigureEntropyDecoder( pbi, pbi->FrameType ); 
+
+	if(pbi->Configuration.Interlaced == 1)
+		pbi->probInterlaced = ((UINT8)VP5_bitread( &pbi->br,   8 ));  
+
+	// since we are on a new frame reset the above contexts 
+	ResetAboveContext(pbi);
+
+	// Default clear data area down to 0s
+    memset(pbi->mbi.Coeffs, 0,6*72*sizeof(Q_LIST_ENTRY));
+
+	// for each row of macroblocks 
+	for ( MBrow=2; MBrow<MBRows-2; MBrow++ )
+	{
+
+		ResetLeftContext(pbi);
+
+		// for each macroblock within a row of macroblocks
+		for ( MBcol=2; MBcol<MBCols-2; MBcol++,MB++ )
+		{
+
+			// Decode the macroblock
+			DecodeMacroBlock(pbi,MBrow,MBcol);
+            
+		} // mb col
+
+
+	} // mbrow
+
+//	printmodes(pbi);
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemode.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemode.c
new file mode 100644
index 00000000..2f3a565b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemode.c
@@ -0,0 +1,799 @@
+/****************************************************************************
+*        
+*   Module Title :	   Decodemode.c     
+*
+*   Description  :     functions for decoding modes and motionvectors 
+*
+*   AUTHOR       :     James Bankoski
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.00 JBB 30OCT01  New Configuration baseline.
+*   1.01 JBB 04AP402  Reworked lower footprint mode compression scheme
+*
+*****************************************************************************
+*/ 
+//************************************************************************************
+// Decoding the Modes: 
+//
+//  Decode Mode Tree Looks like this :
+//
+//
+//
+//
+//                                            zz 
+//                                                             
+//                               0                        Mode Same As Last
+//                                                                
+//                    
+//              1                                       2
+//
+//       3             4                  5                          6
+//
+//  NoMV   +MV    Nest  Near        Intra   FourMV          7                 8
+//                                                      
+//                                                   00Gold   GoldMV    GNrst   GNear
+//
+//
+// 30 probabilitity contexts are set up at each branch (in probMode) corresponding to 
+//
+//   3 for what situation we are in at the mode level ( all modes available, 
+//     no nearest mv found, and no near mv found) 
+//
+//  10 one for each possible last mode
+//
+// Note: if the last mode was near then the probability of getting near at position 4 
+// above is set to 0 (it would have been coded as same as last). Note also that the 
+// probablity of getting near when no near mv is available is also always set to 0.
+//
+// These probs are created from the 20 that can be xmitted in the bitstream (probXmitted)
+//    For each mode 2 probabilities can be transmitted:
+//        probability that the mode will appear if the last mode was the same
+//        probability that the mode will appear if the last mode is not that mode
+//
+//************************************************************************************
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+#include "pbdll.h"
+#include "decodemode.h"
+#include "decodemv.h"
+
+
+/****************************************************************************
+*  Implicit Imports
+*****************************************************************************
+*/        
+#define STRICT              /* Strict type checking. */
+
+#ifdef MAPCA
+    #include <eti/mm.h>
+#endif
+
+/****************************************************************************
+*  Exported data structures.
+*****************************************************************************
+*/        
+
+
+/****************************************************************************
+*  Module statics.
+*****************************************************************************
+*/        
+//*****************************************************************************
+// ModeVQ: This structure holds a table of probability vectors for encoding modes
+// To build this table a number of clips were run through and allowed to 
+// select each of the probabilities that were best for them on each frame.  These 
+// choices were output and a vector quantizer was used to optimize the selection 
+// of 16 vectors for each MODETYPE (allmodes available, nonearest, and no near)
+//*****************************************************************************
+UINT8 ModeVq[MODETYPES][MODEVECTORS][MAX_MODES*2]=
+{
+9,15,32,25,7,19,9,21,1,12,14,12,3,18,14,23,3,10,0,4,
+48,39,1,2,11,27,29,44,7,27,1,4,0,3,1,6,1,2,0,0,
+21,32,1,2,4,10,32,43,6,23,2,3,1,19,1,6,12,21,0,7,
+69,83,0,0,0,2,10,29,3,12,0,1,0,3,0,3,2,2,0,0,
+11,20,1,4,18,36,43,48,13,35,0,2,0,5,3,12,1,2,0,0,
+70,44,0,1,2,10,37,46,8,26,0,2,0,2,0,2,0,1,0,0,
+8,15,0,1,8,21,74,53,22,42,0,1,0,2,0,3,1,2,0,0,
+141,42,0,0,1,4,11,24,1,11,0,1,0,1,0,2,0,0,0,0,
+8,19,4,10,24,45,21,37,9,29,0,3,1,7,11,25,0,2,0,1,
+46,42,0,1,2,10,54,51,10,30,0,2,0,2,0,1,0,1,0,0,
+28,32,0,0,3,10,75,51,14,33,0,1,0,2,0,1,1,2,0,0,
+100,46,0,1,3,9,21,37,5,20,0,1,0,2,1,2,0,1,0,0,
+27,29,0,1,9,25,53,51,12,34,0,1,0,3,1,5,0,2,0,0,
+80,38,0,0,1,4,69,33,5,16,0,1,0,1,0,0,0,1,0,0,
+16,20,0,0,2,8,104,49,15,33,0,1,0,1,0,1,1,1,0,0,
+194,16,0,0,1,1,1,9,1,3,0,0,0,1,0,1,0,0,0,0,
+
+41,22,1,0,1,31,0,0,0,0,0,1,1,7,0,1,98,25,4,10,
+123,37,6,4,1,27,0,0,0,0,5,8,1,7,0,1,12,10,0,2,
+26,14,14,12,0,24,0,0,0,0,55,17,1,9,0,36,5,7,1,3,
+209,5,0,0,0,27,0,0,0,0,0,1,0,1,0,1,0,0,0,0,
+2,5,4,5,0,121,0,0,0,0,0,3,2,4,1,4,2,2,0,1,
+175,5,0,1,0,48,0,0,0,0,0,2,0,1,0,2,0,1,0,0,
+83,5,2,3,0,102,0,0,0,0,1,3,0,2,0,1,0,0,0,0,
+233,6,0,0,0,8,0,0,0,0,0,1,0,1,0,0,0,1,0,0,
+34,16,112,21,1,28,0,0,0,0,6,8,1,7,0,3,2,5,0,2,
+159,35,2,2,0,25,0,0,0,0,3,6,0,5,0,1,4,4,0,1,
+75,39,5,7,2,48,0,0,0,0,3,11,2,16,1,4,7,10,0,2,
+212,21,0,1,0,9,0,0,0,0,1,2,0,2,0,0,2,2,0,0,
+4,2,0,0,0,172,0,0,0,0,0,1,0,2,0,0,2,0,0,0,
+187,22,1,1,0,17,0,0,0,0,3,6,0,4,0,1,4,4,0,1,
+133,6,1,2,1,70,0,0,0,0,0,2,0,4,0,3,1,1,0,0,
+251,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+
+2,3,2,3,0,2,0,2,0,0,11,4,1,4,0,2,3,2,0,4,
+49,46,3,4,7,31,42,41,0,0,2,6,1,7,1,4,2,4,0,1,
+26,25,1,1,2,10,67,39,0,0,1,1,0,14,0,2,31,26,1,6,
+103,46,1,2,2,10,33,42,0,0,1,4,0,3,0,1,1,3,0,0,
+14,31,9,13,14,54,22,29,0,0,2,6,4,18,6,13,1,5,0,1,
+85,39,0,0,1,9,69,40,0,0,0,1,0,3,0,1,2,3,0,0,
+31,28,0,0,3,14,130,34,0,0,0,1,0,3,0,1,3,3,0,1,
+171,25,0,0,1,5,25,21,0,0,0,1,0,1,0,0,0,0,0,0,
+17,21,68,29,6,15,13,22,0,0,6,12,3,14,4,10,1,7,0,3,
+51,39,0,1,2,12,91,44,0,0,0,2,0,3,0,1,2,3,0,1,
+81,25,0,0,2,9,106,26,0,0,0,1,0,1,0,1,1,1,0,0,
+140,37,0,1,1,8,24,33,0,0,1,2,0,2,0,1,1,2,0,0,
+14,23,1,3,11,53,90,31,0,0,0,3,1,5,2,6,1,2,0,0,
+123,29,0,0,1,7,57,30,0,0,0,1,0,1,0,1,0,1,0,0,
+13,14,0,0,4,20,175,20,0,0,0,1,0,1,0,1,1,1,0,0,
+202,23,0,0,1,3,2,9,0,0,0,1,0,1,0,1,0,0,0,0
+};
+
+// These are the probabilities that we reset to after each keyframe.  
+// It was created as the average probabilities of the trees.
+UINT8 BaselineXmittedProbs[4][2][10]=
+{
+ 42,  2,  7, 42, 22,  3,  2,  5,  1,  0, 69,  1,  1, 44,  6,  1,  0,  1,  0,  0,
+  8,  1,  8,  0,  0,  2,  1,  0,  1,  0,229,  1,  0,  0,  0,  1,  0,  0,  1,  0,
+ 35,  1,  6, 34,  0,  2,  1,  1,  1,  0,122,  1,  1, 46,  0,  1,  0,  0,  1,  0,
+ 64,  0, 64, 64, 64,  0,  0,  0,  0,  0, 64,  0, 64, 64, 64,  0,  0,  0,  0,  0,
+};
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : BuildModeTree
+ *
+ *  INPUTS        : 
+ *						
+ *  OUTPUTS       : 
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      : Fills in probabilities at each branch of the huffman tree
+ *                  based upon the frequencies transmitted in the bitstream. 
+ *                  probXmitted 
+ *
+ *
+ *  ERRORS        : None.
+ *
+ ****************************************************************************/
+void BuildModeTree
+(	
+ PB_INSTANCE *pbi
+)
+{
+	int i,j,k;
+
+	// make a huffman tree and code array for each of our modes (note each of the trees is minus the node give by probmodesame)
+	for(i=0;i<10;i++)
+	{
+		unsigned int Counts[MAX_MODES];
+		unsigned int total;
+
+		// set up the probabilities for each tree
+		for(k=0;k<MODETYPES;k++)
+		{
+			total=0;
+			for(j=0;j<10;j++)
+			{	
+				if(i==j)
+				{
+					Counts[j]=0;
+				}
+				else
+				{
+					Counts[j]=100*pbi->probXmitted[k][0][j];
+				}
+
+
+				total+=Counts[j];
+			}
+
+
+			pbi->probModeSame[k][i] = 255-
+				255 * pbi->probXmitted[k][1][i] 
+				/
+				(	1 +
+					pbi->probXmitted[k][1][i] +	
+					pbi->probXmitted[k][0][i]
+				);
+
+			// each branch is basically calculated via 
+			// summing all posibilities at that branch.
+			pbi->probMode[k][i][0]= 1 + 255 *
+				(
+					Counts[CODE_INTER_NO_MV]+
+					Counts[CODE_INTER_PLUS_MV]+
+					Counts[CODE_INTER_NEAREST_MV]+
+					Counts[CODE_INTER_NEAR_MV]
+				) / 
+				(   1 +
+				    total
+				);
+
+			pbi->probMode[k][i][1]= 1 + 255 *
+				(
+					Counts[CODE_INTER_NO_MV]+
+					Counts[CODE_INTER_PLUS_MV]
+				) / 
+				(
+					1 + 
+					Counts[CODE_INTER_NO_MV]+
+					Counts[CODE_INTER_PLUS_MV]+
+					Counts[CODE_INTER_NEAREST_MV]+
+					Counts[CODE_INTER_NEAR_MV]
+				);
+
+			pbi->probMode[k][i][2]= 1 + 255 *
+				(
+					Counts[CODE_INTRA]+
+					Counts[CODE_INTER_FOURMV]
+				) / 
+				(
+					1 + 
+					Counts[CODE_INTRA]+
+					Counts[CODE_INTER_FOURMV]+
+					Counts[CODE_USING_GOLDEN]+
+					Counts[CODE_GOLDEN_MV]+
+					Counts[CODE_GOLD_NEAREST_MV]+
+					Counts[CODE_GOLD_NEAR_MV]
+				);
+			
+			pbi->probMode[k][i][3]= 1 + 255 *
+				(
+					Counts[CODE_INTER_NO_MV]
+				) / 
+				(
+					1 +
+					Counts[CODE_INTER_NO_MV]+
+					Counts[CODE_INTER_PLUS_MV]
+				);
+
+			pbi->probMode[k][i][4]= 1 + 255 *
+				(
+					Counts[CODE_INTER_NEAREST_MV]
+				) / 
+				(
+					1 +
+					Counts[CODE_INTER_NEAREST_MV]+
+					Counts[CODE_INTER_NEAR_MV]
+				) ;
+
+			pbi->probMode[k][i][5]= 1 + 255 *
+				(
+					Counts[CODE_INTRA]
+				) / 
+				(
+					1 +
+					Counts[CODE_INTRA]+
+					Counts[CODE_INTER_FOURMV]
+				);
+
+			pbi->probMode[k][i][6]= 1 + 255 *
+				(
+					Counts[CODE_USING_GOLDEN]+
+					Counts[CODE_GOLDEN_MV]
+				) / 
+				(
+					1 +
+					Counts[CODE_USING_GOLDEN]+
+					Counts[CODE_GOLDEN_MV]+
+					Counts[CODE_GOLD_NEAREST_MV]+
+					Counts[CODE_GOLD_NEAR_MV]
+				);
+
+			pbi->probMode[k][i][7]= 1 + 255 *
+				(
+					Counts[CODE_USING_GOLDEN]
+				) / 
+				(
+					1 +
+					Counts[CODE_USING_GOLDEN]+
+					Counts[CODE_GOLDEN_MV]
+				);
+
+			pbi->probMode[k][i][8]= 1 + 255 *
+				(
+					Counts[CODE_GOLD_NEAREST_MV]
+				) / 
+				(
+					1 +
+					Counts[CODE_GOLD_NEAREST_MV]+
+					Counts[CODE_GOLD_NEAR_MV]
+				);
+		}
+	}
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       : decodeModeDiff
+ *
+ *  INPUTS        : 
+ *						
+ *  OUTPUTS       : diff -> the probability difference value decoded from the bitstream    
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      : this function returns a value probability difference value 
+ *                  -256 to +256 in steps of 4  transmitted  in the bitstream
+ *                  using a fixed tree and hardcoded probabilities 
+ *
+ *  SPECIAL NOTES : The hard coded probabilities for the difference tree
+ *                  were calcualated by taking the average number of times a 
+ *                  branch was taken on some sample material ie 
+ *                  (bond,bike,beautifulmind)
+ *  
+ *  
+ *
+ *  ERRORS        : None.
+ *
+ ****************************************************************************/
+int decodeModeDiff
+(
+   PB_INSTANCE *pbi
+)
+{
+
+	int sign;
+	if(DecodeBool(&pbi->br, 205)==0)
+	{
+		return 0;
+	}
+	
+	sign = 1 + -2 * DecodeBool128(&pbi->br);
+	
+	if( !DecodeBool(&pbi->br,171))
+	{
+        return sign<<(3-DecodeBool(	&pbi->br,83));
+        /*
+		if( DecodeBool(	&pbi->br,83))
+			return sign*4;
+		else
+			return sign*8;
+            */
+	}
+	else
+	{
+		if( !DecodeBool(	&pbi->br,199) ) 
+		{
+			if(DecodeBool(	&pbi->br,140))
+				return sign * 12;
+
+			if(DecodeBool(	&pbi->br,125))
+				return sign * 16;
+
+			if(DecodeBool(	&pbi->br,104))
+				return sign * 20;
+
+			return sign * 24;
+
+		}
+		else 
+		{
+			int diff =VP5_bitread(&pbi->br,7);
+			return sign *diff*4;
+		}
+	}
+	
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DecodeModeProbs
+ *
+ *  INPUTS        :     
+ *						
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     This function parses the probabilities xmitted in 
+ *                      the bitstream. The bitstream may either use the 
+ *                      lastframes baselines, or transmit a pointer to a
+ *                      vector of new probabilities. It may then also 
+ *                      contain updates to each of these probabilities.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void DecodeModeProbs
+(
+	PB_INSTANCE *pbi
+)
+{
+	int i,j;
+	// For each mode type (all modes available, no nearest, no near mode)
+	for(j=0;j<MODETYPES;j++)
+	{
+		// determine whether we are sending a vector for this mode byte
+		if(DecodeBool( &pbi->br, PROBVECTORXMIT) )
+		{
+			// figure out which vector we have encoded
+			int whichVector = VP5_bitread(&pbi->br, 4);
+
+			// adjust the vector
+			for(i=0;i<MAX_MODES;i++)
+			{
+				pbi->probXmitted[j][1][i] = ModeVq[j][whichVector][i*2];
+				pbi->probXmitted[j][0][i] = ModeVq[j][whichVector][i*2+1];
+			}
+		} 
+
+		// decode whether updates to bring it closer to ideal 
+		if( DecodeBool( &pbi->br, PROBIDEALXMIT) )
+		{
+			for(i=0;i<10;i++)
+			{
+				int diff;
+
+				// determine difference 
+				diff = decodeModeDiff(pbi);
+				diff += pbi->probXmitted[j][1][i];
+
+				pbi->probXmitted[j][1][i] = (diff<0?0:(diff>255?255:diff));
+
+				// determine difference 
+				diff = decodeModeDiff(pbi);
+				diff += pbi->probXmitted[j][0][i];
+
+				pbi->probXmitted[j][0][i] = (diff<0?0:(diff>255?255:diff));
+
+			}
+		}
+	}
+	
+	BuildModeTree(pbi);
+}
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     decodeModeandMotionVector
+ *
+ *  INPUTS        :     MBrow -> row 
+						MBcol -> column
+						
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     decodes a macroblock's mode and motion vectors from 
+                        the bitstream 
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void decodeModeAndMotionVector
+(
+ PB_INSTANCE *pbi,
+ UINT32 MBrow,
+ UINT32 MBcol
+ )
+{
+	CODING_MODE mode;//lastmode;
+	int type,type2;
+	UINT32 k;
+	MOTION_VECTORA NearestInterMVect,NearInterMVect; 
+	MOTION_VECTORA NearestGoldMVect,NearGoldMVect;
+	MOTION_VECTOR mv;
+    int x, y;
+
+	FindNearestandNextNearest(pbi,MBrow,MBcol,&NearestInterMVect,&NearInterMVect,1,&type);
+
+	mode = 	DecodeMode(pbi,pbi->LastMode,type);
+    pbi->LastMode = mode; 
+	
+	pbi->predictionMode[MBOffset(MBrow,MBcol)] = mode;
+	pbi->mbi.Mode = mode;
+    if(mode ==CODE_INTER_FOURMV)
+    {
+		pbi->mbi.BlockMode[0] = DecodeBlockMode(pbi);
+		pbi->mbi.BlockMode[1] = DecodeBlockMode(pbi);
+		pbi->mbi.BlockMode[2] = DecodeBlockMode(pbi);
+		pbi->mbi.BlockMode[3] = DecodeBlockMode(pbi);
+
+		pbi->mbi.BlockMode[4] = CODE_INTER_FOURMV;
+		pbi->mbi.BlockMode[5] = CODE_INTER_FOURMV;
+
+		x=0;
+		y=0;
+		for(k=0;k<4;k++)
+		{
+			if(pbi->mbi.BlockMode[k]==CODE_INTER_NO_MV)
+            {
+				pbi->mbi.Mv[k].x = 0;
+                pbi->mbi.Mv[k].y = 0;
+            }
+			else if( pbi->mbi.BlockMode[k]==CODE_INTER_NEAREST_MV)
+            {
+				pbi->mbi.Mv[k].x = NearestInterMVect.x;
+                pbi->mbi.Mv[k].y = NearestInterMVect.y;                
+                x+=NearestInterMVect.x;
+				y+=NearestInterMVect.y;
+            }
+            else if( pbi->mbi.BlockMode[k]==CODE_INTER_NEAR_MV)
+            {
+				pbi->mbi.Mv[k].x = NearInterMVect.x;
+                pbi->mbi.Mv[k].y = NearInterMVect.y;                
+                x+=NearInterMVect.x;
+				y+=NearInterMVect.y;
+            }
+            else if ( pbi->mbi.BlockMode[k]==CODE_INTER_PLUS_MV)
+            {
+				decodeMotionVector(pbi,&mv,NULL);
+				pbi->mbi.Mv[k].x = mv.x;
+                pbi->mbi.Mv[k].y = mv.y;
+                x+=mv.x;
+				y+=mv.y;
+            }
+		}
+        x = (x+1+(x>=0))>>2;
+        y = (y+1+(y>=0))>>2;
+
+        pbi->MBMotionVector[MBOffset(MBrow,MBcol)].x = pbi->mbi.Mv[3].x;
+        pbi->MBMotionVector[MBOffset(MBrow,MBcol)].y = pbi->mbi.Mv[3].y;
+        
+        pbi->mbi.Mv[4].x = x; 
+        pbi->mbi.Mv[4].y = y;
+
+        pbi->mbi.Mv[5].x = x; 
+        pbi->mbi.Mv[5].y = y;
+        
+
+    }
+    else
+    {
+        if(mode == CODE_INTER_NEAREST_MV)
+        {
+            x = NearestInterMVect.x;
+            y = NearestInterMVect.y;            
+        }
+        else if(mode == CODE_INTER_NEAR_MV)
+        {
+            x = NearInterMVect.x;
+            y = NearInterMVect.y;
+        }
+        else 
+        {
+            switch(mode)
+            {
+            /*
+            case CODE_INTER_NEAREST_MV:
+            x = NearestInterMVect.x;
+            y = NearestInterMVect.y;            
+            break;
+            case CODE_INTER_NEAR_MV:
+            x = NearInterMVect.x;
+            y = NearInterMVect.y;
+            break;
+                */
+            case CODE_GOLD_NEAREST_MV:
+                FindNearestandNextNearest(pbi,MBrow,MBcol,&NearestGoldMVect,&NearGoldMVect,2,&type2);
+                x = NearestGoldMVect.x;
+                y = NearestGoldMVect.y;
+                break;
+            case CODE_GOLD_NEAR_MV:
+                FindNearestandNextNearest(pbi,MBrow,MBcol,&NearestGoldMVect,&NearGoldMVect,2,&type2);
+                x = NearGoldMVect.x;
+                y = NearGoldMVect.y;
+                break;
+            case CODE_INTER_PLUS_MV:
+            case CODE_GOLDEN_MV:
+                decodeMotionVector(pbi,&mv,NULL);
+                x = mv.x;
+                y = mv.y;
+                break;
+            default:
+                x =0;
+                y =0;
+            }
+        }
+        pbi->MBMotionVector[MBOffset(MBrow,MBcol)].x = x;
+        pbi->MBMotionVector[MBOffset(MBrow,MBcol)].y = y;
+		for(k=0;k<6;k++)
+		{
+			
+            pbi->mbi.Mv[k].x = x;
+            pbi->mbi.Mv[k].y = y;
+			pbi->mbi.BlockMode[k] = mode;
+		}
+    }
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     decodeBlockMode
+ *
+ *  INPUTS        :     mode -> mode we are trying to encode
+ *						
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     decodes a block mode from the bitstream as 2 bits
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+CODING_MODE 
+DecodeBlockMode
+(
+	PB_INSTANCE *pbi
+)
+
+{
+
+	int choice = DecodeBool128(&pbi->br)<<1;
+	choice += DecodeBool128(&pbi->br);
+
+
+	switch(choice)
+	{
+	case 0:return CODE_INTER_NO_MV;//0
+	case 1:return CODE_INTER_PLUS_MV;//2
+	case 2:return CODE_INTER_NEAREST_MV;//3
+	case 3:return CODE_INTER_NEAR_MV;//4
+	}
+	return (CODING_MODE)0;
+
+}   
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     decodeMode
+ *
+ *  INPUTS        :     lastmode -> mode of the last coded macroblock
+ *						mode -> mode we are trying to encode
+ *						type -> MODE_TYPE (all modes available, nonearest 
+ *						        macroblock, no near macroblock)
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     decodes a MBmode from the bitstream using modecodearray
+ *                      and probabilities that the value is the same as 
+ *                      lastmode stored in probModeSame, and the probability 
+ *                      of mode occuring if lastmode != mode stored in 
+ *                      probMode
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+CODING_MODE DecodeMode
+(
+	PB_INSTANCE *pbi,
+	CODING_MODE lastmode,
+	UINT32 type
+)
+{
+	CODING_MODE	mode;
+	if(DecodeBool(&pbi->br,pbi->probModeSame[type][lastmode]))
+	{
+		mode = lastmode;
+	}
+	else
+	{ // 0
+		UINT8 * Stats =pbi->probMode[type][lastmode]; 
+		if(DecodeBool(&pbi->br,Stats[0]))
+		{  // 2
+			if(DecodeBool(&pbi->br,Stats[2]))
+			{ //6
+				if(DecodeBool(&pbi->br,Stats[6]))
+				{ // 8
+
+					mode = CODE_GOLD_NEAREST_MV + DecodeBool(&pbi->br,Stats[8]);
+					/*
+					if(DecodeBool(&pbi->br,Stats[8]))
+					{
+						mode = CODE_GOLD_NEAR_MV;
+					}
+					else
+					{
+						mode = CODE_GOLD_NEAREST_MV;
+					}
+					*/
+					
+				}
+				else
+				{ // 7
+					mode = CODE_USING_GOLDEN + DecodeBool(&pbi->br,Stats[7]);
+					/*
+					if(DecodeBool(&pbi->br,Stats[7]))
+					{
+						mode = CODE_GOLDEN_MV;
+					}
+					else
+					{
+						mode = CODE_USING_GOLDEN;
+					}
+					*/
+				}
+
+			}
+			else
+			{ //5
+				//mode = CODE_INTRA + 6*DecodeBool(&pbi->br,Stats[5]);
+				
+				if(DecodeBool(&pbi->br,Stats[5]))
+				{
+					mode = CODE_INTER_FOURMV;
+				}
+				else
+				{
+					mode = CODE_INTRA;
+				}
+				
+			}
+		}
+		else
+		{ // 1
+			if(DecodeBool(&pbi->br,Stats[1]))
+			{ // 4
+				mode = CODE_INTER_NEAREST_MV + DecodeBool(&pbi->br,Stats[4]);
+				/*
+				if(DecodeBool(&pbi->br,Stats[4]))
+				{
+					mode = CODE_INTER_NEAR_MV;
+				}
+				else
+				{
+					mode = CODE_INTER_NEAREST_MV;
+				}
+				*/
+
+			}
+			else
+			{ // 3
+				mode = CODE_INTER_NO_MV + 2 * DecodeBool(&pbi->br,Stats[3]);
+				/*
+				if(DecodeBool(&pbi->br,Stats[3]))
+				{
+					mode = CODE_INTER_PLUS_MV;
+				}
+				else
+				{
+					mode = CODE_INTER_NO_MV;
+				}
+				*/
+			}
+		}
+	}
+	return mode;
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemv.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemv.c
new file mode 100644
index 00000000..37254267
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/decodemv.c
@@ -0,0 +1,366 @@
+/****************************************************************************
+*        
+*   Module Title :	   Decodemv.c     
+*
+*   Description  :     functions for decoding modes and motionvectors 
+*
+*   AUTHOR       :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.00 JBB 30OCT01  New Configuration baseline.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+#include "pbdll.h"
+#include "boolhuff.h"
+#include "huffman.h"
+#include "stdio.h"
+#include "decodemode.h" 
+#include "decodemv.h"
+
+/****************************************************************************
+*  Implicit Imports
+*****************************************************************************
+*/        
+#define STRICT              /* Strict type checking. */
+
+#ifdef MAPCA
+    #include <eti/mm.h>
+#endif
+
+ 
+
+/****************************************************************************
+*  Exported data structures.
+*****************************************************************************
+*/        
+
+
+/****************************************************************************
+*  Module statics.
+*****************************************************************************
+*/        
+
+UINT8 MvUpdateProbs[2][MV_NODES] = 
+{ 
+	{ 243, 220, 251, 253, 237, 232, 241, 245, 247, 251, 253 },
+	{ 235, 211, 246, 249, 234, 231, 248, 249, 252, 252, 254 }
+};
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ConfigureMvEntropyDecoder
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     Build the MV entropy decoding tree
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+***************************************************************************/
+void ConfigureMvEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType )
+{
+	int i;
+    
+	//This funciton is not called at all if it is a BASE_FRAME
+    /*    
+	if ( FrameType == BASE_FRAME)
+	{
+		// Set up the default values for each of the MV probabilities
+		// For now these are just 128
+		memset ( pbi->MvSignProbs, 128, sizeof(pbi->MvSignProbs) );
+		memset ( pbi->MvZeroProbs, 128, sizeof(pbi->MvZeroProbs) );
+		memset ( pbi->MvHalfPixelProbs, DEFAULT_HALF_PIXEL_PROB, sizeof(pbi->MvHalfPixelProbs) );
+		memset ( pbi->MvLowBitProbs, 128, sizeof(pbi->MvLowBitProbs) );
+		memset ( pbi->MvSizeProbs, 128, sizeof(pbi->MvSizeProbs) );
+	}
+	else
+    */
+	{
+		// Calculate and if necessary send the Zero, sign, half pixel and Low order probabilities.
+		for ( i = 0; i < 2; i++ )
+		{
+			// Zero probability
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][0]) )
+			{
+				pbi->MvZeroProbs[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvZeroProbs[i] == 0 )
+					pbi->MvZeroProbs[i] = 1;
+			}
+
+			// Sign probability
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][1]) )
+			{
+				pbi->MvSignProbs[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvSignProbs[i] == 0 )
+					pbi->MvSignProbs[i] = 1;
+			}
+
+			// Half pixel bit probability
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][2]) )
+			{
+				pbi->MvHalfPixelProbs[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvHalfPixelProbs[i] == 0 )
+					pbi->MvHalfPixelProbs[i] = 1;
+			}
+
+			// Low order magnitude bit Probability
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][3]) )
+			{
+				pbi->MvLowBitProbs[i] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvLowBitProbs[i] == 0 )
+					pbi->MvLowBitProbs[i] = 1;
+			}
+		}
+
+		// Now vector magnitude Probabilities
+		for ( i = 0; i < 2; i++ )
+		{
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][4]) )
+			{
+				pbi->MvSizeProbs[i][0] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvSizeProbs[i][0] == 0 )
+					pbi->MvSizeProbs[i][0] = 1;
+			}
+
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][5]) )
+			{
+				pbi->MvSizeProbs[i][1] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvSizeProbs[i][1] == 0 )
+					pbi->MvSizeProbs[i][1] = 1;
+			}
+			
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][6]) )
+			{
+				pbi->MvSizeProbs[i][2] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvSizeProbs[i][2] == 0 )
+					pbi->MvSizeProbs[i][2] = 1;
+			}
+			
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][7]) )
+			{
+				pbi->MvSizeProbs[i][3] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvSizeProbs[i][3] == 0 )
+					pbi->MvSizeProbs[i][3] = 1;
+			}
+			
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][8]) )
+			{
+				pbi->MvSizeProbs[i][4] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvSizeProbs[i][4] == 0 )
+					pbi->MvSizeProbs[i][4] = 1;
+			}
+
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][9]) )
+			{
+				pbi->MvSizeProbs[i][5] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvSizeProbs[i][5] == 0 )
+					pbi->MvSizeProbs[i][5] = 1;
+			}
+
+			if ( DecodeBool(&pbi->br, MvUpdateProbs[i][10]) )
+			{
+				pbi->MvSizeProbs[i][6] = VP5_bitread( &pbi->br, PROB_UPDATE_BASELINE_COST ) << 1;
+				if ( pbi->MvSizeProbs[i][6] == 0 )
+					pbi->MvSizeProbs[i][6] = 1;
+			}
+		}
+	}
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     decodeMotionVector 
+ *
+ *  INPUTS        :     *mv -> returned motion vector
+						*nearestMv -> passed in mv acting as context 
+						
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     decodes a motion vector from the bitstream 
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void decodeMotionVector
+(
+	PB_INSTANCE *pbi,
+	MOTION_VECTOR *mv,
+	MOTION_VECTOR *nearestMv
+)
+{
+	UINT32 i;
+	INT32  Vector = 0;
+	INT32  SignBit;
+	INT32  HpBit;
+	INT32  LowBit;
+
+	for ( i = 0; i < 2; i++ )
+	{
+		Vector = 0;
+
+		// Is the vector non-zero
+		if ( DecodeBool(&pbi->br, pbi->MvZeroProbs[i]) )
+		{
+			// Read the sign, half pixel and low order bits
+			SignBit = DecodeBool(&pbi->br, pbi->MvSignProbs[i]);
+
+			// Read half pixel and low order bits
+			HpBit = DecodeBool(&pbi->br, pbi->MvHalfPixelProbs[i]);
+			LowBit = DecodeBool(&pbi->br, pbi->MvLowBitProbs[i]);
+
+			// Now read the magnitude bits
+			if ( DecodeBool(&pbi->br, pbi->MvSizeProbs[i][0] ) )
+			{
+				Vector = 1 << 4;
+				if ( DecodeBool(&pbi->br, pbi->MvSizeProbs[i][4]) )
+				{
+					Vector |= (1 << 3);
+					Vector |= DecodeBool(&pbi->br, pbi->MvSizeProbs[i][6]) << 2;
+				}
+				else
+				{
+					Vector |= DecodeBool(&pbi->br, pbi->MvSizeProbs[i][5]) << 2;
+				}
+			}
+			else
+			{
+				if ( DecodeBool(&pbi->br, pbi->MvSizeProbs[i][1]) )
+				{
+					Vector |= (1 << 3);
+					Vector |= DecodeBool(&pbi->br, pbi->MvSizeProbs[i][3]) << 2;
+				}
+				else
+				{
+					Vector |= DecodeBool(&pbi->br, pbi->MvSizeProbs[i][2]) << 2;
+				}
+			}
+		
+			// Now Add in the low order and sign bits
+			Vector |= HpBit;
+			Vector |= (LowBit << 1);
+			if ( SignBit )
+				Vector = -Vector;
+		}
+
+		if ( i )
+			mv->y = Vector;
+		else
+			mv->x = Vector;
+
+    }
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FindNearestandNextNearest
+ *
+ *  INPUTS        :     
+						MBrow row of macroblock to check
+						MBcol col of macroblock to check
+						*nearest returns nearest motion vector if found 0,0 otherwise
+						*near returns next nearest motion vector if found 0,0 otherwise
+						frame which frame motion vector should come from (gold or last)
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     true if motion vector differs 
+                        false otherwise
+ *
+ *  FUNCTION      :     search through the existing motion vectors for two different MVs
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void FindNearestandNextNearest
+(
+	PB_INSTANCE *pbi,
+	UINT32 MBrow,
+	UINT32 MBcol,
+	MOTION_VECTORA *nearest,
+	MOTION_VECTORA *nextnearest,	
+	UINT8 Frame,
+	int *type
+)
+{
+	UINT32 BaseMB = MBOffset(MBrow,MBcol);
+	UINT32 OffsetMB;
+	int i;
+
+	nearest->x=0;
+	nearest->y=0;
+	nextnearest->x=0;
+	nextnearest->y=0;
+	*type = NONEAREST_MACROBLOCK;
+
+	for(i=0;i<12;i++)
+	{
+
+		OffsetMB = pbi->mvNearOffset[i]+BaseMB;
+
+		if(VP5_Mode2Frame[pbi->predictionMode[OffsetMB]] != Frame)
+			continue;
+
+		if(*((unsigned int *) &pbi->MBMotionVector[OffsetMB]) == 0) 
+			continue;
+
+		*((unsigned int *) nearest) = *((unsigned int *) &pbi->MBMotionVector[OffsetMB]);
+		*type = NONEAR_MACROBLOCK;
+
+		break;
+
+	}
+
+	if(*((unsigned int *) nearest))
+	{
+		for(i=i+1;i<12;i++)
+		{
+
+			OffsetMB = pbi->mvNearOffset[i]+BaseMB;
+
+			if(VP5_Mode2Frame[pbi->predictionMode[OffsetMB]] != Frame)
+				continue;
+			
+			if(    *((unsigned int *) &pbi->MBMotionVector[OffsetMB])
+				== *((unsigned int *) nearest) )
+				continue;
+			
+			if(*((unsigned int *) &pbi->MBMotionVector[OffsetMB]) == 0) 
+				continue;
+			
+			*((unsigned int *) nextnearest) = *((unsigned int *) &pbi->MBMotionVector[OffsetMB]);
+			*type = MACROBLOCK;
+
+			break;
+		}
+	
+	}
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/dxv2_vp50.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/dxv2_vp50.c
new file mode 100644
index 00000000..52a36eae
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/dxv2_vp50.c
@@ -0,0 +1,438 @@
+/****************************************************************************
+*
+*   Module Title :     vp5dxv.c
+*
+*   Description  :     VP50 interface to DXV.
+*
+*    AUTHOR      :     SJL
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.03 SJL 17/10/02  Up the version to 1.0.0.3, added new dxv interface
+*   1.02 YWX 30/09/02  Up the version to 1.0.0.2, added support of scaling
+*   1.01 YWX 19/09/02  Fixed bug in blit and up the version to 1.0.0.1
+*	1.00 SJL 17/06/02  Base
+*
+*****************************************************************************
+*/
+//#include <stdlib.h> 
+
+#include "duck_mem.h" /* interface to memory manager */
+#include "dxl_plugin.h" /* interface to dxv */
+
+#include "pbdll.h"
+
+
+const char* VP5LIBVERSION="ON2 VP5 Decode Library for MAC Version 1.0.0.3";
+
+typedef unsigned int FourCC;
+ 
+#define VP50_FOURCC DXL_MKFOURCC( 'V', 'P', '5', '0')
+
+
+static dxvBitDepth bitDepths[] = 
+{
+	DXYV12,DXRGBNULL
+};
+
+
+void vp50_SetParameter(DXL_XIMAGE_HANDLE src,int Command, unsigned int Parameter );
+
+extern void VP5_VPInitLibrary(void);
+extern void VP5_VPDeInitLibrary(void);
+
+#include "duck_dxl.h"
+
+typedef struct tFrameInfo
+{
+    int KeyFrame;
+    int Version;
+    int Quality;
+    int vp30Flag;
+} FrameInfo;
+
+void 
+vp50_GetInfo(unsigned char * source, FrameInfo * frameInfo)
+{
+
+    // Is the frame and inter frame or a key frame 
+    frameInfo->KeyFrame = !(source[0] > 0x7f);
+    frameInfo->Quality = source[0] >> 2;
+    if(frameInfo->KeyFrame) 
+        frameInfo->Version = ((source[2]>>3) & 0x1f );
+    else
+        frameInfo->Version = 0;
+
+    frameInfo->vp30Flag = (int)source[1];
+
+}
+
+
+// YUV buffer configuration structure
+typedef struct
+{
+    int     YWidth;
+    int     YHeight;
+    int     YStride;
+
+    int     UVWidth;
+    int     UVHeight;
+    int     UVStride;
+
+    char *  YBuffer;
+    char *  UBuffer;
+    char *  VBuffer;
+
+	char *  uvStart;
+    int uvDstArea;
+    int uvUsedArea;
+
+} DXV_YUV_BUFFER_CONFIG;
+
+/* define an algorithm base container */
+typedef struct tXImageCODEC
+{
+	FourCC myFourCC;
+	DXV_YUV_BUFFER_CONFIG FrameBuffer;
+	PB_INSTANCE *myPBI;
+} vp50_XIMAGE, *vp50_XIMAGE_HANDLE;
+
+
+typedef void ((*VP5BLIT_FUNC)(unsigned char *, int, YUV_BUFFER_CONFIG *));
+//typedef void ((*vp5_VSCREEN_FUNC)(void));
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp50_decompress
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     
+ *
+ *  SPECIAL NOTES :     
+ *
+ ****************************************************************************/
+static int 
+vp50_decompress(DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE vScreen)
+{
+
+	int retVal;
+	vp50_XIMAGE_HANDLE thisAlgorithmBase = (vp50_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+	unsigned char *cAddr;
+	int cSize;
+	int w, h;
+
+
+	// if we have a compressed frame decompress it ( otherwise we'll just redo
+	// the scaling and postprocessing from the last frame )
+    cAddr = DXL_GetXImageCDataAddr(src);
+	cSize = DXL_GetXImageCSize(src);
+	
+    if(cAddr)
+    {
+		if((cSize != 0) && (cAddr[0]>=1 || cAddr[1]>=1 || cAddr[2] >=1))
+		{
+			int w, h;
+			
+			DXL_GetXImageXYWH(src, NULL, NULL, &w, &h);
+			
+			// decode the frame 
+			retVal = VP5_DecodeFrameToYUV(thisAlgorithmBase->myPBI, (char *)cAddr, cSize, w, h);
+			if(retVal != 0 )
+			{
+	            if(retVal == -1)
+		            return DXL_VERSION_CONFLICT;
+			    else
+				    return DXL_BAD_DATA;
+			}
+		}
+    }
+
+
+	if (vScreen) /* if there is a vScreen, blit to it */
+	{
+        unsigned char * ptrScrn;
+        short thisPitch, vsHeight;
+        dxvBlitQuality bq; 
+        dxvBitDepth bd;
+        VP5BLIT_FUNC blitter;
+        
+        DXL_GetVScreenAttributes(vScreen, (void **)&ptrScrn, &bq, &bd, &thisPitch, &vsHeight);
+
+		if(ptrScrn)
+        { 
+    		int x, y, pSize;
+            int viewX, viewY;
+
+			DXL_GetVScreenView(vScreen, &viewX, &viewY, NULL, NULL);
+
+			/* get a frame pointer to the scaled and postprocessed reconstructed buffer */
+		    VP5_GetYUVConfig(thisAlgorithmBase->myPBI, (YUV_BUFFER_CONFIG *) &(thisAlgorithmBase->FrameBuffer));
+			
+          	pSize = VPX_GetSizeOfPixel(bd);
+
+			DXL_GetXImageXYWH(src, &x, &y, NULL, NULL);
+
+		    /* remember to offset if requested */
+		    y += viewY;
+		    x += viewX; 
+
+	        ptrScrn += (x * pSize) + (y * thisPitch);
+
+            /* setup ptrs so we can work backwards through Paul's frame buffers */
+            #if 1
+            thisAlgorithmBase->FrameBuffer.YBuffer = thisAlgorithmBase->FrameBuffer.YBuffer + 
+                    ((thisAlgorithmBase->FrameBuffer.YHeight - 1) * 
+                     (thisAlgorithmBase->FrameBuffer.YStride));
+
+			thisAlgorithmBase->FrameBuffer.UBuffer = thisAlgorithmBase->FrameBuffer.UBuffer +
+                    ((thisAlgorithmBase->FrameBuffer.UVHeight - 1) * 
+                     (thisAlgorithmBase->FrameBuffer.UVStride));
+			
+            thisAlgorithmBase->FrameBuffer.VBuffer = thisAlgorithmBase->FrameBuffer.VBuffer +
+                    ((thisAlgorithmBase->FrameBuffer.UVHeight - 1) * 
+                     (thisAlgorithmBase->FrameBuffer.UVStride));
+            #endif
+            
+            if((bd != DXYUY2) && (bd != DXYV12))
+            {
+                if(bq == DXBLIT_STRETCH)
+                {
+                    thisPitch *= 2;
+                }
+            }
+
+            if(bd == DXYV12 || bd == DXI420)
+            {
+				if(thisPitch < 0)
+				{
+					thisAlgorithmBase->FrameBuffer.uvStart = (char *) (ptrScrn + abs(thisPitch) + abs(thisPitch) * h/4 + thisPitch/2 );
+					thisAlgorithmBase->FrameBuffer.uvDstArea = abs((thisPitch * h)/4);
+					thisAlgorithmBase->FrameBuffer.uvUsedArea = 0;
+				}
+				else
+				{
+					thisAlgorithmBase->FrameBuffer.uvStart = (char *) (ptrScrn + (thisPitch * h));
+					thisAlgorithmBase->FrameBuffer.uvDstArea = ((thisPitch * h)/4);
+					thisAlgorithmBase->FrameBuffer.uvUsedArea = ((thisPitch * thisAlgorithmBase->FrameBuffer.UVHeight)/2);
+				}
+
+            }
+
+			blitter = (VP5BLIT_FUNC)VPX_GetBlitter(bq, bd);
+			
+			if ((void *)blitter != (void *)-1) 
+			{
+            	blitter(ptrScrn, thisPitch, (YUV_BUFFER_CONFIG *)(&thisAlgorithmBase->FrameBuffer));
+            }
+            else
+            {
+            	return DXL_INVALID_BLIT;
+            }
+
+
+        }
+	}
+
+	return DXL_OK;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp50_xImageDestroy
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     close down a decompressor, releasing the wilk decompressor, 
+ *                      the xImage (decompressor), and the intermediate vScreen (surface)
+ *
+ *  SPECIAL NOTES :     
+ *
+ ****************************************************************************/
+static int 
+vp50_xImageDestroy(DXL_XIMAGE_HANDLE src)
+{
+	vp50_XIMAGE_HANDLE thisAlgorithmBase = (vp50_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src);
+
+	if(thisAlgorithmBase)
+	{
+        VP5_StopDecoder(&(thisAlgorithmBase->myPBI));
+		duck_free(thisAlgorithmBase);
+	}
+
+	return DXL_OK;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp50_xImageReCreate
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     
+ *
+ *  SPECIAL NOTES : 
+ *                  called during initialization and/or when xImage (decompressor)
+ *                  attributes change, note that nImage and src are actually
+ *                  synonymous and should be cleared out a bit (to say the least!)
+ *
+ *
+ *                  !!!!!!
+ *                  This function should be prepared to get data that is NOT of the 
+ *                  type native to the decoder,  It should do it's best to verify it 
+ *                  as valid data and should clean up after itself and return NULL
+ *                  if it doesn't recognize the format of the data
+ *
+ ****************************************************************************/
+static void * 
+vp50_xImageReCreate(DXL_XIMAGE_HANDLE src, unsigned char *data, int type, enum BITDEPTH bitDepth, int w, int h)
+{  
+	vp50_XIMAGE_HANDLE thisAlgorithmBase = (vp50_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src); 
+
+	(void) bitDepth;
+	
+    if(type != VP50_FOURCC) 
+		return NULL;
+
+	/* if an algorithm base container already exists, destroy it */
+	if(thisAlgorithmBase != NULL)
+	{	
+        VP5_StopDecoder(&(thisAlgorithmBase->myPBI));
+		duck_free(thisAlgorithmBase);
+	}
+	
+	/* create a new algorithm base container */
+	thisAlgorithmBase = (vp50_XIMAGE_HANDLE)duck_calloc(1,sizeof(vp50_XIMAGE),DMEM_GENERAL);
+	if(thisAlgorithmBase == NULL) 
+        return NULL;
+
+
+	DXL_RegisterXImageRecreate(src, (RECREATE_FUNC) vp50_xImageReCreate);
+
+	DXL_RegisterXImageDestroy(src, (DESTROY_FUNC) vp50_xImageDestroy);
+
+	DXL_RegisterXImageDx(src, (DX_FUNC) vp50_decompress);
+
+	DXL_RegisterXImageSetParameter(src, (SET_PARAMETER_FUNC) vp50_SetParameter);
+
+	thisAlgorithmBase->myFourCC = VP50_FOURCC;
+  
+    /* create new PBI */
+    if(!VP5_StartDecoder( &(thisAlgorithmBase->myPBI), w, h))
+    {
+		duck_free(thisAlgorithmBase);
+        thisAlgorithmBase = NULL;
+    }
+
+    return (DXL_HANDLE) thisAlgorithmBase;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp50_xImageCreate
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     
+ *
+ *  SPECIAL NOTES :     in this "glue" case, just calls through to the create function. 
+ *
+ ****************************************************************************/
+static DXL_HANDLE
+vp50_xImageCreate(DXL_XIMAGE_HANDLE src, unsigned char *data)
+{
+	/* our default wxh is always 320x240 */
+	return vp50_xImageReCreate(src, data, VP50_FOURCC, (enum BITDEPTH ) 0, 320, 240);
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp50_Init
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+int 
+vp50_Init(void)
+{
+    DXL_RegisterXImage((CREATE_FUNC) vp50_xImageCreate, VP50_FOURCC);
+
+
+    vp3SetBlit();
+
+	/* initialize all the global variables */
+	VP5_VPInitLibrary();
+	
+	return DXL_OK;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     vp50_Exit
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     main exit routine, called during DXL_ExitVideo() 
+ *                      clean up any global information if necessary
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+int 
+vp50_Exit(void)
+{
+	VP5_VPDeInitLibrary();
+
+	return DXL_OK;
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void 
+vp50_SetParameter(DXL_XIMAGE_HANDLE src, int Command, unsigned int Parameter)
+{
+	vp50_XIMAGE_HANDLE thisAlgorithmBase = (vp50_XIMAGE_HANDLE)DXL_GetAlgorithmBasePtr(src); 
+
+	VP5_SetPbParam(thisAlgorithmBase->myPBI, (PB_COMMAND_TYPE) Command, (UINT32) Parameter );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/pb_globals.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/pb_globals.c
new file mode 100644
index 00000000..ac683b0d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/pb_globals.c
@@ -0,0 +1,389 @@
+/****************************************************************************
+*
+*   Module Title :     PB_Globals.c
+*
+*   Description  :     Video CODEC Demo: playback dll global declarations
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.21 JBB 13 Jun 01 VP4 Code Clean Out
+*	1.20 AWG 08-Jun-01 Added support for DCT16
+*   1.19 JBB 01-MAY-01 VP5 Functionality (set up coefftoband array)
+*   1.18 YWX 26-Apr 01 Added global "CPUFrequency" and its initializing
+* 			in VPInitlibrary()		
+*   1.17 JBB 06 Apr 01 new cpu free variable initialized
+*   1.16 SJL 30 Mar 01 Added #if defined(POSTPROCESS) around InitPostProcessing();
+*   1.15 PGW 25 Jan 01 Add code to create and destroy MV huffman trees.
+*   1.15 JBB 26 Jan 01 No need to destroy huffman trees
+*   1.14 JBB 22 Aug 00 Ansi C conversion
+*   1.13 JBB 21 Aug 00 New More Blurry in high variance area deringer
+*	1.12 YWX 2  Aug 00 Removed redundant kernel modifiers
+*   1.11 JBB 27 Jul 00 Moved kernel modifiers to pbi mallocs -> duck_malloc
+*                      for scott added malloc checks
+*	1.10 YWX 15/05/00  change the initialization of PostProcessLevel
+*	1.09 JBB 27/01/99  Globals Removed, use of PB_INSTANCE, added PB_Instance
+*                      allocation and deletion funcitons
+*   1.08 PGW 17/12/99  Draw dib functionality removed.
+*   1.07 PGW 16/12/99  Added support for VP3 version id.
+*   1.06 PGW 15/12/99  Added key frame type variable
+*   1.05 PGW 22/11/99  Changes relating to restructuring of block map stuff.
+*   1.04 PGW 14/10/99  Changes to reduce uneccessary dependancies. 
+*   1.05 PGW 06/09/99  DivBySix changed to UINT8 [].
+*   1.04 PGW 24/08/99  Removed of EOF token and assosciated data sturctures etc.
+*                      Deleted COrderList[].
+*   1.03 PGW 15/07/99  Added bit extraction variables.
+*   1.02 PGW 14/07/99  Changes to interface to idct and reconstruction functions.
+*                      Added ModeUsesMC[] truth table. Added (*ReconIntra) funtion
+*                      pointer.
+*   1.01 PGW 09/07/99  Added code to support profile timing
+*   1.00 PGW 22/06/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include "pbdll.h"
+#include "duck_mem.h"
+
+/****************************************************************************
+*  Explicit imports
+*****************************************************************************
+*/ 
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+ 
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+extern unsigned long VP5_GetProcessorFrequency();
+                
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+//extern Q_LIST_ENTRY VP5_DcScaleFactorTableV1[ Q_TABLE_SIZE ] ; 
+extern Q_LIST_ENTRY VP5_DcQuant[ Q_TABLE_SIZE ];
+
+UINT32 DCQuantScaleP[Q_TABLE_SIZE];
+
+//****************************************************************
+// Function Pointers now library globals!
+//****************************************************************
+
+// Process Frequency
+unsigned int CPUFrequency;
+
+// Truth table to indicate if the given mode uses motion estimation
+BOOL VP5_ModeUsesMC[MAX_MODES] = { FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE };
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeleteTmpBuffers
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_DeleteTmpBuffers(PB_INSTANCE * pbi)
+{
+
+	if(pbi->ReconDataBufferAlloc)
+		duck_free(pbi->ReconDataBufferAlloc);
+	if(pbi->LoopFilteredBlockAlloc)
+		duck_free(pbi->LoopFilteredBlockAlloc);
+	if(pbi->TmpDataBufferAlloc)
+		duck_free(pbi->TmpDataBufferAlloc);
+	if(pbi->TmpReconBufferAlloc)
+		duck_free(pbi->TmpReconBufferAlloc);
+	if(pbi->ScaleBufferAlloc)
+		duck_free(pbi->ScaleBufferAlloc);
+
+	pbi->ReconDataBufferAlloc=0;
+	pbi->TmpDataBufferAlloc = 0;
+	pbi->TmpReconBufferAlloc = 0;
+    pbi->ScaleBufferAlloc = 0;
+    pbi->ScaleBuffer = 0;
+	pbi->ReconDataBuffer=0;
+	pbi->TmpDataBuffer = 0;
+	pbi->TmpReconBuffer = 0;
+
+	pbi->LoopFilteredBlockAlloc = 0;
+	pbi->LoopFilteredBlock = 0;
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     AllocateTmpBuffers
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+BOOL VP5_AllocateTmpBuffers(PB_INSTANCE * pbi)
+{
+
+	// clear any existing info
+	VP5_DeleteTmpBuffers(pbi);
+#ifdef MAPCA
+    pbi->ReconDataBufferAlloc      = (INT16 (*)[64])duck_malloc(32+64*sizeof(INT16)*6, DMEM_GENERAL);
+	if(!pbi->ReconDataBufferAlloc)      { VP5_DeleteTmpBuffers(pbi); return FALSE;};
+    pbi->ReconDataBuffer           = (INT16 (*)[64])ROUNDUP32(pbi->ReconDataBufferAlloc);
+#else
+	// Adjust the position of all of our temporary
+	pbi->ReconDataBufferAlloc      = (INT16 *)duck_malloc(32+64*sizeof(INT16), DMEM_GENERAL);
+	if(!pbi->ReconDataBufferAlloc)      { VP5_DeleteTmpBuffers(pbi); return FALSE;};
+    pbi->ReconDataBuffer           = (INT16 *)ROUNDUP32(pbi->ReconDataBufferAlloc);
+#endif
+    
+	pbi->TmpDataBufferAlloc        = (INT16 *)duck_malloc(32 + 64 * sizeof(INT16), DMEM_GENERAL);
+    if(!pbi->TmpDataBufferAlloc)        { VP5_DeleteTmpBuffers(pbi); return FALSE;};
+    pbi->TmpDataBuffer             = (INT16 *)ROUNDUP32(pbi->TmpDataBufferAlloc);
+
+	pbi->LoopFilteredBlockAlloc        = (UINT8 *)duck_malloc(32 + 256 * sizeof(UINT8), DMEM_GENERAL);
+    if(!pbi->LoopFilteredBlockAlloc)        { VP5_DeleteTmpBuffers(pbi); return FALSE;};
+    pbi->LoopFilteredBlock             = (UINT8 *)ROUNDUP32(pbi->LoopFilteredBlockAlloc);
+
+    pbi->TmpReconBufferAlloc       = (INT16 *)duck_malloc(32 + 64 * sizeof(INT16), DMEM_GENERAL);
+    if(!pbi->TmpReconBufferAlloc)       { VP5_DeleteTmpBuffers(pbi); return FALSE;};
+    pbi->TmpReconBuffer            = (INT16 *)ROUNDUP32(pbi->TmpReconBufferAlloc);
+
+
+    return TRUE;
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeletePBInstance
+ *
+ *
+ *  INPUTS        :     Instance of PB to be deleted
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     frees the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_DeletePBInstance(PB_INSTANCE **pbi)
+{
+	// clear any existing info
+    if(*pbi)
+    {
+        // Delete the motion vector huffman trees.
+        //DestroyMvTrees(*pbi);
+
+        // Delete any other dynamically allocaed temporary buffers
+		VP5_DeleteTmpBuffers(*pbi);
+		VP5_DeleteQuantizer(&(*pbi)->quantizer);
+#ifndef MAPCA
+        DeletePostProcInstance(&(*pbi)->postproc);
+#endif
+    }
+
+	duck_free(*pbi);
+	*pbi=0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CreatePBInstance
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+PB_INSTANCE * VP5_CreatePBInstance(void)
+{
+	PB_INSTANCE *pbi=0;
+	CONFIG_TYPE ConfigurationInit = 
+	{
+		0,0,0,0,
+	    8,8,
+	};
+
+
+	int pbi_size = sizeof(PB_INSTANCE);
+	pbi=(PB_INSTANCE *) duck_malloc(pbi_size, DMEM_GENERAL);
+    if(!pbi)
+    {
+        return 0;
+    }
+
+	// initialize whole structure to 0
+	memset((unsigned char *) pbi, 0, sizeof(PB_INSTANCE));
+	
+	memcpy((void *) &pbi->Configuration, (void *) &ConfigurationInit, sizeof(CONFIG_TYPE));
+
+	if(!VP5_AllocateTmpBuffers(pbi))
+    {
+        duck_free(pbi);
+        return 0;
+    }
+
+
+	pbi->KeyFrameType = DCT_KEY_FRAME;
+	pbi->CPUFree = 70;
+#ifndef MAPCA
+    pbi->idct = idct;
+#endif
+
+	// Initialise Entropy related data structures.
+	memset( pbi->DcProbs, 0, sizeof(pbi->DcProbs) );
+	memset( pbi->AcProbs, 0, sizeof(pbi->AcProbs) );
+
+
+	return pbi;
+}
+
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VPInitLibrary
+ *
+ *
+ *  INPUTS        :     init VP library
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Fully initializes the playback library
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_VPInitLibrary(void)
+{
+	int i;
+#if !defined(__POWERPC__)
+    CPUFrequency = VP5_GetProcessorFrequency();
+#endif
+
+
+    VP5_DMachineSpecificConfig();
+
+    for( i = 0 ; i < Q_TABLE_SIZE; i++)
+    {
+		INT32 dcScale;
+
+//		if(i<4)
+//			dcScale = ((6-i) * VP5_DcQuant[i]/4);
+//		else 
+			dcScale = VP5_DcQuant[i]/2;
+
+		DCQuantScaleP[i] =  dcScale;
+
+    }
+
+#ifndef MAPCA
+    InitPostProcessing(
+		DCQuantScaleP,
+		DCQuantScaleP,
+		DCQuantScaleP,
+		CURRENT_DECODE_VERSION);
+	InitVPUtil(); 
+#else
+    VP5_InitPostProcess();
+#endif
+}
+
+/*********************************************************/
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VPDeinitLibrary
+ *
+ *
+ *  INPUTS        :     init VP library
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Fully initializes the playback library
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_VPDeInitLibrary(void)
+{
+#ifdef MAPCA
+    VP5_ClosePostProcess();
+#endif
+
+
+}
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/quantize.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/quantize.c
new file mode 100644
index 00000000..15df1f30
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/quantize.c
@@ -0,0 +1,845 @@
+/****************************************************************************
+*
+*   Module Title :     Quantise
+*
+*   Description  :     Quantisation and dequanitsation of an 8x8 dct block. .
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+* 
+*
+*   1.18 PGW 03 Dec 01 Changes to available Q values.
+*   1.17 PGW 14 Sep 01 Added support for ZB varying on zero-run.	
+*   1.16 JBX 22-Mar-01 Merged with vp4-mapca bitstream
+*   1.15 PGW 19 Oct 00 Added select_InterUV_quantiser and related data structures
+*	  				   to support use of different DC  behaviour for UV.
+*   1.11 PGW 18 Sep 00 QThreshTable[] and Inter_coeffs[] made instacne specific.
+*	1.10 PGW 14 Sep 00 Added support for different Q, ZB and Rounding tables 
+*					   in different encoder versions.
+*   1.09 PGW 04 Sep 00 Fixed bugs in code to set up rounding and zero bins
+*					   Added support for ZB to change with Q and coefficient.
+*   1.08 PGW 29 Aug 00 Correction to UpdateQ() and UpdateQC() re. Q limits.
+*					   Changes to rounding and ZBF.
+*   1.08 JBB 22 Aug 00 Ansi C conversion
+*   1.07 SJL 14/04/00  Added the BuildQuantIndex function.
+*   1.06 PGW 18/02/00  Rate targeting changes.
+*	1.05 JBB 27/01/99  Globals Removed, use of QUANTIZER, Dequant no longer 
+*                      used
+*   1.04 PGW 05/11/99  Changes to support AC range entropy tables
+*   1.03 PGW 12/10/99  Removal of spurious windows dependancies.
+*   1.02 PGW 14/09/99  Removal of some floating point code.
+*   1.01 PGW 13/07/99  Changes to keep dequant output to 16 bit
+*   1.01 PGW 07/07/99  Tweaks to baseline matrix.
+*   1.00 PGW 18/06/99  Configuration baseline
+*
+*****************************************************************************
+*/						
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+#define STRICT              /* Strict type checking. */
+#include <string.h>  
+#include "quantize.h"
+#include "duck_mem.h"
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/ 
+#define MIN16 ((1<<16)-1)
+      
+// DC quantizer characteristics
+#define VP5_MIN_QUANT		1
+	
+#define UV_Q_ADJUSTMENT		0
+
+// Scale factors used to improve precision of DCT/IDCT
+#define IDCT_SCALE_FACTOR       2       // Shift left bits to improve IDCT precision
+
+/****************************************************************************
+*  Imported Functions
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Imported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+void  (*VP5_BuildQuantIndex)( QUANTIZER * pbi);
+
+UINT8 QTableSelect[6] = { 0,0,0,0,1,1 };	// Controls selection of Q Table,rounding,zero bin etc for Y, U & V blocks
+
+/****************************************************************************
+*  Foreward References
+*****************************************************************************
+*/    
+void VP5_InitQTables( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+void VP5_BuildQuantIndex_Generic(QUANTIZER *pbi);
+void VP5_UpdateQ( QUANTIZER *pbi, UINT8 Vp3VersionNo  );
+void VP5_UpdateQC( QUANTIZER *pbi,UINT8 Vp3VersionNo  );
+void VP5_init_quantizer ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+void (*VP5_quantize)( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp );
+void VP5_init_dequantizer ( QUANTIZER *pbi, UINT8 Vp3VersionNo );
+QUANTIZER * VP5_CreateQuantizer(void);
+void VP5_DeleteQuantizer(QUANTIZER **pbi);          
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/      
+
+// AC Quantizer Tables
+static UINT32 VP5_QThreshTable[Q_TABLE_SIZE] = 
+{   94, 92, 90, 88, 86, 82, 78, 74,
+    70, 66, 62, 58, 54, 53, 52, 51,
+	50, 49, 48, 47, 46, 45, 44, 43,
+	42,	40, 39, 37, 36, 35, 34, 33,
+    32, 31, 30, 29, 28, 27, 26, 25, 
+    24, 23, 22, 21, 20, 19, 18, 17,
+    16, 15, 14, 13, 12, 11, 10,  9,  
+    8,   7,  6,  5,  4,  3,  2,  1
+};
+static UINT32 VP5_UvQThreshTable[Q_TABLE_SIZE] = 
+{   94, 92, 90, 88, 86, 82, 78, 74,
+    70, 66, 62, 58, 54, 53, 52, 51,
+	50, 49, 48, 47, 46, 45, 44, 43,
+	42,	40, 39, 37, 36, 35, 34, 33,
+    32, 31, 30, 29, 28, 27, 26, 25, 
+    24, 23, 22, 21, 20, 19, 18, 17,
+    16, 15, 14, 13, 12, 11, 10,  9,  
+    8,   7,  6,  5,  4,  3,  2,  1
+};
+
+// AC Zero Bin and Rounding Tables (include fdct normalisation)
+UINT32 VP5_ZBinTable[Q_TABLE_SIZE] = 
+{
+	330,314,298,284,264,246,228,213,
+	201,190,178,167,156,153,149,146,
+	144,141,138,135,132,130,127,124,
+	121,115,110,104,99, 96, 94, 90,
+	85, 82, 79, 76, 74, 71, 69, 66,
+	63, 61, 58, 55, 53, 50, 47, 45,
+	43, 40, 38, 36, 33, 31, 28, 24,
+	21, 18, 16, 13, 10, 7,  4,  2,
+};
+UINT32 VP5_UvZBinTable[Q_TABLE_SIZE] = 
+{
+	330,314,298,284,264,246,228,213,
+	201,190,178,167,156,153,149,146,
+	144,141,138,135,132,130,127,124,
+	121,115,110,104,99, 96, 94, 90,
+	85, 82, 79, 76, 74, 71, 69, 66,
+	63, 61, 58, 55, 53, 50, 47, 45,
+	43, 40, 38, 36, 33, 31, 28, 24,
+	21, 18, 16, 13, 10, 7,  4,  2,
+};
+UINT32 VP5_RTable[Q_TABLE_SIZE] = 
+{
+	48, 56, 64, 70, 78, 82, 86, 88,
+	91, 92, 94, 94, 99,103,102,100,
+	99, 97,	95, 93, 91, 89, 87, 85, 
+	83, 79, 77, 73, 71, 69, 67, 65,
+	64, 62, 60, 58, 56, 54, 52, 50,
+	48, 46, 44, 42, 40, 38, 36, 34,
+	32, 30, 28, 26, 24, 22, 20, 18, 
+	16, 14, 12, 10,  8,  6,  4,  2,
+};
+UINT32 VP5_UvRTable[Q_TABLE_SIZE] = 
+{
+	48, 56, 64, 70, 78, 82, 86, 88,
+	91, 92, 94, 94, 99,103,102,100,
+	99, 97,	95, 93, 91, 89, 87, 85, 
+	83, 79, 77, 73, 71, 69, 67, 65,
+	64, 62, 60, 58, 56, 54, 52, 50,
+	48, 46, 44, 42, 40, 38, 36, 34,
+	32, 30, 28, 26, 24, 22, 20, 18, 
+	16, 14, 12, 10,  8,  6,  4,  2,
+};
+
+// DC Quantizer tables
+Q_LIST_ENTRY VP5_DcQuant[ Q_TABLE_SIZE ] = 
+{
+	47, 47, 47, 47, 45, 43, 43, 43, 
+	43, 43,	42, 41, 41, 40, 40, 40, 
+	40, 35,	35, 35, 35, 33, 33, 33, 
+	33, 32, 32, 32, 27, 27, 26, 26,
+	25, 25, 24, 24, 23, 23,	19, 19,
+    19, 19, 18, 18, 17, 16, 16, 16,
+    16, 16, 15, 11, 11, 11, 10, 10,
+     9,  8,  7,  5,  3,  3,  2,  2,    
+};
+Q_LIST_ENTRY VP5_UvDcQuant[ Q_TABLE_SIZE ] = 
+{
+	47, 47, 47, 47, 45, 43, 43, 43, 
+	43, 43,	42, 41, 41, 40, 40, 40, 
+	40, 35,	35, 35, 35, 33, 33, 33, 
+	33,	32, 32, 32, 27, 27, 26, 26,
+	25, 25, 24, 24, 23, 23,	19, 19,
+    19, 19, 18, 18, 17, 16, 16, 16,
+    16, 16, 15, 11, 11, 11, 10, 10,
+     9,  8,  7,  5,  3,  3,  2,  2,    
+};
+// DC Zero Bin and Rounding Tables (include fdct normalisation)
+UINT32 VP5_DcZBinTable[Q_TABLE_SIZE] = 
+{
+	170,162,152,150,140,130,125,121,
+	121,118,113,111,110,108,108,106,
+	105,96, 93, 87, 86, 83, 83, 83, 
+	83, 78, 78, 78, 66, 66, 63, 63,
+	61, 61, 58, 58, 56, 56, 46, 46,
+	46, 46, 43, 43, 41, 38, 38, 38,
+	38, 38, 35, 24, 24, 24, 23, 23, 
+	20, 19, 16, 13,  6,  6,  4,  4,
+};
+UINT32 VP5_UvDcZBinTable[Q_TABLE_SIZE] = 
+{
+	170,162,152,150,140,130,125,121,
+	121,118,113,111,110,108,108,106,
+	105,96, 93, 87, 86, 83, 83, 83, 
+	83, 78, 78, 78, 66, 66, 63, 63,
+	61, 61, 58, 58, 56, 56, 46, 46,
+	46, 46, 43, 43, 41, 38, 38, 38,
+	38, 38, 35, 24, 24, 24, 23, 23, 
+	20, 19, 16, 13,  6,  6,  4,  4,
+};
+
+UINT32 VP5_DcRTable[Q_TABLE_SIZE] = 
+{
+	20, 28, 38, 40, 44, 46, 50, 50, 
+	51, 57,	59, 61, 62, 64, 66, 67, 
+	67, 62,	63, 64, 64, 62, 62, 62, 
+	62,	62, 62, 62, 54, 54, 52, 52,
+	50, 50, 48, 48, 46, 46, 38, 38,
+	38, 38, 36, 36, 34, 32, 32, 32,
+	32, 32, 30, 22, 22, 22, 20, 20, 
+	18, 16, 14, 10,  6,  6,  4,  4,
+};
+UINT32 VP5_UvDcRTable[Q_TABLE_SIZE] = 
+{
+	20, 30, 38, 40, 44, 46, 50, 50, 
+	51, 57,	59, 61, 62, 64, 66, 67, 
+	67, 62,	63, 64, 64, 62, 62, 62, 
+	62,	62, 62, 62, 54, 54, 52, 52,
+	50, 50, 48, 48, 46, 46, 38, 38,
+	38, 38, 36, 36, 34, 32, 32, 32,
+	32, 32, 30, 22, 22, 22, 20, 20, 
+	18, 16, 14, 10,  6,  6,  4,  4,
+};
+
+/*	Inverse fast DCT index											*/
+/*	This contains the offsets needed to convert zigzag order into	*/
+/*	x, y order for decoding. It is generated from the input zigzag	*/
+/*	indexat run time.												*/
+
+/*	For maximum speed during both quantisation and dequantisation	*/
+/*	we maintain separate quantisation and zigzag tables for each	*/
+/*	operation.														*/
+
+/*	pbi->quant_index:	the zigzag index used during quantisation			*/
+/*	dequant_index:	zigzag index used during dequantisation					*/
+/*					the pbi->quant_index is the inverse of dequant_index	*/
+/*					and is calculated during initialisation					*/
+
+static UINT32 dequant_index[64] = 
+{	0,  1,  8,  16,  9,  2,  3, 10,
+	17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static UINT32 transIndexC[64] = 
+{
+	 0,	 1,	 2,	 3,	   4,  5,  6,  7,
+	 8,	 9, 10,	11,	  12, 13, 14, 15,
+	16, 17, 18, 19,   20, 21, 22, 23,
+	24, 25, 26, 27,   28, 29, 30, 31,
+
+	32, 33, 34, 35,   36, 37, 38, 39,
+	40, 41, 42, 43,   44, 45, 46, 47,
+	48, 49, 50, 51,   52, 53, 54, 55, 
+	56, 57, 58, 59,   60, 61, 62, 63
+};
+
+static UINT32 quant_indexC[64] = 
+{
+    0,  1,   5,  6, 14, 15, 27, 28,
+    2,  4,   7, 13, 16, 26, 29, 42,
+    3,  8,  12, 17, 25, 30, 41, 43,
+    9,  11, 18, 24, 31, 40, 44, 53,
+    10, 19, 23, 32, 39, 45, 52, 54, 
+    20, 22, 33, 38, 46, 51, 55, 60,
+    21, 34, 37, 47, 50, 56, 59, 61,
+    35, 36, 48, 49, 57, 58, 62, 63
+};
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     InitQTables
+ *
+ *  INPUTS        :     
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Initialises Q tables based upon version number
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_InitQTables( QUANTIZER *pbi, UINT8 Vp3VersionNo )
+{  
+	// Make version specific assignments.
+	memcpy ( pbi->QThreshTable, VP5_QThreshTable, sizeof( pbi->QThreshTable ) );
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     BuildQuantIndex_Generic
+ *
+ *  INPUTS        :     
+ *                      
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Builds the quant_index table.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_BuildQuantIndex_Generic(QUANTIZER *pbi)
+{
+    INT32 i,j;
+
+    pbi->transIndex = transIndexC;
+
+    // invert the dequant index into the quant index
+	for ( i = 0; i < BLOCK_SIZE; i++ )
+	{	
+        j = dequant_index[i];
+		pbi->quant_index[j] = i;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     UpdateQ
+ *
+ *  INPUTS        :     UINT32  NewQ
+ *                              (A New Q value (50 - 1000))
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Updates the quantisation tables for a new Q
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_UpdateQ( QUANTIZER *pbi, UINT8 Vp3VersionNo )
+{  
+	if ( pbi->QThreshTable[pbi->FrameQIndex] == pbi->LastQuantizerValue )
+		return;
+
+	// Update the record of last Q and last Q index.
+    pbi->LastQuantizerValue = pbi->ThisFrameQuantizerValue;
+
+	// invert the dequant index into the quant index
+    // the dxer has a different order than the cxer.
+    VP5_BuildQuantIndex(pbi);
+
+	// Re-initialise the q tables for forward and reverse transforms.    
+	VP5_init_dequantizer ( pbi, Vp3VersionNo );
+}
+
+/********************* COMPRESSOR SPECIFIC **********************************/
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     UpdateQC (compressor's update q)
+ *
+ *  INPUTS        :     UINT32  NewQ
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Updates the quantisation tables for a new Q
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_UpdateQC( QUANTIZER *pbi, UINT8 Vp3VersionNo )
+{  
+	if ( pbi->QThreshTable[pbi->FrameQIndex] == pbi->LastQuantizerValue )
+		return;
+
+    // Update the record of last Q.
+    pbi->LastQuantizerValue = pbi->ThisFrameQuantizerValue;
+
+	// invert the dequant index into the quant index
+    // the dxer has a different order than the cxer.
+    VP5_BuildQuantIndex_Generic(pbi);
+
+    // Re-initialise the q tables for forward and reverse transforms.    
+    VP5_init_quantizer ( pbi, Vp3VersionNo );
+	VP5_init_dequantizer ( pbi, Vp3VersionNo );
+}
+
+/****************************************************************************
+* 
+*   Routine:	init_quantizer
+*
+*   Purpose:    Used to initialize the encoding/decoding data structures
+*				and to select DCT algorithm	
+*
+*   Parameters :
+*       Input :
+*           UINT32          scale_factor
+*                           Defines the factor by which to scale QUANT_ARRAY to
+*                           produce quantization_array
+*
+*           UINT8           QIndex          :: 
+*                           Index into Q table for current quantiser value.
+*   Return value :
+*       None.
+*
+****************************************************************************
+*/
+#define SHIFT16 (1<<16)
+void VP5_init_quantizer ( QUANTIZER *pbi, UINT8 Vp3VersionNo )
+{
+    int i;                   // Loop counters
+
+    double temp_fp_quant_coeffs;
+
+
+    // Notes on setup of quantisers.
+    // The "* 4" is a normalisation factor for the forward DCT transform.
+	
+	// ******************* Y *********************
+
+    // Calculate DC quant values (Include a *4 for FDCT normalization)
+	temp_fp_quant_coeffs =  ( VP5_DcQuant[pbi->FrameQIndex] * 4 ); 
+
+	// 1/X (Y)
+	temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+	pbi->QuantCoeffs[0][0] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+	// DC rounding (Y)
+	pbi->QuantRound[0][0] = VP5_DcRTable[pbi->FrameQIndex];
+
+	// Set DC zero Bin (Y)
+	pbi->ZeroBinSize[0][0] = VP5_DcZBinTable[pbi->FrameQIndex];
+   
+
+	// AC for Y
+	for ( i = 1; i < 64; i++ )
+	{
+		// Normalize the quantizer (* 4 for fdct normalisation)
+		temp_fp_quant_coeffs =  (double)(VP5_QThreshTable[pbi->FrameQIndex] * 4);
+
+		// Convert to 1/x
+		temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+		pbi->QuantCoeffs[0][i] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+		// AC rounding
+		pbi->QuantRound[0][i] = VP5_RTable[pbi->FrameQIndex];
+
+		// Zero Bins
+		pbi->ZeroBinSize[0][i] = VP5_ZBinTable[pbi->FrameQIndex];
+	}
+
+
+	// ******************* UV *********************
+    // Calculate DC quant values (Include a *4 for FDCT normalization)
+	temp_fp_quant_coeffs =  ( VP5_UvDcQuant[pbi->FrameQIndex] * 4 ); 
+
+	// 1/X (UV)
+	temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+	pbi->QuantCoeffs[1][0] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+	// DC rounding (UV)
+	pbi->QuantRound[1][0] = VP5_UvDcRTable[pbi->FrameQIndex];
+
+	// Set DC zero Bin (UV)
+	pbi->ZeroBinSize[1][0] = VP5_UvDcZBinTable[pbi->FrameQIndex];
+   
+
+	// AC for UV
+	for ( i = 1; i < 64; i++ )
+	{
+		// Normalize the quantizer (* 4 for fdct normalisation)
+		temp_fp_quant_coeffs =  (double)(VP5_UvQThreshTable[pbi->FrameQIndex] * 4);
+
+		// 1/x
+		temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
+		pbi->QuantCoeffs[1][i] = (INT32) (0.5 + SHIFT16 * temp_fp_quant_coeffs);
+
+		// AC rounding
+		pbi->QuantRound[1][i] = VP5_UvRTable[pbi->FrameQIndex];
+
+		// Zero Bins
+		pbi->ZeroBinSize[1][i] = VP5_UvZBinTable[pbi->FrameQIndex];
+	}
+	for(i=0;i<8;i++)
+	{
+		pbi->round[i] = pbi->QuantRound[0][1];
+		pbi->mult[i] = pbi->QuantCoeffs[0][1];
+		pbi->zbin[i] = pbi->ZeroBinSize[0][1]-1;
+	}
+
+}
+
+
+/***************************************************************************
+* 
+*   Routine:    quantize
+*
+*   Purpose:    Quantizes a block of pixels by dividing 
+*               each element by the corresponding entry in the quantization
+*               array. Output is in a list of values in the zig-zag order.
+*
+*   Parameters :
+*       Input :
+*           DCT_block        -- The block to by quantized
+*       Output :
+*           quantized_list   -- The quantized values in zig-zag order
+*
+*   Return value :
+*       None.
+*
+*   Persistent data referenced :
+*       quantization_array   Module static array read
+*       zig_zag_index        Module static array read
+* 
+****************************************************************************
+*/
+#define HIGHBITDUPPED(X) (((signed short) X)  >> 15)
+void VP5_quantize_c( QUANTIZER *pbi, INT16 * DCT_block, Q_LIST_ENTRY * quantized_list, UINT8 bp )
+{
+    UINT32  i, j;
+    
+    INT32 * QuantRoundPtr = pbi->QuantRound[QTableSelect[bp]];
+    INT32 * QuantCoeffsPtr = pbi->QuantCoeffs[QTableSelect[bp]];
+    INT32 * ZBinPtr = pbi->ZeroBinSize[QTableSelect[bp]];
+
+    INT16 * DCT_blockPtr = DCT_block;
+	INT32 temp;
+	INT32 NonZeroACs = 0;
+	INT16 *round = &pbi->round[0];
+	INT16 *mult = &pbi->mult[0];
+	INT16 *zbin = &pbi->zbin[0];
+
+    // Set the quantized_list to default to 0
+    memset( quantized_list, 0, 64 * sizeof(Q_LIST_ENTRY) );
+
+	// dc quantization (disabled the zerobinning!!)
+	temp = 0;
+	if ( DCT_blockPtr[0] >= QuantRoundPtr[0] )
+		temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] ) ;
+	else if ( DCT_blockPtr[0] <= -QuantRoundPtr[0] )
+		temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+	quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+
+    // Note that we add in a value to effect rounding.
+	// AC Quantization
+    for( i = 1; i < 64; i++)
+    {
+		// Zig Zag order
+		j = dequant_index[i];
+
+        if ( DCT_blockPtr[j] >= ZBinPtr[j] )
+        {
+			temp = QuantCoeffsPtr[j] * ( DCT_blockPtr[j] + QuantRoundPtr[j] ) ;
+			quantized_list[i] = (Q_LIST_ENTRY) (temp>>16);
+			//NonZeroACs += quantized_list[i];;
+        }
+        else if ( DCT_blockPtr[j] <= -ZBinPtr[j] )
+        {
+			temp = QuantCoeffsPtr[j] * ( DCT_blockPtr[j] - QuantRoundPtr[j] ) + MIN16;
+			quantized_list[i] = (Q_LIST_ENTRY) (temp>>16);
+			//NonZeroACs -= quantized_list[i];
+        }
+    }
+
+
+	// Now the DC quantization
+/*
+	if ( NonZeroACs > 0 )
+	{
+        if ( DCT_blockPtr[0] >= QuantRoundPtr[0] )
+        {
+			temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] ) ;
+			quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+        }
+        else if ( DCT_blockPtr[0] <= -QuantRoundPtr[0] )
+        {
+			temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+			quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+        }
+	}
+	// Use larger Zero Bin only if there are no ACs as this will help us get an EOB
+	else
+	{
+        if ( DCT_blockPtr[0] >= ZBinPtr[0] )
+        {
+			temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] + QuantRoundPtr[0] ) ;
+			quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+        }
+        else if ( DCT_blockPtr[0] <= -ZBinPtr[0] )
+        {
+			temp = QuantCoeffsPtr[0] * ( DCT_blockPtr[0] - QuantRoundPtr[0] ) + MIN16;
+			quantized_list[0] = (Q_LIST_ENTRY) (temp>>16);
+        }
+	}
+*/
+}
+/**************************** END COMPRESSOR SPECIFIC **********************************/
+/***************************************************************************************
+*  Dequantiser code for decode loop
+/***************************************************************************************/
+
+/****************************************************************************
+* 
+*   Routine:	init_pbi->dequantizer
+*
+*   Purpose:    Used to initialize the encoding/decoding data structures
+*				and to select DCT algorithm	
+*
+*   Parameters :
+*       Input :
+*           UINT32          scale_factor
+*                           Defines the factor by which to scale QUANT_ARRAY to
+*                           produce quantization_array
+*
+*           UINT8           QIndex          :: 
+*                           Index into Q table for current quantiser value.
+*   Return value :
+*       None.
+*
+****************************************************************************
+*/
+
+void VP5_init_dequantizer ( QUANTIZER *pbi, UINT8 Vp3VersionNo )
+{
+    int		i, j;						 
+
+
+	// *************** Y ******************/
+	// Set up the Ac dequant values and then place in the zig-zag/transposed order as appropriate.
+	for ( i = 1; i < 64; i++ )
+	{	
+        j = pbi->quant_index[i];
+
+		pbi->dequant_coeffs[0][j] = VP5_QThreshTable[pbi->FrameQIndex] << IDCT_SCALE_FACTOR;
+    }
+	
+    // DC
+    pbi->dequant_coeffs[0][0] = VP5_DcQuant[pbi->FrameQIndex] << IDCT_SCALE_FACTOR;
+
+	// *************** UV ******************/
+	// Set up the Ac dequant values and then place in the zig-zag/transposed order as appropriate.
+	for ( i = 1; i < 64; i++ )
+	{	
+        j = pbi->quant_index[i];
+
+		pbi->dequant_coeffs[1][j] = VP5_UvQThreshTable[pbi->FrameQIndex] << IDCT_SCALE_FACTOR;
+    }
+	
+    // DC
+    pbi->dequant_coeffs[1][0] = VP5_UvDcQuant[pbi->FrameQIndex] << IDCT_SCALE_FACTOR;
+
+}
+
+/****************************************************************************/
+/*																			*/
+/*		Select Quantisation Parameters										*/
+/*																			*/
+/*		void select_Y_dequantiser ( void )									*/
+/*			sets dequantiser to use for intra Y         					*/
+/*																			*/
+/*		void select_Inter_dequantiser ( void )								*/
+/*			sets dequantiser to use for inter Y         					*/
+/*																			*/
+/*		void select_UV_dequantiser ( void )									*/
+/*			sets dequantiser to use UV compression constants				*/
+/*																			*/
+/****************************************************************************/
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeleteQuantizerBuffers
+ *
+ *
+ *  INPUTS        :     Instance of PB to be cleared
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+static void DeleteQuantizerBuffers(QUANTIZER *pbi)
+{
+
+	if(pbi->dequant_coeffsAlloc[0])
+		duck_free(pbi->dequant_coeffsAlloc[0]);
+	pbi->dequant_coeffsAlloc[0]		= 0;
+	pbi->dequant_coeffs[0]				= 0;
+
+	if(pbi->dequant_coeffsAlloc[1])
+		duck_free(pbi->dequant_coeffsAlloc[1]);
+	pbi->dequant_coeffsAlloc[1]		= 0;
+	pbi->dequant_coeffs[1]				= 0;
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     AllocateQuantizerBuffers
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+static INT32 AllocateQuantizerBuffers(QUANTIZER *pbi)
+{
+	DeleteQuantizerBuffers(pbi);
+
+	pbi->dequant_coeffsAlloc[0]				= (INT16 *)duck_malloc(32+64*sizeof(INT16), DMEM_GENERAL);
+    if(!pbi->dequant_coeffsAlloc[0])		{ DeleteQuantizerBuffers(pbi); return FALSE;};
+	pbi->dequant_coeffs[0]					= (INT16 *)ROUNDUP32(pbi->dequant_coeffsAlloc[0]);
+
+	pbi->dequant_coeffsAlloc[1]				= (INT16 *)duck_malloc(32+64*sizeof(INT16), DMEM_GENERAL);
+    if(!pbi->dequant_coeffsAlloc[1])		{ DeleteQuantizerBuffers(pbi); return FALSE;};
+	pbi->dequant_coeffs[1]					= (INT16 *)ROUNDUP32(pbi->dequant_coeffsAlloc[1]);
+
+	return TRUE;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeleteQuantizer
+ *
+ *
+ *  INPUTS        :     Instance of POSTPROC to be deleted
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     frees the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void VP5_DeleteQuantizer(QUANTIZER **pbi)
+{
+	// clear any existing info
+    if(*pbi)
+    {
+        // Delete any other dynamically allocaed temporary buffers
+
+		DeleteQuantizerBuffers(*pbi);
+		duck_free(*pbi);
+		*pbi=0;
+    }
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CreateQuantizer
+ *
+ *
+ *  INPUTS        :     Instance of PB to be initialized
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Initializes the Playback instance passed in
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+QUANTIZER * VP5_CreateQuantizer(void)
+{
+	QUANTIZER *pbi=0;
+	int postproc_size = sizeof(QUANTIZER);
+	pbi=(QUANTIZER *) duck_malloc(postproc_size, DMEM_GENERAL);
+    if(!pbi)
+    {
+        return 0;
+    }
+
+	// initialize whole structure to 0
+	memset((unsigned char *) pbi, 0, sizeof(QUANTIZER));
+	
+	if(!AllocateQuantizerBuffers(pbi))
+		VP5_DeleteQuantizer(&pbi);
+
+	return pbi;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/recon.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/recon.c
new file mode 100644
index 00000000..c863af9f
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/recon.c
@@ -0,0 +1,338 @@
+/****************************************************************************
+*
+*   Module Title :     recon.c
+*
+*   Description  :     reconstruction code
+*
+*    AUTHOR      :     jimb b
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.19 JBB 18 Mar 01 Reorganized code created this file
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#include "pbdll.h"
+#include "codec_common_interface.h"
+#include <string.h>
+
+/****************************************************************************
+*  Explicit imports
+*****************************************************************************
+*/        
+extern void AverageBlockBicubic_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void NewAverageBlock( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void UvAverageBlock( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT8 ModX, INT8 ModY );
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Exported Functions
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/              
+#define MIN(a, b) ( ( a < b ) ? a : b )
+#define Mod8(a) ( ((a) & 7))
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PredictFilteredBlock
+ *
+ *  INPUTS        :     
+ *                      
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     try to build an 8x8 block motion prediction block. If
+ *                      the block is copied across a block boundary attempt 
+ *                      to eliminate the internal block border by applying the
+ *                      loop filter internally to the block
+ *
+ *  SPECIAL NOTES :     
+ *
+ *  ERRORS        :     None.
+ *
+ *****************************************************************************/
+void PredictFiltered
+(
+	PB_INSTANCE *pbi,
+	UINT8 *SrcPtr,
+	INT32 mx,
+	INT32 my,
+	INT32 MvShift
+) 
+{
+    
+    INT32  BoundaryX, BoundaryY; 
+    INT32  mVx, mVy;
+	UINT32 ReconIndex = 0;
+	MACROBLOCK_INFO *mbi=&pbi->mbi;
+    
+	UINT8  TempPtr1 = 2 * 16 + 2;
+	UINT32 TempPtr2 = TempPtr1;
+	UINT8 *TempBuffer = pbi->LoopFilteredBlock;
+
+	// Calculate full pixel motion vector position 
+    if(mx > 0 )
+        mVx = (mx >> MvShift);
+    else 
+        mVx = -((-mx) >> MvShift);
+
+    if(my > 0 )
+        mVy = (my >> MvShift);
+    else
+        mVy = -((-my) >> MvShift);
+
+	// calculate offset in last frame matching motion vector
+	ReconIndex += mbi->FrameReconStride * mVy + mVx;
+
+	// give our selves a border of 2 extra pixel on all sides (for loop filter and half pixel moves)
+	ReconIndex -= 2 * mbi->CurrentReconStride;
+	ReconIndex -= 2;
+
+	// copy the 12x12 region starting from reconpixel index into our temp buffer.
+    Copy12x12( SrcPtr + ReconIndex, TempBuffer, mbi->CurrentReconStride, 16);
+
+	// calculate block border position for x
+	BoundaryX = (8 - Mod8(mVx))&7;
+  
+	// calculate block border position for y
+	BoundaryY = (8 - Mod8(mVy))&7;
+
+	// apply the loop filter at the horizontal boundary we selected
+    if(BoundaryX)
+		FilteringHoriz_12(
+			pbi	->quantizer->FrameQIndex, 
+			TempBuffer + 2 + BoundaryX, 
+			16);
+
+	// apply the loop filter at the vertical boundary we selected
+    if(BoundaryY)
+		FilteringVert_12(
+			pbi->quantizer->FrameQIndex, 
+			TempBuffer + 2 * 16 + BoundaryY * 16, 
+			16);
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     PredictFilteredBlock
+ *
+ *  INPUTS        :     
+ *                      
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     try to build an 8x8 block motion prediction block. If
+ *                      the block is copied across a block boundary attempt 
+ *                      to eliminate the internal block border by applying the
+ *                      loop filter internally to the block
+ *
+ *  SPECIAL NOTES :     
+ *
+ *  ERRORS        :     None.
+ *
+ *****************************************************************************/
+#define AVERAGE_ROUTINE AverageBlock
+//#define AVERAGE_ROUTINE AverageBlockBicubic_C
+//#define AVERAGE_ROUTINE NewAverageBlock
+
+//#define UV_AVERAGE_ROUTINE AverageBlock
+#define UV_AVERAGE_ROUTINE UvAverageBlock
+
+void PredictFilteredBlock
+(
+	PB_INSTANCE *pbi,
+	INT16* OutputPtr,
+	BLOCK_POSITION bp 
+) 
+{
+	MACROBLOCK_INFO *mbi=&pbi->mbi;
+    
+    UINT8 *SrcPtr;
+
+	UINT8 *TempBuffer = pbi->LoopFilteredBlock;
+
+	UINT32 TempPtr1 = 2*16+2;
+	UINT32 TempPtr2 = TempPtr1;
+	INT8   ModX, ModY;
+
+    // Which buffer are we working on?
+    if ( VP5_Mode2Frame[pbi->mbi.Mode] == 2 ) 
+    {
+        SrcPtr = pbi->GoldenFrame;
+    }
+    else
+    {
+        SrcPtr = pbi->LastFrameRecon;
+    }
+
+	PredictFiltered( pbi, SrcPtr+mbi->Recon, pbi->mbi.Mv[bp].x, pbi->mbi.Mv[bp].y, pbi->mbi.MvShift) ;
+
+    // determine if we have a half pixel move in the x direction
+    if(pbi->mbi.Mv[bp].x & pbi->mbi.MvModMask)
+	{
+		if ( pbi->mbi.Mv[bp].x > 0 )
+		{
+			TempPtr2 += 1;
+		}
+		else
+		{
+			TempPtr2 -= 1;
+		}
+	}
+
+	// handle half pixel motion in Y
+    if(pbi->mbi.Mv[bp].y & pbi->mbi.MvModMask)
+	{
+		if ( pbi->mbi.Mv[bp].y > 0 )
+		{
+			TempPtr2 += 16;
+		}
+		else
+		{
+			TempPtr2 -= 16;
+		}
+	}
+ 
+	// put the results back into the real reconstruction buffer
+    if (TempPtr1!=TempPtr2) 
+	{
+		if ( bp < 4 )
+			AVERAGE_ROUTINE(&TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, 16);
+		else
+		{
+			ModX = pbi->mbi.Mv[bp].x & 0x03;
+			ModY = pbi->mbi.Mv[bp].y & 0x03;
+
+			//UV_AVERAGE_ROUTINE(&TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, 16, ModX, ModY );
+			AverageBlock(&TempBuffer[TempPtr1], &TempBuffer[TempPtr2], (unsigned short *)OutputPtr, 16);
+		}
+	}
+    else
+        UnpackBlock(&TempBuffer[TempPtr1], OutputPtr, 16);
+
+}
+
+#ifndef RECONSTRUCTMBATONCE
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ReconstructBlock
+ *
+ *  INPUTS        :     
+ *						
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Codes a DCT block
+ *
+ *                      Motion vectors and modes asumed to be defined at the MB level.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ReconstructBlock
+( 
+	PB_INSTANCE *pbi, 
+	BLOCK_POSITION bp
+)
+{
+	
+	// Action depends on decode mode.
+	if ( pbi->mbi.Mode == CODE_INTER_NO_MV )       // Inter with no motion vector
+	{
+		ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->ThisFrameRecon[pbi->mbi.Recon], 
+			(UINT8 *)&pbi->LastFrameRecon[pbi->mbi.Recon], 
+			pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride);
+		
+	}
+	else if ( VP5_ModeUsesMC[pbi->mbi.Mode] )          // The mode uses a motion vector.
+	{
+		// For the compressor we did this already ( possible optimization).
+		PredictFilteredBlock( pbi, pbi->TmpDataBuffer,bp);
+
+		ReconBlock( 
+			pbi->TmpDataBuffer,
+			pbi->ReconDataBuffer,
+			(UINT8 *)&pbi->ThisFrameRecon[pbi->mbi.Recon],
+			pbi->mbi.CurrentReconStride );
+	}
+	else if ( pbi->mbi.Mode == CODE_USING_GOLDEN )     // Golden frame with motion vector
+	{
+		// Reconstruct the pixel data using the golden frame reconstruction and change data
+		ReconInter( pbi->TmpDataBuffer, (UINT8 *)&pbi->ThisFrameRecon[pbi->mbi.Recon], 
+			(UINT8 *)&pbi->GoldenFrame[ pbi->mbi.Recon ], 
+			pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+	}
+	else                                            // Simple Intra coding
+	{
+		// Get the pixel index for the first pixel in the fragment.
+		ReconIntra( pbi->TmpDataBuffer, (UINT8 *)&pbi->ThisFrameRecon[pbi->mbi.Recon], (UINT16 *)pbi->ReconDataBuffer, pbi->mbi.CurrentReconStride );
+	}
+}
+
+#endif
+
+/************************************************************************** * 
+ *  ROUTINE       :     CopyBlock
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Copies a block from source to destination
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void CopyBlockC(unsigned char *src, unsigned char *dest, unsigned int srcstride)
+{
+	unsigned char * s = src;
+	unsigned char * d = dest;
+	unsigned int stride = srcstride;
+
+	int j;
+    for ( j = 0; j < 8; j++ )
+	{
+		((UINT32*)d)[0] = ((UINT32*)s)[0];
+		((UINT32*)d)[1] = ((UINT32*)s)[1];
+		s+=stride;
+		d+=stride;
+	}
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vfwpbdll_if.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vfwpbdll_if.c
new file mode 100644
index 00000000..e18b8e93
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vfwpbdll_if.c
@@ -0,0 +1,750 @@
+/****************************************************************************
+*        
+*   Module Title :     vfwpbdll_if.c
+*
+*   Description  :     Video codec demo playback dll interface
+*
+*   AUTHOR       :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.29 YWX 17/dec/02 Added support of deinterlace
+*   1.28 YWX 05/08/02  Changed postprocess level setup for interlaced material
+*   1.27 AWG 20 Jun 01 Added code to overlay Motion Vectors onto display
+*   1.26 JBB 13 Jun 01 VP4 Code Clean Out
+*   1.25 YWX 26-Apr-01 Removed call of SetPbParam() in StartDecoder()
+*                      And set CPUFree as 70 when PostProcessingLevel=9 
+*   1.24 JBB 25-apr-01 clear sysstate added at end of frame blit
+*   1.23 JBB 06-Apr-01 CPU Free variable respond
+*   1.22 SJL 22-Mar-01 Fixed MAC compile errors
+*   1.21 JBX 22-Mar-01 Merged with new vp4-mapca bitstream  
+*   1.20 SJL 01 Dec 00 Fixed MAC compile errors
+*   1.19 JBB 30 Nov 00 Version number changes 
+*   1.18 JBB 14 Nov 00 Added version information function and pragma and cleaned
+*                      out unused code
+*   1.17 JBB 17-oct-00 Ifdefs around version information
+*   1.16 SJL 25 Aug 00 Fixed Mac compile error
+*   1.15 JBB 25 Aug 00 Better versioning
+*   1.14 JBB 22 Aug 00 Ansi C conversion
+*   1.13 SJL 14 Aug00  Moved SetPbParam into another file for the MAC 
+*   1.12 YWX 2 Aug00   Changed Postprocessing level initialization 
+*   1.11 JBB 31Jul00   Changed requirements for postprocessing due to new 
+*                      optimiztions
+*   1.10 JBB 27Jul00   Added malloc checks 
+*   1.09 YWX 15/05/00  Check Processor and Frame size to enable/disable 
+*                      postprocessor
+*   1.08 YWX 08/05/00  Added #if defined directives for postprocess
+*   1.07 JBB 05/05/00  Added PostProcessing Parameter
+*   1.06 JBB 27/01/99  Globals Removed, use of PB_INSTANCE, must be created
+*   1.05 PGW 05/11/99  Changes to support AC range entropy tables and to output
+*                      the appropriate stats to tune them.
+*   1.04 PGW 01/09/99  Modified to simulate Tim's DxReference interface.
+*   1.03 PGW 30/07/99  Added exception handlers and some code to try and insure
+*                      decoder is initialised before any frames are decoded.
+*   1.02 PGW 09/07/99  Added code to support profile timing
+*   1.01 PGW 29/06/99  Changes in DecodeFrame() to handle inversion of DIB when 
+*                      requested plus offsets into and pitch of the output image 
+*                      buffer.
+*   1.00 PGW 28/06/99  New Configuration baseline.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+  
+#define STRICT              /* Strict type checking. */
+#include <stdio.h> 
+
+#ifndef _MSC_VER
+
+#define __try  
+
+#endif
+
+#include "huffman.h"
+#include "pbdll.h"
+#include <math.h>
+#include "vp50dversion.h"
+#include "decodemode.h"
+#include "postproc_if.h"
+
+#ifndef MAPCA
+    #define CommentString "\nON2.COM VERSION VP50D " VP50DVERSION "\n"
+    #pragma comment(exestr,CommentString)
+#endif
+/****************************************************************************
+ *  Explicit Imports
+ *****************************************************************************
+ */ 
+
+extern void DecodeFrameMbs(PB_INSTANCE *pbi);
+extern unsigned int CPUFrequency;
+
+/****************************************************************************
+*  Module statics.
+*****************************************************************************
+*/        
+
+
+#ifdef PBSTATS1
+INT32  TotQ = 0;
+INT32  PBFrameNumber = 0;
+#endif
+static const char vp31dVersion[] = VP50DVERSION;
+
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+#if defined(_MSC_VER) 
+#if defined(POSTPROCESS)
+static const unsigned long PP_MACHINE_LOWLIMIT = 350; //Lowest CPU (MHz) to enable PostProcess
+static const unsigned long PP_MACHINE_MIDLIMIT = 400; //Lowest CPU (MHz) to enable PostProcess
+static const unsigned long PP_MACHINE_TOPLIMIT = 590; //Lowest CPU (MHz) to enable PostProcess
+#endif
+#endif
+
+extern void VP5_InitialiseConfiguration(PB_INSTANCE *pbi);
+#ifdef PBSTATS1
+// TEMP diagnostic variables
+INT32  TotBlocksCoded;
+#endif
+
+
+/****************************************************************************
+*  Foreward references
+*****************************************************************************
+*/
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP31D_GetVersionNumber
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None .
+ *
+ *  FUNCTION      :     Returns a pointer to the version string
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+const char * CCONV VP50D_GetVersionNumber(void)
+{
+    return vp31dVersion;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     StartDecoder
+ *
+ *  INPUTS        :     The handle of the display window.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     TRUE if succeeds else FALSE.
+ *
+ *  FUNCTION      :     Starts the compressor grabber
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+BOOL CCONV VP5_StartDecoder( PB_INSTANCE **pbi, UINT32 ImageWidth, UINT32 ImageHeight )
+{ 
+    __try
+    {
+
+
+        // set up our structure holding all formerly global information about a playback instance
+        *pbi = VP5_CreatePBInstance();
+
+        // Set Flag to indicate that a key frame is required as the first input
+        (*pbi)->ScaleWidth = ImageWidth;
+        (*pbi)->ScaleHeight = ImageHeight;
+        (*pbi)->OutputWidth = ImageWidth;
+        (*pbi)->OutputHeight = ImageHeight;
+		(*pbi)->OutputStride = ImageWidth + 32; 
+		
+
+        // Validate the combination of height and width.
+        (*pbi)->Configuration.VideoFrameWidth = ImageWidth;
+        (*pbi)->Configuration.VideoFrameHeight = ImageHeight;
+
+#ifndef MAPCA
+        (*pbi)->postproc = CreatePostProcInstance(&(*pbi)->Configuration);
+#endif
+        //(*pbi)->postproc = CreatePostProcInstance(&(*pbi)->Configuration);
+        (*pbi)->quantizer = VP5_CreateQuantizer();
+
+        (*pbi)->ProcessorFrequency = CPUFrequency;
+        (*pbi)->DeInterlaceMode = 1;
+        // Fills in fragment counts as well
+        if(!VP5_InitFrameDetails(*pbi) )
+        {
+            VP5_DeletePBInstance(pbi);
+            return FALSE;
+        }
+
+
+        /* Set last_dct_thresh to an illegal value to make sure the
+        *  Q tables are initialised for the new video sequence. 
+        */
+        (*pbi)->quantizer->LastQuantizerValue = -1;
+
+        // Set up various configuration parameters.
+        VP5_InitialiseConfiguration(*pbi);
+
+        #ifdef MAPCA
+        InitDMAWriteReconDS(*pbi);
+        InitDMAReadReferenceDS(*pbi);
+        #endif
+        
+        return TRUE;
+    }
+
+#if defined(_MSC_VER)
+    __except( TRUE )
+    {
+        VP5_ErrorTrap( *pbi, GEN_EXCEPTIONS );
+        return FALSE;
+    }
+#endif
+
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_GetPbParam
+ *
+ *  INPUTS        :     PB_COMMAND_TYPE Command
+ *                      char *          Parameter
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *  
+ *  FUNCTION      :     Generalised command interface to decoder.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void CCONV VP5_GetPbParam( PB_INSTANCE *pbi, PB_COMMAND_TYPE Command, UINT32 *Parameter )
+{
+    switch ( Command )
+    {
+#if defined(POSTPROCESS)
+    case PBC_SET_POSTPROC:
+        *Parameter =pbi->PostProcessingLevel;
+#endif
+
+    default:
+        break;
+    }
+}
+
+
+#define CRITICALWATERMARK (int) (31000 * pbi->CPUFree / 100)
+#define DOWNWATERMARK (int) (30000 * pbi->CPUFree / 100)
+#define UPWATERMARK   (int) (28000 * pbi->CPUFree / 100)
+int PickPostProcessingLevel(PB_INSTANCE *pbi)
+{
+	int minimumTime = pbi->thisDecodeTime + pbi->avgBlitTime + pbi->avgPPTime[8];
+	int thisTime = minimumTime + pbi->avgPPTime[pbi->PostProcessingLevel];
+	int avgTime = pbi->avgDecodeTime + pbi->avgBlitTime;
+	
+	// estimate the times of all of our unknown postprocessors
+	if(pbi->avgPPTime[6]==0)
+		pbi->avgPPTime[6] = avgTime>>1;
+	
+	if(pbi->avgPPTime[5]==0)
+		pbi->avgPPTime[5] = avgTime>>1;
+
+	if(pbi->avgPPTime[4]==0)
+		pbi->avgPPTime[4] = (avgTime ) >> 2;
+
+	if(pbi->avgPPTime[8]==0)
+		pbi->avgPPTime[8] = avgTime>>3;
+
+
+	if(pbi->CPUFree == 0 )
+		return pbi->PostProcessingLevel;
+
+	// automatically select a postprocessing level based on the amount 
+	// of time taken to decode blit and postprocess etc
+	
+	// more than 1/30 of a second no postprocessing at all (its better to show an 
+	// ugly frame than none at all). We use 1/30th of a second because nothing 
+	// tells us the actual framerate
+	if(thisTime > (int) (CRITICALWATERMARK))
+	{
+		// this frame's taking to long try to make up time on the subsequent frames
+		pbi->avgDecodeTime = pbi->thisDecodeTime; 
+
+		// pick a post processor we can decode in less than 2/3 the time
+		if(pbi->avgPPTime[6] + minimumTime < CRITICALWATERMARK )
+			return 6;
+		
+		if(pbi->avgPPTime[5] + minimumTime < CRITICALWATERMARK )
+			return 5;
+		
+		if(pbi->avgPPTime[4] + minimumTime < CRITICALWATERMARK )
+			return 4;
+		
+		if(pbi->avgPPTime[8] + minimumTime < CRITICALWATERMARK )
+			return 8;
+
+		return 0;
+	}
+
+	if(thisTime < DOWNWATERMARK && thisTime > UPWATERMARK)
+		return pbi->PostProcessingLevel;
+	
+
+
+	// pick a post processor we can decode in less than 2/3 the time
+	if(pbi->avgPPTime[6] + avgTime < UPWATERMARK )
+		return 6;
+
+	if(pbi->avgPPTime[5] + avgTime < UPWATERMARK )
+		return 5;
+
+	if(pbi->avgPPTime[4] + avgTime < UPWATERMARK )
+		return 4;
+
+	if(pbi->avgPPTime[8] + avgTime < UPWATERMARK )
+		return 8;
+
+	return 0;
+
+}
+#ifndef MAPCA
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_GetYUVConfig
+ *
+ *  INPUTS        :     YUV_BUFFER_CONFIG  * YuvConfig
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *  
+ *  FUNCTION      :     Gets details of the reconstruction buffer
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void CCONV VP5_GetYUVConfig( PB_INSTANCE (*pbi), YUV_BUFFER_CONFIG * YuvConfig )
+{
+    __try 
+    {
+#ifdef _MSC_VER
+		unsigned int duration;
+		unsigned int starttsc,endtsc;
+		VP5_readTSC(&starttsc);
+		pbi->PostProcessingLevel = PickPostProcessingLevel(pbi);
+#endif
+        if( pbi->PostProcessingLevel ||(pbi->Configuration.Interlaced && pbi->DeInterlaceMode))
+        {
+#ifdef _MSC_VER
+            extern void vp5_showinfo2(PB_INSTANCE *pbi);
+            extern void vp5_showinfo(PB_INSTANCE *pbi);
+			
+			
+            if(pbi->PostProcessingLevel > 200 )
+            {
+                PostProcess
+					(
+                    pbi->postproc,                  
+                    pbi->Vp3VersionNo,          
+                    pbi->FrameType,             
+                    pbi->PostProcessingLevel-200,   
+                    pbi->AvgFrameQIndex,            
+                    pbi->LastFrameRecon,        
+                    pbi->PostProcessBuffer,     
+                    (unsigned char *) pbi->FragInfo,        
+                    sizeof(FRAG_INFO),
+                    0x0001
+                    );
+				VP5_readTSC(&endtsc);
+                vp5_showinfo(pbi);
+            }
+            else if(pbi->PostProcessingLevel > 100 )
+            {
+				
+				PostProcess
+                    (
+                    pbi->postproc,                  
+                    pbi->Vp3VersionNo,          
+                    pbi->FrameType,             
+                    pbi->PostProcessingLevel-100,   
+                    pbi->AvgFrameQIndex,            
+                    pbi->LastFrameRecon,        
+                    pbi->PostProcessBuffer,     
+                    (unsigned char *) pbi->FragInfo,                
+                    sizeof(FRAG_INFO),
+                    0x0001
+                    );
+				VP5_readTSC(&endtsc);
+				vp5_showinfo2(pbi);
+            }
+            else
+#endif
+			{
+				pbi->AvgFrameQIndex = pbi->quantizer->FrameQIndex;
+				
+                PostProcess
+                    (
+                    pbi->postproc,                  
+                    pbi->Vp3VersionNo,          
+                    pbi->FrameType,             
+                    pbi->PostProcessingLevel,   
+                    pbi->AvgFrameQIndex,            
+                    pbi->LastFrameRecon,        
+                    pbi->PostProcessBuffer,     
+                    (unsigned char *) pbi->FragInfo,                
+                    sizeof(FRAG_INFO),
+                    0x0001
+					);
+#ifdef _MSC_VER
+                VP5_readTSC(&endtsc);
+#endif
+			}
+			
+        }
+
+        if(pbi->BlackClamp)
+        {
+            ClampLevels( pbi->postproc,pbi->BlackClamp,pbi->WhiteClamp,pbi->PostProcessBuffer,	pbi->PostProcessBuffer);
+        }
+        if( pbi->Configuration.VideoFrameWidth < pbi->OutputWidth ||
+            pbi->Configuration.VideoFrameHeight < pbi->OutputHeight )
+        {
+            YuvConfig->YWidth = pbi->OutputWidth+32; 
+            YuvConfig->YHeight = pbi->OutputHeight+32;
+            YuvConfig->YStride = YuvConfig->YWidth;
+            
+            YuvConfig->UVWidth = YuvConfig->YWidth / 2;
+            YuvConfig->UVHeight = YuvConfig->YHeight / 2;
+            YuvConfig->UVStride = YuvConfig->YStride / 2;
+			
+            YuvConfig->YBuffer = (char *)pbi->ScaleBuffer;
+            YuvConfig->UBuffer = (char *)pbi->ScaleBuffer+YuvConfig->YWidth*YuvConfig->YHeight;
+            YuvConfig->VBuffer = (char *)pbi->ScaleBuffer+YuvConfig->YWidth*YuvConfig->YHeight+YuvConfig->UVWidth*YuvConfig->UVHeight;
+
+			if(pbi->PostProcessingLevel)
+			{
+	            ScaleOrCenter( pbi->postproc, pbi->PostProcessBuffer, YuvConfig  );
+			}
+			else
+			{
+	            ScaleOrCenter( pbi->postproc, pbi->LastFrameRecon, YuvConfig  );
+			}
+
+			YuvConfig->YBuffer += 
+				(YuvConfig->YHeight - pbi->OutputHeight ) / 2 * YuvConfig->YStride 
+				+(YuvConfig->YWidth - pbi->OutputWidth) / 2;
+            YuvConfig->YWidth = pbi->OutputWidth; 
+            YuvConfig->YHeight = pbi->OutputHeight;
+            
+			YuvConfig->UBuffer += 
+				(YuvConfig->UVHeight - pbi->OutputHeight/2 ) / 2 * YuvConfig->UVStride 
+				+(YuvConfig->UVWidth - pbi->OutputWidth/2) / 2;
+
+			YuvConfig->VBuffer += 
+				(YuvConfig->UVHeight - pbi->OutputHeight/2 ) / 2 * YuvConfig->UVStride 
+				+(YuvConfig->UVWidth - pbi->OutputWidth/2) / 2;
+
+            YuvConfig->UVWidth = pbi->OutputWidth / 2; 
+            YuvConfig->UVHeight = pbi->OutputHeight / 2;
+            //YuvConfig->UVStride = pbi->OutputWidth / 2;
+        }
+        else
+        {
+            YuvConfig->YWidth = pbi->Configuration.VideoFrameWidth;
+            YuvConfig->YHeight = pbi->Configuration.VideoFrameHeight;
+            YuvConfig->YStride = pbi->Configuration.YStride;
+            
+            YuvConfig->UVWidth = pbi->Configuration.VideoFrameWidth / 2;
+            YuvConfig->UVHeight = pbi->Configuration.VideoFrameHeight / 2;
+            YuvConfig->UVStride = pbi->Configuration.UVStride;
+
+            //if(pbi->PostProcessingLevel && (pbi->quantizer->FrameQIndex < PPROC_QTHRESH))
+            if( pbi->PostProcessingLevel ||(pbi->Configuration.Interlaced && pbi->DeInterlaceMode))
+            { 
+                YuvConfig->YBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconYDataOffset+(pbi->Configuration.YStride * UMV_BORDER) + UMV_BORDER];
+                YuvConfig->UBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconUDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+                YuvConfig->VBuffer = (char *)&pbi->PostProcessBuffer[pbi->ReconVDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+            }
+            else
+            {
+                YuvConfig->YBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconYDataOffset+ (pbi->Configuration.YStride * UMV_BORDER) + UMV_BORDER];
+                YuvConfig->UBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconUDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+                YuvConfig->VBuffer = (char *)&pbi->LastFrameRecon[pbi->ReconVDataOffset+ (pbi->Configuration.UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2)];
+            }
+        }
+
+#if defined(_MSC_VER)   
+		duration = ( endtsc - starttsc )/ pbi->ProcessorFrequency ;
+
+		if( pbi->avgPPTime[pbi->PostProcessingLevel%10] == 0)
+		{
+			pbi->avgPPTime[pbi->PostProcessingLevel%10] = duration;
+		}
+		else
+		{
+			pbi->avgPPTime[pbi->PostProcessingLevel%10] = ( 7 * pbi->avgPPTime[pbi->PostProcessingLevel%10] + duration ) >> 3;
+		}
+#endif
+    }
+#if defined(_MSC_VER)   
+    __except ( TRUE )
+    {
+        VP5_ErrorTrap( pbi, GEN_EXCEPTIONS );
+    }    
+#endif
+}
+#endif
+/****************************************************************************  
+Debugging Aid Only */ 
+
+void writeframeYX(PB_INSTANCE *pbi, char * address,int x) 
+{ 	// write the frame 	
+	FILE *yframe; 	
+	char filename[255]; 	
+#ifdef MAPCA
+    sprintf(filename,"MapYF%d.raw",x); 	
+#else
+    sprintf(filename,"PcYF%d.raw",x); 	
+#endif
+	yframe=fopen(filename,"wb"); 	
+	fwrite(address,pbi->ReconYPlaneSize,1,yframe); 	
+	fclose(yframe); 
+} 
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_DecodeFrameToYUV
+ *
+ *  INPUTS        :     UINT8 * VideoBufferPtr
+ *                              Compressed input video data
+ *
+ *                      UINT32  ByteCount 
+ *                              Number of bytes compressed data in buffer. *  
+ *
+ *                      UINT32  Height and width of image to be decoded
+ *
+ *  OUTPUTS       :     None
+ *                      None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Decodes a frame into the internal YUV reconstruction buffer.
+ *                      Details of this buffer can be obtained by calling GetYUVConfig().
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+int CCONV VP5_DecodeFrameToYUV( PB_INSTANCE (*pbi), char * VideoBufferPtr, unsigned int ByteCount,
+                             UINT32 ImageWidth,     UINT32 ImageHeight )
+{
+    unsigned char *tmp;
+    (void) ImageHeight;
+    (void) ImageWidth;
+    __try
+    {
+#ifdef _MSC_VER
+		unsigned int duration;
+		unsigned int starttsc,endtsc;
+        VP5_readTSC(&starttsc);
+#endif
+		pbi->CurrentFrameSize = ByteCount;
+
+        //  start the boolean decoder
+        StartDecode(&pbi->br, (unsigned char*)VideoBufferPtr);
+
+        // decode the frame header
+        if ( !VP5_LoadFrame(pbi) )
+            return -1;
+
+        
+        // decode and reconstruct frame
+        DecodeFrameMbs(pbi);
+
+		// switch pointers so lastframe recon is this frame
+        tmp = pbi->LastFrameRecon;
+        pbi->LastFrameRecon = pbi->ThisFrameRecon;
+        pbi->ThisFrameRecon = tmp;
+
+
+#ifndef MAPCA
+        // update the border 
+        UpdateUMVBorder(pbi->postproc, pbi->LastFrameRecon);
+#else
+        VP5_UpdateUMVBorder(pbi, pbi->LastFrameRecon);
+#endif
+
+        
+		if( pbi->FrameType == BASE_FRAME )
+    	{
+            memcpy(pbi->GoldenFrame, pbi->LastFrameRecon, pbi->ReconYPlaneSize + 2* pbi->ReconUVPlaneSize); 
+		}
+
+#ifdef MAPCA
+		//if(debugme<1)
+		{
+			//EtiSysDcFlushDcache();
+            //writeframeYX(pbi,pbi->LastFrameRecon,debugme);
+			//debugme++;
+		}
+#endif
+		// If appropriate clear the MMX state.
+        ClearSysState();
+
+		//temp
+		//vp5_appendframe(pbi);
+
+        #ifdef PBSTATS1
+        // Update PB stats
+        TotQ += pbi->quantizer->ThisFrameQualityValue;
+        PBFrameNumber += 1;
+        #endif
+
+	    if(pbi->FrameType == BASE_FRAME )
+			pbi->AvgFrameQIndex = pbi->quantizer->FrameQIndex;
+		else
+			pbi->AvgFrameQIndex = (2 + 3 * pbi->AvgFrameQIndex + pbi->quantizer->FrameQIndex) / 4 ;
+
+#ifdef _MSC_VER
+        VP5_readTSC(&endtsc);
+
+		duration = (endtsc-starttsc)/ (pbi->ProcessorFrequency) ;
+
+		pbi->thisDecodeTime = duration;
+
+		if( pbi->avgDecodeTime == 0)
+		{
+			pbi->avgDecodeTime = duration;
+		}
+		else
+		{
+			pbi->avgDecodeTime = (7*pbi->avgDecodeTime + duration)>>3;
+		}
+
+#endif
+
+
+    }
+#if defined(_MSC_VER) 
+    __except ( TRUE )
+    {
+        VP5_ErrorTrap( pbi, GEN_EXCEPTIONS );
+        return -2;
+    }
+#endif    
+    return 0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_StopDecoder
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None .
+ *
+ *  FUNCTION      :     Stops the encoder and grabber
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+int CCONV VP5_StopDecoder(PB_INSTANCE **pbi)
+{
+
+#ifdef MAPCA
+    CloseDMAReadReferenceDS();
+    CloseDMAWriteReconDS();
+#endif
+    
+    __try
+    {
+        if(*pbi)
+        {
+            // Set flag to say that the decoder is no longer initialised
+            VP5_DeleteQuantizer(&(*pbi)->quantizer);
+#ifndef MAPCA
+            DeletePostProcInstance(&(*pbi)->postproc);
+#endif
+            VP5_DeleteFragmentInfo(*pbi);
+            VP5_DeleteFrameInfo(*pbi);
+
+
+            VP5_DeletePBInstance(pbi);
+        
+            return TRUE;
+        }
+    }
+
+#if defined(_MSC_VER)        
+    __except ( TRUE )
+    {
+        VP5_ErrorTrap( *pbi, GEN_EXCEPTIONS );
+        return FALSE;
+    }
+#endif    
+    return TRUE;
+}
+
+#ifndef MAPCA
+/****************************************************************************
+ * 
+ *  ROUTINE       :     VP5_ErrorTrap
+ *
+ *  INPUTS        :     Nonex.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Called when a fatal error is detected.
+ *                      Sets an error flag and loops untill the thread is
+ *                      terminated.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+ static void VP5_ErrorTrap( PB_INSTANCE *pbi, int ErrorCode )
+ {
+ }
+#endif
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vp50dxv.c b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vp50dxv.c
new file mode 100644
index 00000000..13e2bc55
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/dx/generic/vp50dxv.c
@@ -0,0 +1,429 @@
+/* 
+ dxvmpg.cpp : Defines the entry point for the console application.
+*/ 
+#include <stdlib.h> 
+
+#include "dkpltfrm.h" /* platform specifics */
+#include "duktypes.h" /* predefined general types used at duck */
+
+#include "duck_mem.h" /* interface to memory manager */
+#include "dxl_main.h" /* interface to dxv */
+#include "pbdll.h"
+
+typedef unsigned long FourCC;
+ 
+#define VP50_FOURCC DXL_MKFOURCC( 'V', 'P', '5', '0')
+void vp50_SetParameter(DXL_XIMAGE_HANDLE src,int Command, unsigned long Parameter );
+
+extern void vp3SetBlit(void);
+extern void VP5_VPInitLibrary(void);
+extern void VP5_VPDeInitLibrary(void);
+#ifdef _MSC_VER 
+#pragma warning(disable:4055)
+#endif
+
+#include "duck_dxl.h"
+extern void VP5_readTSC(unsigned long *tsc);
+
+void vp50_GetInfo(unsigned char * source, FrameInfo * frameInfo)
+{
+
+    // Is the frame and inter frame or a key frame 
+    frameInfo->KeyFrame = !(source[0] > 0x7f);
+    frameInfo->Quality = source[0] >> 2;
+    if(frameInfo->KeyFrame) 
+        frameInfo->Version = ((source[2]>>3) & 0x1f );
+    else
+        frameInfo->Version = 0;
+
+    frameInfo->vp30Flag = (int)source[1];
+
+}
+
+
+// YUV buffer configuration structure
+typedef struct
+{
+    int     YWidth;
+    int     YHeight;
+    int     YStride;
+
+    int     UVWidth;
+    int     UVHeight;
+    int     UVStride;
+
+    char *  YBuffer;
+    char *  UBuffer;
+    char *  VBuffer;
+
+	char *  uvStart;
+    int uvDstArea;
+    int uvUsedArea;
+
+} DXV_YUV_BUFFER_CONFIG;
+
+/* define an xImage structure based on the core xImage struct */
+typedef struct tXImageCODEC
+{
+	xImageBaseStruct;
+	FourCC myFourCC;
+	DXV_YUV_BUFFER_CONFIG FrameBuffer;
+	PB_INSTANCE *myPBI;
+	int owned;
+	
+} vp50_XIMAGE,*vp50_XIMAGE_HANDLE;
+
+static dxvBitDepth bitDepths[] = 
+{
+	DXRGB32,DXRGB24,DXRGB16,DXRGBNULL
+};
+
+
+typedef void ((*vp5BLIT_FUNC)(unsigned char *, int, YUV_BUFFER_CONFIG *));
+typedef void ((*vp5_VSCREEN_FUNC)(void));
+
+
+DXL_INTERNAL_FORMAT vp50_GetXImageInternalFormat(DXL_XIMAGE_HANDLE xImage,
+												DXL_VSCREEN_HANDLE vScreen)
+{
+	(void) vScreen;
+	(void) xImage;
+	return YV12;
+}
+int vp50_blit(PB_INSTANCE *pbi,DXL_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE vScreen,DXV_YUV_BUFFER_CONFIG *FrameBuffer,int x, int y )
+{
+    if(vScreen && ((void *)(src->internalFormat) != NULL)) {
+        /* get your hamdy damdy((c)1997 Duck North) registered blitter setup */
+        vScreen->blitSetup = DXL_GetBlitSetupFunc(src,vScreen);
+        vScreen->blitExit = DXL_GetBlitExitFunc(src,vScreen);
+        vScreen->blitter = DXL_GetBlitFunc(src, vScreen); 
+
+        if (vScreen->blitter ==  (void *) -1)
+            return DXL_INVALID_BLIT;
+    }
+
+	if (vScreen) /* if there is a vScreen, blit to it */
+	{
+		if (vScreen->addr)
+        { 
+    		int pSize;
+            int w,h;
+            unsigned char *ptrScrn;
+            int thisPitch = vScreen->pitch;
+			unsigned int duration;
+			unsigned int starttsc,endtsc;
+
+			/* get a frame pointer to the scaled and postprocessed reconstructed buffer */
+		    VP5_GetYUVConfig(pbi, (YUV_BUFFER_CONFIG *) FrameBuffer);
+
+            pSize = DXL_GetVScreenSizeOfPixel(vScreen);
+
+		    /* remember to offset if requested */
+		    y += vScreen->viewY;           
+		    x += vScreen->viewX ;
+
+            /* for planar destinations */
+            w = vScreen->viewW;//pitch;
+            h = vScreen->height;
+
+			if(w != FrameBuffer->YWidth)
+			{
+				FrameBuffer->YWidth = w;
+				FrameBuffer->UVWidth = (w+1)/2;
+			}
+			if(h != FrameBuffer->YHeight)
+			{
+				FrameBuffer->YHeight = h;
+				FrameBuffer->UVHeight = (h+1)/2;
+			}
+		    ptrScrn = vScreen->addr;
+	        ptrScrn += (x * pSize) + (y * thisPitch);
+
+            /* setup ptrs so we can work backwards through Paul's frame buffers */
+            FrameBuffer->YBuffer = FrameBuffer->YBuffer + 
+                    ((FrameBuffer->YHeight - 1) * 
+                     (FrameBuffer->YStride));
+
+			FrameBuffer->UBuffer = FrameBuffer->UBuffer +
+                    ((FrameBuffer->UVHeight - 1) * 
+                     (FrameBuffer->UVStride));
+			
+            FrameBuffer->VBuffer = FrameBuffer->VBuffer +
+                    ((FrameBuffer->UVHeight - 1) * 
+                     (FrameBuffer->UVStride));
+
+
+            if((vScreen->bd != DXYUY2) && (vScreen->bd != DXYV12))
+            {
+                if(vScreen->bq == DXBLIT_STRETCH)
+                {
+                    thisPitch *= 2;
+                }
+            }
+
+            if(vScreen->bd == DXYV12||vScreen->bd == DXI420)
+            {
+				if(thisPitch < 0)
+				{
+					FrameBuffer->uvStart = (char *) (ptrScrn + abs(thisPitch) + abs(thisPitch) * h/4 + thisPitch/2 );
+					FrameBuffer->uvDstArea = abs((thisPitch * h)/4);
+					FrameBuffer->uvUsedArea = 0;
+				}
+				else
+				{
+					FrameBuffer->uvStart = (char *) (ptrScrn + (thisPitch * h));
+					FrameBuffer->uvDstArea = (((thisPitch+1)/2) * (( h+1)/2));
+					FrameBuffer->uvUsedArea = (((thisPitch+1)/2) * FrameBuffer->UVHeight);
+				}
+
+				// Temporary fix for Scott Kludge Kludge Kludge !!!!!!!!!
+				// ptrScrn -= thisPitch; // fixes a bug in assembly code for some reason the buttnutt is adding pitch to Y buffer
+            }
+
+			/* if a blitter hasn't been set up set one up ! */
+            if (vScreen->blitSetup != (void *)-1) 
+                ((vp5_VSCREEN_FUNC)vScreen->blitSetup)();
+
+			/* if its still not set up return that it failed */
+            if ((vp5BLIT_FUNC)vScreen->blitter == (vp5BLIT_FUNC)-1)
+                return DXL_INVALID_BLIT;
+
+			/* blit the screen */
+			
+			VP5_readTSC(&starttsc);
+			if(pbi->Configuration.Interlaced==1 && (vScreen->bd != DXYV12 && vScreen->bd != DXI420))
+			{
+				int ypitch = FrameBuffer->YStride;
+				int uvpitch = FrameBuffer->UVStride; 
+		
+				FrameBuffer->YStride <<= 1;
+				FrameBuffer->YHeight >>= 1;
+				FrameBuffer->UVStride <<= 1;
+				FrameBuffer->UVHeight >>= 1;
+
+				ptrScrn+=thisPitch;
+				FrameBuffer->YBuffer -= ypitch;
+				FrameBuffer->UBuffer -= uvpitch;
+				FrameBuffer->VBuffer -= uvpitch;
+	            ((vp5BLIT_FUNC)vScreen->blitter)(ptrScrn, thisPitch*2, (YUV_BUFFER_CONFIG *)(FrameBuffer));
+
+				ptrScrn-=thisPitch;
+				FrameBuffer->YBuffer += ypitch;
+				FrameBuffer->UBuffer += uvpitch;
+				FrameBuffer->VBuffer += uvpitch;
+	            ((vp5BLIT_FUNC)vScreen->blitter)(ptrScrn, thisPitch*2, (YUV_BUFFER_CONFIG *)(FrameBuffer));
+
+			}
+			else
+			{
+	            ((vp5BLIT_FUNC)vScreen->blitter)(ptrScrn, thisPitch, (YUV_BUFFER_CONFIG *)(FrameBuffer));
+			}
+			VP5_readTSC(&endtsc);
+
+			duration = ( endtsc - starttsc ) / (pbi->ProcessorFrequency) ;
+			if( pbi->avgBlitTime == 0)
+			{
+				pbi->avgBlitTime = duration;
+			}
+			else
+			{
+		
+				pbi->avgBlitTime = (7*pbi->avgBlitTime + duration)>>3;
+			}
+
+			/* blitter cleanup ?*/
+            if ((vp5BLIT_FUNC)vScreen->blitExit != (vp5BLIT_FUNC)-1) 
+                ((vp5_VSCREEN_FUNC)vScreen->blitExit)();
+
+        }
+	}
+	return DXL_OK;
+}
+
+
+static int vp50_decompress(vp50_XIMAGE_HANDLE src, DXL_VSCREEN_HANDLE vScreen)
+{
+
+	// if we have a compressed frame decompress it ( otherwise we'll just redo
+	// the scaling and postprocessing from the last frame )
+    if (src->addr)
+    {
+
+		if( src->fSize != 0 && (src->addr[0]>=1 || src->addr[1]>=1 || src->addr[2] >=1))
+		{
+			// decode the frame 
+			int retVal= VP5_DecodeFrameToYUV(
+				src->myPBI,
+		        (char *)src->addr, 
+				src->fSize, 
+				src->imWidth, 
+				src->imHeight);
+
+			if(retVal != 0 )
+			{
+	            if(retVal == -1)
+		            return DXL_VERSION_CONFLICT;
+			    else
+				    return DXL_BAD_DATA;
+			}
+		}
+    }
+		VP5_GetYUVConfig(src->myPBI, (YUV_BUFFER_CONFIG *) &src->FrameBuffer);
+	return DXL_OK;
+}
+
+/* 
+  close down a decompressor, releasing the wilk decompressor, 
+  the xImage (decompressor), and the intermediate vScreen (surface)
+*/
+
+static int vp50_xImageDestroy(vp50_XIMAGE_HANDLE xThis)
+{
+	if (xThis)
+	{
+		if(xThis->owned)
+	        VP5_StopDecoder(&(xThis->myPBI));
+		duck_free(xThis);
+	}
+
+	return DXL_OK;
+}
+
+/* 
+  called during initialization and/or when xImage (decompressor)
+  attributes change, note that nImage and src are actually
+  synonymous and should be cleared out a bit (to say the least!)
+
+
+  !!!!!!
+  This function should be prepared to get data that is NOT of the 
+  type native to the decoder,  It should do it's best to verify it 
+  as valid data and should clean up after itself and return NULL
+  if it doesn't recognize the format of the data
+*/
+static DXL_XIMAGE_HANDLE vp50_xImageCreate(unsigned char *data);
+static DXL_XIMAGE_HANDLE vp50_xImageReCreate(vp50_XIMAGE_HANDLE src,unsigned char *data,
+	int type,enum BITDEPTH bitDepth,int w,int h)
+{  
+	(void) bitDepth;
+    if (type != VP50_FOURCC) 
+		return NULL;
+
+	if (src != NULL)	/* if an xImage/decompressor already exists, destroy it */
+		vp50_xImageDestroy(src);
+
+	/* create a new xImage, specific to this type of decoder, 
+        (see "vp50_XIMAGE" struct above and dxl_main.h) */
+
+	src = (vp50_XIMAGE_HANDLE)duck_calloc(1,sizeof(vp50_XIMAGE),DMEM_GENERAL);
+
+	if (!src) 
+        return NULL;
+
+//	duck_memset(nImage,0,sizeof(vp50_XIMAGE));
+
+	/* set up the "vtable" of interface calls */
+    src->create =  (DXL_XIMAGE_HANDLE (*)(void *)) vp50_xImageCreate;
+    src->recreate =  (DXL_XIMAGE_HANDLE (*)(DXL_XIMAGE_HANDLE,void *,int,int,int,int)) vp50_xImageReCreate;
+
+	src->destroy = (int (*)(DXL_XIMAGE_HANDLE))vp50_xImageDestroy;
+	src->dx = (int (*)(DXL_XIMAGE_HANDLE, DXL_VSCREEN_HANDLE)) vp50_decompress;
+	src->blit = NULL; /* there is no interleaved blitter for vp5x files */
+	src->setParameter = vp50_SetParameter;
+
+#if !KLUDGE_FOR_NEIL
+    src->internalFormat = (int (*)(DXL_XIMAGE_HANDLE, DXL_VSCREEN_HANDLE)) vp50_GetXImageInternalFormat; 
+#endif
+	src->bdPrefs = bitDepths; /* plug in the list of prefered bit depths */
+
+    src->addr = data;
+    src->dkFlags.inUse = 1;
+
+	src->imWidth = src->w = (short) (w ? w : 320);
+	src->imHeight = src->h = (short) (h ? h : 240);
+
+	src->myFourCC = VP50_FOURCC;
+  
+    /* create new PBI */
+    if(!VP5_StartDecoder( &(src->myPBI), src->imWidth, src->imHeight ))
+    {
+		vp50_xImageDestroy(src);
+        src = NULL;
+    }
+	src->owned = 1;
+
+    return (DXL_XIMAGE_HANDLE ) src;
+}
+
+/* in this "glue" case, just calls through to the create function */
+
+static DXL_XIMAGE_HANDLE vp50_xImageCreate(unsigned char *data)
+{
+	return vp50_xImageReCreate(NULL, data, VP50_FOURCC, (enum BITDEPTH ) 0,0,0);
+}
+
+int vp50_Init(void)
+{
+
+    DXL_RegisterXImage( 
+		(DXL_XIMAGE_HANDLE (*)(unsigned char *)) vp50_xImageCreate,
+		VP50_FOURCC, 
+        YV12 
+		);
+
+
+	/* initialize all the global variables */
+	VP5_VPInitLibrary();
+
+	return DXL_OK;
+}
+
+/* 
+    main exit routine, called during DXL_ExitVideo() 
+    clean up any global information if necessary
+*/
+
+int vp50_Exit(void)
+{
+	VP5_VPDeInitLibrary();
+
+	return DXL_OK;
+}
+
+void vp50_SetParameter(DXL_XIMAGE_HANDLE src,int Command, unsigned long Parameter )
+{
+	if(Command == PBC_SET_PBSTRUCT)
+	{
+
+		if(((vp50_XIMAGE_HANDLE) src)->owned)
+	        VP5_StopDecoder(&(((vp50_XIMAGE_HANDLE) src)->myPBI));
+
+		((vp50_XIMAGE_HANDLE) src)->owned = 0;
+		((vp50_XIMAGE_HANDLE) src)->myPBI= (PB_INSTANCE *) Parameter;
+
+	}
+	else
+		VP5_SetPbParam( ((vp50_XIMAGE_HANDLE) src)->myPBI, (PB_COMMAND_TYPE) Command, (UINT32) Parameter );
+}
+
+typedef	struct {
+	unsigned char*	baseAddr;
+	long			rowBytes;
+} YV12_PLANE;
+
+typedef	struct {
+	YV12_PLANE	y;
+	YV12_PLANE	u;
+	YV12_PLANE	v;
+} YV12_PLANES;
+
+void GetImageBufs(DXL_XIMAGE_HANDLE x, YV12_PLANES *p)
+{
+  vp50_XIMAGE_HANDLE xim=(vp50_XIMAGE_HANDLE)x;
+  p->y.baseAddr=(unsigned char *)xim->FrameBuffer.YBuffer;
+  p->u.baseAddr=(unsigned char *)xim->FrameBuffer.UBuffer;
+  p->v.baseAddr=(unsigned char *)xim->FrameBuffer.VBuffer;
+  p->y.rowBytes=xim->FrameBuffer.YStride;
+  p->u.rowBytes=xim->FrameBuffer.UVStride;
+  p->v.rowBytes=xim->FrameBuffer.UVStride;
+}
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/HuffTables.h b/Src/libvpShared/corelibs/cdxv/vp50/include/HuffTables.h
new file mode 100644
index 00000000..eccbbc67
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/HuffTables.h
@@ -0,0 +1,33 @@
+/****************************************************************************
+*
+*   Module Title :     HuffTables.h
+*
+*   Description  :     Video CODEC
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*  
+*   1.01 JBB 26 Jan 01 New Huffman Code
+*   1.00 PGW 12/10/00  Configuration baseline
+*
+*****************************************************************************
+*/
+
+#ifndef HUFFTAB_H
+#define HUFFTAB_H
+
+#include "type_aliases.h"
+#include "huffman.h"
+
+/****************************************************************************
+*  Hufman tables
+*****************************************************************************
+*/
+
+// For details of tokens and extra bit breakdown see token definitions in huffman.h
+UINT8  ExtraBitLengths_VP5[MAX_ENTROPY_TOKENS] = { 0, 1, 1, 1, 1, 2, 3, 5, 6, 12, 0 };
+UINT32 DctRangeMinVals[MAX_ENTROPY_TOKENS] = { 0, 1, 2, 3, 4, 5, 7, 11, 27, 59, 0 };
+
+#endif
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/boolhuff.h b/Src/libvpShared/corelibs/cdxv/vp50/include/boolhuff.h
new file mode 100644
index 00000000..4c1a53b1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/boolhuff.h
@@ -0,0 +1,78 @@
+/****************************************************************************
+*
+*   Module Title :     boolhuff.H
+*
+*   Description  :     Video CODEC
+*
+*    AUTHOR      :     James Bankoski
+*
+*****************************************************************************
+*   Revision History
+*  
+*   1.00 JBB 01JUN01  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+#ifndef boolhuff_h 
+
+#define boolhuff_h
+
+#ifdef NOTNORMALIZED
+typedef struct _boolcoder
+{
+    unsigned char *buffer;
+    unsigned int pos;
+	union
+	{
+		unsigned int value;
+		unsigned char v[4];
+	};
+    unsigned int range;
+} BOOL_CODER;
+#else 
+typedef struct 
+{ 
+	unsigned int  bits;
+	unsigned int  bitpos;
+	unsigned int *source;
+	unsigned int  pos;
+} bitpump;
+typedef struct 
+{
+	unsigned int lowvalue;
+	unsigned int range;
+	unsigned int value;
+	         int count;
+	unsigned int pos;
+    unsigned char *buffer;
+
+	// Variables used to track bit costs without outputing to the bitstream
+	unsigned int  MeasureCost;
+	unsigned long BitCounter;
+} BOOL_CODER;
+#endif 
+
+extern void StartDecode(BOOL_CODER *bc, unsigned char *buffer);
+
+extern int DecodeBool(BOOL_CODER *bc, int context);
+extern int DecodeBool128(BOOL_CODER *bc);
+
+extern void StopDecode(BOOL_CODER *bc);
+
+extern void StartEncode(BOOL_CODER *bc, unsigned char *buffer);
+
+extern void EncodeBool(BOOL_CODER *bc, int x, int context);
+extern void EncodeBool2(BOOL_CODER *bc, int x, int context);
+extern void StopEncode(BOOL_CODER *bc);
+
+extern double shannonCost0[256];
+extern double shannonCost1[256];
+extern unsigned int shannon64Cost0[256];
+extern unsigned int shannon64Cost1[256];
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/compdll.h b/Src/libvpShared/corelibs/cdxv/vp50/include/compdll.h
new file mode 100644
index 00000000..b4b8d78b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/compdll.h
@@ -0,0 +1,562 @@
+/****************************************************************************
+*
+*   Module Title :     COMPDLL.H
+*
+*   Description  :     Video CODEC demo compression DLL main header
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.34 YWX 09-Dec-02 Added Function pointers for frame/field varainces calculation
+*   1.33 YWX 30-Oct-02 Added EncoderLoopFilterOff flag
+*   1.32 YWX 28-Oct-02 Added function pointer for 5 region diamond search 
+*   1.31 YWX 28-Oct-02 Added above and left token context and 5 region
+*                      diamond motion search sites
+*   1.30 YWX 02-Jul-02 Added new funcion pointers for motion search
+*   1.31 JBB 04 JUL-02 Added preprocessor code
+*   1.29 AWG 20-Jun-01 Removed QuadCodeComponent function prototype & HExtra/VExtra
+*   1.29 AWG 22-May-01 Added support for DCT16
+*   1.28 JBB 05-May-01 Changes for VP5 (new entropytablebits and tokenextra chgs
+*   1.27 JBB 23-Mar-01 Changed QuickCompress datatype from BOOL to INT32
+*   1.26 JBB 11 Feb 01 Merged in: added vars for map ca move ac choice to right after dc
+*   1.25 PGW 31 Jan 01 Added some stats variables and VP5 Mv entropy tables.
+*   1.24 JBB 30 Nov 00 Version number changes
+*   1.23 JBB 15 Nov 00 Cleaned out ifdefs
+*   1.22 JBB 15 Oct 00 Added First Pass Function
+*   1.21 JBB 11 Sep 00 new function pointers for subtract removed transxquant
+*   1.20 JBB 07 Sep 00 Changed error metrics to Unsigned int
+*   1.19 JBB 24 Aug 00 Ansi C compatible
+*   1.18 JBB 27Jul00   added checks on Mallocs
+*   1.17 JBB 24Jul00   Changed error functions to return INT32 instead of double
+*   1.16 PGW 12 Jul 00 Removed CompAutoKeyFrameThreshold.
+*   1.15 PGW 29 Jun 00 Removed instnace varibale CarryOverAdaptionEnabled.
+*   1.14 PGW 27 Jun 00 Added QTargetModifier[]. Changes to CONFIG_TYPE2.
+*   1.13 JBB 30/05/00  Removed hard coded size limits
+*   1.12 JBB 22/05/00  Added OriginalDC support to remove max_fragments depends
+*   1.11 YX  13/04/00  Add function pointers for new optimizations
+*   1.10 YX  06/04/00  More buffers alligned MMX Fdct
+*   1.09 YX  20/03/00  32 Byte alligned buffers, Back to Integer Forward DCT
+*                      Additional Function pointers for optimized code
+*   1.08 PGW 17/03/00  Changes to support seperate Y and UV entropy tables.
+*                      Added PreProcFilterLevel to allow control of preprecessor
+*                      filter level.
+*   1.07 YX  09/03/00  Change to use floating point forward DCT
+*   1.06 PGW 17/12/99  Draw dib functionality removed.
+*   1.05 PGW 05/10/99  Remove some Windows dependancies for VFW compressor.
+*   1.04 PGW 20/07/99  Rate targeting corrections for VFW version of codec
+*   1.03 PGW 15/07/99  Added QuickCompress flag.
+*   1.02 PGW 05/07/99  Added GetFOURMVExhaustiveSearch() function
+*   1.01 PGW 29/06/99  Added GetMBMVExhaustiveSearch() function.
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+#ifndef __INC_COMPDLL_H
+#define __INC_COMPDLL_H
+
+#define MIN_BPB_FACTOR          0.1
+#define MAX_BPB_FACTOR          10.0
+
+#define KEY_FRAME_CONTEXT       5
+
+#include "codec_common.h"
+#include "preprocif.h"
+#include "preproc.h"
+#include "pbdll.h"
+#include "vp50_comp_interface.h"
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/
+
+// Debug/stats code
+//#define PSNR_ON
+
+
+/****************************************************************************
+*  Types
+*****************************************************************************
+*/
+
+typedef struct CONFIG_TYPE2
+{
+    UINT32 TargetBandwidth;
+    UINT32 OutputFrameRate;
+
+    UINT32 FirstFrameQ;
+    UINT32 BaseQ;
+    UINT32 MaxQ;				// Absolute Max Q allowed.
+    UINT32 ActiveWorstQuality;	// Reflects worst quality Currently allowed (specified as an index where 0 is worst quality)
+    UINT32 ActiveBestQuality;	// Reflects best quality currently allowed (specified as an index where 0 is worst quality)
+
+} CONFIG_TYPE2;
+
+
+/* Defines the largest positive integer expressable with a standard int type */
+/****************************************************************************
+* *     Type declarations
+****************************************************************************
+*/
+
+typedef enum
+{
+    DCT_COEF_TOKEN,
+    MODE_TOKEN,
+    BLOCKMAP_TOKEN,
+    MV_TOKEN
+} TOKENTYPE;
+
+typedef struct _TOKENEXTRA
+{
+    INT32  Token;
+    UINT32 Extra;
+} TOKENEXTRA;
+
+
+typedef struct LineEq2
+{
+    double  M;
+    double  C;
+
+} LINE_EQ2;
+
+typedef struct
+{
+	BLOCK_CONTEXTA *  AbovePtr;
+	BLOCK_CONTEXTA    Above;
+	BLOCK_CONTEXT *  LeftPtr;
+	BLOCK_CONTEXT    Left;
+	Q_LIST_ENTRY  *  LastDcPtr;
+	Q_LIST_ENTRY     LastDc;
+
+} MB_DC_CONTEXT;
+
+/****************************************************************************
+*  MACROS
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Global Variables
+*****************************************************************************
+*/
+
+//****************************************************************
+// Function Pointers now library globals!
+extern UINT32 (*GetSAD16)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32);
+extern UINT32 (*GetSadHalfPixel16)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32);
+extern void   (*fdct_short) ( INT16 * InputData, INT16 * OutputData );
+extern void   (*idctc[65])( INT16 *InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern UINT32 (*GetSAD)(UINT8 *, INT32, UINT8 *, INT32, UINT32, UINT32) ;
+//extern UINT32 (*GetNextSAD)(UINT8 *, INT32, UINT8 *, UINT32, UINT32 );
+extern UINT32 (*GetSadHalfPixel)(UINT8 *, INT32, UINT8 *, UINT8 *, INT32, UINT32, UINT32  );
+extern UINT32 (*GetInterError)( UINT8 *, INT32, UINT8 *,  UINT8 *, INT32 );
+extern UINT32 (*GetIntraError)( UINT8 *, INT32);
+extern void   (*Sub8)( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride );
+extern void   (*Sub8_128)( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride );
+extern void   (*Sub8Av2)( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1, INT32 SourceStride, INT32 ReconStride );
+
+//****************************************************************
+
+
+
+
+#define HUGE_ERROR              (1<<28)  //  Out of range test value
+
+#define MAX_SEARCH_SITES        33      //  Number of search sites for a 4-step search (at pixel accuracy)
+
+typedef struct CP_INSTANCE * xCP_INST;
+typedef struct CP_INSTANCE
+{
+    PB_INSTANCE pb; // playback
+    INT32  DropCounter;
+
+    //****************************************************************************************************
+    // Compressor Configuration
+    CONFIG_TYPE2 Configuration;
+
+    YUV_BUFFER_CONFIG InputConfig;
+    YUV_BUFFER_CONFIG YuvInputData;
+    INT32  SizeStep;
+
+    INT32  QuickCompress;
+    BOOL   GoldenFrameEnabled;
+    BOOL   InterPrediction;
+    BOOL   MotionCompensation;
+    BOOL   AutoKeyFrameEnabled ;
+    INT32  ForceKeyFrameEvery ;
+    INT32  AutoKeyFrameThreshold ;
+    UINT32 LastKeyFrame ;
+    UINT32 MinimumDistanceToKeyFrame ;
+    INT32  KeyFrameDataTargetOrig ;        // Data rate target for key frames
+    INT32  KeyFrameDataTarget ;        // Data rate target for key frames
+    UINT32 KeyFrameFrequency ;
+    BOOL   DropFramesAllowed ;
+	BOOL   DropFrame;
+    INT32  DropCount ;
+    UINT32 QualitySetting;
+    UINT32 PreProcFilterLevel;
+    BOOL   AllowSpatialResampling;
+	UINT8  RdOpt;		// 0 - off, 1 - basic rd on, 2 - all rd options on
+
+
+    // Compressor Statistics
+    double TotErrScore;
+    UINT32 InterError;
+    UINT32 MVErrorPerBit; 
+    UINT32 ErrorPerBit; 
+    UINT32 IntraError;
+    INT64  KeyFrameCount ;                          // Count of key frames.
+    INT64  TotKeyFrameBytes ;
+    UINT32 LastKeyFrameSize ;
+    UINT32 PriorKeyFrameSize[KEY_FRAME_CONTEXT];
+    UINT32 PriorKeyFrameDistance[KEY_FRAME_CONTEXT];
+    INT32  FrameQuality[6];
+    int    DecoderErrorCode;        // Decoder error flag.
+    INT32  ThreshMapThreshold;
+    INT32  TotalMotionScore;
+    INT64  TotalByteCount;
+    INT32  FixedQ;
+
+    // Frame Statistics
+    INT64  CurrentFrame;
+    UINT32 LastFrameSize;
+    UINT32 ThisFrameSize;
+    BOOL   ThisIsFirstFrame;
+    BOOL   ThisIsKeyFrame;
+	BOOL   GfRecoveryFrame;
+
+    INT32  MotionScore;
+	UINT32  FirstSixthBoundary;		// Macro block index marking the first sixth of the image
+	UINT32  LastSixthBoundary;		// Macro block index marking the last sixth of the image
+
+    /* Rate Targeting variables PGW 08/05/96). */
+    double BpbCorrectionFactor;
+	double KeyFrameBpbCorrectionFactor;
+
+    // Controlling Block Selection
+    UINT32 MVChangeFactor;
+    UINT32 FourMvChangeFactor;
+    UINT32 ExhaustiveSearchThresh;
+    UINT32 MinImprovementForFourMV;
+    UINT32 FourMVThreshold;
+    UINT32 IntraThresh;
+
+	UINT32 MinErrorForMacroBlockMVSearch;
+	UINT32 MinErrorForBlockMVSearch;
+	UINT32 MinErrorForGoldenMVSearch;
+
+
+    //****************************************************************************************************
+
+
+    //****************************************************************************************************
+    // Frames
+    // Used in the selecetive convolution filtering of the Y plane. */
+    YUV_BUFFER_ENTRY *yuv1ptr;
+    YUV_BUFFER_ENTRY *yuv1ptrAlloc;
+    //****************************************************************************************************
+
+    //****************************************************************************************************
+    // Token Buffers
+    TOKENEXTRA *CoeffTokens;
+    TOKENEXTRA *CoeffTokensAlloc;
+    TOKENEXTRA *CoeffTokenPtr;
+
+    INT16  LastDC[3];
+
+    BOOL_CODER bc;
+
+    //****************************************************************************************************
+
+    //****************************************************************************************************
+    // SuperBlock, MacroBLock and Fragment Information
+    // Coded flag arrays and counters for them
+
+    //****************************************************************************************************
+    // Live Codec Variables
+
+    UINT8  *DataOutputBuffer;
+    //****************************************************************************************************
+
+    //****************************************************************************************
+    // STATICS COPIED FROM C FILES (USED IN MULTIPLE FUNCTIONS BUT ARE NOT REALLY INSTANCE GLOBALS )
+    // copied from cencode.c
+    UINT8  MBCodingMode;        // Coding mode flags
+
+    // copied from mcomp.c
+    INT32  MVPixelOffsetY[MAX_SEARCH_SITES];
+    UINT32 InterTripOutThresh;
+    INT32  MVSearchSteps;
+    INT32  MVOffsetX[MAX_SEARCH_SITES];
+    INT32  MVOffsetY[MAX_SEARCH_SITES];
+    INT32  HalfPixelRef2Offset[9];    // Offsets for half pixel compensation
+    INT8   HalfPixelXOffset[9];       // Half pixel MV offsets for X
+    INT8   HalfPixelYOffset[9];       // Half pixel MV offsets for Y
+
+
+    Q_LIST_ENTRY    *quantized_list;
+    Q_LIST_ENTRY    *quantized_listAlloc;
+
+    MOTION_VECTOR   MVector;
+    INT16  *DCT_codes;          //Buffer that stores the result of Forward DCT
+    INT16  *DCTDataBuffer;      //Input data buffer for Forward DCT
+    INT16  *DCT_codesAlloc;
+    INT16  *DCTDataBufferAlloc;
+
+
+    // Motion compensation related variables
+    UINT32  MvMaxExtent;
+
+    INT32  byte_bit_offset;
+
+    // copied from cbitman.c
+    UINT32 NearestError[4];
+    UINT32 NearError[4];
+    UINT32 ZeroError[4];
+    UINT32 BestError[4];
+
+	UINT32 ErrorBins[128];
+
+    //****************************************************************
+    // instances (used for reconstructing buffers and to hold tokens etc.)
+    xPP_INST pp;    // preprocessor
+
+#if defined PSNR_ON
+    double TotPsnr;
+    double MinPsnr;
+    double MaxPsnr;
+    double TotYPsnr;
+    double MinYPsnr;
+    double MaxYPsnr;
+    double TotUPsnr;
+    double MinUPsnr;
+    double MaxUPsnr;
+    double TotVPsnr;
+    double MinVPsnr;
+    double MaxVPsnr;
+#endif
+
+    // Structures for entropy contexts
+    UINT32 FrameDcTokenDist[2][MAX_ENTROPY_TOKENS];
+    UINT32 FrameAcTokenDist[PREC_CASES][2][VP5_AC_BANDS][MAX_ENTROPY_TOKENS];
+
+	// Storage for the first frame entropy probabilities.
+	// These are re-used for all subsequent key frames when we are operating in
+	// error (drop frame) ressiliant mode.
+	UINT8 FirstFrameDcProbs[2*(MAX_ENTROPY_TOKENS-1)];
+	UINT8 FirstFrameAcProbs[2*PREC_CASES*VP5_AC_BANDS*(MAX_ENTROPY_TOKENS-1)];
+
+    // The Plane Y or UV to which the current block belongs (0 = Y 1 = UV)
+    UINT8  EncoderPlane;
+
+    // Last token coded this block.
+    UINT8  ThisBlockLastToken;
+    UINT8  ZeroCount;
+    //UINT32 MBModeCount[MAX_MODES+1];
+    UINT32 MBModeCount[4][MAX_MODES+1];
+    UINT32 BModeCount[MAX_MODES+1];
+	UINT32 CountModeSameAsLast[4][MAX_MODES+1];
+	UINT32 CountModeDiffFrLast[4][MAX_MODES+1];
+
+    UINT32 ModeCodeArray[4][MAX_MODES+1][MAX_MODES+1];
+    UINT8  ModeLengthArray[4][MAX_MODES+1][MAX_MODES+1];
+
+    // TEMP 
+    UINT32 ModeBitCount[2];
+    INT64  ModeComplexity[2];
+    UINT32 ModeBlocks[2];
+
+	UINT32 MBModeCostBoth[11];
+	UINT32 MBModeCostNoNear[11];
+	UINT32 MBModeCostNoNearest[11];
+	UINT32 BModeCost[11];
+	UINT32 MvBaselineDist[2][MV_ENTROPY_TOKENS];
+	UINT32 FrameMvCount;
+	UINT32 EstMVCost[2][MV_ENTROPY_TOKENS];
+	UINT32 EstModeCost[2][MAX_MODES];
+	
+    UINT32 nExperimentals;
+    INT32 Experimental[C_SET_EXPERIMENTAL_MAX - C_SET_EXPERIMENTAL_MIN + 1];
+
+	// Bandwidth and buffer control variables
+	INT32  PerFrameBandwidth;				// Target for average bandwidth per frame.
+    INT32  InterFrameTarget;				// Average "inter" frame bit target corrected for key frame costs
+    INT32  ThisFrameTarget;					// Modified rate target for this frame
+
+	BOOL   BufferedMode;					// FALSE = Tight buffering (Video Conferencing mode); TRUE = normal buffered/streaming mode.
+	BOOL   ErrorResilliantMode;				// A mode used for VC etc. to make the codec more resilliant to dropped frames.
+	INT32  StartingBufferLevel;             // The initial encoder buffer level
+	INT32  CurrentBufferLevel;				// Current decoder buffer fullness state 
+	INT32  OptimalBufferLevel;				// The buffer level target we strive to reach / maintain.
+	INT32  DropFramesWaterMark;				// Buffer fullness watermark for forced drop frames.
+	INT32  ResampleDownWaterMark;			// Buffer fullness watermark for downwards spacial re-sampling
+	INT32  ResampleUpWaterMark;				// Buffer fullness watermark where returning to larger image size is consdered
+	INT32  LastKeyFrameBufferLevel;			// Used to monitor changes in buffer level when considering re-sampling.
+
+	INT32  Speed;
+	INT32  CPUUsed;
+
+	UINT32 ModeMvCostEstimate;				// Running total of cost estimates for modes and MVs in this frame.
+
+	// Variables used in regulating cost of new motion vectors based upon an estimate of new MV frequency.
+	UINT32 FrameNewMvCounter;
+	UINT32 FrameModeCounter;
+	UINT32 MvEpbCorrection;
+	UINT32 LastFrameNewMvUsage;				// 0 = Low 9 = High
+
+	UINT32 * MbBestErr;
+	UINT32 * MbBestErrAlloc;
+
+    UINT32 EstDcTokenCosts[2][MAX_ENTROPY_TOKENS];
+    UINT32 EstAcTokenCosts[PREC_CASES][2][VP5_AC_BANDS][MAX_ENTROPY_TOKENS];
+
+	// Data structures used to save and restor MB and DC contexts during rate distortion
+	MACROBLOCK_INFO CopyMbi;
+	BLOCK_CONTEXTA AboveCopyY[2];		
+	BLOCK_CONTEXTA AboveCopyU;		
+	BLOCK_CONTEXTA AboveCopyV;		
+	BLOCK_CONTEXT LeftYCopy[2];
+	BLOCK_CONTEXT LeftUCopy;
+	BLOCK_CONTEXT LeftVCopy;
+	Q_LIST_ENTRY  LastDcYCopy[3];
+	Q_LIST_ENTRY  LastDcUCopy[3];
+	Q_LIST_ENTRY  LastDcVCopy[3];
+
+    // Above and left context for encoding
+    UINT8  *aboveDcTokensAlloc[3];       // 0 for y, 1 for u and 2 for v
+    UINT8  *aboveDcTokens[3];            // 0 for y, 1 for u and 2 for v
+    UINT8  leftTokens[4][64];            // 0 1 for y 2 for u and 3 for v
+
+
+	MB_DC_CONTEXT MbDcContexts[MAX_MODES][6];		// Per mode, per block position data structure for and MB
+
+	UINT32 avgPickModeTime;
+	UINT32 avgEncodeTime;
+	UINT32 avgPackVideoTime;
+
+	UINT32 ForceHScale;
+	UINT32 ForceHRatio;
+	UINT32 ForceVScale;
+	UINT32 ForceVRatio;
+	BOOL   ForceInternalSize;
+
+	PreProcInstance preproc;
+    INT32  FrameRateInput;
+    INT32  FrameRateDropFrames;
+    INT32  FrameRateDropCount;
+
+    
+    //
+    UINT32 EncoderLoopFilterOff;
+    // variables for 5 region diamond MV search
+    INT32  DSMVSearchSteps;
+    INT32  DSMVPixelOffsetY[MAX_SEARCH_SITES];
+    INT32  DSMVOffsetX[MAX_SEARCH_SITES];
+    INT32  DSMVOffsetY[MAX_SEARCH_SITES];
+
+
+    UINT32 (*FindMvViaSearch)(  xCP_INST cpi,
+                                UINT8 *SrcPtr,
+                                INT32  SourceStride,    
+                                UINT8 *RefPtr,
+                                INT32 ReconStride,
+                                MOTION_VECTOR *MV,
+                                UINT8 **BestBlockPtr,
+                                UINT32 BlockSize);
+
+
+    void (*FindBestHalfPixelMv)(xCP_INST cpi,
+                                UINT8 *SrcPtr,
+                                INT32 SourceStride,
+                                UINT8 *RefPtr,
+                                INT32 ReconStride,
+                                MOTION_VECTOR *MV,
+                                UINT8 **BestBlockPtr,
+                                UINT32 BlockSize,
+                                UINT32 MinError);
+
+
+
+} CP_INSTANCE;
+
+
+UINT32 (*GetMBFrameVertVar)(CP_INSTANCE *cpi);
+UINT32 (*GetMBFieldVertVar)(CP_INSTANCE *cpi);
+
+/****************************************************************************
+*  Functions.
+*****************************************************************************
+*/
+
+
+extern void UpdateFrame(CP_INSTANCE *cpi);
+
+extern UINT32 QuadCodeDisplayFragments (CP_INSTANCE *cpi);
+
+extern UINT32 QuadCodeComponent ( CP_INSTANCE *cpi, UINT32 FirstSB, UINT32 SBRows, UINT32 SBCols, UINT32 HExtra, UINT32 VExtra, INT32 SourceStride );
+
+extern void AcquireSingleFrame( CP_INSTANCE *cpi, UINT32  CurrFrame );
+extern void AcquireFirstFrame(CP_INSTANCE *cpi);
+extern void AcquireNextFrame( CP_INSTANCE *cpi, UINT32 CurrFrame );
+
+extern void InitFrameTimer( CP_INSTANCE *cpi);
+
+extern UINT32 EncodeData(CP_INSTANCE *cpi);
+
+// Loop optimizations
+extern void InitMapArrays();
+
+// Codec
+extern UINT32 DPCMTokenizeBlock  ( CP_INSTANCE *cpi, INT32 FragIndex, INT32 SourceStride );
+extern void SUB8( UINT8 *FiltPtr, UINT8 *ReconPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+               INT32 SourceStride, INT32 ReconStride );
+extern void SUB8_128( UINT8 *FiltPtr, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+               INT32 SourceStride );
+extern void SUB8AV2( UINT8 *FiltPtr, UINT8 *ReconPtr1, UINT8 *ReconPtr2, INT16 *DctInputPtr, UINT8 *old_ptr1, UINT8 *new_ptr1,
+              INT32 SourceStride, INT32 ReconStride );
+
+
+
+extern void  PackEOBRun(CP_INSTANCE *cpi);
+extern void ConvertBmpToYUV( PB_INSTANCE *pbi, UINT8 * BmpDataPtr, UINT8 * YuvBufferPtr );
+extern CP_INSTANCE * CreateCPInstance(void);
+extern void DeleteCPInstance(CP_INSTANCE **cpi);
+extern void CMachineSpecificConfig(void);
+// extern void fdct_slow16 ( INT16 * InputData, INT16 * OutputData );
+extern void fdct_slowf ( INT16 * InputData, INT16 * OutputData );
+extern void fdct_short_C ( INT16 * InputData, INT16 * OutputData );
+extern void fdct_short_C ( INT16 * InputData, INT16 * OutputData );
+
+extern BOOL EAllocateFragmentInfo(CP_INSTANCE *cpi);
+extern BOOL EAllocateFrameInfo(CP_INSTANCE *cpi);
+extern void EDeleteFragmentInfo(CP_INSTANCE *cpi);
+extern void EDeleteFrameInfo(CP_INSTANCE *cpi);
+extern UINT32 PickIntra( CP_INSTANCE *cpi );
+extern UINT32 PickModes( CP_INSTANCE *cpi, UINT32 *InterError, UINT32 *IntraError);
+
+extern INT32  GetSpeckSumAbsDiffs( UINT8 * NewDataPtr, UINT8 * RefDataPtr,
+                              INT32 SourceStride, INT32 ErrorSoFar, INT32 BestSoFar );
+extern INT32  GetNextSpeckSumAbsDiffs( UINT8 * NewDataPtr, UINT8 * RefDataPtr,
+                              INT32 SourceStride, INT32 ErrorSoFar, INT32 BestSoFar );
+
+extern INT32  GetHalfPixelSpeckSumAbsDiffs( UINT8 * SrcData, UINT8 * RefDataPtr1, UINT8 * RefDataPtr2,
+                              INT32 SourceStride, INT32 ErrorSoFar, INT32 BestSoFar );
+extern void ClampAndUpdateQ ( CP_INSTANCE *cpi, UINT32 QIndex) ;
+
+//  cx\generic\encodembs.c
+extern void EncodeFrameMbs(CP_INSTANCE *cpi);
+
+
+//  cx\generic\vfw_comp_if.c
+extern void CCONV ChangeEncoderSize(CP_INSTANCE* cpi, UINT32 Width, UINT32 Height);
+extern void CopyOrResize(CP_INSTANCE* cpi);
+
+//  cx\generic\tokenize.c
+extern UINT16 TokenizeFrag(CP_INSTANCE* cpi, INT16*  RawData, UINT16 BlockSize, UINT32 Plane, BLOCK_CONTEXTA* Above, BLOCK_CONTEXT* Left);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/decodemode.h b/Src/libvpShared/corelibs/cdxv/vp50/include/decodemode.h
new file mode 100644
index 00000000..c2dfea9a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/decodemode.h
@@ -0,0 +1,100 @@
+/****************************************************************************
+*        
+*   Module Title :     decodemode.h
+*
+*   Description  :     functions for decoding modes and motionvectors 
+*
+*   AUTHOR       :     James Bankoski
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.00 JBB 30OCT01  New Configuration baseline.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#ifndef STRICT
+#define STRICT              /* Strict type checking. */
+#endif
+
+/****************************************************************************
+*  Implicit Imports
+*****************************************************************************
+*/        
+extern UINT8 Stats[9][4][4][4];
+extern UINT8 NNStats[7][4][4][4];
+extern UINT8 NN2Stats[7][4][4][4];
+extern UINT8 blockStats[3][4][4][4];
+
+#define MODETYPES 3
+#define MODEVECTORS 16
+#define PROBVECTORXMIT 174
+#define PROBIDEALXMIT 254
+
+
+/****************************************************************************
+*  Exported data structures.
+*****************************************************************************
+*/        
+
+
+typedef struct _modeContext
+{
+	UINT8 left;
+	UINT8 above;
+	UINT8 last;
+} MODE_CONTEXT;
+
+typedef struct _htorp
+{
+    unsigned char selector : 1;   // 1 bit selector 0->ptr, 1->token
+    unsigned char value : 7;
+} torp;
+
+typedef struct _hnode
+{
+	torp left;
+	torp right;
+} HNODE;
+
+typedef enum _MODETYPE 
+{
+	MACROBLOCK,
+	NONEAREST_MACROBLOCK,
+	NONEAR_MACROBLOCK,
+	BLOCK
+} MODETYPE;
+
+
+#ifndef MAPCA
+__inline
+#endif
+    int mbClass(int i);
+
+
+/****************************************************************************
+*  Imports
+*****************************************************************************
+*/
+extern HNODE MBCodingMode[9];
+extern HNODE NN2MBCodingMode[8];
+extern HNODE NNMBCodingMode[7];
+extern HNODE BlockCodingMode[3];
+extern UINT8 BaselineXmittedProbs[4][2][MAX_MODES];
+
+/****************************************************************************
+*  Function Prototypes
+*****************************************************************************
+*/
+void DecodeModeProbs(PB_INSTANCE *pbi);
+
+extern void FindNearestandNextNearest(PB_INSTANCE* pbi, UINT32 MBrow, UINT32 MBcol,
+    MOTION_VECTORA* nearest, MOTION_VECTORA* nextnearest, UINT8 Frame,int *type);
+
+extern void	BuildModeTree(PB_INSTANCE *pbi);
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/decodemv.h b/Src/libvpShared/corelibs/cdxv/vp50/include/decodemv.h
new file mode 100644
index 00000000..2dcab158
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/decodemv.h
@@ -0,0 +1,45 @@
+/****************************************************************************
+*        
+*   Module Title :     decodemode.h
+*
+*   Description  :     functions for decoding modes and motionvectors 
+*
+*   AUTHOR       :     James Bankoski
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.00 JBB 30OCT01  New Configuration baseline.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#ifndef STRICT
+#define STRICT              /* Strict type checking. */
+#endif
+
+
+#define MV_NODES	11
+
+/****************************************************************************
+*  Implicit Imports
+*****************************************************************************
+*/        
+/****************************************************************************
+*  Exported data structures and functions
+*****************************************************************************
+*/        
+
+extern void FindNearestandNextNearest(PB_INSTANCE* pbi, UINT32 MBrow, UINT32 MBcol,
+    MOTION_VECTORA* nearest, MOTION_VECTORA* nextnearest, UINT8 Frame, int *type);
+
+extern void ConfigureMvEntropyDecoder( PB_INSTANCE *pbi, UINT8 FrameType );
+
+extern void decodeMotionVector(	PB_INSTANCE *pbi,	MOTION_VECTOR *mv,	MOTION_VECTOR *nearestMv);
+
+extern UINT8 MvUpdateProbs[2][MV_NODES];
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/huffman.h b/Src/libvpShared/corelibs/cdxv/vp50/include/huffman.h
new file mode 100644
index 00000000..7c4995a6
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/huffman.h
@@ -0,0 +1,93 @@
+/****************************************************************************
+*
+*   Module Title :     Huffman.h
+*
+*   Description  :     Video CODEC
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*  
+*   1.04 YWX 06-Nov-01 Changed for compatibility with Equator C compiler
+*   1.03 JBB 26 Jan 01 New Huffman Code
+*	1.02 PGW 11 Oct 00 Deleted reference to FrequencyCounts[].
+*   1.01 PGW 15/03/00  Changes re. updated entropy tables.
+*   1.00 PGW 12/10/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+#ifndef HUFFMAN_H
+#define HUFFMAN_H
+
+#include "type_aliases.h"
+#include "boolhuff.h"
+
+/****************************************************************************
+*  Constants
+*****************************************************************************
+*/
+
+/****************************************************************************/
+
+/****************************************************************************
+*  Types
+*****************************************************************************
+*/  
+
+typedef struct _tokenorptr
+{
+    unsigned int selector : 1;   // 1 bit selector 0->ptr, 1->token
+    unsigned int value : 7;
+} tokenorptr;
+
+
+typedef struct _huffnode
+{
+	union
+	{
+		char l;
+		tokenorptr left;
+    } leftunion; 
+	union
+	{
+		char r;
+		tokenorptr right;
+    } rightunion; 
+	unsigned char freq;
+
+} HUFF_NODE;
+
+/****************************************************************************
+*   Data structures
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Functions
+*****************************************************************************
+*/
+extern void VP5_BuildHuffTree(
+    HUFF_NODE *hn, 
+    unsigned int *counts, 
+    int values );
+
+extern void VP5_CreateCodeArray( HUFF_NODE *hn,
+                      int node,
+                      unsigned int *codearray,
+                      unsigned char *lengtharray,
+					  int codevalue, 
+                      int codelength );
+
+extern void VP5_EncodeValue(
+    BOOL_CODER *bc,
+    HUFF_NODE *hn,
+    int value,
+    int length);
+
+
+
+#endif
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/misc_common.h b/Src/libvpShared/corelibs/cdxv/vp50/include/misc_common.h
new file mode 100644
index 00000000..f72453d3
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/misc_common.h
@@ -0,0 +1,53 @@
+/****************************************************************************
+*
+*   Module Title :     MiscCommon.h
+*
+*   Description  :     Miscellaneous common routines header file
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*  
+*   1.00 PGW 15/10/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+
+#ifndef MISCCOMP_H
+#define MISCCOMP_H
+
+#include "type_aliases.h"
+#include "compdll.h"
+
+/****************************************************************************
+*  Constants
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Types
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*   Data structures
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Function Prototypes
+*****************************************************************************
+*/
+extern double GetEstimatedBpb( CP_INSTANCE *cpi, UINT32 TargetQIndex );
+extern void UpdateBpbCorrectionFactor( CP_INSTANCE *cpi, UINT32 FrameSize );
+extern void UpRegulateMB( CP_INSTANCE *cpi, UINT32 RegulationQ, UINT32 SB, UINT32 MB, BOOL NoCheck );
+extern void ClampAndUpdateQ ( CP_INSTANCE *cpi, UINT32 QIndex );
+extern void RegulateQ( CP_INSTANCE *cpi, INT32 TargetBits );
+extern void ConfigureQuality( CP_INSTANCE *cpi, UINT32 QualityValue );
+extern void CopyBackExtraFrags(CP_INSTANCE *cpi);
+
+extern void PredictFilteredBlock(PB_INSTANCE* pbi, INT16* OutputPtr, BLOCK_POSITION bp);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/pbdll.h b/Src/libvpShared/corelibs/cdxv/vp50/include/pbdll.h
new file mode 100644
index 00000000..bea27745
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/pbdll.h
@@ -0,0 +1,535 @@
+/****************************************************************************
+*
+*   Module Title :     PBDLL
+*
+*   Description  :     Video CODEC DEMO playback dll header
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.40 YWX 17-Dec-02 Added DeInteralceMode
+*   1.39 YWX 06-Nov-01 Changed to align the MB coeffs buffer memory
+*   1.38 AWG 22-MAY-01 Added support for DCT16
+*   1.37 JBB 01-MAY-01 Added features to support vp5
+*   1.36 JBB 06-Apr-01 Added cpufree variable
+*   1.35 JBB 23-Mar-01 New data structure defined for DC prediction
+*   1.34 JBX 22-Mar-01 Merged with vp4-mapca bitstream
+*   1.33.PGW 08 Feb 01 Added LastFrameQIndex.
+*   1.32 PGW 25 Jan 01 Changes to support new motion vector entropy coding in VP5.
+*   1.31 JBB 26-JAN-01 Fixes for New Huffman Strategy
+*   1.30 YWX 27-Nov-00 Added function Pointers for simple deblocker, i.e.
+*                      Deblocking filter for low end machines
+*   1.29 YWX 02-Nov-00 Added function pointers for new loopfilter
+*   1.28 PGW 16 Nov 00 Deleted redundant data structures.
+*                      Added BlockPatternPredictor.
+*   1.27 YWX 02-Nov-00 Added function pointers for new loopfilter
+*   1.26 YWX 19_Oct-00 Added function pointers for 1-2 scaling
+*   1.25 JBB 17-oct-00 Ifdefs around version information
+*   1.24 YWX 17-Oct-00 Added *FragCoordinates for new loop filter strategy
+*   1.23 PGW 15 Oct 00 Added select_InterUV_quantiser() and related data structures.
+*   1.22 PGW 11 Oct 00 Added CreateHuffmanTrees() and DestroyHuffmanTrees()
+*                      Added void SelectHuffmanSet() and Huffman selector variables.
+*   1.23 YWX 11-Oct-00 Added LastFrameNoMvRecon and LastFrameNoMvReconAlloc
+*   1.22 YWX 04 Oct 00 Merged scaling and new loop filtering code
+*   1.21 YWX 06 Sep 00 Added new deringing functions pointers
+*   1.21 PGW 18 Sep 00 QThreshTable[] made instance specific.
+*                      Added InitQTables().
+*   1.20 JBB 25 Aug 00 Versioning differences
+*   1.19 JBB 21 Aug 00 New More Blurry in high variance area deringer
+*   1.18 YWX 2  Aug 00 Added function pointers for Postproc
+*   1.17 JBB 28 Jul 00 Added Fragment Variance Value for eliminating deringer
+*                      in some cases...
+*   1.16 JBB 27 Jul 00 Moved kernel modifiers to pbi, malloc checks
+*   1.15 SJL 24Jul00   Changes for Mac
+*   1.14 YWX 15/05/00  More variable and function pointersf for postprocessor
+*   1.13 YWX 08/05/00  Added #ifdef s and function pointers for postprocessor
+*   1.12 JYX 05/05/00  Added PostProcessing (PostProcessBuffer + PostProcessLevel)
+*   1.11 SJL 20/04/00  Added ability to enable new dequant code for the dxer.
+*   1.10 JYX 06/04/00  Alligned Small Buffers & Live Codec Reordering
+*   1.09 SJL 22/03/00  Added func ptr for the loop filter.
+*   1.08 JBB 20/03/00  32 Byte alligned buffers, Back to Integer Forward DCT
+*                      Additional function pointers for optimized code
+*   1.07 PGW 20/03/00  Removed InterIntra.
+*   1.06 PGW 17/03/00  Changes to support seperate Y and UV entropy tables.
+*   1.05 JBB 29/01/00  Removed Globals added Playback only function externs !
+*   1.04 PGW 17/12/99  Draw dib functionality removed.
+*   1.03 PGW 22/11/99  Changes relating to restructuring of block map stuff.
+*   1.02 PGW 15/07/99  Added bit extraction variables.
+*   1.01 PGW 09/07/99  Added code to support profile timing
+*   1.00 PGW 28/06/99  New Configuration baseline.
+*
+*****************************************************************************
+*/
+
+#ifndef __INC_PBDLL_H
+#define __INC_PBDLL_H
+
+
+#define VAL_RANGE   256
+
+
+#include "codec_common.h"
+#include "huffman.h"
+#include "tokenentropy.h"
+#include "vfw_pb_interface.h"
+#include "postproc_if.h"
+#include "vputil_if.h"
+#include "quantize.h"
+#include "boolhuff.h"
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/
+
+#ifdef MAPCA
+// switch to turn on the Data streamer
+#define DMAREADREFERENCE
+#define DMAWRITERECON
+#define RECONSTRUCTMBATONCE
+
+#define __inline
+#endif
+/****************************************************************************
+*  Types
+*****************************************************************************
+*/
+
+typedef enum
+{
+    CODE_INTER_NO_MV        = 0x0,      // INTER prediction, (0,0) motion vector implied.
+    CODE_INTRA              = 0x1,      // INTRA i.e. no prediction.
+    CODE_INTER_PLUS_MV      = 0x2,      // INTER prediction, non zero motion vector.
+    CODE_INTER_NEAREST_MV   = 0x3,      // Use Last Motion vector
+    CODE_INTER_NEAR_MV      = 0x4,      // Prior last motion vector
+    CODE_USING_GOLDEN       = 0x5,      // 'Golden frame' prediction (no MV).
+    CODE_GOLDEN_MV          = 0x6,      // 'Golden frame' prediction plus MV.
+    CODE_INTER_FOURMV       = 0x7,      // Inter prediction 4MV per macro block.
+    CODE_GOLD_NEAREST_MV    = 0x8,      // Use Last Motion vector
+    CODE_GOLD_NEAR_MV       = 0x9,      // Prior last motion vector
+    DO_NOT_CODE             = 0x10       // Fake Mode
+} CODING_MODE;
+
+typedef struct
+{
+    unsigned int DisplayFragment  :  1;
+    unsigned int FragCodingMode   :  4;
+    int          MVectorX         :  6;
+    int          MVectorY         :  6;
+} FRAG_INFO;
+
+typedef struct _DCINFO
+{
+    Q_LIST_ENTRY dc;
+    short frame;
+} DCINFO;
+
+// defined so i don't have to remember which block goes where
+typedef enum
+{
+    TOP_LEFT_Y_BLOCK        = 0,
+    TOP_RIGHT_Y_BLOCK       = 1,
+    BOTTOM_LEFT_Y_BLOCK     = 2,
+    BOTTOM_RIGHT_Y_BLOCK    = 3,
+    U_BLOCK                 = 4,
+    V_BLOCK                 = 5
+} BLOCK_POSITION;
+
+
+// all the information gathered from a block to be used as context in the next block
+typedef struct
+{
+    UINT8        Tokens[64];
+    CODING_MODE  Mode;
+    UINT16       Frame;
+    Q_LIST_ENTRY Dc;
+    UINT32       EOBPos;
+    UINT32       unused;
+}  BLOCK_CONTEXT;
+
+typedef struct
+{
+    UINT32       EOBPos;
+    CODING_MODE  Mode;
+    UINT16       Frame;
+    Q_LIST_ENTRY Dc;
+    UINT8        Tokens[1];
+    UINT8        unused[3];
+}  BLOCK_CONTEXTA;
+
+typedef struct
+{
+	INT16		x;
+	INT16		y;
+
+} MOTION_VECTORA;
+
+// all the contexts maintained for a frame
+typedef struct
+{
+    BLOCK_CONTEXT    LeftY[2];   // 1 for each block row in a macroblock
+    BLOCK_CONTEXT    LeftU;
+    BLOCK_CONTEXT    LeftV;
+
+    BLOCK_CONTEXTA   *AboveY;
+    BLOCK_CONTEXTA   *AboveU;
+    BLOCK_CONTEXTA   *AboveV;
+
+    BLOCK_CONTEXTA   *AboveYAlloc;
+    BLOCK_CONTEXTA   *AboveUAlloc;
+    BLOCK_CONTEXTA   *AboveVAlloc;
+
+    Q_LIST_ENTRY     LastDcY[3]; // 1 for each frame 
+    Q_LIST_ENTRY     LastDcU[3];
+    Q_LIST_ENTRY     LastDcV[3];
+
+} FRAME_CONTEXT;
+
+// Structure to hold last token values at each position in block
+typedef UINT8 TOKENBUFFER[256];
+
+//#define BIT_STATS				1
+#ifdef BIT_STATS
+#define BIT_STAT_CATEGORIES		8
+
+extern	UINT32 BitStats[BIT_STAT_CATEGORIES];
+extern 	UINT8  BitStatCategory;
+#endif
+
+typedef struct
+{
+    Q_LIST_ENTRY  (*CoeffsAlloc)[72];   // coefficients 64 per frag 4 y in raster order, u then v
+    Q_LIST_ENTRY  (*Coeffs)[72];    // coefficients 64 per frag 4 y in raster order, u then v
+    CODING_MODE   Mode;             // mode macroblock coded as
+    CODING_MODE   BlockMode[6];     // mode macroblock coded as
+    MOTION_VECTOR Mv[6];            // one motion vector per block u and v calculated from rest
+
+    MOTION_VECTOR NearestInterMVect;// nearest mv in last frame
+    MOTION_VECTOR NearInterMVect;   // near mv in last frame
+    MOTION_VECTOR NearestGoldMVect; // nearest mv in gold frame
+    MOTION_VECTOR NearGoldMVect;    // near mv in gold frame
+    UINT32 MBrow;                   // mb row
+    UINT32 MBcol;                   // mb col
+
+    BLOCK_POSITION bp;              // block number 0 - 5
+    UINT32 Source;                  // address for source (compressor only)
+    UINT32 SourceY;                 // starting row
+    UINT32 SourceX;                 // starting column
+    INT32  CurrentSourceStride;     // pitch of source (compressor only)
+    UINT32 Recon;                   // address in reconstruction buffer of block
+    INT32  CurrentReconStride;      // pitch of reconstruction
+    UINT32 Plane;                   // plane block is from
+    INT32  MvShift;                 // motion vector shift value
+    INT32  MvModMask;               // motion vector mod mask
+	INT32  FrameSourceStride;		// Stride of the frame
+	INT32  FrameReconStride;		// Stride of the frame
+
+#ifdef RECONSTRUCTMBATONCE
+    UINT32 ReconIndex[6];           // ReconIndex for each block
+#endif
+
+	UINT32 SourcePtr[6];			// address for source (compressor only)
+	UINT32 ReconPtr[6];				// address for source (compressor only)
+	UINT32 StripPtr[6];
+#ifdef DMAREADREFERENCE
+    INT32 Offset[6];
+    UINT32 BoundaryX[6];
+    UINT32 BoundaryY[6];
+#endif
+    BLOCK_CONTEXTA  *Above;          // above block context
+    BLOCK_CONTEXT  *Left;           // left block context
+    Q_LIST_ENTRY   *LastDc;         // last dc value seen
+
+	INT32  Interlaced;				// is the macroblock interlaced?
+
+} MACROBLOCK_INFO;
+
+/****************************************************************************
+*  MACROS
+*****************************************************************************
+*/
+
+// Enumeration of how block is coded
+#define CURRENT_ENCODE_VERSION  5
+#define CURRENT_DECODE_VERSION  5
+
+#define UMV_BORDER              32
+#define STRIDE_EXTRA            (UMV_BORDER * 2)
+
+
+#define MAX_MV_EXTENT           31      //  Max search distance in half pixel increments
+#define MV_ENTROPY_TABLES       16
+#define MV_ENTROPY_TOKENS       ((MAX_MV_EXTENT * 2) + 1)
+
+#define PPROC_QTHRESH           64
+
+#define MAX_MODES               10
+
+#define DCT_KEY_FRAME           0
+
+#define DEFAULT_HALF_PIXEL_PROB 85
+
+
+#define DCProbOffset(A,B) \
+	( (A) * (MAX_ENTROPY_TOKENS-1) \
+    + (B) )
+
+#define DCContextOffset(A,B,C,D) \
+	( (A) * TOKEN_CONTEXTS * TOKEN_CONTEXTS * CONTEXT_NODES \
+	+ (B) * TOKEN_CONTEXTS * CONTEXT_NODES \
+	+ (C) * CONTEXT_NODES \
+	+ (D) )
+
+#define ACProbOffset(A,B,C,D) \
+	( (A) * PREC_CASES * VP5_AC_BANDS * (MAX_ENTROPY_TOKENS-1) \
+	+ (B) * VP5_AC_BANDS * (MAX_ENTROPY_TOKENS-1) \
+	+ (C) * (MAX_ENTROPY_TOKENS-1) \
+	+ (D) ) 
+
+
+#define ACContextOffset(A,B,C,D,E) \
+	( (A) * PREC_CASES * (VP5_AC_BANDS-3) * TOKEN_CONTEXTS * CONTEXT_NODES \
+	+ (B) * (VP5_AC_BANDS-3) * TOKEN_CONTEXTS * CONTEXT_NODES \
+	+ (C) * TOKEN_CONTEXTS * CONTEXT_NODES \
+	+ (D) * CONTEXT_NODES \
+	+ (E) )
+
+#define MBOffset(row,col) ( (row) * pbi->MBCols + (col) )
+
+/****************************************************************************
+*  Global Variables
+*****************************************************************************
+*/
+extern UINT8 LimitVal_VP31[VAL_RANGE * 3];
+
+extern BOOL VP5_ModeUsesMC[MAX_MODES]; // table to indicate if the given mode uses motion estimation
+
+extern const int VP5_Mode2Frame[DO_NOT_CODE];
+
+extern const INT32  CoeffToBand[65];
+
+//****************************************************************
+// Function Pointers some probably could be library globals!
+// all the information we ever need about a macroblock
+
+typedef struct PB_INSTANCE
+{
+	// Should be able to delete these entries when VP5 complete
+	INT32   CodedBlockIndex;		   
+	UINT8	  *DataOutputInPtr;		  
+    FRAG_INFO *FragInfo;
+    FRAG_INFO *FragInfoAlloc;
+
+
+    /* Current access points fopr input and output buffers */
+    BOOL_CODER br;
+
+	//****************************************************************************************
+	// Decoder and Frame Type Information
+	UINT8   Vp3VersionNo;
+    UINT32  DeInterlaceMode;
+	UINT32  PostProcessingLevel;	   /* Perform post processing */
+	UINT32  ProcessorFrequency;	   /* CPU frequency	*/
+	UINT32  CPUFree;
+	UINT8   FrameType;       
+	UINT8   KeyFrameType;
+	//****************************************************************************************
+
+	//****************************************************************************************
+	// Frame Size & Index Information
+
+	CONFIG_TYPE Configuration;	// frame configuration
+
+	UINT32  CurrentFrameSize;
+
+	UINT32  YPlaneSize;  
+	UINT32  UVPlaneSize;  
+	UINT32  VFragments;
+	UINT32  HFragments;
+	UINT32  UnitFragments;
+	UINT32  YPlaneFragments;
+	UINT32  UVPlaneFragments;
+	
+	UINT32  ReconYPlaneSize;
+	UINT32  ReconUVPlaneSize;
+	
+	UINT32  YDataOffset;
+	UINT32  UDataOffset;
+	UINT32  VDataOffset;
+	UINT32  ReconYDataOffset;
+	UINT32  ReconUDataOffset;
+	UINT32  ReconVDataOffset;
+
+	UINT32  MacroBlocks;	// Number of Macro-Blocks in Y component
+	UINT32  MBRows;			// Number of rows of MacroBlocks in a Y frame
+	UINT32  MBCols;			// Number of cols of MacroBlocks in a Y frame
+    UINT32	ScaleWidth;
+    UINT32	ScaleHeight;
+    UINT32	OutputWidth;
+    UINT32	OutputHeight;
+	UINT32  OutputStride;
+	
+	//****************************************************************************************
+
+	//****************************************************************************************
+	// Frames 
+	YUV_BUFFER_ENTRY *ThisFrameRecon;
+	YUV_BUFFER_ENTRY *ThisFrameReconAlloc;
+	YUV_BUFFER_ENTRY *GoldenFrame; 
+	YUV_BUFFER_ENTRY *GoldenFrameAlloc; 
+	YUV_BUFFER_ENTRY *LastFrameRecon;
+	YUV_BUFFER_ENTRY *LastFrameReconAlloc;
+	YUV_BUFFER_ENTRY *PostProcessBuffer;
+	YUV_BUFFER_ENTRY *PostProcessBufferAlloc;
+	YUV_BUFFER_ENTRY *ScaleBuffer;     /* new buffer for testing new loop filtering scheme */
+	YUV_BUFFER_ENTRY *ScaleBufferAlloc; 	
+	//****************************************************************************************
+
+	//****************************************************************************************
+	Q_LIST_ENTRY *quantized_list;  
+#ifdef RECONSTRUCTMBATONCE
+    INT16		 (*ReconDataBuffer)[64];
+	INT16		 (*ReconDataBufferAlloc)[64];
+#else
+    INT16		 *ReconDataBuffer;
+	INT16		 *ReconDataBufferAlloc;
+#endif
+	UINT8         FragCoefEOB;	   // Position of last non 0 coef within QFragData
+	INT16		 *TmpReconBuffer;
+	INT16		 *TmpReconBufferAlloc;
+	INT16		 *TmpDataBuffer;
+	INT16		 *TmpDataBufferAlloc;
+    
+	UINT8		 *LoopFilteredBlockAlloc;
+	UINT8		 *LoopFilteredBlock;
+
+#ifdef DMAREADREFERENCE
+    UINT8  (*ReferenceBlocksAlloc)[192];
+    UINT8  (*ReferenceBlocks)[192]; // Six Reference Blocks
+    UINT32  mvX[6], mvY[6];    
+#endif
+
+#ifdef DMAWRITERECON    
+    #ifdef RECONSTRUCTMBATONCE
+        UINT8		 *ThisBandReconPtr[6];			//Current Band to write to
+    #else
+        UINT8		 *ThisBandReconPtr;				//Current Band to write to
+    #endif
+	UINT8 		 *ReconstructedMBs;			    //bandbuffer for DMA reconstructed MB row.
+    UINT8        *ReconstructedMBsAlloc;       
+	UINT8        *FillMem;
+#endif    
+    //****************************************************************
+
+	void (**idct)(INT16 *InputData, INT16 *QuantMatrix, INT16 * OutputData );
+
+	POSTPROC_INST    postproc;
+	QUANTIZER	    *quantizer;
+	MACROBLOCK_INFO  mbi;		// all the information needed for one macroblock
+	FRAME_CONTEXT    fc;		// all of the context information needed for a frame
+
+	TOKENBUFFER LastToken;			// LTIndex of tokens at each position in block
+
+    CODING_MODE      LastMode;      // Last Mode decoded;
+
+	UINT8 DcProbs[2*(MAX_ENTROPY_TOKENS-1)];
+	UINT8 AcProbs[2*PREC_CASES*VP5_AC_BANDS*(MAX_ENTROPY_TOKENS-1)];
+	UINT8 DcNodeContexts[2*TOKEN_CONTEXTS*TOKEN_CONTEXTS*CONTEXT_NODES];									// Plane, Node, Contexts, Contexts
+	UINT8 AcNodeContexts[2*PREC_CASES*(VP5_AC_BANDS-3)*TOKEN_CONTEXTS*CONTEXT_NODES];						// Prec, Plane, AcBand, Context, Node
+	
+	UINT8 ZeroCount;
+    UINT8 MBModeProb[11];
+    UINT8 BModeProb[11];
+
+	UINT8 Inter00Prob;
+	UINT32 AvgFrameQIndex;
+
+	BOOL testMode;
+
+    UINT32 mvNearOffset[16];
+	
+	int probInterlaced;
+	char *MBInterlaced;
+	char *predictionMode;
+	MOTION_VECTORA *MBMotionVector;
+	char *MBInterlacedAlloc;
+	char *predictionModeAlloc;
+	MOTION_VECTORA *MBMotionVectorAlloc;
+
+	UINT8  MvSignProbs[2];
+	UINT8  MvZeroProbs[2];
+	UINT8  MvHalfPixelProbs[2];
+	UINT8  MvLowBitProbs[2];
+	UINT8  MvSizeProbs[2][((MAX_MV_EXTENT+1) >> 2) - 1];
+
+	UINT8 probXmitted[4][2][MAX_MODES];
+	UINT8 probModeSame[4][MAX_MODES];
+	UINT8 probMode[4][MAX_MODES][MAX_MODES-1]; // nearest+near,nearest only, nonearest+nonear, 10 preceding modes, 9 nodes
+
+	UINT32 maxTimePerFrame;
+	UINT32 thisDecodeTime;
+	UINT32 avgDecodeTime;
+	UINT32 avgPPTime[10];
+	UINT32 avgBlitTime;
+    UINT32 BlackClamp;
+    UINT32 WhiteClamp;
+
+} PB_INSTANCE;
+
+/****************************************************************************
+*  Functions.
+*****************************************************************************
+*/
+//****************************************************************
+// Function Pointers now library globals!
+//extern void (*ReadTokens)( xPB_INST pbi, UINT32 BlockSize, UINT32 Hpos );
+
+//****************************************************************
+extern PB_INSTANCE * VP5_CreatePBInstance(void);
+extern void			 VP5_DeletePBInstance(PB_INSTANCE **);
+extern BOOL			 VP5_LoadFrame(PB_INSTANCE *pbi);
+extern void			 VP5_SetFrameType(PB_INSTANCE *pbi, UINT8 FrType );
+extern UINT8		 VP5_GetFrameType(PB_INSTANCE *pbi);
+extern BOOL			 VP5_InitFrameDetails(PB_INSTANCE *pbi);
+extern void			 VP5_ErrorTrap( PB_INSTANCE *pbi, int ErrorCode );
+extern BOOL			 VP5_AllocateFragmentInfo(PB_INSTANCE * pbi);
+extern BOOL			 VP5_AllocateFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize);
+extern void			 VP5_DeleteFragmentInfo(PB_INSTANCE * pbi);
+extern void			 VP5_DeleteFrameInfo(PB_INSTANCE * pbi);
+extern void			 VP5_DMachineSpecificConfig(void);
+
+INLINE UINT32 VP5_bitread1(BOOL_CODER *br) 
+{
+	return (DecodeBool128(br));
+}
+INLINE UINT32 VP5_bitread(BOOL_CODER *br, int bits)
+{
+	UINT32 z = 0;
+	int bit;
+	for(bit=bits-1;bit>=0;bit--)
+	{
+		z|=(DecodeBool128(br)<<bit);
+	}
+	return z;
+}
+extern void          vp5_appendframe(PB_INSTANCE *pbi);
+extern void			 VP5_readTSC(unsigned long *tsc);
+extern void ConfigureContexts(PB_INSTANCE *pbi);
+
+//  dx\generic\decodembs.c
+extern void ResetAboveContext(PB_INSTANCE* pbi);
+extern void ResetLeftContext(PB_INSTANCE* pbi);
+extern void UpdateContext(PB_INSTANCE* pbi, BLOCK_CONTEXT* c, BLOCK_POSITION bp);
+extern void UpdateContextA(PB_INSTANCE* pbi, BLOCK_CONTEXTA* c, BLOCK_POSITION bp);
+extern void PredictDC(PB_INSTANCE* pbi, BLOCK_POSITION bp, Q_LIST_ENTRY* LastDC, BLOCK_CONTEXTA* Above, BLOCK_CONTEXT* Left);
+
+//  dx\generic\recon.c
+extern void ReconstructBlock(PB_INSTANCE* pbi, BLOCK_POSITION bp);
+
+//  dx\generic\decodemode.c
+extern CODING_MODE DecodeBlockMode(PB_INSTANCE *pbi);
+extern CODING_MODE DecodeMode(PB_INSTANCE *pbi, CODING_MODE lastmode, UINT32 type);
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/quantize.h b/Src/libvpShared/corelibs/cdxv/vp50/include/quantize.h
new file mode 100644
index 00000000..d2e1a314
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/quantize.h
@@ -0,0 +1,89 @@
+#ifndef QUANTIZE_H
+#define QUANTIZE_H
+#include "codec_common.h"
+#include "codec_common_interface.h"
+
+/****************************************************************************
+*  Structures
+*****************************************************************************
+*/
+typedef struct 
+{
+	UINT32 FrameQIndex;							// Quality specified as a table index 
+	UINT32 ThisFrameQuantizerValue;				// Quality value for this frame  
+	short round[8];
+	short mult[8];
+	short zbin[8];
+	UINT32 LastQuantizerValue;					// Quality value for this frame  
+	UINT32 QThreshTable[Q_TABLE_SIZE];			// ac quantizer scale values
+
+    UINT32 *transIndex;							// array to reorder zig zag to idct's ordering
+	UINT8   quant_index[64];					// array to reorder from raster to zig zag
+
+	// used by the dequantizer 
+	Q_LIST_ENTRY * dequant_coeffs[2];			// pointer to current dequantization tables
+	Q_LIST_ENTRY * dequant_coeffsAlloc[2];		// alloc so we can keep alligned
+
+	INT32 QuantCoeffs[2][64];					// Quantizer values table
+	INT32 QuantRound[2][64];					// Quantizer rounding table
+	INT32 ZeroBinSize[2][64];					// Quantizer zero bin table
+
+
+} QUANTIZER;
+
+/****************************************************************************
+*  Functions
+*****************************************************************************
+*/
+
+extern void VP5_InitQTables
+(
+ QUANTIZER *pbi,
+ UINT8 Vp3VersionNo 
+);
+
+extern void VP5_UpdateQ
+(
+ QUANTIZER *pbi,
+ UINT8 Vp3VersionNo  
+);
+
+extern void VP5_UpdateQC
+(
+ QUANTIZER *pbi,
+ UINT8 Vp3VersionNo  
+);
+
+extern void VP5_init_quantizer 
+(
+ QUANTIZER *pbi,
+ UINT8 Vp3VersionNo 
+);
+
+extern void (*VP5_quantize)
+(
+ QUANTIZER *pbi,
+ INT16 * DCT_block,
+ Q_LIST_ENTRY * quantized_list,
+ UINT8 bp
+);
+
+extern void VP5_init_dequantizer 
+(
+ QUANTIZER *pbi,
+ UINT8 Vp3VersionNo 
+);
+
+extern QUANTIZER * VP5_CreateQuantizer
+(
+ void
+);
+
+extern void VP5_DeleteQuantizer
+(
+ QUANTIZER **pbi
+);          
+
+extern UINT8 QTableSelect[6];
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/systemdependant.h b/Src/libvpShared/corelibs/cdxv/vp50/include/systemdependant.h
new file mode 100644
index 00000000..9408a09e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/systemdependant.h
@@ -0,0 +1,52 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.h
+*
+*   Description  :     Miscellaneous system dependant functions header
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*  
+*   1.00 PGW 12/10/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/*******************************************3*********************************
+*  Header Files
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Constants
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Types
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*   Data structures
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Functions
+*****************************************************************************
+*/
+
+// Test machine config
+
+// Misc.
+extern void VP5_IssueWarning( char * WarningMessage );
+extern void PauseProcess( unsigned int SleepMs );
+
+// System dynamic memory allocation
+char * SytemGlobalAlloc( unsigned int Size );   
+void SystemGlobalFree( char * MemPtr );
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/tokenentropy.h b/Src/libvpShared/corelibs/cdxv/vp50/include/tokenentropy.h
new file mode 100644
index 00000000..8052f942
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/tokenentropy.h
@@ -0,0 +1,129 @@
+/****************************************************************************
+*
+*   Module Title :     TokenEntropy.h
+*
+*   Description  :     Video CODEC: Coefficient toke entropy header.
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.01 PGW 27 Jun 01  Module created.
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Header Frames
+*****************************************************************************
+*/
+
+
+#ifndef TOKEN_ENTROPY_H
+#define TOKEN_ENTROPY_H
+
+#include "type_aliases.h"
+#include "boolhuff.h"
+#include "codec_common.h"
+#include "huffman.h"
+
+
+/****************************************************************************
+*  Constants
+*****************************************************************************
+*/
+
+// VP5 hufman table AC bands
+#define VP5_AC_BANDS			6
+
+// Tokens								Value		Extra Bits (range + sign)
+#define ZERO_TOKEN              0		//0			Extra Bits 0+0
+#define ONE_TOKEN               1		//1			Extra Bits 0+1       
+#define TWO_TOKEN               2		//2			Extra Bits 0+1 
+#define THREE_TOKEN             3		//3			Extra Bits 0+1
+#define FOUR_TOKEN              4		//4			Extra Bits 0+1
+#define DCT_VAL_CATEGORY1		5		//5-6		Extra Bits 1+1
+#define DCT_VAL_CATEGORY2		6		//7-10		Extra Bits 2+1
+#define DCT_VAL_CATEGORY3		7		//11-26		Extra Bits 4+1
+#define DCT_VAL_CATEGORY4		8		//11-26		Extra Bits 5+1
+#define DCT_VAL_CATEGORY5		9		//27-58		Extra Bits 5+1
+#define DCT_VAL_CATEGORY6		10		//59+		Extra Bits 11+1	
+#define DCT_EOB_TOKEN           11		//EOB		Extra Bits 0+0
+#define MAX_ENTROPY_TOKENS      (DCT_EOB_TOKEN + 1)  
+#define ILLEGAL_TOKEN			255
+
+
+#define TOKEN_CONTEXTS			6 // EOB, 0, 1, 2, 3-4, x
+#define CONTEXT_NODES			(MAX_ENTROPY_TOKENS-7)
+
+#define PREC_CASES				3
+
+#define DC_PROBABILITY_UPDATE_THRESH	100
+
+#define ZERO_CONTEXT_NODE		0
+#define EOB_CONTEXT_NODE		1
+#define ONE_CONTEXT_NODE		2
+#define LOW_VAL_CONTEXT_NODE	3
+#define TWO_CONTEXT_NODE		4
+#define THREE_CONTEXT_NODE		5
+#define HIGH_LOW_CONTEXT_NODE	6
+#define CAT_ONE_CONTEXT_NODE	7
+#define CAT_THREEFOUR_CONTEXT_NODE	8
+#define CAT_THREE_CONTEXT_NODE	9
+#define CAT_FIVE_CONTEXT_NODE	10
+
+#define PROB_UPDATE_BASELINE_COST	7
+
+#define MAX_PROB				254
+#define DCT_MAX_VALUE			2048
+
+
+/****************************************************************************
+*  Types
+*****************************************************************************
+*/        
+
+extern const UINT32 ProbCost[256];
+
+extern const UINT8  ExtraBitLengths_VP5[MAX_ENTROPY_TOKENS];
+extern const UINT32 DctRangeMinVals[MAX_ENTROPY_TOKENS];
+
+typedef struct LineEq
+{
+    INT32	M;
+    INT32	C;
+} LINE_EQ;
+
+
+extern const UINT8 DcUpdateProbs[2][MAX_ENTROPY_TOKENS-1];
+extern const UINT8 AcUpdateProbs[PREC_CASES][2][VP5_AC_BANDS][MAX_ENTROPY_TOKENS-1];
+extern const UINT8 PrevTokenIndex[MAX_ENTROPY_TOKENS];
+
+extern UINT8 PrecZeroRunLength[BLOCK_SIZE];
+
+/****************************************************************************
+*   Data structures
+*****************************************************************************
+*/
+
+
+// These table contains the normailized probabilities required to traverse the
+// entropy tree for DC and AC value tokens representing values >= 2
+// Probabilities are normalized to 8 bits and represent the likelyhood of a zero branch.
+
+
+/****************************************************************************
+*  Functions
+*****************************************************************************
+*/
+
+
+#endif
+
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/include/xprintf.h b/Src/libvpShared/corelibs/cdxv/vp50/include/xprintf.h
new file mode 100644
index 00000000..3d746395
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/include/xprintf.h
@@ -0,0 +1,37 @@
+#ifndef XPRINTF_H
+#define XPRINTF_H
+//____________________________________________________________________________
+//
+//  File:  xprintf.h
+//
+//  Description:  Display a printf style message on the current video frame
+//
+//  Author:  Keith Looney
+//
+//____________________________________________________________________________
+//  Revision History
+//
+
+//____________________________________________________________________________
+//  Includes
+
+#include "pbdll.h"
+
+//____________________________________________________________________________
+//  Defines
+
+//____________________________________________________________________________
+//  Declarations
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+extern int vp5_xprintf(const PB_INSTANCE* ppbi, long pixel, const char* format, ...);
+
+#if __cplusplus
+}
+#endif
+
+#endif  //  XPRINTF_H
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj b/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj
new file mode 100644
index 00000000..c9f9a51e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj
@@ -0,0 +1,326 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{FC9F7C6D-C0BF-4265-B7BD-C184573C3C8A}</ProjectGuid>
+    <RootNamespace>vp5d</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>16.0.32002.118</_ProjectFileVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\vp5d\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\vp5d\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnableManifest>false</VcpkgEnableManifest>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Vcpkg">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Vcpkg">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <AdditionalIncludeDirectories>Include;..\include;..\..\Include;..\..\..\Include;..\..\..\Include\VP50;.\include;..\..\include;..\..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;..\..\..\..\libvp6\include;..\..\..\..\libvp6\include\vp50;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_WINDOWS;_USRDLL;vp5D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;WIN32;NDEBUG;INLINE=__inline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+    <Bscmake>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <OutputFile>.\..\..\Lib\Win32\Release/vp5d.bsc</OutputFile>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <AdditionalIncludeDirectories>Include;..\include;..\..\Include;..\..\..\Include;..\..\..\Include\VP50;.\include;..\..\include;..\..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;..\..\..\..\libvp6\include;..\..\..\..\libvp6\include\vp50;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_WINDOWS;_USRDLL;vp5D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;WIN32;NDEBUG;INLINE=__inline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+    <Bscmake>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <OutputFile>.\..\..\Lib\Win32\Release/vp5d.bsc</OutputFile>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>Include;..\include;..\..\Include;..\..\..\Include;..\..\..\Include\VP50;.\include;..\..\include;..\..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;..\..\..\..\libvp6\include;..\..\..\..\libvp6\include\vp50;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_WINDOWS;_USRDLL;vp5D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;WIN32;_DEBUG;INLINE=__inline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+    <Bscmake>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <OutputFile>.\..\..\Lib\Win32\Debug/vp5d.bsc</OutputFile>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>Include;..\include;..\..\Include;..\..\..\Include;..\..\..\Include\VP50;.\include;..\..\include;..\..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;..\..\..\..\libvp6\include;..\..\..\..\libvp6\include\vp50;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_WINDOWS;_USRDLL;vp5D_EXPORTS;PREDICT_2D;PBDLL;VFW_PB;USE_DRAWDIB;POSTPROCESS;NORMALIZED;WIN32;_DEBUG;INLINE=__inline;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>false</MinimalRebuild>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+    <Bscmake>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <OutputFile>.\..\..\Lib\Win32\Debug/vp5d.bsc</OutputFile>
+    </Bscmake>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="dx\Generic\boolhuff.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\debug.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodembs.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodemode.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodemv.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\DFrameR.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\DSystemDependant.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\FrameIni.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\Huffman.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\pb_globals.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\quantize.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\recon.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\TokenEntropy.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\vfwpbdll_if.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\vp50dxv.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Win32\dsystemdependant.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="dx\Win32\quantindexmmx.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="xprintf\xprintf.cpp">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj.filters
new file mode 100644
index 00000000..cfe106b5
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/vp5d.vcxproj.filters
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Decompress">
+      <UniqueIdentifier>{2c04083d-6bcf-4b0c-94ce-55f89142c8dc}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Win32">
+      <UniqueIdentifier>{aad98e01-f672-4f5a-8d90-0d8c9eeab331}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="dx\Generic\boolhuff.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\debug.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodembs.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodemode.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\decodemv.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\DFrameR.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\DSystemDependant.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\FrameIni.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\Huffman.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\pb_globals.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\quantize.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\recon.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\TokenEntropy.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="DX\Generic\vfwpbdll_if.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Generic\vp50dxv.c">
+      <Filter>Decompress</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Win32\dsystemdependant.c">
+      <Filter>Win32</Filter>
+    </ClCompile>
+    <ClCompile Include="dx\Win32\quantindexmmx.c">
+      <Filter>Win32</Filter>
+    </ClCompile>
+    <ClCompile Include="xprintf\xprintf.cpp">
+      <Filter>Win32</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vp50/xprintf/xprintf.cpp b/Src/libvpShared/corelibs/cdxv/vp50/xprintf/xprintf.cpp
new file mode 100644
index 00000000..29826461
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vp50/xprintf/xprintf.cpp
@@ -0,0 +1,169 @@
+//____________________________________________________________________________
+//
+//  File:  xprintf.cpp
+//
+//  Description:  Display a printf style message on the current video frame
+//
+//  Author:  Keith Looney
+//
+//____________________________________________________________________________
+//  Revision History
+//
+
+//____________________________________________________________________________
+//  Includes
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <windows.h>
+ 
+#include "xprintf.h"
+ 
+//________ ____________________________________________________________________
+//  Defines
+
+//____________________________________________________________________________
+//  Declarations
+
+//____________________________________________________________________________
+//  Definitions
+
+/****************************************************************************
+ *
+ *  Function    :  xprintf
+ *
+ *  Description :  Display a printf style message on the current video frame
+ *
+ *  INPUTS      :
+ *
+ *  OUTPUTS     :
+ *
+ *  RETURNS     :  void
+ *
+ *  Notes       :
+ *
+ *  ERRORS      :
+ *
+ ****************************************************************************/
+
+int vp5_xprintf(const PB_INSTANCE* ppbi, long nPixel, const char* format, ...)
+{
+	HFONT hfont,hfonto;
+    va_list arglist;
+    char szFormatted[256] = "";
+    UINT8* pDest = &ppbi->PostProcessBuffer[nPixel];
+    long nSizeY = ppbi->HFragments * 8;
+    long nStride = ppbi->Configuration.YStride;
+    BOOL bRC;
+    int rc = 0;
+
+    //  Format text
+
+    va_start(arglist, format);
+    _vsnprintf(szFormatted, sizeof(szFormatted), format, arglist);
+    va_end(arglist);
+
+    //  Set up temporary bitmap
+
+    HDC hdcMemory = NULL;
+    HBITMAP hbmTemp = NULL;
+    HBITMAP hbmOrig = NULL;
+
+    RECT rect;
+    rect.left = 0;
+    rect.top = 0;
+    rect.right = 8 * strlen(szFormatted);
+    rect.bottom = 8;
+
+    hdcMemory = CreateCompatibleDC(NULL);
+    if (hdcMemory == NULL)
+    {
+        goto Exit;
+    }
+
+    hbmTemp = CreateBitmap(rect.right, rect.bottom, 1, 1, NULL);
+    if (hbmTemp == NULL)
+    {
+        goto Exit;
+    }
+    hbmOrig = static_cast<HBITMAP>(SelectObject(hdcMemory, hbmTemp));
+    if(!hbmOrig)
+    {
+        goto Exit;
+    }
+
+    //  Write text into bitmap
+    //  font?
+	hfont = CreateFont(8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,VARIABLE_PITCH | FF_SWISS, ""); 
+	if(hfont == NULL)
+	{
+		goto Exit;
+	}
+    hfonto = static_cast<HFONT>(SelectObject(hdcMemory, hbmTemp));
+	if(!hfonto)
+	{
+		goto Exit;
+	}
+
+	SelectObject (hdcMemory, hfont);             
+    SetTextColor(hdcMemory, 1);
+    SetBkColor(hdcMemory, 0);
+    SetBkMode(hdcMemory, TRANSPARENT);
+
+    bRC = BitBlt(hdcMemory, rect.left, rect.top, rect.right, rect.bottom, hdcMemory, rect.left, rect.top, BLACKNESS);
+    if (!bRC)
+    {
+        goto Exit;
+    }
+
+    bRC = ExtTextOut(hdcMemory, 0, 0, ETO_CLIPPED, &rect, szFormatted, strlen(szFormatted), NULL);
+    if (!bRC)
+    {
+        goto Exit;
+    }
+
+    //  Copy bitmap to video frame
+
+    long x;
+    long y;
+
+    for (y = rect.top; y < rect.bottom; ++y)
+    {
+        for (x = rect.left; x < rect.right; ++x)
+        {
+            if (GetPixel(hdcMemory, x, rect.bottom - 1 - y))
+            {
+                pDest[x] = 255;
+            }
+        }
+        pDest += nStride;
+    }
+
+    rc = strlen(szFormatted);
+
+Exit:
+    if (hbmTemp != NULL)
+    {
+        if (hbmOrig != NULL)
+        {
+            SelectObject(hdcMemory, hbmOrig);
+        }
+        DeleteObject(hbmTemp);
+    }
+    if (hfont != NULL)
+    {
+        if (hfonto!= NULL)
+        {
+            SelectObject(hdcMemory, hfonto);
+        }
+        DeleteObject(hfont);
+    }
+
+    if (hdcMemory != NULL)
+    {
+        DeleteDC(hdcMemory);
+    }
+    hdcMemory = 0;
+
+    return rc;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/Makefile b/Src/libvpShared/corelibs/cdxv/vppp/Makefile
new file mode 100644
index 00000000..f8a75930
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/Makefile
@@ -0,0 +1,64 @@
+## Target to built
+
+TARGET 		=libvppp
+
+## TOOLS
+CC      	= ecc
+LD      	= ecc
+AR      	= ar
+OBJDUMP 	= objdump
+RM      	= rm -f
+
+## Directories
+TOPDIR  	=C:\DuckSoft
+PRIVATEINCLUDE  =${TOPDIR}\private\include
+CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
+CDXVINCLUDE     =${TOPDIR}\private\corelibs\cdxv\include 
+VPPPINCLUDE     =${TOPDIR}\private\corelibs\cdxv\vppp\include 
+
+CURRENTDIR 	=${TOPDIR}\private\corelibs\cdxv\vppp
+LIBDIR		=${TOPDIR}\private\corelibs\lib\mapca 
+
+## Compile Flags
+ALLINCLUDES     =-I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${VPPPINCLUDE}
+VP6DEFINES		=-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
+ETIDEFINES      =-DMAPCA
+ALLDEFINES      =${VP6DEFINES} ${ETIDEFINES}
+
+DEBUG			=-O2
+CFLAGS 			=-msvc -align 8 -etswp -mP3OPT_nonlocal_calls_through_register=true \
+				-mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
+				-magen_interroutine_padding
+ALLFLAGS 		=$(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
+
+
+## Files
+OBJS		= 	bsp\borders.o	 \
+			generic\clamp.o	 \
+			generic\deblock.o	 \
+			generic\DeInterlace.o	 \
+			generic\Dering.o	 \
+			generic\loopfilter.o	 \
+			generic\postproc.o	 \
+			generic\scale.o	 \
+			generic\simpledeblocker.o \
+			generic\doptsystemdependant.o
+			
+
+SRCS		= $(OBJS:.o=.c)
+
+ARTARGET	= ${TARGET}.a
+
+# archive
+
+ARTARGET:${OBJS}
+	${AR} -cr ${ARTARGET} ${OBJS}
+	mv ${ARTARGET} ${LIBDIR}
+
+${OBJS} : ${SRCS}
+	$(CC) $(ALLFLAGS) -c $*.c -o $*.o
+
+clean:
+	${RM} ${OBJS} ${ARTARGET}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/DeInterlace.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/DeInterlace.c
new file mode 100644
index 00000000..8609121d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/DeInterlace.c
@@ -0,0 +1,76 @@
+/**************************************************************************** 
+ *
+ *   Module Title :     DeInterlace.c
+ *
+ *   Description  :     De-Interlace routines.
+ *
+ ***************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <memory.h>
+#include "type_aliases.h"
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : CFastDeInterlace
+ *
+ *  INPUTS        : UINT8 *SrcPtr : Pointer to input image.
+ *                  UINT8 *DstPtr : Pointer to output image.
+ *                  INT32 Width   : Image width.
+ *                  INT32 Height  : Image height.
+ *                  INT32 Stride  : Image stride.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 3-tap filter vertically to remove interlacing
+ *                  artifacts.
+ *
+ *  SPECIAL NOTES : This function use a three tap filter [1, 2, 1] to blur 
+ *                  veritically in an interlaced frame. This function assumes:
+ *                  1) SrcPtr & DstPtr buffers have the same geometry.
+ *                  2) SrcPtr != DstPtr.
+ *
+ ****************************************************************************/
+void CFastDeInterlace
+(
+    UINT8 *SrcPtr,
+    UINT8 *DstPtr,
+    INT32 Width,
+    INT32 Height,
+    INT32 Stride
+)
+{
+    INT32  i, j;
+    UINT32 x0, x1, x2;
+    UINT8 *PrevSrcPtr, *NextSrcPtr;
+    UINT8 *CurrentSrcPtr = SrcPtr;
+    UINT8 *CurrentDstPtr = DstPtr;
+    
+    // Always copy the first line
+    memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+
+    for ( i=1; i<Height-1; i++ )
+    {
+        PrevSrcPtr     = CurrentSrcPtr;
+        CurrentSrcPtr += Stride;
+        NextSrcPtr     = CurrentSrcPtr + Stride;
+        CurrentDstPtr += Stride;
+
+        for ( j=0; j<Width; j++ )
+        {
+            x0 = PrevSrcPtr[j];
+            x1 = (CurrentSrcPtr[j]<<1);
+            x2 = NextSrcPtr[j];
+            CurrentDstPtr[j] = (UINT8)( (x0 + x1 + x2 + 2)>>2 );
+        }
+    }
+    
+    // Copy the last line
+    CurrentSrcPtr += Stride;
+    CurrentDstPtr += Stride;
+    memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/borders.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/borders.c
new file mode 100644
index 00000000..2cf04ad0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/borders.c
@@ -0,0 +1,303 @@
+/**************************************************************************** 
+*
+*   Module Title :     borders.c
+*
+*   Description  :     
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "postp.h"
+
+#ifdef MAPCA
+#include "eti/mm.h"
+#include "eti_loopdir.h"
+#endif
+
+#ifdef MAPCA
+void CopyYLeftRightBorder 
+(
+    UINT8 *restrict SrcPtr1,
+    UINT8 *restrict SrcPtr2,
+    UINT8 *restrict DestPtr1,
+    UINT8 *restrict DestPtr2,
+    UINT32 PlaneHeight,
+    UINT32 PlaneStride
+)
+{
+	n64 *restrict DstPtr64_1 = (n64* restrict)DestPtr1;
+    n64 *restrict DstPtr64_2 = (n64* restrict)DestPtr2;
+    n32  PlaneStride64 = (PlaneStride>>3);
+    n32  Left, Right;
+    n64  Left64, Right64;
+	int i;
+	
+    loop_directives ( ELD_SWP_IVDEP );
+    for ( i=0; i<PlaneHeight; i++ )
+	{
+    	Left  = SrcPtr1[0];
+        Right = SrcPtr2[0];
+
+        Left64  = hmpv_bcopyrev_64_32 ( Left, 0, 0 );
+        Right64 = hmpv_bcopyrev_64_32 ( Right, 0, 0 );
+
+    	DstPtr64_1[0] = Left64;
+        DstPtr64_2[0] = Right64;
+
+    	DstPtr64_1[1] = Left64;
+        DstPtr64_2[1] = Right64;
+
+        DstPtr64_1[2] = Left64;
+        DstPtr64_2[2] = Right64;
+
+        DstPtr64_1[3] = Left64;
+        DstPtr64_2[3] = Right64;
+
+        SrcPtr1 += PlaneStride;
+        SrcPtr2 += PlaneStride;
+        DstPtr64_1 += PlaneStride64;
+        DstPtr64_2 += PlaneStride64;		
+	}
+}
+
+void CopyUVLeftRightBorder
+(
+        UINT8 *restrict SrcPtr1,
+        UINT8 *restrict SrcPtr2,
+        UINT8 *restrict DestPtr1,
+        UINT8 *restrict DestPtr2,
+        UINT32 PlaneHeight,
+        UINT32 PlaneStride
+)
+{
+	n64 *restrict DstPtr64_1 = (n64* restrict)DestPtr1;
+    n64 *restrict DstPtr64_2 = (n64* restrict)DestPtr2;
+    n32  PlaneStride64 = (PlaneStride>>3);
+    n32  Left, Right;
+    n64  Left64, Right64;
+	int  i;
+    
+    loop_directives ( ELD_SWP_IVDEP );
+	for ( i=0; i<PlaneHeight; i++ )
+	{
+    	Left  = SrcPtr1[0];
+        Right = SrcPtr2[0];
+
+        Left64  = hmpv_bcopyrev_64_32 ( Left, 0, 0 );
+        Right64 = hmpv_bcopyrev_64_32 ( Right, 0, 0 );
+
+    	DstPtr64_1[0] = Left64;
+        DstPtr64_2[0] = Right64;
+
+    	DstPtr64_1[1] = Left64;
+        DstPtr64_2[1] = Right64;
+
+        SrcPtr1 += PlaneStride;
+        SrcPtr2 += PlaneStride;
+        DstPtr64_1 += PlaneStride64;
+        DstPtr64_2 += PlaneStride64;		
+	}
+}
+#endif
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : UpdateUMVBorder
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  UINT8 *DestReconPtr    : Pointer to reconstructed image.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Copies pixel values in first/last rows/columns of the
+ *                  image into the UMV border in the specified reconstructed
+ *                  image.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UpdateUMVBorder ( POSTPROC_INSTANCE *pbi, UINT8 *DestReconPtr )
+{
+	
+	INT32 i;
+	INT32 PlaneHeight;
+	UINT8 *SrcPtr1, *SrcPtr2;
+	UINT8 *DestPtr1, *DestPtr2;
+
+    UINT32 Border = pbi->MVBorder;
+	INT32 PlaneStride = pbi->YStride;
+
+    /***********/
+    /* Y Plane */
+    /***********/
+	PlaneStride = pbi->YStride;
+	PlaneHeight = pbi->VFragments * 8;
+
+    // copy the left and right most columns out 
+	SrcPtr1 = DestReconPtr + pbi->ReconYDataOffset;
+	SrcPtr2 = SrcPtr1 + 8 * pbi->HFragments - 1;
+	DestPtr1= SrcPtr1 - Border;
+	DestPtr2= SrcPtr2 + 1;
+
+#ifdef MAPCA
+    CopyYLeftRightBorder ( SrcPtr1, SrcPtr2, DestPtr1,DestPtr2, PlaneHeight, PlaneStride );
+#else
+    for ( i=0; i<PlaneHeight; i++ )
+    {
+        memset ( DestPtr1, SrcPtr1[0], Border );
+        memset ( DestPtr2, SrcPtr2[0], Border );
+        SrcPtr1  += PlaneStride;
+        SrcPtr2  += PlaneStride;
+        DestPtr1 += PlaneStride;
+        DestPtr2 += PlaneStride;
+    }
+#endif
+
+    // Now copy the top and bottom source lines into each line of the respective borders
+	SrcPtr1 = DestReconPtr + Border * PlaneStride;
+	SrcPtr2 = SrcPtr1 + (pbi->VFragments * 8 * PlaneStride)- PlaneStride;
+	DestPtr1= DestReconPtr;
+	DestPtr2= SrcPtr2 + PlaneStride;
+    for ( i=0; i<(INT32)Border; i++ )
+    {
+        memcpy ( DestPtr1, SrcPtr1, PlaneStride );
+        memcpy ( DestPtr2, SrcPtr2, PlaneStride );
+        DestPtr1 += PlaneStride;
+        DestPtr2 += PlaneStride;
+    }
+
+	PlaneStride = pbi->UVStride;
+	PlaneHeight = pbi->VFragments * 4;
+
+    /***********/
+    /* U Plane */
+    /***********/
+
+    // copy the left and right most columns out 
+	SrcPtr1 = DestReconPtr + pbi->ReconUDataOffset;
+	SrcPtr2 = SrcPtr1 + 4 * pbi->HFragments - 1;
+	DestPtr1= SrcPtr1 - Border/2;
+	DestPtr2= SrcPtr2 + 1;
+
+#ifdef MAPCA
+    CopyUVLeftRightBorder ( SrcPtr1, SrcPtr2, DestPtr1,DestPtr2, PlaneHeight, PlaneStride );
+#else
+    for ( i=0; i<PlaneHeight; i++ )
+    {
+        memset ( DestPtr1, SrcPtr1[0], Border/2 );
+        memset ( DestPtr2, SrcPtr2[0], Border/2 );
+        SrcPtr1  += PlaneStride;
+        SrcPtr2  += PlaneStride;
+        DestPtr1 += PlaneStride;
+        DestPtr2 += PlaneStride;
+    }
+#endif
+
+    // Now copy the top and bottom source lines into each line of the respective borders
+	SrcPtr1 = DestReconPtr + pbi->ReconUDataOffset - Border/2;
+	SrcPtr2 = SrcPtr1 + (pbi->VFragments * 4 * PlaneStride)- PlaneStride;
+	DestPtr1= SrcPtr1 - Border/2*PlaneStride;
+	DestPtr2= SrcPtr2 + PlaneStride;
+    for ( i=0; i<(INT32)(Border/2); i++ )
+    {
+        memcpy ( DestPtr1, SrcPtr1, PlaneStride );
+        memcpy ( DestPtr2, SrcPtr2, PlaneStride );
+        DestPtr1 += PlaneStride;
+        DestPtr2 += PlaneStride;
+    }
+
+    /***********/
+    /* V Plane */
+    /***********/
+    
+    // copy the left and right most columns out 
+	SrcPtr1 = DestReconPtr + pbi->ReconVDataOffset;
+	SrcPtr2 = SrcPtr1 + 4 * pbi->HFragments - 1;
+	DestPtr1= SrcPtr1 - Border/2;
+	DestPtr2= SrcPtr2 + 1;
+
+#ifdef MAPCA
+    CopyUVLeftRightBorder ( SrcPtr1, SrcPtr2, DestPtr1,DestPtr2, PlaneHeight, PlaneStride );
+#else
+    for ( i=0; i<PlaneHeight; i++ )
+    {
+        memset ( DestPtr1, SrcPtr1[0], Border/2 );
+        memset ( DestPtr2, SrcPtr2[0], Border/2 );
+        SrcPtr1  += PlaneStride;
+        SrcPtr2  += PlaneStride;
+        DestPtr1 += PlaneStride;
+        DestPtr2 += PlaneStride;
+    }
+#endif
+
+    // Now copy the top and bottom source lines into each line of the respective borders
+	SrcPtr1 = DestReconPtr + pbi->ReconVDataOffset - Border/2;
+	SrcPtr2 = SrcPtr1 + (pbi->VFragments * 4 * PlaneStride)- PlaneStride;
+	DestPtr1= SrcPtr1 - Border/2*PlaneStride;
+	DestPtr2= SrcPtr2 + PlaneStride;
+    for ( i=0; i<(INT32)(Border/2); i++ )
+    {
+        memcpy ( DestPtr1, SrcPtr1, PlaneStride );
+        memcpy ( DestPtr2, SrcPtr2, PlaneStride );
+        DestPtr1 += PlaneStride;
+        DestPtr2 += PlaneStride;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : CopyFrame
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  YUV_BUFFER_CONFIG *b   : Pointer to source image.
+ *                  UINT8 *DestReconPtr    : Pointer to destination image.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Copies the source image into the destination image and
+ *                  updates the destination's UMV borders.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void CopyFrame ( POSTPROC_INSTANCE *pbi, YUV_BUFFER_CONFIG *b, UINT8 *DestReconPtr )
+{
+	int row;
+	unsigned char *source, *dest;
+
+	source = (unsigned char *) b->YBuffer;
+	dest = DestReconPtr + pbi->ReconYDataOffset;
+	for ( row=0; row<b->YHeight; row++ )
+	{
+		memcpy ( dest, source, b->YWidth );
+		source += b->YStride;
+		dest   += pbi->YStride;
+	}
+
+    source = (unsigned char *) b->UBuffer;
+	dest = DestReconPtr + pbi->ReconUDataOffset;
+	for ( row=0; row<b->UVHeight; row++ )
+	{
+		memcpy ( dest, source, b->UVWidth );
+		source += b->UVStride;
+		dest   += pbi->UVStride;
+	}
+
+    source = (unsigned char *)  b->VBuffer;
+	dest = DestReconPtr + pbi->ReconVDataOffset;
+	for ( row=0; row<b->UVHeight; row++ )
+	{
+		memcpy ( dest, source, b->UVWidth );
+		source += b->UVStride;
+		dest   += pbi->UVStride;
+	}
+
+	UpdateUMVBorder ( pbi, DestReconPtr );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/clamp.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/clamp.c
new file mode 100644
index 00000000..8f863382
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/clamp.c
@@ -0,0 +1,75 @@
+/****************************************************************************
+ *        
+ *   Module Title :     clamp.c
+ *
+ *   Description  :     Image pixel value clamping routines.
+ *
+ ***************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ClampLevels_C
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  INT32  BlackClamp,	   : Number of levels to clamp up from 0.
+ *                  INT32  WhiteClamp,	   : Number of levels to clamp down from 255.
+ *                  UINT8 *Src,			   : Pointer to input image to be clamped.
+ *                  UINT8 *Dst			   : Pointer to clamped image.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Clamps the pixel values in the input image at each
+ *                  end of the 8-bit range. 
+ *
+ *  SPECIAL NOTES : BlackClamp/WhiteClamp are the number.of levels to
+ *                  clamp at either end of the range. In particular, it
+ *                  should be noted that WhiteClamp is _not_ the level
+ *                  to clamp to at the high end of the range.
+ *
+ ****************************************************************************/
+void ClampLevels_C
+( 
+    POSTPROC_INSTANCE *pbi,
+    INT32  BlackClamp,
+    INT32  WhiteClamp,
+    UINT8 *Src,		
+    UINT8 *Dst		
+)
+{
+	int i;
+	int row,col;
+	unsigned char clamped[256];
+
+    int	width  = pbi->HFragments*8;
+	int	height = pbi->VFragments*8;
+	UINT8 *SrcPtr  = Src + pbi->ReconYDataOffset;
+	UINT8 *DestPtr = Dst + pbi->ReconYDataOffset;
+	UINT32 LineLength = pbi->YStride;
+
+	// set up clamping table so we can avoid ifs while clamping
+	for ( i=0; i<256; i++ )
+	{
+		clamped[i] = i;
+		if ( i<BlackClamp )
+			clamped[i] = BlackClamp;
+
+		if ( i>(255-WhiteClamp) )
+			clamped[i] = 255-WhiteClamp;
+	}
+
+    // clamping is for Y only!
+	for ( row=0 ; row<height; row++ )
+	{
+		for ( col=0; col<width; col++ )
+			SrcPtr[col] = clamped[DestPtr[col]];
+		SrcPtr  += LineLength;
+		DestPtr += LineLength;
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/deblock.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/deblock.c
new file mode 100644
index 00000000..e9604510
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/deblock.c
@@ -0,0 +1,1491 @@
+/****************************************************************************
+ *
+ *   Module Title :     deblock.c
+ *
+ *   Description  :     Post-processing deblocker functions.
+ *
+ ***************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+ *  Header Files
+ ***************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/        
+#if ( defined(_MSC_VER) || defined(MAPCA) )
+#define abs(x) ( (x>0) ? (x) : (-(x)) )
+#endif
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+UINT32 DeblockLimitValuesVp4[Q_TABLE_SIZE] =
+{  
+	30, 25, 20, 20, 15, 15, 14, 14,
+    13, 13, 12, 12, 11, 11, 10, 10, 
+    9,  9,  8,  8,  7,  7,  7,  7,
+    6,  6,  6,  6,  5,  5,  5,  5,
+    4,  4,  4,  4,  3,  3,  3,  3,  
+    2,  2,  2,  2,  2,  2,  2,  2,  
+    2,  2,  2,  2,  2,  2,  2,  2,  
+    1,  1,  1,  1,  1,  1,  1,  1 
+};
+
+UINT32 DeblockLimitValuesVp5[Q_TABLE_SIZE] = 
+{  
+	15, 15, 15, 15, 10, 10, 10, 10,
+	10, 10, 10, 10, 10, 9,  8,  8,
+	8,	8,  8,  8,  8,  8,  8,  8,
+	8,	7,  7,  7,  7,  7,  7,  7,	
+	6,  6,  6,  6,  5,  5,  5,  5,	
+	5,  4,  4,  4,  4,  4,  4,  3,	
+	3,  3,  3,  3,  3,  2,  2,  2,	
+    2,  2,  1,  1,  1,  0,  0,  0 
+};
+
+UINT32 DeblockLimitValuesVp6[Q_TABLE_SIZE] = 
+{  
+	15, 15, 15, 15, 10, 10, 10, 10,
+	10, 10, 10, 10, 10, 9,  8,  8,
+	8,	8,  8,  8,  8,  8,  8,  8,
+	8,	7,  7,  7,  7,  7,  7,  7,	
+	6,  6,  6,  6,  5,  5,  5,  5,	
+	5,  4,  4,  4,  4,  4,  4,  3,	
+	3,  3,  3,  3,  3,  2,  2,  2,	
+    2,  2,  1,  1,  1,  0,  0,  0 
+};
+
+UINT32 *DCQuantScaleV2;
+UINT32 *DCQuantScaleUV;
+UINT32 *DCQuantScaleV1;
+UINT32 *DeblockLimitValuesV2;
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SetupDeblockValueArray_Generic
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  INT32 FLimit           : Deblocking limit value.
+ *
+ *  OUTPUTS       : None
+ *
+ *  RETURNS       : UINT32 *: Pointer to deblocker LUT.
+ *
+ *  FUNCTION      : Sets up the bounding value array.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INT32 *SetupDeblockValueArray_Generic ( POSTPROC_INSTANCE *pbi, INT32 FLimit )
+{
+    INT32 i;
+    INT32 *DeblockValuePtr;
+
+    DeblockValuePtr = &pbi->DeblockBoundingValue[256];
+
+    // Set up the bounding value array.
+    memset ( pbi->DeblockBoundingValue, 0, (512*sizeof(*pbi->DeblockBoundingValue)) );
+    
+    for ( i=0; i<FLimit; i++ )
+    {
+        DeblockValuePtr[-i-FLimit] = (-FLimit+i);
+        DeblockValuePtr[-i]        = -i;
+        DeblockValuePtr[i]         = i;
+        DeblockValuePtr[i+FLimit]  = FLimit-i;
+    }
+    return DeblockValuePtr;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SetupDeblocker
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Prepares LUT ready to apply a loop filter.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetupDeblocker ( POSTPROC_INSTANCE *pbi )
+{
+    INT32 FLimit; 
+
+    if ( pbi->Vp3VersionNo >= 2 )
+    {
+        FLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+        pbi->DeblockValuePtr = SetupDeblockValueArray_Generic ( pbi, FLimit );
+    }
+    else
+    {
+        FLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+        pbi->DeblockValuePtr = SetupDeblockValueArray ( pbi, FLimit );
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeblockVerticalEdgesInLoopFilteredBand
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DesPtr          : Pointer to output image.
+ *                  UINT32 PlaneLineStep   : Stride of SrcPtr & DesPtr.
+ *                  UINT32 FragsAcross     : Number of blocks across.
+ *                  UINT32 StartFrag       : Number of first block. 
+ *                  UINT32 *QuantScale     :
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filters the vertical edges in a band.
+ *
+ *  SPECIAL NOTES : Variance values for each block are stored in 
+ *                  pbi->FragmentVariances for later use.
+ *
+ ****************************************************************************/
+void DeblockVerticalEdgesInLoopFilteredBand
+(
+    POSTPROC_INSTANCE *pbi, 
+    UINT8 *SrcPtr, 
+    UINT8 *DesPtr, 
+    UINT32 PlaneLineStep,
+    UINT32 FragsAcross,
+    UINT32 StartFrag,
+    UINT32 *QuantScale
+)
+{
+    UINT32 j, k;
+    INT32  QStep;
+    INT32  FLimit;
+    INT32  p1,p2;
+    INT32  psum;
+    INT32  v[10];
+    INT32  Sum1, Sum2;
+    INT32  Variance1, Variance2;
+    UINT8 *Src, *Des;
+    UINT32 CurrentFrag = StartFrag;
+
+    while ( CurrentFrag < (StartFrag+FragsAcross-1) )
+    {
+        Src = SrcPtr + 8*(CurrentFrag-StartFrag+1);
+        Des = DesPtr + 8*(CurrentFrag-StartFrag+1);
+
+        QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
+        FLimit = (QStep * QStep * 3)>>5 ;
+        
+        for( j=0; j<8 ; j++)
+        {
+            v[1] = Src[-4];
+            v[2] = Src[-3];
+            v[3] = Src[-2];
+            v[4] = Src[-1];
+            v[5] = Src[0];
+            v[6] = Src[+1];
+            v[7] = Src[+2];
+            v[8] = Src[+3];
+            
+            Variance1 = Variance2 = 0;
+            Sum1 = Sum2 = 0;
+            
+            for ( k=1; k<=4; k++ )
+            {
+                Sum1 += v[k];
+                Variance1 += v[k]*v[k];
+            }
+            
+            for ( k=5; k<=8; k++ )
+            {
+                Sum2 += v[k];
+                Variance2 += v[k]*v[k];
+            }
+            Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+            Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+            pbi->FragmentVariances[CurrentFrag] += Variance1;
+            pbi->FragmentVariances[CurrentFrag + 1] += Variance2;
+            
+            if( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+                ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+            {
+                p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4];
+                p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3];
+                
+                /* low pass filtering (LPF9: 1 1 2 2 4 2 2 1 1) */
+                psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+                Des[-4] = (INT8) ((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+                psum += v[5] - p1; 
+                Des[-3] = (INT8) ((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+                psum += v[6] - p1; 
+                Des[-2] = (INT8) ((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+                psum += v[7] - p1; 
+                Des[-1] = (INT8) ((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+                
+                psum += v[8] - v[1]; 
+                Des[0] = (INT8) ((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+                psum += p2 - v[2]; 
+                Des[+1] =(INT8) ((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+                psum += p2 - v[3]; 
+                Des[+2] = (INT8) ((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+                psum += p2 - v[4]; 
+                Des[+3] = (INT8) ((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+            }
+            
+            Src += PlaneLineStep;
+            Des += PlaneLineStep;                
+        }
+        
+        CurrentFrag++;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeblockLoopFilteredBand_C
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DesPtr          : Pointer to output image.
+ *                  UINT32 PlaneLineStep   : Stride of SrcPtr & DesPtr.
+ *                  UINT32 FragsAcross     : Number of blocks across.
+ *                  UINT32 StartFrag       : Number of first block. 
+ *                  UINT32 *QuantScale     :
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filters both horizontal and vertical edge in a band.
+ *
+ *  SPECIAL NOTES : Variance values for each block are stored in 
+ *                  pbi->FragmentVariances for later use.
+ *
+ ****************************************************************************/
+void DeblockLoopFilteredBand_C
+(
+    POSTPROC_INSTANCE *pbi, 
+    UINT8 *SrcPtr, 
+    UINT8 *DesPtr,
+    UINT32 PlaneLineStep, 
+    UINT32 FragsAcross,
+    UINT32 StartFrag,
+    UINT32 *QuantScale
+)
+{
+    UINT32 j,k;
+    UINT32 CurrentFrag=StartFrag;
+    INT32 QStep;
+    INT32 FLimit;
+    UINT8 *Src, *Des;
+    INT32  psum;
+    INT32  v[10];
+    INT32  p1,p2;
+    INT32 w1, w2, w3, w4, w5;
+    INT32  Variance1, Variance2;
+    INT32  Sum1, Sum2;
+
+    w1 = PlaneLineStep;
+    w2 = PlaneLineStep * 2;
+    w3 = PlaneLineStep * 3;
+    w4 = PlaneLineStep * 4;
+    w5 = PlaneLineStep * 5;
+
+    while ( CurrentFrag < StartFrag+FragsAcross )
+    {
+        Src = SrcPtr + 8*(CurrentFrag-StartFrag);
+        Des = DesPtr + 8*(CurrentFrag-StartFrag);
+
+        QStep = QuantScale[pbi->FragQIndex[CurrentFrag+FragsAcross]];
+        FLimit = (QStep * QStep * 3)>>5 ;
+        
+        for ( j=0; j<8; j++ )
+        {
+            v[1] = Src[-w4];
+            v[2] = Src[-w3];
+            v[3] = Src[-w2];
+            v[4] = Src[-w1];
+            v[5] = Src[0];
+            v[6] = Src[+w1];
+            v[7] = Src[+w2];
+            v[8] = Src[+w3];
+            
+            Variance1 = Variance2 = 0;
+            Sum1 = Sum2 = 0;
+            
+            for ( k=1; k<=4; k++ )
+            {
+                Sum1 += v[k];
+                Variance1 += v[k]*v[k];
+            }
+            for ( k=5; k<=8; k++ )
+            {
+                Sum2 += v[k];
+                Variance2 += v[k]*v[k];
+            }
+            Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+            Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+            pbi->FragmentVariances[CurrentFrag] += Variance1;
+            pbi->FragmentVariances[CurrentFrag + FragsAcross] += Variance2;
+            
+            if( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+                ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+            {
+                p1 = (abs(Src[-w4] - Src[-w5]) < QStep ) ?  Src[-w5] : Src[-w4];
+                p2 = (abs(Src[+w3] - Src[+w4]) < QStep ) ?  Src[+w4] : Src[+w3];
+                
+                /* low pass filtering (LPF9: 1 1 2 2 4 2 2 1 1) */
+                psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+                Des[-w4] = (INT8)((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+                psum += v[5] - p1; 
+                Des[-w3] = (INT8)((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+                psum += v[6] - p1; 
+                Des[-w2] = (INT8)((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+                psum += v[7] - p1; 
+                Des[-w1] = (INT8)((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+                
+                psum += v[8] - v[1]; 
+                Des[0] = (INT8)((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+                psum += p2 - v[2]; 
+                Des[+w1] = (INT8)((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+                psum += p2 - v[3]; 
+                Des[+w2] = (INT8)((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+                psum += p2 - v[4]; 
+                Des[+w3] = (INT8)((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+            }
+            else
+            {
+                Des[-w4] = Src[-w4];
+                Des[-w3] = Src[-w3];
+                Des[-w2] = Src[-w2];
+                Des[-w1] = Src[-w1];
+                Des[0]   = Src[0];
+                Des[+w1] = Src[+w1];
+                Des[+w2] = Src[+w2];
+                Des[+w3] = Src[+w3];
+            }
+            Src++;
+            Des++;             
+        }
+        CurrentFrag++;
+    }
+   
+    CurrentFrag = StartFrag;
+
+    while ( CurrentFrag < (StartFrag+FragsAcross-1) )
+    {
+        Des = DesPtr - 8*PlaneLineStep + 8*(CurrentFrag-StartFrag+1);
+        Src = Des;
+
+        QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
+        FLimit = (QStep * QStep * 3)>>5 ;
+        
+        for ( j=0; j<8 ; j++ )
+        {
+            v[1] = Src[-4];
+            v[2] = Src[-3];
+            v[3] = Src[-2];
+            v[4] = Src[-1];
+            v[5] = Src[0];
+            v[6] = Src[+1];
+            v[7] = Src[+2];
+            v[8] = Src[+3];
+            
+            Variance1 = Variance2 = 0;
+            Sum1 = Sum2 = 0;
+            
+            for ( k=1; k<=4; k++ )
+            {
+                Sum1 += v[k];
+                Variance1 += v[k]*v[k];
+            }
+            for ( k=5; k<=8; k++ )
+            {
+                Sum2 += v[k];
+                Variance2 += v[k]*v[k];
+            }
+            Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+            Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+            pbi->FragmentVariances[CurrentFrag] += Variance1;
+            pbi->FragmentVariances[CurrentFrag + 1] += Variance2;
+            
+            if ( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+                 ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+            {
+                p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4];
+                p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3];
+                
+                /* lo pass filtering (LPF9: 1 1 2 2 4 2 2 1 1) */
+                psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+                Des[-4] = (INT8)((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+                psum += v[5] - p1; 
+                Des[-3] = (INT8)((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+                psum += v[6] - p1; 
+                Des[-2] = (INT8)((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+                psum += v[7] - p1; 
+                Des[-1] = (INT8)((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+                
+                psum += v[8] - v[1]; 
+                Des[0] = (INT8)((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+                psum += p2 - v[2]; 
+                Des[+1] = (INT8)((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+                psum += p2 - v[3]; 
+                Des[+2] =(INT8)((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+                psum += p2 - v[4]; 
+                Des[+3] = (INT8)((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+            }
+
+            Src += PlaneLineStep;
+            Des += PlaneLineStep;               
+        }
+        
+        CurrentFrag++;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeblockVerticalEdgesInNonFilteredBand
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DesPtr          : Pointer to output image.
+ *                  UINT32 PlaneLineStep   : Stride of SrcPtr & DesPtr.
+ *                  UINT32 FragsAcross     : Number of blocks across.
+ *                  UINT32 StartFrag       : Number of first block. 
+ *                  UINT32 *QuantScale     :
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filter the vertical edges in a band.
+ *
+ *  SPECIAL NOTES : Variance values for each block are stored in 
+ *                  pbi->FragmentVariances for later use.
+ *
+ ****************************************************************************/
+void DeblockVerticalEdgesInNonFilteredBand
+(
+    POSTPROC_INSTANCE *pbi, 
+    UINT8 *SrcPtr, 
+    UINT8 *DesPtr, 
+    UINT32 PlaneLineStep,
+    UINT32 FragsAcross,
+    UINT32 StartFrag,
+    UINT32 *QuantScale
+)
+{
+    UINT32 j,k;
+    INT32 QStep;
+    INT32 FLimit;
+    INT32  psum;
+    INT32  v[10];
+    INT32  p1,p2;
+    INT32  Sum1, Sum2;
+    INT32  Variance1, Variance2;
+    UINT8 *Src, *Des;
+    UINT32 CurrentFrag = StartFrag;
+    
+    while ( CurrentFrag < (StartFrag + FragsAcross-1) )
+    {
+        Src = SrcPtr + 8*(CurrentFrag-StartFrag+1);
+        Des = DesPtr + 8*(CurrentFrag-StartFrag+1);
+
+        QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
+        FLimit = (QStep * QStep * 3)>>5 ;
+    
+        for ( j=0; j<8 ; j++ )
+        {
+            v[1] = Src[-4];
+            v[2] = Src[-3];
+            v[3] = Src[-2];
+            v[4] = Src[-1];
+            v[5] = Src[0];
+            v[6] = Src[+1];
+            v[7] = Src[+2];
+            v[8] = Src[+3];
+            
+            Variance1 = Variance2 = 0;
+            Sum1 = Sum2 = 0;
+            
+            for ( k=1; k<=4; k++ )
+            {
+                Sum1 += v[k];
+                Variance1 += v[k]*v[k];
+            }
+            for ( k=5; k<=8; k++ )
+            {
+                Sum2 += v[k];
+                Variance2 += v[k]*v[k];
+            }
+            Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+            Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+            pbi->FragmentVariances[CurrentFrag] += Variance1;
+            pbi->FragmentVariances[CurrentFrag + 1] += Variance2;
+            
+            if ( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+                 ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+            {
+                p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4];
+                p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3];
+                
+                // low pass filtering (LPF9: 1 1 2 2 4 2 2 1 1) 
+                psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+                Des[-4] = (INT8)((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+                psum += v[5] - p1; 
+                Des[-3] = (INT8)((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+                psum += v[6] - p1; 
+                Des[-2] = (INT8)((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+                psum += v[7] - p1; 
+                Des[-1] = (INT8)((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+                
+                psum += v[8] - v[1]; 
+                Des[0] = (INT8)((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+                psum += p2 - v[2]; 
+                Des[+1] =(INT8)((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+                psum += p2 - v[3]; 
+                Des[+2] = (INT8)((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+                psum += p2 - v[4]; 
+                Des[+3] = (INT8)((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+            }
+            else 
+            {
+                // Old loop filter
+                INT32 FiltVal;
+                UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+                
+                FiltVal =  v[3] -  v[4] * 3 + v[5] * 3 - v[6] ;     
+                FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];        
+                Des[-1] = LimitTable[(INT32)v[4] + FiltVal];
+                Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+            }
+
+            Src += PlaneLineStep;
+            Des += PlaneLineStep;                
+        }
+        
+        CurrentFrag++;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeblockVerticalEdgesInNonFilteredBandNewFilter
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DesPtr          : Pointer to output image.
+ *                  UINT32 PlaneLineStep   : Stride of SrcPtr & DesPtr.
+ *                  UINT32 FragsAcross     : Number of blocks across.
+ *                  UINT32 StartFrag       : Number of first block. 
+ *                  UINT32 *QuantScale     :
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filter the vertical edges in a band.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void DeblockVerticalEdgesInNonFilteredBandNewFilter
+(
+    POSTPROC_INSTANCE *pbi, 
+    UINT8 *SrcPtr, 
+    UINT8 *DesPtr, 
+    UINT32 PlaneLineStep,
+    UINT32 FragsAcross,
+    UINT32 StartFrag,
+    UINT32 *QuantScale
+)
+{
+    UINT32 j,k;
+    INT32 QStep;
+    INT32 FLimit;
+    INT32  psum;
+    INT32  v[10];
+    INT32  p1,p2;
+    INT32  Sum1, Sum2;
+    UINT8 *Src, *Des;
+    UINT32 CurrentFrag = StartFrag;
+    
+    QStep = QuantScale[pbi->FrameQIndex];
+    
+    for (CurrentFrag = StartFrag; CurrentFrag < (StartFrag + FragsAcross); CurrentFrag++)
+    {
+        Src = SrcPtr + 8*(CurrentFrag-StartFrag+1);
+        Des = DesPtr + 8*(CurrentFrag-StartFrag+1);
+        
+        FLimit = (QStep * QStep * 3)>>5;
+
+        for ( j=0; j<8; j++ )
+        {
+            v[0] = Src[-5];
+            v[1] = Src[-4];
+            v[2] = Src[-3];
+            v[3] = Src[-2];
+            v[4] = Src[-1];
+            v[5] = Src[0];
+            v[6] = Src[+1];
+            v[7] = Src[+2];
+            v[8] = Src[+3];
+            v[9] = Src[+4];
+            
+            Sum1 = Sum2 = 0;
+            
+            for ( k=1; k<=4; k++ )
+                Sum1 += abs ( v[k]-v[k-1] );
+            
+            for ( k=5; k<=8; k++ )
+                Sum2 += abs ( v[k]-v[k+1] );
+            
+            if ( (Sum1 < FLimit) && (Sum2 < FLimit) &&
+                 ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+            {
+                p1 = v[0];
+                p2 = v[9];
+                
+                // low pass filtering (LPF7: 1 1 1 2 1 1 1) 
+                psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+                Des[-4] = (INT8)((psum + v[1]) >> 3);
+                psum += v[5] - p1; 
+                Des[-3] = (INT8)((psum + v[2]) >> 3);
+                psum += v[6] - p1; 
+                Des[-2] = (INT8)((psum + v[3]) >> 3);
+                psum += v[7] - p1; 
+                Des[-1] = (INT8)((psum + v[4]) >> 3);
+                
+                psum += v[8] - v[1]; 
+                Des[0] =  (INT8)((psum + v[5]) >> 3);
+                psum += p2 - v[2]; 
+                Des[+1] = (INT8)((psum + v[6]) >> 3);
+                psum += p2 - v[3]; 
+                Des[+2] = (INT8)((psum + v[7]) >> 3);
+                psum += p2 - v[4]; 
+                Des[+3] = (INT8)((psum + v[8]) >> 3);
+            }
+            else 
+            {
+                // Old loopfilter
+                INT32 FiltVal;
+                UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+                
+                FiltVal =  v[3] -  v[4] * 3 + v[5] * 3 - v[6] ;     
+                FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];        
+                Des[-1] = LimitTable[(INT32)v[4] + FiltVal];
+                Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+            }
+
+            Src += PlaneLineStep;
+            Des += PlaneLineStep;                
+        }
+        
+
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeblockNonFilteredBand_C
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DesPtr          : Pointer to output image.
+ *                  UINT32 PlaneLineStep   : Stride of SrcPtr & DesPtr.
+ *                  UINT32 FragsAcross     : Number of blocks across.
+ *                  UINT32 StartFrag       : Number of first block. 
+ *                  UINT32 *QuantScale     :
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filter both horizontal and vertical edge in a band.
+ *
+ *  SPECIAL NOTES : Variance values for each block are stored in 
+ *                  pbi->FragmentVariances for later use.
+ *
+ ****************************************************************************/
+void DeblockNonFilteredBand_C
+(
+     POSTPROC_INSTANCE *pbi, 
+     UINT8 *SrcPtr, 
+     UINT8 *DesPtr,
+     UINT32 PlaneLineStep, 
+     UINT32 FragsAcross,
+     UINT32 StartFrag,
+     UINT32 *QuantScale
+)
+{
+    UINT32 j,k;
+    INT32  QStep;
+    INT32  FLimit;
+    INT32  psum;
+    INT32  v[10];
+    INT32  p1,p2;
+    INT32  w1, w2, w3, w4, w5;
+    INT32  Variance1, Variance2;
+    INT32  Sum1, Sum2;
+    UINT8 *Src, *Des;
+    UINT32 CurrentFrag = StartFrag;
+
+    w1 = PlaneLineStep;
+    w2 = PlaneLineStep * 2;
+    w3 = PlaneLineStep * 3;
+    w4 = PlaneLineStep * 4;
+    w5 = PlaneLineStep * 5;
+
+    while ( CurrentFrag < StartFrag+FragsAcross )
+    {
+        Src = SrcPtr + 8*(CurrentFrag-StartFrag);
+        Des = DesPtr + 8*(CurrentFrag-StartFrag);
+        QStep = QuantScale[pbi->FragQIndex[CurrentFrag+FragsAcross]];
+        FLimit = (QStep * QStep * 3)>>5;
+        
+        for ( j=0; j<8; j++ )
+        {
+            v[1] = Src[-w4];
+            v[2] = Src[-w3];
+            v[3] = Src[-w2];
+            v[4] = Src[-w1];
+            v[5] = Src[  0];
+            v[6] = Src[+w1];
+            v[7] = Src[+w2];
+            v[8] = Src[+w3];
+            
+            Variance1 = Variance2 = 0;
+            Sum1 = Sum2 = 0;
+            
+            for ( k=1; k<=4; k++ )
+            {
+                Sum1 += v[k];
+                Variance1 += v[k]*v[k];
+            }
+            for ( k=5; k<=8; k++ )
+            {
+                Sum2 += v[k];
+                Variance2 += v[k]*v[k];
+            }
+            Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+            Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+            pbi->FragmentVariances[CurrentFrag] += Variance1;
+            pbi->FragmentVariances[CurrentFrag + FragsAcross] += Variance2;
+            
+            if ( (Variance1 < FLimit) && (Variance2 < FLimit) &&
+                 ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+            {
+                p1 = (abs(Src[-w4] - Src[-w5]) < QStep ) ?  Src[-w5] : Src[-w4];
+                p2 = (abs(Src[+w3] - Src[+w4]) < QStep ) ?  Src[+w4] : Src[+w3];
+                
+                // low pass filtering (LPF9: 1 1 2 2 4 2 2 1 1) 
+                psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+                Des[-w4] = (INT8)((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+                psum += v[5] - p1; 
+                Des[-w3] = (INT8)((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+                psum += v[6] - p1; 
+                Des[-w2] = (INT8)((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+                psum += v[7] - p1; 
+                Des[-w1] = (INT8)((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+                
+                psum += v[8] - v[1]; 
+                Des[0] = (INT8)((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+                psum += p2 - v[2]; 
+                Des[+w1] = (INT8)((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+                psum += p2 - v[3]; 
+                Des[+w2] = (INT8)((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+                psum += p2 - v[4]; 
+                Des[+w3] = (INT8)((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+            }
+            else
+            {
+                // Old loopfilter
+                INT32 FiltVal;
+                UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+                
+                FiltVal =  v[3] -  v[4] * 3 + v[5] * 3 - v[6] ;     
+                FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];        
+                Des[-w1] = LimitTable[(INT32)v[4] + FiltVal];
+                Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+                Des[-w4]=Src[-w4];
+                Des[-w3]=Src[-w3];
+                Des[-w2]=Src[-w2];
+                Des[+w1]=Src[+w1];
+                Des[+w2]=Src[+w2];
+                Des[+w3]=Src[+w3];
+            }
+            
+            Src++;
+            Des++;             
+        }
+
+        // Finished filtering horizontal edge, vertical edge next...
+
+        // skip the first one
+        if ( CurrentFrag==StartFrag )
+            CurrentFrag++;
+        else
+        {
+            Des = DesPtr - 8*PlaneLineStep + 8*(CurrentFrag-StartFrag);
+            Src = Des;
+            
+            QStep = QuantScale[pbi->FragQIndex[CurrentFrag]];
+            FLimit = (QStep * QStep * 3)>>5 ;
+            
+            for ( j=0; j<8; j++ )
+            {
+                v[1] = Src[-4];
+                v[2] = Src[-3];
+                v[3] = Src[-2];
+                v[4] = Src[-1];
+                v[5] = Src[0];
+                v[6] = Src[+1];
+                v[7] = Src[+2];
+                v[8] = Src[+3];
+                
+                Variance1 = Variance2 = 0;
+                Sum1 = Sum2 = 0;
+                
+                for ( k=1; k<=4; k++ )
+                {
+                    Sum1 += v[k];
+                    Variance1 += v[k]*v[k];
+                }
+                for ( k=5; k<=8; k++ )
+                {
+                    Sum2 += v[k];
+                    Variance2 += v[k]*v[k];
+                }
+                Variance1 -= ((Sum1>>1)*((Sum1+1)>>1));
+                Variance2 -= ((Sum2>>1)*((Sum2+1)>>1));
+                pbi->FragmentVariances[CurrentFrag-1] += Variance1;
+                pbi->FragmentVariances[CurrentFrag] += Variance2;
+                
+                if ( (Variance1 < FLimit) &&  (Variance2 < FLimit) &&
+                     ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+                {
+                    p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4];
+                    p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3];
+                    
+                    // lo pass filtering (LPF9: 1 1 2 2 4 2 2 1 1) 
+                    psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+                    Des[-4] = (INT8)((((psum + v[1]) << 1) - (v[4] - v[5])) >> 4);
+                    psum += v[5] - p1; 
+                    Des[-3] = (INT8)((((psum + v[2]) << 1) - (v[5] - v[6])) >> 4);
+                    psum += v[6] - p1; 
+                    Des[-2] = (INT8)((((psum + v[3]) << 1) - (v[6] - v[7])) >> 4);
+                    psum += v[7] - p1; 
+                    Des[-1] = (INT8)((((psum + v[4]) << 1) + p1 - v[1] - (v[7] - v[8])) >> 4);
+                    
+                    psum += v[8] - v[1]; 
+                    Des[0] = (INT8)((((psum + v[5]) << 1) + (v[1] - v[2]) - v[8] + p2) >> 4);
+                    psum += p2 - v[2]; 
+                    Des[+1] = (INT8)((((psum + v[6]) << 1) + (v[2] - v[3])) >> 4);
+                    psum += p2 - v[3]; 
+                    Des[+2] =(INT8)((((psum + v[7]) << 1) + (v[3] - v[4])) >> 4);
+                    psum += p2 - v[4]; 
+                    Des[+3] = (INT8)((((psum + v[8]) << 1) + (v[4] - v[5])) >> 4);
+                }
+                else
+                {
+                    // Old loop-filter
+                    INT32 FiltVal;
+                    UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+                    FiltVal =  v[3] -  v[4] * 3 + v[5] * 3 - v[6] ;     
+                    FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];        
+                    Des[-1] = LimitTable[(INT32)v[4] + FiltVal];
+                    Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+                }
+
+                Src += PlaneLineStep;
+                Des += PlaneLineStep;               
+            }
+        }
+        
+        CurrentFrag++;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeblockNonFilteredBandNewFilter_C
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DesPtr          : Pointer to output image.
+ *                  UINT32 PlaneLineStep   : Stride of SrcPtr & DesPtr.
+ *                  UINT32 FragsAcross     : Number of blocks across.
+ *                  UINT32 StartFrag       : Number of first block. 
+ *                  UINT32 *QuantScale     :
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filter both horizontal and vertical edge in a band.
+ *
+ *  SPECIAL NOTES : Variance values for each block are stored in 
+ *                  pbi->FragmentVariances for later use.
+ *                  Uses SAD to determine where to apply the new 
+ *                  7 tap fiter.
+ *
+ ****************************************************************************/
+void DeblockNonFilteredBandNewFilter_C
+(
+     POSTPROC_INSTANCE *pbi, 
+     UINT8 *SrcPtr, 
+     UINT8 *DesPtr,
+     UINT32 PlaneLineStep, 
+     UINT32 FragsAcross,
+     UINT32 StartFrag,
+     UINT32 *QuantScale
+)
+{
+    UINT32 j,k;
+    INT32  QStep;
+    INT32  FLimit;
+    INT32  psum;
+    INT32  v[10];
+    INT32  p1,p2;
+    INT32  w1, w2, w3, w4, w5;
+    INT32  Sum1, Sum2;
+    UINT8 *Src, *Des;
+    UINT32 CurrentFrag = StartFrag;
+
+    w1 = PlaneLineStep;
+    w2 = PlaneLineStep * 2;
+    w3 = PlaneLineStep * 3;
+    w4 = PlaneLineStep * 4;
+    w5 = PlaneLineStep * 5;
+
+    QStep = QuantScale[pbi->FrameQIndex];
+
+    while ( CurrentFrag < (StartFrag + FragsAcross) )
+    {
+        Src = SrcPtr + 8*(CurrentFrag-StartFrag);
+        Des = DesPtr + 8*(CurrentFrag-StartFrag);
+
+        FLimit = ( QStep * 3 ) >> 2;
+        
+        for ( j=0; j<8; j++ )
+        {
+            v[0] = Src[-w5];
+            v[1] = Src[-w4];
+            v[2] = Src[-w3];
+            v[3] = Src[-w2];
+            v[4] = Src[-w1];
+            v[5] = Src[  0];
+            v[6] = Src[+w1];
+            v[7] = Src[+w2];
+            v[8] = Src[+w3];
+            v[9] = Src[+w4];
+
+            Sum1 = Sum2 = 0;
+            
+            for ( k=1; k<=4; k++ )
+                Sum1 += abs ( v[k]-v[k-1] );
+            
+            for ( k=5; k<=8; k++ )
+                Sum2 += abs ( v[k]-v[k+1] );
+
+            pbi->FragmentVariances[CurrentFrag] +=((Sum1>255)?255:Sum1);
+            pbi->FragmentVariances[CurrentFrag + FragsAcross] += ((Sum2>255)?255:Sum2);
+           
+            if ( (Sum1 < FLimit) && (Sum2 < FLimit) &&
+                 ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+            {
+                p1 = v[0];
+                p2 = v[9];
+                
+                // low pass filtering (LPF7: 1 1 1 2 1 1 1) 
+                psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+                Des[-w4] = (INT8)((psum + v[1]) >> 3);
+                psum += v[5] - p1; 
+                Des[-w3] = (INT8)((psum + v[2]) >> 3);
+                psum += v[6] - p1; 
+                Des[-w2] = (INT8)((psum + v[3]) >> 3);
+                psum += v[7] - p1; 
+                Des[-w1] = (INT8)((psum + v[4]) >> 3);
+                
+                psum += v[8] - v[1]; 
+                Des[0] =   (INT8)((psum + v[5]) >> 3);
+                psum += p2 - v[2]; 
+                Des[+w1] = (INT8)((psum + v[6]) >> 3);
+                psum += p2 - v[3]; 
+                Des[+w2] = (INT8)((psum + v[7]) >> 3);
+                psum += p2 - v[4]; 
+                Des[+w3] = (INT8)((psum + v[8]) >> 3);
+            }
+            else 
+            {
+                //old loopfilter
+                INT32 FiltVal;
+                UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+                
+                FiltVal =  v[3] -  v[4] * 3 + v[5] * 3 - v[6] ;     
+                FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];        
+                Des[-w1] = LimitTable[(INT32)v[4] + FiltVal];
+                Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+                Des[-w4]=Src[-w4];
+                Des[-w3]=Src[-w3];
+                Des[-w2]=Src[-w2];
+                Des[+w1]=Src[+w1];
+                Des[+w2]=Src[+w2];
+                Des[+w3]=Src[+w3];
+            }
+
+            Src++;
+            Des++;             
+        }
+
+        // Finished filtering horizontal edge, vertical edge next...
+
+        // skip the first one
+        if ( CurrentFrag==StartFrag )
+            CurrentFrag++;
+        else
+        {
+            Des = DesPtr - 8*PlaneLineStep + 8*(CurrentFrag-StartFrag);
+            Src = Des;
+            
+            FLimit = (QStep * 3) >> 2;
+            
+            for ( j=0; j<8; j++ )
+            {
+                v[0] = Src[-5];
+                v[1] = Src[-4];
+                v[2] = Src[-3];
+                v[3] = Src[-2];
+                v[4] = Src[-1];
+                v[5] = Src[0];
+                v[6] = Src[+1];
+                v[7] = Src[+2];
+                v[8] = Src[+3];
+                v[9] = Src[+4];
+                
+                Sum1 = Sum2 = 0;
+                
+                for ( k=1; k<=4; k++ )
+                    Sum1 += abs ( v[k]-v[k-1] );
+                
+                for ( k=5; k<=8; k++ )
+                    Sum2 += abs ( v[k]-v[k+1] );
+                
+                pbi->FragmentVariances[CurrentFrag-1] += ((Sum1>255)?255:Sum1);
+                pbi->FragmentVariances[CurrentFrag] += ((Sum2>255)?255:Sum2);
+                
+                if ( (Sum1 < FLimit) && (Sum2 < FLimit) &&
+                     ((v[5] - v[4]) < QStep) && ((v[4] - v[5]) < QStep) )
+                {
+                    p1 = v[0];
+                    p2 = v[9];
+                    
+                    // low pass filtering (LPF7: 1 1 1 2 1 1 1) 
+                    psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4;
+                    Des[-4] = (INT8)((psum + v[1]) >> 3);
+                    psum += v[5] - p1; 
+                    Des[-3] = (INT8)((psum + v[2]) >> 3);
+                    psum += v[6] - p1; 
+                    Des[-2] = (INT8)((psum + v[3]) >> 3);
+                    psum += v[7] - p1; 
+                    Des[-1] = (INT8)((psum + v[4]) >> 3);
+                    
+                    psum += v[8] - v[1]; 
+                    Des[0] =  (INT8)((psum + v[5]) >> 3);
+                    psum += p2 - v[2]; 
+                    Des[+1] = (INT8)((psum + v[6]) >> 3);
+                    psum += p2 - v[3]; 
+                    Des[+2] = (INT8)((psum + v[7]) >> 3);
+                    psum += p2 - v[4]; 
+                    Des[+3] = (INT8)((psum + v[8]) >> 3);
+                }
+                else 
+                {
+                    // Old loopfilter
+                    INT32 FiltVal;
+                    UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+                    
+                    FiltVal =  v[3] -  v[4] * 3 + v[5] * 3 - v[6] ;     
+                    FiltVal = pbi->DeblockValuePtr[(FiltVal + 4) >> 3];        
+                    Des[-1] = LimitTable[(INT32)v[4] + FiltVal];
+                    Des[ 0] = LimitTable[(INT32)v[5] - FiltVal];
+                }
+
+                Src += PlaneLineStep;
+                Des += PlaneLineStep;               
+            }
+            CurrentFrag++;
+        }
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeblockPlane
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi   : Pointer to post-processor instance.
+ *                  UINT8 *SourceBuffer      : Pointer to input image.
+ *                  UINT8 *DestinationBuffer : Pointer to output image.
+ *                  UINT32 Channel           : Whether the Y, U or V plane.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies de-blocking filters to an image plane Y, U or V.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeblockPlane
+(
+    POSTPROC_INSTANCE *pbi, 
+    UINT8 *SourceBuffer, 
+    UINT8 *DestinationBuffer, 
+    UINT32 Channel 
+)
+{
+    
+    UINT32 i, j, k;
+    UINT32 PixelIndex;
+    
+    UINT32 FragsDown = 0;
+    UINT32 FragsAcross = 0;
+    UINT32 StartFrag = 0;
+    UINT32 PlaneLineStep = 0;
+    UINT8 *SrcPtr = 0, *DesPtr = 0;
+    UINT32 *QuantScale = 0;
+
+    typedef void (*ApplyFilterToBand) (xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+
+    ApplyFilterToBand DeblockBand;
+    ApplyFilterToBand DeblockVerticalEdgesInBand;
+    
+    if ( pbi->Vp3VersionNo >= 2 ) 
+    { 
+        DeblockBand = DeblockNonFilteredBand;
+        DeblockVerticalEdgesInBand = DeblockVerticalEdgesInNonFilteredBand;
+    }
+    else
+    {
+        DeblockBand = DeblockLoopFilteredBand;
+        DeblockVerticalEdgesInBand = DeblockVerticalEdgesInLoopFilteredBand;
+    }
+
+    switch( Channel )
+    {
+    case 0:
+        // Get the parameters
+        PlaneLineStep = pbi->YStride; 
+        FragsAcross   = pbi->HFragments;
+        FragsDown     = pbi->VFragments;
+        StartFrag     = 0;
+        PixelIndex    = pbi->ReconYDataOffset;
+        SrcPtr        = &SourceBuffer[PixelIndex];
+        DesPtr        = &DestinationBuffer[PixelIndex];
+        break;
+    
+    case 1:
+        // Get the parameters
+        PlaneLineStep = pbi->UVStride;    
+        FragsAcross   = pbi->HFragments / 2;
+        FragsDown     = pbi->VFragments / 2;
+        StartFrag     = pbi->YPlaneFragments;
+        PixelIndex    = pbi->ReconUDataOffset;
+        SrcPtr        = &SourceBuffer[PixelIndex];
+        DesPtr        = &DestinationBuffer[PixelIndex];
+        break;
+
+    default:
+        // Get the parameters
+        PlaneLineStep = pbi->UVStride;    
+        FragsAcross   = pbi->HFragments / 2;
+        FragsDown     = pbi->VFragments / 2;
+        StartFrag     = pbi->YPlaneFragments + pbi->UVPlaneFragments;
+        PixelIndex    = pbi->ReconVDataOffset;
+        SrcPtr        = &SourceBuffer[PixelIndex];
+        DesPtr        = &DestinationBuffer[PixelIndex];
+        break;
+    }
+
+    if ( pbi->Vp3VersionNo >= 2 )
+    {
+        switch ( Channel )
+        {
+        case 0:
+            QuantScale = DCQuantScaleV2;
+            break;
+        case 1:
+        case 2:
+            QuantScale = DCQuantScaleUV;
+            break;
+        }
+    }
+    else
+    {
+        QuantScale = DCQuantScaleV1;
+    }
+
+    for ( i=0; i<4; i++ )
+        for ( j=0; j<PlaneLineStep; j++ )
+            DesPtr[i*PlaneLineStep + j] = SrcPtr[i*PlaneLineStep + j];
+
+    // loop to last band
+    k = 1;
+    while ( k < FragsDown )
+    {
+        SrcPtr += 8*PlaneLineStep;
+        DesPtr += 8*PlaneLineStep;
+
+        // Filter both the horizontal and vertical block edges inside the band
+        DeblockBand ( pbi, 
+                      SrcPtr, 
+                      DesPtr, 
+                      PlaneLineStep, 
+                      FragsAcross, 
+                      StartFrag,
+                      QuantScale );
+        
+        // Move on...
+        StartFrag += FragsAcross;
+        k++;   
+    }
+    
+    // The Last band
+    for ( i=0; i<4; i++ )
+        for ( j=0; j<PlaneLineStep; j++ )
+            DesPtr[(i+4)*PlaneLineStep + j] = SrcPtr[(i+4)*PlaneLineStep + j];
+
+    DeblockVerticalEdgesInBand ( pbi,
+                                 SrcPtr,
+                                 DesPtr, 
+                                 PlaneLineStep, 
+                                 FragsAcross, 
+                                 StartFrag,
+                                 QuantScale );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeblockPlaneNew
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  UINT32 PlaneLineStep   : Stride for the plane.
+ *                  UINT32 StartFrag       : Number of first block. 
+ *                  UINT32 FragsAcross     : Number of blocks horizontally.
+ *                  UINT32 FragsDown       : Number of blocks vertically.
+ *                  UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DesPtr          : Pointer to output image.
+ *                  UINT32 *QuantScale     :
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies new de-blocking filters to an image plane Y, U or V.
+ *
+ *  SPECIAL NOTES : Uses the new de-blocking filter.
+ *
+ ****************************************************************************/
+void DeblockPlaneNew
+(
+    POSTPROC_INSTANCE *pbi,
+    UINT32  PlaneLineStep,
+    UINT32  StartFrag,
+    UINT32  FragsAcross,
+    UINT32  FragsDown,
+    UINT8  *SrcPtr,
+	UINT8  *DesPtr,
+    UINT32 *QuantScale
+)
+{
+    UINT32 i, k;
+
+	typedef void (*ApplyFilterToBand) (xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+
+    ApplyFilterToBand DeblockBand;
+    ApplyFilterToBand DeblockVerticalEdgesInBand;
+
+    DeblockBand = DeblockNonFilteredBandNewFilter;
+    DeblockVerticalEdgesInBand = DeblockVerticalEdgesInNonFilteredBandNewFilter;
+
+    for ( i=0; i<4; i++ )
+        memcpy ( DesPtr+i*PlaneLineStep, SrcPtr+i*PlaneLineStep, PlaneLineStep );
+
+    // loop to last band
+    k = 1;
+
+    while ( k < FragsDown )
+    {
+        SrcPtr += 8*PlaneLineStep;
+        DesPtr += 8*PlaneLineStep;
+
+        // Filter both the horizontal and vertical block edges inside the band
+        DeblockBand ( pbi, 
+                      SrcPtr, 
+                      DesPtr, 
+                      PlaneLineStep, 
+                      FragsAcross, 
+                      StartFrag,
+                      QuantScale );
+        
+        // Move-on...
+        StartFrag += FragsAcross;
+        k++;
+    }
+
+    // The Last band
+    for ( i=0; i<4; i++ )
+        memcpy ( DesPtr+(i+4)*PlaneLineStep, SrcPtr+(i+4)*PlaneLineStep, PlaneLineStep );
+  
+    DeblockVerticalEdgesInBand ( pbi,
+                                 SrcPtr,
+                                 DesPtr, 
+                                 PlaneLineStep, 
+                                 FragsAcross, 
+                                 StartFrag,
+                                 QuantScale );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeblockFrame
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi   : Pointer to post-processor instance.
+ *                  UINT8 *SourceBuffer      : Pointer to input frame.
+ *                  UINT8 *DestinationBuffer : Pointer to output deblocked frame.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies loop filter to the edge pixels of coded blocks.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeblockFrame ( POSTPROC_INSTANCE *pbi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer )
+{  
+    // Initialize the fragment variance accumulators
+    memset ( pbi->FragmentVariances, 0 , pbi->UnitFragments*sizeof(INT32) );
+
+    SetupDeblocker(pbi);
+    
+#if defined(_WIN32) 
+    if ( pbi->Vp3VersionNo >= 5 ) 
+	{
+		// Y
+		DeblockPlaneNew ( pbi, 
+  			              pbi->YStride,
+			              0,
+			              pbi->HFragments,
+			              pbi->VFragments,
+			              &SourceBuffer[pbi->ReconYDataOffset],
+			              &DestinationBuffer[pbi->ReconYDataOffset],
+			              DCQuantScaleV2 );
+        // U
+		DeblockPlaneNew ( pbi, 
+			              pbi->UVStride,
+			              0,
+			              pbi->HFragments / 2,
+			              pbi->VFragments / 2,
+			              &SourceBuffer[pbi->ReconUDataOffset],
+			              &DestinationBuffer[pbi->ReconUDataOffset],
+			              DCQuantScaleUV );
+        // V
+		DeblockPlaneNew ( pbi, 
+			              pbi->UVStride,
+			              0,
+			              pbi->HFragments / 2,
+			              pbi->VFragments / 2,
+			              &SourceBuffer[pbi->ReconVDataOffset],
+			              &DestinationBuffer[pbi->ReconVDataOffset],
+			              DCQuantScaleUV );
+	}
+	else
+#endif
+	{
+		DeblockPlane ( pbi, SourceBuffer, DestinationBuffer, 0 ); // Y
+		DeblockPlane ( pbi, SourceBuffer, DestinationBuffer, 1 ); // U
+		DeblockPlane ( pbi, SourceBuffer, DestinationBuffer, 2 ); // V
+    }
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : DeblockFrameInterlaced
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi   : Pointer to post-processor instance.
+ *                  UINT8 *SourceBuffer      : Pointer to input frame.
+ *                  UINT8 *DestinationBuffer : Pointer to output deblocked frame.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a loop filter to the edge pixels of coded blocks.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeblockFrameInterlaced ( POSTPROC_INSTANCE *pbi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer )
+{
+	INT32 *FragVarPtr;  
+	
+	SetupDeblocker ( pbi );
+
+	// Y Plane
+	FragVarPtr = pbi->FragmentVariances;
+	memset ( FragVarPtr, 0, pbi->UnitFragments*sizeof(INT32) );
+	
+    DeblockPlaneNew ( pbi, 
+		              pbi->YStride*2,
+		              0,
+		              pbi->HFragments,
+		              pbi->VFragments/2,
+		              &SourceBuffer[pbi->ReconYDataOffset],
+		              &DestinationBuffer[pbi->ReconYDataOffset],
+		              DCQuantScaleV2 );
+
+	pbi->FragmentVariances = pbi->FragmentVariances + pbi->HFragments*pbi->VFragments/2;
+	
+    DeblockPlaneNew ( pbi, 
+		              pbi->YStride*2,
+		              0,
+		              pbi->HFragments,
+		              pbi->VFragments/2,
+		              &SourceBuffer[pbi->ReconYDataOffset+pbi->YStride],
+		              &DestinationBuffer[pbi->ReconYDataOffset+pbi->YStride],
+		              DCQuantScaleV2 );
+
+	// Restore the FragmentVariances point in PBI
+	pbi->FragmentVariances = FragVarPtr;
+
+	// UV Plane
+	DeblockPlaneNew ( pbi, 
+		              pbi->UVStride,
+		              pbi->YPlaneFragments,
+		              pbi->HFragments / 2,
+		              pbi->VFragments / 2,
+		              &SourceBuffer[pbi->ReconUDataOffset],
+		              &DestinationBuffer[pbi->ReconUDataOffset],
+		              DCQuantScaleUV );
+
+	DeblockPlaneNew ( pbi, 
+		              pbi->UVStride,
+		              pbi->YPlaneFragments + pbi->UVPlaneFragments,
+		              pbi->HFragments / 2,
+		              pbi->VFragments / 2,
+		              &SourceBuffer[pbi->ReconVDataOffset],
+		              &DestinationBuffer[pbi->ReconVDataOffset],
+		              DCQuantScaleUV );
+    return;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/dering.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/dering.c
new file mode 100644
index 00000000..7c970d1a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/dering.c
@@ -0,0 +1,1166 @@
+/****************************************************************************
+ *
+ *   Module Title :     Dering.c
+ *
+ *   Description  :     Post-processing de-rining filter routines.
+ *
+ ***************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Frames
+****************************************************************************/
+#include "postp.h"
+#include "stdlib.h"    /* to get abs() */
+
+/****************************************************************************
+*  Macros
+****************************************************************************/        
+#if ( defined(_MSC_VER) || defined(MAPCA) )
+#define abs(x) ( (x>0) ? (x) : (-(x)) )
+#endif
+
+#define Clamp(val)  ( (val)<0 ? 0 : ((val)>255 ? 255 : (val)) )
+
+/****************************************************************************
+*  Exported Global Variables
+****************************************************************************/
+UINT32 DeringModifierV1[Q_TABLE_SIZE];
+
+/*const*/ UINT32 DeringModifierV2[Q_TABLE_SIZE] =
+{
+    9,  9,  8,  8,  7,  7,  7,  7,
+    6,  6,  6,  6,  6,  6,  6,  6, 
+    6,  6,  6,  6,  6,  6,  6,  6,
+    5,  5,  5,  5,  5,  5,  5,  5,
+    5,  5,  5,  5,  5,  5,  5,  5,  
+    4,  4,  4,  4,  4,  4,  4,  4,  
+    4,  4,  4,  4,  4,  4,  4,  4,  
+    3,  3,  3,  3,  2,  2,  2,  2 
+};
+
+/*const*/ UINT32 DeringModifierV3[Q_TABLE_SIZE] =
+{
+    9,  9,  9,  9,  8,  8,  8,  8,
+    7,  7,  7,  7,  7,  7,  7,  7, 
+    6,  6,  6,  6,  6,  6,  6,  6,
+    6,  6,  6,  6,  6,  6,  6,  6,
+    6,  6,  6,  6,  6,  6,  6,  6,
+    6,  6,  5,  5,  5,  5,  5,  5,  
+    4,  4,  4,  4,  3,  3,  3,  3,  
+    2,  2,  2,  0,  0,  0,  0,  0 
+};
+
+/*const*/ INT32 SharpenModifier[Q_TABLE_SIZE] =
+{  
+    -12, -11, -10, -10,  -9,  -9,  -9,  -9,
+     -6,  -6,  -6,  -6,  -6,  -6,  -6,  -6, 
+     -4,  -4,  -4,  -4,  -4,  -4,  -4,  -4,
+     -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,
+     -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,
+      0,   0,   0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0,   0,   0,   0,   0
+};
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeringBlockStrong_C
+ *
+ *  INPUTS        : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  const UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DstPtr                : Pointer to output image.
+ *                  const INT32 Pitch            : Stride of SrcPtr & DstPtr.
+ *                  UINT32 FragQIndex            : Quantizer index to use.
+ *                  UINT32 *QuantScale           :
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a strong de-ringing filter to a block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockStrong_C
+( 
+   const POSTPROC_INSTANCE *pbi, 
+   const UINT8 *SrcPtr,
+   UINT8 *DstPtr,
+   const INT32 Pitch,
+   UINT32 FragQIndex,
+   UINT32 *QuantScale
+)
+{
+    int B;
+    int al;
+    int ar;
+    int au;
+    int ad;
+    int atot;
+    int High;
+    int Low;
+    int TmpMod;
+    int newVal;
+    short UDMod[72];
+    short LRMod[72];
+    unsigned int j,k;
+
+    unsigned char p;
+    unsigned char pl;
+    unsigned char pr;
+    unsigned char pu;
+    unsigned char pd;
+
+    unsigned int rowOffset = 0;
+    unsigned int round = (1<<6);
+    unsigned int QValue = QuantScale[FragQIndex];
+    int Sharpen = SharpenModifier[FragQIndex];
+
+    const unsigned char *Src     = SrcPtr;
+    const unsigned char *curRow  = SrcPtr;
+    const unsigned char *lastRow = SrcPtr-Pitch;
+    const unsigned char *nextRow = SrcPtr+Pitch;
+    unsigned char *dstRow        = DstPtr;
+
+    (void) pbi;
+    Low  = 0;
+    High = 3 * QValue;
+    
+    if ( High>32 )
+        High = 32;
+       
+    /* Initialize the Mod Data */
+    for ( k=0; k<9; k++ )
+    {           
+        for ( j=0; j<8; j++ )
+        {
+            TmpMod = 32 + QValue - (abs(Src[j]-Src[j-Pitch]));
+
+            if ( TmpMod < -64 )
+                TmpMod = Sharpen;
+            else if ( TmpMod < Low )
+                TmpMod = Low;
+            else if ( TmpMod > High )
+                TmpMod = High;
+            
+            UDMod[k*8+j] = (INT16)TmpMod;
+        }
+        Src += Pitch;
+    }
+
+    Src = SrcPtr;
+
+    for ( k=0; k<8; k++ )
+    {           
+        for ( j=0; j<9; j++ )
+        {
+            TmpMod = 32 + QValue - (abs(Src[j]-Src[j-1]));
+            
+            if ( TmpMod < -64 )
+                TmpMod = Sharpen;
+            else if ( TmpMod < 0 )
+                TmpMod = Low;
+            else if ( TmpMod > High )
+                TmpMod = High;
+
+            LRMod[k*9+j] = (INT16)TmpMod;
+        }
+        Src += Pitch;
+    }
+      
+    for ( k=0; k<8; k++ )
+    {
+        // In the case that this function called with
+        // same buffer for source and destination, To 
+        // keep the c and the mmx version to have 
+        // consistant results, intermediate buffer is 
+        // used to store the eight pixel value before 
+        // writing them to destination(i.e. Overwriting 
+        // souce for the speical case)
+        
+        // column 0 
+        int newPixel[8];
+
+        atot = 128;
+        B = round;
+        p = curRow[rowOffset+0];
+        
+        pl = curRow[rowOffset+0-1];
+        al = LRMod[k*9+0];
+        atot -= al;
+        B += al * pl; 
+        
+        pu = lastRow[rowOffset+0];
+        au = UDMod[k*8+0];
+        atot -= au;
+        B += au * pu;
+        
+        pd = nextRow[rowOffset+0];
+        ad = UDMod[(k+1)*8+0];
+        atot -= ad;
+        B += ad * pd;
+        
+        pr = curRow[rowOffset+0+1];
+        ar = LRMod[k*9+0+1];
+        atot -= ar;
+        B += ar * pr;
+        
+        newVal = ( atot * p + B) >> 7;
+        
+        newPixel[0] = Clamp( newVal );
+
+        // column 1 
+        atot = 128;
+        B = round;
+        p = curRow[rowOffset+1];
+        
+        pl = curRow[rowOffset+1-1];
+        al = LRMod[k*9+1];
+        atot -= al;
+        B += al * pl; 
+        
+        pu = lastRow[rowOffset+1];
+        au = UDMod[k*8+1];
+        atot -= au;
+        B += au * pu;
+        
+        pd = nextRow[rowOffset+1];
+        ad = UDMod[(k+1)*8+1];
+        atot -= ad;
+        B += ad * pd;
+        
+        pr = curRow[rowOffset+1+1];
+        ar = LRMod[k*9+1+1];
+        atot -= ar;
+        B += ar * pr;
+        
+        newVal = ( atot * p + B) >> 7;
+        
+        newPixel[1] = Clamp( newVal );
+        
+        // column 2 
+        atot = 128;
+        B = round;
+        p = curRow[rowOffset+2];
+        
+        pl = curRow[rowOffset+2-1];
+        al = LRMod[k*9+2];
+        atot -= al;
+        B += al * pl; 
+        
+        pu = lastRow[rowOffset+2];
+        au = UDMod[k*8+2];
+        atot -= au;
+        B += au * pu;
+        
+        pd = nextRow[rowOffset+2];
+        ad = UDMod[(k+1)*8+2];
+        atot -= ad;
+        B += ad * pd;
+        
+        pr = curRow[rowOffset+2+1];
+        ar = LRMod[k*9+2+1];
+        atot -= ar;
+        B += ar * pr;
+        
+        newVal = ( atot * p + B) >> 7;
+        
+        newPixel[2] = Clamp( newVal );
+
+        // column 3 
+        atot = 128;
+        B = round;
+        p = curRow[rowOffset+3];
+        
+        pl = curRow[rowOffset+3-1];
+        al = LRMod[k*9+3];
+        atot -= al;
+        B += al * pl; 
+        
+        pu = lastRow[rowOffset+3];
+        au = UDMod[k*8+3];
+        atot -= au;
+        B += au * pu;
+        
+        pd = nextRow[rowOffset+3];
+        ad = UDMod[(k+1)*8+3];
+        atot -= ad;
+        B += ad * pd;
+        
+        pr = curRow[rowOffset+3+1];
+        ar = LRMod[k*9+3+1];
+        atot -= ar;
+        B += ar * pr;
+        
+        newVal = ( atot * p + B) >> 7;
+        
+        newPixel[3] = Clamp( newVal );
+
+        // column 4 
+        atot = 128;
+        B = round;
+        p = curRow[rowOffset+4];
+        
+        pl = curRow[rowOffset+4-1];
+        al = LRMod[k*9+4];
+        atot -= al;
+        B += al * pl; 
+        
+        pu = lastRow[rowOffset+4];
+        au = UDMod[k*8+4];
+        atot -= au;
+        B += au * pu;
+        
+        pd = nextRow[rowOffset+4];
+        ad = UDMod[(k+1)*8+4];
+        atot -= ad;
+        B += ad * pd;
+        
+        pr = curRow[rowOffset+4+1];
+        ar = LRMod[k*9+4+1];
+        atot -= ar;
+        B += ar * pr;
+        
+        newVal = ( atot * p + B) >> 7;
+        
+        newPixel[4] = Clamp( newVal );
+
+        // column 5 
+        atot = 128;
+        B = round;
+        p = curRow[rowOffset+5];
+        
+        pl = curRow[rowOffset+5-1];
+        al = LRMod[k*9+5];
+        atot -= al;
+        B += al * pl; 
+        
+        pu = lastRow[rowOffset+5];
+        au = UDMod[k*8+5];
+        atot -= au;
+        B += au * pu;
+        
+        pd = nextRow[rowOffset+5];
+        ad = UDMod[(k+1)*8+5];
+        atot -= ad;
+        B += ad * pd;
+        
+        pr = curRow[rowOffset+5+1];
+        ar = LRMod[k*9+5+1];
+        atot -= ar;
+        B += ar * pr;
+        
+        newVal = ( atot * p + B) >> 7;
+        
+        newPixel[5] = Clamp( newVal );
+        
+        // column 6 
+        atot = 128;
+        B = round;
+        p = curRow[rowOffset+6];
+        
+        pl = curRow[rowOffset+6-1];
+        al = LRMod[k*9+6];
+        atot -= al;
+        B += al * pl; 
+        
+        pu = lastRow[rowOffset+6];
+        au = UDMod[k*8+6];
+        atot -= au;
+        B += au * pu;
+        
+        pd = nextRow[rowOffset+6];
+        ad = UDMod[(k+1)*8+6];
+        atot -= ad;
+        B += ad * pd;
+        
+        pr = curRow[rowOffset+6+1];
+        ar = LRMod[k*9+6+1];
+        atot -= ar;
+        B += ar * pr;
+        
+        newVal = ( atot * p + B) >> 7;
+        
+        newPixel[6] = Clamp( newVal );
+
+        // column 7 
+        atot = 128;
+        B = round;
+        p = curRow[rowOffset+7];
+        
+        pl = curRow[rowOffset+7-1];
+        al = LRMod[k*9+7];
+        atot -= al;
+        B += al * pl; 
+        
+        pu = lastRow[rowOffset+7];
+        au = UDMod[k*8+7];
+        atot -= au;
+        B += au * pu;
+        
+        pd = nextRow[rowOffset+7];
+        ad = UDMod[(k+1)*8+7];
+        atot -= ad;
+        B += ad * pd;
+        
+        pr = curRow[rowOffset+7+1];
+        ar = LRMod[k*9+7+1];
+        atot -= ar;
+        B += ar * pr;
+        
+        newVal = ( atot * p + B) >> 7;
+        
+        newPixel[7] = Clamp( newVal );
+
+        dstRow[rowOffset+0] = (INT8)newPixel[0];
+        dstRow[rowOffset+1] = (INT8)newPixel[1];
+        dstRow[rowOffset+2] = (INT8)newPixel[2];
+        dstRow[rowOffset+3] = (INT8)newPixel[3];
+        dstRow[rowOffset+4] = (INT8)newPixel[4];
+        dstRow[rowOffset+5] = (INT8)newPixel[5];
+        dstRow[rowOffset+6] = (INT8)newPixel[6];
+        dstRow[rowOffset+7] = (INT8)newPixel[7];
+        
+        rowOffset += Pitch;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeringBlockWeak_C
+ *
+ *  INPUTS        : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  const UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DstPtr                : Pointer to output image.
+ *                  const INT32 Pitch            : Stride of SrcPtr & DstPtr.
+ *                  UINT32 FragQIndex            : Quantizer index to use.
+ *                  UINT32 *QuantScale           :
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a weak de-ringing filter to a block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockWeak_C
+( 
+    const POSTPROC_INSTANCE *pbi, 
+    const UINT8 *SrcPtr,
+    UINT8 *DstPtr,
+    const INT32 Pitch,
+    UINT32 FragQIndex,
+    UINT32 *QuantScale
+)
+{
+    int B;
+    int al;
+    int ar;
+    int au;
+    int ad;
+    int atot;
+    int High;
+    int Low;
+    int newVal;
+    int TmpMod;
+    short UDMod[72];
+    short LRMod[72];
+    unsigned int j, k;
+    unsigned char p;
+    unsigned char pl;
+    unsigned char pr;
+    unsigned char pu;
+    unsigned char pd;
+
+    unsigned int rowOffset = 0;
+    unsigned int round = (1<<6);
+    unsigned int QValue = QuantScale[FragQIndex];
+    int Sharpen = SharpenModifier[FragQIndex];
+    const unsigned char *Src     = SrcPtr;
+    const unsigned char *curRow  = SrcPtr;
+    const unsigned char *lastRow = SrcPtr-Pitch;
+    const unsigned char *nextRow = SrcPtr+Pitch;
+    unsigned char *dstRow        = DstPtr;
+
+    (void) pbi;
+
+    Low  = 0;
+    High = 3 * QValue;
+    
+    if ( High>24 ) 
+        High = 24;
+    
+    /* Initialize the Mod Data */
+    for ( k=0; k<9; k++ )
+    {           
+        for ( j=0; j<8; j++ )
+        {
+            TmpMod = 32 + QValue - 2*(abs(Src[j]-Src[j-Pitch]));
+
+            if ( TmpMod < -64 )
+                TmpMod = Sharpen;
+            else if ( TmpMod < Low )
+                TmpMod = Low;
+            else if ( TmpMod > High )
+                TmpMod = High;
+            
+            UDMod[k*8+j] = (INT16)TmpMod;
+        }
+        Src += Pitch;
+    }
+
+    Src = SrcPtr;
+
+    for ( k=0; k<8; k++ )
+    {           
+        for ( j=0; j<9; j++ )
+        {
+            TmpMod = 32 + QValue - 2*(abs(Src[j]-Src[j-1]));
+            
+            if ( TmpMod < -64 )
+                TmpMod = Sharpen;
+            else if ( TmpMod < Low )
+                TmpMod = Low;
+            else if ( TmpMod > High )
+                TmpMod = High;
+
+            LRMod[k*9+j] = (INT16)TmpMod;
+        }
+        Src += Pitch;
+    }
+
+    for ( k=0; k<8; k++ )
+    {
+        // loop expanded for speed
+        for ( j=0; j<8; j++ )
+        {
+            // column 0 
+            atot = 128;
+            B = round;
+            p = curRow[rowOffset+j];
+            
+            pl = curRow[rowOffset+j-1];
+            al = LRMod[k*9+j];
+            atot -= al;
+            B += al * pl;
+            
+            pu = lastRow[rowOffset+j];
+            au = UDMod[k*8+j];
+            atot -= au;
+            B += au * pu;
+            
+            pd = nextRow[rowOffset+j];
+            ad = UDMod[(k+1)*8+j];
+            atot -= ad;
+            B += ad * pd;
+            
+            pr = curRow[rowOffset+j+1];
+            ar = LRMod[k*9+j+1];
+            atot -= ar;
+            B += ar * pr;
+            
+            newVal = ( atot * p + B) >> 7;
+            
+            dstRow[ rowOffset+j] = (INT8) Clamp( newVal );
+        }
+        
+        rowOffset += Pitch;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeringBlock
+ *
+ *  INPUTS        : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  const UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DstPtr                : Pointer to output image.
+ *                  const INT32 Pitch            : Stride of SrcPtr & DstPtr.
+ *                  UINT32 FragQIndex            : Quantizer index to use.
+ *                  UINT32 *QuantScale           :
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a de-ringing filter to a block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlock
+( 
+    const POSTPROC_INSTANCE *pbi, 
+    const UINT8 *SrcPtr,
+    UINT8 *DstPtr,
+    const INT32 Pitch,
+    UINT32 FragQIndex,
+    const UINT32 *QuantScale,
+    UINT32 Variance
+)
+{
+    int B;
+    int atot;
+    int newVal;
+    int High;
+    int Low;
+    int TmpMod;
+    int N[8];   // neighbors
+    unsigned int j, k, l;
+    unsigned int QValue = QuantScale[FragQIndex];
+
+    int Slope = 4;
+    unsigned int round = (1<<7);
+    const unsigned char *srcRow = SrcPtr;
+    unsigned char *dstRow = DstPtr;
+    int Sharpen = SharpenModifier[FragQIndex];
+
+    if ( pbi->PostProcessingLevel > 100 )
+        QValue = pbi->PostProcessingLevel - 100;
+
+    if ( Variance > 32768)
+        Slope = 4;
+    else if (Variance > 2048)
+        Slope = 8;
+
+    Low  = 0;
+    High = 3 * QValue;
+    
+    if ( High > 32 )
+        High = 32;
+    
+    for ( k=0; k<8; k++ )
+    {
+        // loop expanded for speed
+        for ( j=0; j<8; j++ )
+        {
+            // set up 8 neighbors of pixel srcRow[j]
+            N[0] = srcRow[j-Pitch-1]; 
+            N[1] = srcRow[j-Pitch  ]; 
+            N[2] = srcRow[j-Pitch+1];
+            N[3] = srcRow[j      -1];
+            N[4] = srcRow[j      +1];
+            N[5] = srcRow[j+Pitch-1];
+            N[6] = srcRow[j+Pitch  ];
+            N[7] = srcRow[j+Pitch+1];
+
+            // column 0 
+            atot = 256;
+            B = round;
+
+            for ( l=0; l<8; l++ )
+            {
+                TmpMod = 32 + QValue - (Slope *(abs(srcRow[j]-N[l])) >> 2);
+                
+                if ( TmpMod < -64 )
+                    TmpMod = Sharpen;
+                else if ( TmpMod < Low )
+                    TmpMod = Low;
+                else if ( TmpMod > High )
+                    TmpMod = High;
+
+                atot -= TmpMod;
+                B += TmpMod * N[l];
+            }
+           
+            newVal = ( atot * srcRow[j] + B) >> 8;
+            
+            dstRow[j] = (INT8) Clamp( newVal );
+        }
+        
+        dstRow += Pitch;
+        srcRow += Pitch;
+    }
+}
+
+/***************************************************************************
+ * 
+ *  ROUTINE       : DiagonalBlur
+ *
+ *  INPUTS        : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance (NOT USED).
+ *                  const UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DstPtr                : Pointer to output image.
+ *                  const INT32 Pitch            : Stride of SrcPtr & DstPtr.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a de-ringing filter to a block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ***************************************************************************/
+void DiagonalBlur
+( 
+    const POSTPROC_INSTANCE *pbi, 
+    const UINT8 *SrcPtr,
+    UINT8 *DstPtr,
+    const INT32 Pitch
+)
+{
+    unsigned int j, k;
+    unsigned char *dstRow = DstPtr;
+    const unsigned char *srcRow = SrcPtr;
+
+	for ( k=0; k<8; k++ )
+	{
+		// loop expanded for speed
+		for ( j=0; j<8; j++ )
+		{
+			int sum;
+			
+			sum = 16;
+			sum += 8*srcRow[j];
+			sum += 2*srcRow[j-2*Pitch-2]; 
+			sum += 2*srcRow[j-2*Pitch+2]; 
+			sum += 4*srcRow[j-Pitch  -1];
+			sum += 4*srcRow[j-Pitch  +1];
+			sum += 4*srcRow[j+Pitch  -1];
+			sum += 4*srcRow[j+Pitch  +1];
+			sum += 2*srcRow[j+2*Pitch-2];
+			sum += 2*srcRow[j+2*Pitch+2];
+			
+			sum >>= 5;
+			
+			dstRow[j] = sum;
+		}
+		
+		dstRow += Pitch;
+		srcRow += Pitch;
+	}
+	for ( k=0; k<8; k++ )
+	{
+		// loop expanded for speed
+		for ( j=0; j<8; j++ )
+		{
+			int sum;
+			
+			sum = 1;
+			sum += 6*srcRow[j];
+			sum += -1 * srcRow[j-Pitch];
+			sum += -1 * srcRow[j+Pitch];
+			sum += -1 * srcRow[j-1];
+			sum += -1 * srcRow[j+1];
+			
+			sum >>= 1;
+			
+			if ( sum<0 )
+				sum = 0;
+
+			if ( sum>255 )
+				sum = 255;
+
+			dstRow[j] = sum;
+		}
+		
+		dstRow += Pitch;
+		srcRow += Pitch;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeringFrame
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  UINT8 *Src             : Pointer to input image.
+ *                  UINT8 *Dst             : Pointer to output image.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a de-ringing filter to a frame.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringFrame ( POSTPROC_INSTANCE *pbi, UINT8 *Src, UINT8 *Dst )
+{
+	UINT32 Block;
+    UINT32 col, row;
+	UINT32 BlocksAcross, BlocksDown;
+	UINT32 *QuantScale;
+	UINT32 LineLength;
+	INT32  Thresh1,Thresh2,Thresh3,Thresh4;
+	UINT8  *SrcPtr;     // Pointer to line of source image data
+	UINT8  *DestPtr;    // Pointer to line of destination image data
+	INT32  Quality = pbi->FrameQIndex;
+
+    if ( pbi->Vp3VersionNo >= 5 )
+    {
+		Thresh1 = 384;                  
+		Thresh2 = 6 * Thresh1;          
+		Thresh3 = 5 * Thresh2/4;        
+		Thresh4 = 5 * Thresh2/2;        
+	}
+	else
+	{
+		Thresh1 = 2048;
+		Thresh2 = 15 * Thresh1;
+		Thresh3 = 3 * Thresh2;
+		Thresh4 = 4 * Thresh2;
+	}
+
+    if ( pbi->Vp3VersionNo >= 5 )
+        QuantScale = DeringModifierV3;
+    else if ( pbi->Vp3VersionNo >= 2 )
+        QuantScale = DeringModifierV2;
+    else
+        QuantScale = DeringModifierV1;
+
+	BlocksAcross = pbi->HFragments;
+	BlocksDown   = pbi->VFragments;
+
+	SrcPtr     = Src + pbi->ReconYDataOffset;
+	DestPtr    = Dst + pbi->ReconYDataOffset;
+	LineLength = pbi->YStride;
+
+	Block = 0;
+	
+	// De-ring Y plane
+	for ( row=0 ; row<BlocksDown; row++ )
+	{
+		for ( col=0; col<BlocksAcross; col++ )
+		{
+			INT32 Variance = pbi->FragmentVariances[Block]; 
+			
+			if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh3) )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				
+				if( (col > 0                && pbi->FragmentVariances[Block-1] > Thresh4 ) ||
+					(col + 1 < BlocksAcross && pbi->FragmentVariances[Block+1] > Thresh4 ) ||
+					(row + 1 < BlocksDown   && pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) ||
+					(row > 0                && pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) )
+				{
+					DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+					DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				}		
+			}
+			else if ( Variance > Thresh2 )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else if(Variance > Thresh1 )
+			{
+				DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else
+			{
+				CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+            }
+			
+			++Block;
+		}
+		SrcPtr  += 8 * LineLength;
+		DestPtr += 8 * LineLength;
+    }
+    
+	// De-ring U plane
+	BlocksAcross /= 2;
+	BlocksDown   /= 2;
+	LineLength   /= 2;
+
+	SrcPtr  = Src + pbi->ReconUDataOffset;
+	DestPtr = Dst + pbi->ReconUDataOffset;
+	for ( row=0; row<BlocksDown; row++ )
+	{
+		for ( col=0; col<BlocksAcross; col++ )
+		{
+			INT32 Variance = pbi->FragmentVariances[Block]; 
+			if ( pbi->Vp3VersionNo < 5)
+			   Quality = pbi->FragQIndex[Block]; 
+			
+			if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh4) )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );		
+			}
+			else if ( Variance > Thresh2 )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else if ( Variance > Thresh1 )
+			{
+				DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else
+			{
+				CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+			}
+			
+			++Block;
+		}
+		SrcPtr  += 8 * LineLength;
+		DestPtr += 8 * LineLength;
+    }
+
+	// De-ring U plane
+	SrcPtr  = Src + pbi->ReconVDataOffset;
+	DestPtr = Dst + pbi->ReconVDataOffset;
+
+	for ( row=0; row<BlocksDown; row++ )
+	{
+		for ( col=0; col<BlocksAcross; col++ )
+		{
+			INT32 Variance = pbi->FragmentVariances[Block]; 
+
+            if ( pbi->Vp3VersionNo < 5 )
+			   Quality = pbi->FragQIndex[Block]; 
+			
+			if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh4) )            
+			{
+				DeringBlockStrong (pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				DeringBlockStrong (pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				DeringBlockStrong (pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				
+			}
+			else if ( Variance > Thresh2 )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else if ( Variance > Thresh1 )
+			{
+				DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else
+			{
+				CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+			}
+			
+			++Block;
+		}
+
+        SrcPtr  += 8 * LineLength;
+		DestPtr += 8 * LineLength;
+    }  
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeringFrameInterlaced
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  UINT8 *Src             : Pointer to input image.
+ *                  UINT8 *Dst             : Pointer to output image.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a de-ringing filter to an INTERLACED frame.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringFrameInterlaced ( POSTPROC_INSTANCE *pbi, UINT8 *Src, UINT8 *Dst )
+{
+	UINT32 Block;
+    UINT32 col, row;
+	UINT32 BlocksAcross,BlocksDown;
+	UINT32 LineLength;
+	UINT32 *QuantScale;
+	INT32 Thresh1,Thresh2,Thresh3,Thresh4;
+	UINT8  *SrcPtr;	    // Pointer to line of source image data
+	UINT8  *DestPtr;    // Pointer to line of destination image data
+	INT32  Quality = pbi->FrameQIndex;
+
+    if ( pbi->Vp3VersionNo >= 5 )
+    {
+		Thresh1 = 384;                  
+		Thresh2 = 6 * Thresh1;          
+		Thresh3 = 5 * Thresh2/4;        
+		Thresh4 = 5 * Thresh2/2;        
+	}
+	else
+	{
+		Thresh1 = 2048;
+		Thresh2 = 15 * Thresh1;
+		Thresh3 = 3 * Thresh2;
+		Thresh4 = 4 * Thresh2;
+	}
+
+    if ( pbi->Vp3VersionNo >= 5 )
+        QuantScale = DeringModifierV3;
+    else if ( pbi->Vp3VersionNo >= 2 )
+        QuantScale = DeringModifierV2;
+    else
+        QuantScale = DeringModifierV1;
+
+	BlocksAcross = pbi->HFragments;
+	BlocksDown   = pbi->VFragments/2;       // Y plane will be done in two passes
+
+	SrcPtr  = Src + pbi->ReconYDataOffset;
+	DestPtr = Dst + pbi->ReconYDataOffset;
+	LineLength = pbi->YStride * 2;			// pitch is doubled for interlacing
+
+	Block = 0;	
+
+    // De-ring Y Plane: Top Field
+	for ( row=0; row<BlocksDown; row++ )
+	{
+		for ( col=0; col<BlocksAcross; col++ )
+		{
+			INT32 Variance = pbi->FragmentVariances[Block]; 
+			
+			if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh3) )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				
+				if( (col > 0                && pbi->FragmentVariances[Block-1] > Thresh4 ) ||
+					(col + 1 < BlocksAcross && pbi->FragmentVariances[Block+1] > Thresh4 ) ||
+					(row + 1 < BlocksDown   && pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) ||
+					(row > 0                && pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) )
+				{
+					DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+					DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				}
+				
+			}
+			else if ( Variance > Thresh2 )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else if ( Variance > Thresh1 )
+			{
+				DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else
+			{
+				CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+			}
+			
+			++Block;
+		}
+
+        SrcPtr  += 8 * LineLength;
+		DestPtr += 8 * LineLength;
+    }
+
+    // De-ring Y Plane: Bottom Field
+	SrcPtr  = Src + pbi->ReconYDataOffset + pbi->YStride;
+	DestPtr = Dst + pbi->ReconYDataOffset + pbi->YStride;
+
+	for ( row=0; row<BlocksDown; row++ )
+	{
+		for ( col=0; col<BlocksAcross; col++ )
+		{
+			INT32 Variance = pbi->FragmentVariances[Block]; 
+			
+			if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh3) )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				
+				if( (col > 0                && pbi->FragmentVariances[Block-1] > Thresh4 ) ||
+					(col + 1 < BlocksAcross && pbi->FragmentVariances[Block+1] > Thresh4 ) ||
+					(row + 1 < BlocksDown   && pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) ||
+					(row > 0                && pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) )
+				{
+					DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+					DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				}
+			}
+			else if ( Variance > Thresh2 )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else if(Variance > Thresh1 )
+			{
+				DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else
+			{
+				CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+			}
+			
+			++Block;
+		}
+
+        SrcPtr  += 8 * LineLength;
+		DestPtr += 8 * LineLength;
+    }
+
+    // NOTE: BlocksDown for UV Planes is same as in Y for interlaced frame.
+
+    // De-ring U Plane
+	BlocksAcross /= 2;
+	LineLength   /= 4;
+
+	SrcPtr  = Src + pbi->ReconUDataOffset;
+	DestPtr = Dst + pbi->ReconUDataOffset;
+	
+    for ( row=0; row<BlocksDown; row++ )
+	{
+		for ( col=0; col<BlocksAcross; col++ )
+		{
+			INT32 Variance = pbi->FragmentVariances[Block]; 
+			
+			if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh4) )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else if ( Variance > Thresh2 )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else if ( Variance > Thresh1 )
+			{
+				DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else
+			{
+				CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+			}
+			
+			++Block;
+		}
+
+        SrcPtr  += 8 * LineLength;
+		DestPtr += 8 * LineLength;
+    }
+
+    // De-ring V Plane
+	SrcPtr  = Src + pbi->ReconVDataOffset;
+	DestPtr = Dst + pbi->ReconVDataOffset;
+
+	for ( row=0; row<BlocksDown; row++ )
+	{
+		for ( col=0; col<BlocksAcross; col++ )
+		{
+			INT32 Variance = pbi->FragmentVariances[Block]; 
+			
+			if ( (pbi->PostProcessingLevel>5) && (Variance > Thresh4) )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else if ( Variance > Thresh2 )
+			{
+				DeringBlockStrong ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else if ( Variance > Thresh1 )
+			{
+				DeringBlockWeak ( pbi, SrcPtr+8*col, DestPtr+8*col, LineLength, Quality, QuantScale );
+			}
+			else
+			{
+				CopyBlock ( SrcPtr+8*col, DestPtr+8*col, LineLength );
+			}
+			
+			++Block;
+		}
+
+        SrcPtr  += 8 * LineLength;
+		DestPtr += 8 * LineLength;
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/doptsystemdependant.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/doptsystemdependant.c
new file mode 100644
index 00000000..260354fb
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/doptsystemdependant.c
@@ -0,0 +1,92 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions
+*
+****************************************************************************/
+
+/*******************************************3********************************
+*  Header Files
+****************************************************************************/ 
+#include "postp.h"
+ 
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
+extern void FilteringVert_12_C ( UINT32 QValue,UINT8 *Src, INT32 Pitch);
+extern void FilteringHoriz_12_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch );
+extern void FilteringVert_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch );
+extern void FilteringHoriz_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch );
+extern void HorizontalLine_1_2_Scale_C ( const unsigned char *source, unsigned int sourceWidth, unsigned char *dest, unsigned int destWidth );
+extern void HorizontalLine_3_5_Scale_C ( const unsigned char *source, unsigned int sourceWidth, unsigned char *dest, unsigned int destWidth );
+extern void HorizontalLine_4_5_Scale_C ( const unsigned char *source, unsigned int sourceWidth, unsigned char *dest, unsigned int destWidth );
+extern void VerticalBand_4_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_4_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_3_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_3_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_1_2_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_1_2_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
+extern void FilterHoriz_Simple_C ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_Simple_C ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterHoriz_Generic ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_Generic ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern INT32 *SetupBoundingValueArray_Generic ( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern INT32 *SetupDeblockValueArray_Generic ( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern void DeringBlockWeak_C ( POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void DeringBlockStrong_C ( POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void DeblockLoopFilteredBand_C ( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBand_C ( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBandNewFilter_C ( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void ClampLevels_C( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
+extern void CFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
+extern void PlaneAddNoise_C( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PostProcMachineSpecificConfig
+ *
+ *  INPUTS        : UINT32 version : Codec version number (UNUSED)
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Sets post-processing function pointers to vanilla
+ *                  C implementations.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void PostProcMachineSpecificConfig ( UINT32 Version )
+{
+        FilterHoriz                     = FilterHoriz_Generic;
+        FilterVert                      = FilterVert_Generic;
+        SetupBoundingValueArray         = SetupBoundingValueArray_Generic;
+        SetupDeblockValueArray          = SetupDeblockValueArray_Generic;
+        DeringBlockWeak                 = DeringBlockWeak_C;
+        DeringBlockStrong               = DeringBlockStrong_C;
+		DeblockLoopFilteredBand         = DeblockLoopFilteredBand_C;
+		DeblockNonFilteredBand          = DeblockNonFilteredBand_C;
+		DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_C;
+		FilterHoriz_Simple              = FilterHoriz_Simple_C;
+		FilterVert_Simple               = FilterVert_Simple_C;
+        HorizontalLine_1_2_Scale        = HorizontalLine_1_2_Scale_C;        
+        VerticalBand_1_2_Scale          = VerticalBand_1_2_Scale_C;
+        LastVerticalBand_1_2_Scale      = LastVerticalBand_1_2_Scale_C;
+        HorizontalLine_3_5_Scale        = HorizontalLine_3_5_Scale_C;
+        VerticalBand_3_5_Scale          = VerticalBand_3_5_Scale_C;
+        LastVerticalBand_3_5_Scale      = LastVerticalBand_3_5_Scale_C;
+        HorizontalLine_4_5_Scale        = HorizontalLine_4_5_Scale_C;
+        VerticalBand_4_5_Scale          = VerticalBand_4_5_Scale_C;
+        LastVerticalBand_4_5_Scale      = LastVerticalBand_4_5_Scale_C;
+        FilteringHoriz_8                = FilteringHoriz_8_C;
+        FilteringVert_8                 = FilteringVert_8_C;
+        FilteringHoriz_12               = FilteringHoriz_12_C;
+        FilteringVert_12                = FilteringVert_12_C;
+        FastDeInterlace                 = CFastDeInterlace;
+        ClampLevels                     = ClampLevels_C; 
+        PlaneAddNoise                   = PlaneAddNoise_C;
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/loopfilter.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/loopfilter.c
new file mode 100644
index 00000000..9980533d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/loopfilter.c
@@ -0,0 +1,976 @@
+/****************************************************************************
+*
+*   Module Title :     loopfilter.c
+*
+*   Description  :     Loop filter functions.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define Mod8(x) ( (x) & 7 )
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+UINT32 LoopFilterLimitValuesV1[Q_TABLE_SIZE] = 
+{
+    30, 25, 20, 20, 15, 15, 14, 14,
+    13, 13, 12, 12, 11, 11, 10, 10, 
+     9,  9,  8,  8,  7,  7,  7,  7,
+     6,  6,  6,  6,  5,  5,  5,  5,
+     4,  4,  4,  4,  3,  3,  3,  3,  
+     2,  2,  2,  2,  2,  2,  2,  2,  
+     0,  0,  0,  0,  0,  0,  0,  0,  
+     0,  0,  0,  0,  0,  0,  0,  0 
+};
+
+UINT32 *LoopFilterLimitValuesV2;
+
+UINT32 LoopFilterLimitValuesVp4[Q_TABLE_SIZE] =
+{
+    30, 25, 20, 20, 15, 15, 14, 14,
+    13, 13, 12, 12, 11, 11, 10, 10, 
+     9,  9,  8,  8,  7,  7,  7,  7,
+     6,  6,  6,  6,  5,  5,  5,  5,
+     4,  4,  4,  4,  3,  3,  3,  3,  
+     2,  2,  2,  2,  2,  2,  2,  2,  
+     2,  2,  2,  2,  2,  2,  2,  2,  
+     1,  1,  1,  1,  1,  1,  1,  1 
+};
+
+UINT32 LoopFilterLimitValuesVp5[Q_TABLE_SIZE] = 
+{
+    14, 14, 13, 13, 12, 12, 10, 10, 
+	10, 10,  8,  8,  8,  8,  8,  8,
+	 8,  8,  8,  8,  8,  8,  8,  8,
+	 8,  8,  8,  8,  8,  8,  8,  8,  
+	 8,  8,  8,  8,  7,  7,  7,  7,	
+	 7,  7,  6,  6,  6,  6,  6,  6,	
+	 5,  5,  5,  5,  4,  4,  4,  4,  
+     4,  4,  4,  3,  3,  3,  3,  2 
+};
+
+UINT32 LoopFilterLimitValuesVp6[Q_TABLE_SIZE] = 
+{ 
+    14, 14, 13, 13, 12, 12, 10, 10, 
+	10, 10,  8,  8,  8,  8,  8,  8,
+	 8,  8,  8,  8,  8,  8,  8,  8,
+	 8,  8,  8,  8,  8,  8,  8,  8,  
+	 8,  8,  8,  8,  7,  7,  7,  7,	
+	 7,  7,  6,  6,  6,  6,  6,  6,	
+	 5,  5,  5,  5,  4,  4,  4,  4,  
+     4,  4,  4,  3,  3,  3,  3,  2 
+};
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SetupBoundingValueArray_Generic
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *                  INT32 FLimit           : Value to use as limit.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : INT32: Pointer to LUT position 0 (cast to UINT32)
+ *
+ *  FUNCTION      : Set up the bounding value array.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INT32 *SetupBoundingValueArray_Generic ( POSTPROC_INSTANCE *ppi, INT32 FLimit )
+{
+    INT32 i;
+    INT32 *BoundingValuePtr;
+
+    BoundingValuePtr = &ppi->FiltBoundingValue[256];
+
+    // Set up the bounding value array
+    memset ( ppi->FiltBoundingValue, 0, (512*sizeof(*ppi->FiltBoundingValue)) );
+    for ( i=0; i<FLimit; i++ )
+    {
+        BoundingValuePtr[-i-FLimit] = (-FLimit+i);
+        BoundingValuePtr[-i]        = -i;
+        BoundingValuePtr[i]         = i;
+        BoundingValuePtr[i+FLimit]  = FLimit-i;
+    }
+
+    return BoundingValuePtr;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SetupLoopFilter
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Initializes LUTs and function pointer for loop filter.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetupLoopFilter ( POSTPROC_INSTANCE *ppi )
+{
+    INT32 FLimit; 
+
+    FLimit = LoopFilterLimitValuesV2[ppi->FrameQIndex];
+
+    if ( ppi->Vp3VersionNo >= 2 )
+        ppi->BoundingValuePtr = SetupBoundingValueArray_Generic(ppi, FLimit);
+    else
+        ppi->BoundingValuePtr = SetupBoundingValueArray ( ppi, FLimit );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterHoriz_Generic
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi  : Pointer to post-processor instance.
+ *                  UINT8 *PixelPtr         : Pointer to Pointer to input data.
+ *                  INT32 LineLength        : Stride of input data.
+ *                  INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies the 4-tap loop-filter across vertical edge,
+ *                  i.e. filter is applied horizontally.
+ *
+ *  SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/
+void FilterHoriz_Generic
+(
+    POSTPROC_INSTANCE *ppi, 
+    UINT8 *PixelPtr, 
+    INT32 LineLength, 
+    INT32 *BoundingValuePtr
+)
+{
+	INT32 j;
+	INT32 FiltVal;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+	(void)ppi;
+
+	for ( j=0; j<8; j++ )
+	{            
+		FiltVal = PixelPtr[0] - (PixelPtr[1]*3) + (PixelPtr[2]*3) - PixelPtr[3];
+		FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+		
+		PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
+		PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
+		
+		PixelPtr += LineLength;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterVert_Generic
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi  : Pointer to post-processor instance.
+ *                  UINT8 *PixelPtr         : Pointer to Pointer to input data.
+ *                  INT32 LineLength        : Stride of input data.
+ *                  INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies the 4-tap loop-filter across horizontal edge,
+ *                  i.e. filter is applied vertically.
+ *
+ *  SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/
+void FilterVert_Generic 
+( 
+    POSTPROC_INSTANCE *ppi,
+    UINT8 *PixelPtr,
+    INT32 LineLength,
+    INT32 *BoundingValuePtr
+)
+{
+	INT32 j;
+	INT32 FiltVal;
+    UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+	(void)ppi;
+
+	for ( j=0; j<8; j++ )
+	{            
+		FiltVal =   (INT32)PixelPtr[-(2 * LineLength)]
+			      - ((INT32)PixelPtr[- LineLength] * 3)
+			      + ((INT32)PixelPtr[0] * 3)
+			      -  (INT32)PixelPtr[LineLength];
+		
+		FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+		
+		PixelPtr[-LineLength] = LimitTable[(INT32)PixelPtr[-LineLength] + FiltVal];
+		PixelPtr[0]           = LimitTable[(INT32)PixelPtr[0] - FiltVal];
+		
+		PixelPtr++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : Bound
+ *
+ *  INPUTS        : UINT32 FLimit  : Limit to use in computing bounding value.
+ *                  INT32  FiltVal : Value to have bounds applied to.
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : INT32: 
+ *
+ *  FUNCTION      : Computes a bounded Filtval based on specified Flimit.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+ 
+#if defined (_WIN32_WCE)
+INT32 Bound ( UINT32 FLimit, INT32 FiltVal )
+#else
+INLINE INT32 Bound ( UINT32 FLimit, INT32 FiltVal )
+#endif
+{
+    INT32 Clamp;
+    INT32 FiltSign;
+    INT32 NewSign;
+
+    Clamp = 2 * FLimit;
+
+    // Next 3 lines are fast way to find abs...
+    FiltSign = (FiltVal >> 31);         // Sign extension makes FiltSign all 0's or all 1's
+    FiltVal ^= FiltSign;                // FiltVal is then 1's complement of value if -ve
+    FiltVal -= FiltSign;                // Filtval = abs Filtval
+
+    FiltVal *= (FiltVal < Clamp);       // clamp filter value to 2 times limit
+
+    FiltVal -= FLimit;                  // subtract limit value 
+    
+    // Next 3 lines are fast way to find abs...
+    NewSign = (FiltVal >> 31);          // Sign extension makes NewSign all 0's or all 1's
+    FiltVal ^= NewSign;                 // FiltVal is then 1's complement of value if -ve
+    FiltVal -= NewSign;                 // FiltVal = abs FiltVal
+
+    FiltVal = FLimit - FiltVal;         // flimit - abs (filtVal - flimit)
+    
+    FiltVal += FiltSign;                // convert back to signed value
+    FiltVal ^= FiltSign;            
+    
+    return FiltVal;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilteringHoriz_8_C
+ *
+ *  INPUTS        : UINT32 QValue : Current quatizer level.
+ *                  UINT8 *Src    : Pointer to data to be filtered.
+ *                  INT32 Pitch   : Pitch of input data.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies horizontal filter across vertical edge inside
+ *                  block with Q-dependent limits.
+ *
+ *  SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/                       
+void FilteringHoriz_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
+{    
+    INT32 j;
+	INT32 FiltVal;
+    UINT32 FLimit;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+    FLimit = LoopFilterLimitValuesV2[QValue];
+
+	for ( j=0; j<8; j++ )
+	{            
+        // Apply 4-tap filter with rounding...
+		FiltVal =  ( Src[-2] - 
+			        (Src[-1] * 3) +
+			        (Src[ 0] * 3) - 
+			         Src[ 1] + 4 ) >> 3;
+
+        FiltVal = Bound ( FLimit, FiltVal );
+
+		Src[-1] = LimitTable[(INT32)Src[-1] + FiltVal];
+		Src[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
+		
+        Src += Pitch;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilteringVert_8_C
+ *
+ *  INPUTS        : UINT32 QValue : Current quatizer level.
+ *                  UINT8 *Src    : Pointer to data to be filtered.
+ *                  INT32 Pitch   : Pitch of input data.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies vertical filter across horizontal edge inside
+ *                  block with Q-dependent limits.
+ *
+ *  SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/                       
+void FilteringVert_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
+{    
+    INT32 j;
+	INT32 FiltVal;
+    UINT32 FLimit;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+    FLimit = LoopFilterLimitValuesV2[QValue];
+
+	for ( j=0; j<8; j++ )
+	{            
+        // Apply 4-tap filter with rounding...
+  		FiltVal = (  (INT32)Src[-(2 * Pitch)] - 
+        		    ((INT32)Src[-Pitch] * 3)  + 
+		        	((INT32)Src[0] * 3 )      - 
+			         (INT32)Src[Pitch] + 4 ) >> 3;
+
+        FiltVal = Bound( FLimit, FiltVal);
+
+		Src[-Pitch] = LimitTable[(INT32)Src[-Pitch] + FiltVal];
+		Src[     0] = LimitTable[(INT32)Src[     0] - FiltVal];
+	
+        Src++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilteringHoriz_12_C
+ *
+ *  INPUTS        : UINT32 QValue : Current quatizer level.
+ *                  UINT8 *Src    : Pointer to data to be filtered.
+ *                  INT32 Pitch   : Pitch of input data.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies vertical filter across horizontal edge inside
+ *                  block with Q-dependent limits.
+ *
+ *  SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/                       
+void FilteringHoriz_12_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
+{    
+    INT32  j;
+	INT32  FiltVal;
+    UINT32 FLimit;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+    FLimit = LoopFilterLimitValuesV2[QValue];
+
+	for ( j=0; j<12; j++ )
+	{            
+        // Apply 4-tap filter with rounding...
+		FiltVal =  ( Src[-2]      - 
+			        (Src[-1] * 3) +
+			        (Src[ 0] * 3) - 
+			         Src[1]  + 4) >> 3;
+
+        FiltVal = Bound ( FLimit, FiltVal );
+
+		Src[-1] = LimitTable[(INT32)Src[-1] + FiltVal];
+		Src[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
+		
+        Src += Pitch;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilteringVert_12_C
+ *
+ *  INPUTS        : UINT32 QValue : Current quatizer level.
+ *                  UINT8 *Src    : Pointer to data to be filtered.
+ *                  INT32 Pitch   : Pitch of input data.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies vertical filter across horizontal edge inside
+ *                  block with Q-dependent limits.
+ *
+ *  SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
+ *
+ ****************************************************************************/                       
+void FilteringVert_12_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
+{    
+    INT32  j;
+	INT32  FiltVal;
+    UINT32 FLimit;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+    FLimit = LoopFilterLimitValuesV2[QValue];
+
+	for ( j=0; j<12; j++ )
+	{            
+ 		FiltVal = ( (INT32)Src[- (2 * Pitch)] - 
+        		   ((INT32)Src[- Pitch] * 3)  + 
+		           ((INT32)Src[0] * 3)        - 
+			        (INT32)Src[Pitch] + 4 ) >> 3;
+
+        FiltVal = Bound ( FLimit, FiltVal );
+
+		Src[-Pitch] = LimitTable[(INT32)Src[-Pitch] + FiltVal];
+		Src[     0] = LimitTable[(INT32)Src[     0] - FiltVal];
+	
+        Src++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ApplyReconLoopFilter
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi     : Pointer to post-processor instance.
+ *                  INT32  FrameQIndex         : Q index for the frame.
+ *                  UINT8  *LastFrameRecon     : Pointer to last frame reconstruction buffer.
+ *                  UINT8  *PostProcessBuffer  : Pointer to last post-processing buffer.
+ *                  UINT8  *FragInfo           : Pointer to list of coded blocks.
+ *                  UINT32 FragInfoElementSize : Size of each element.
+ *                  UINT32 FragInfoCodedMask   : Mask to get at whether fragment is coded.
+ *             
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a loop filter to the edge pixels of coded blocks.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ApplyReconLoopFilter
+(
+    POSTPROC_INSTANCE *ppi,
+    INT32		       FrameQIndex,
+    UINT8		      *LastFrameRecon,
+    UINT8		      *PostProcessBuffer,
+    UINT8		      *FragInfo,
+    UINT32             FragInfoElementSize,
+    UINT32		       FragInfoCodedMask
+)
+{
+	int j, m, n;
+	UINT32 nextRow;
+	UINT8 *rowStart;
+    INT32 *BoundingValuePtr;
+
+    INT32 i = 0;
+    INT32 FLimit = 0; 
+	int FromFragment = 0;
+    INT32 LineLength = 0;
+    INT32 LineFragments = 0;
+	int FragsAcross = ppi->HFragments;	
+	int FragsDown   = ppi->VFragments;
+
+	// variables passed in per frame
+	ppi->FrameQIndex 		 = FrameQIndex;
+	ppi->LastFrameRecon      = LastFrameRecon;
+	ppi->PostProcessBuffer 	 = PostProcessBuffer;
+	ppi->FragInfo 			 = FragInfo;
+	ppi->FragInfoElementSize = FragInfoElementSize;
+	ppi->FragInfoCodedMask	 = FragInfoCodedMask;
+
+    FLimit = LoopFilterLimitValuesV1[ppi->FrameQIndex];
+    if ( FLimit == 0 )
+        return;
+        
+    BoundingValuePtr = SetupBoundingValueArray ( ppi, FLimit );
+ 
+	for ( j=0; j<3; j++ )
+	{
+		switch ( j )
+		{
+		case 0: // Y
+			FromFragment  = 0;
+			FragsAcross   = ppi->HFragments;
+			FragsDown     = ppi->VFragments;
+			LineLength    = ppi->YStride;
+			LineFragments = ppi->HFragments;
+			rowStart      = ppi->LastFrameRecon + ppi->ReconYDataOffset;
+			break;
+		case 1: // U
+			FromFragment  = ppi->YPlaneFragments;
+			FragsAcross   = ppi->HFragments >> 1;
+			FragsDown     = ppi->VFragments >> 1;
+			LineLength    = ppi->UVStride;
+			LineFragments = ppi->HFragments / 2;
+			rowStart      = ppi->LastFrameRecon + ppi->ReconUDataOffset;
+			break;
+		case 2:	// V
+			FromFragment  = ppi->YPlaneFragments + ppi->UVPlaneFragments;
+			FragsAcross   = ppi->HFragments >> 1;
+			FragsDown     = ppi->VFragments >> 1;
+			LineLength    = ppi->UVStride;
+			LineFragments = ppi->HFragments / 2;
+			rowStart      = ppi->LastFrameRecon + ppi->ReconVDataOffset;
+			break;
+		}
+		
+        nextRow = 8*LineLength;
+		i = FromFragment;
+		n = 0;
+
+		/*************/
+		/* First Row */
+		/*************/
+
+		/* First column */
+		
+		// only do 2 prediction if fragment coded and on non intra or if all fragments are intra 
+		if ( blockCoded ( i ) )
+		{
+			// Filter right hand border only if the block to the right is not coded
+			if ( !blockCoded ( i + 1 ) )
+				FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+			
+			// Bottom done if next row set
+			if ( !blockCoded (i + LineFragments) )
+				FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+		}
+		
+		i++;
+		
+		/* Middle columns */
+		for ( n=1; n<FragsAcross-1; n++, i++ )
+		{
+			if ( blockCoded( i ))
+			{
+				// Filter Left edge always
+				FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+				
+				// Filter right hand border only if the block to the right is not coded
+				if ( !blockCoded( i + 1 ) )
+					FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+				
+				// Bottom done if next row set
+				if( !blockCoded( i + LineFragments) )
+					FilterVert(ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr);
+			}
+		}
+		
+		// Last Column
+		if ( blockCoded( i ) )
+		{
+			// Filter Left edge always
+			FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+			
+			// Bottom done if next row set
+			if( !blockCoded (i + LineFragments) )
+				FilterVert(ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr);
+		}
+		
+		i++;
+		rowStart += nextRow;
+		n = 0;
+
+		/***************/
+		/* Middle Rows */
+		/***************/
+		for ( m=1; m<FragsDown-1; m++ )
+		{
+			/* First column */
+			n=0;
+			
+			// only do 2 prediction if fragment coded and on non intra or if all fragments are intra 
+			if( blockCoded( i ) )
+			{
+				// TopRow is always done
+				FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+
+				// Filter right hand border only if the block to the right is not coded
+				if ( !blockCoded ( i + 1 ) )
+					FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+				
+				// Bottom done if next row set
+				if ( !blockCoded (i + LineFragments) )
+					FilterVert ( ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr );
+			}
+
+			i++;
+
+			/* Middle columns */
+			for ( n=1; n<FragsAcross-1; n++, i++ )
+			{
+				if ( blockCoded ( i ) )
+				{
+					// Filter Left edge always
+					FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+					
+					// TopRow is always done
+					FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+					
+					// Filter right hand border only if the block to the right is not coded
+					if ( !blockCoded ( i + 1 ) )
+						FilterHoriz ( ppi, rowStart + 8*n + 6 , LineLength, BoundingValuePtr );
+					
+					// Bottom done if next row set
+					if ( !blockCoded (i + LineFragments) )
+						FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+				}
+			}
+
+			/* Last Column */
+			if ( blockCoded ( i ) )
+			{
+				// Filter Left edge always
+				FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+				
+				// TopRow is always done
+				FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+				
+				// Bottom done if next row set
+				if ( !blockCoded (i + LineFragments) )
+					FilterVert ( ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr );
+            }
+
+			i++;
+			rowStart += nextRow;
+		}
+	}
+		
+	//***********/
+	// Last Row */
+	//***********/
+	
+	/* First Column */
+	n = 0;
+	
+    // only do 2 prediction if fragment coded and on non intra or if all fragments are intra 
+	if ( blockCoded ( i ) )
+	{
+		// TopRow is always done
+		FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+		
+		// Filter right hand border only if the block to the right is not coded
+		if ( !blockCoded ( i + 1 ) )
+			FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+	}
+	
+	i++;
+	
+	/* middle columns */
+	for ( n=1; n<FragsAcross-1; n++, i++ )
+	{
+		if ( blockCoded ( i ) )
+		{
+			// Filter Left edge always
+			FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+			
+			// TopRow is always done
+			FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+			
+			// Filter right hand border only if the block to the right is not coded
+			if ( !blockCoded( i + 1 ) )
+				FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+		}
+	}
+	
+	/* Last Column */
+	if ( blockCoded ( i ) )
+	{
+		// Filter Left edge always
+		FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+		
+		// TopRow is always done
+		FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+	}
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : LoopFilter
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi     : Pointer to post-processor instance.
+ *                  INT32  FrameQIndex         : Q index for the frame.
+ *                  UINT8  *LastFrameRecon     : Pointer to last frame reconstruction buffer.
+ *                  UINT8  *PostProcessBuffer  : Pointer to last post-processing buffer.
+ *                  UINT8  *FragInfo           : Pointer to list of coded blocks.
+ *                  UINT32 FragInfoElementSize : Size of each element.
+ *                  UINT32 FragInfoCodedMask   : Mask to get at whether fragment is coded.
+ *             
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a loop filter to the edge pixels of coded blocks.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void LoopFilter
+(
+    POSTPROC_INSTANCE *ppi,
+    INT32		       FrameQIndex,
+    UINT8		      *LastFrameRecon,
+    UINT8		      *PostProcessBuffer,
+    UINT8		      *FragInfo,
+    UINT32             FragInfoElementSize,
+    UINT32		       FragInfoCodedMask
+)
+{
+	int j, m, n;
+	UINT32 nextRow;
+	UINT8 *rowStart;
+    INT32 *BoundingValuePtr;
+
+    INT32 i = 0;
+    INT32 FLimit = 0; 
+	int   FromFragment = 0;
+    INT32 LineLength = 0;
+    INT32 LineFragments = 0;
+	int   FragsDown   = ppi->VFragments;
+	int   FragsAcross = ppi->HFragments;	
+	
+    // variables passed in per frame
+	ppi->FrameQIndex 			= FrameQIndex;
+	ppi->LastFrameRecon 		= LastFrameRecon;
+	ppi->PostProcessBuffer 		= PostProcessBuffer;
+	ppi->FragInfo 				= FragInfo;
+	ppi->FragInfoElementSize 	= FragInfoElementSize;
+	ppi->FragInfoCodedMask		= FragInfoCodedMask;
+
+    FLimit = LoopFilterLimitValuesV1[ppi->FrameQIndex];
+    if ( FLimit == 0 )
+        return;
+	
+    BoundingValuePtr = SetupBoundingValueArray ( ppi, FLimit );
+	
+	for ( j=0; j<3; j++ )
+	{
+		switch ( j )
+		{
+		case 0: // Y
+			FromFragment  = 0;
+			FragsAcross   = ppi->HFragments;
+			FragsDown     = ppi->VFragments;
+			LineLength    = ppi->YStride;
+			LineFragments = ppi->HFragments;
+			rowStart      = ppi->LastFrameRecon + ppi->ReconYDataOffset;
+			break;
+		case 1: // U
+			FromFragment  = ppi->YPlaneFragments;
+			FragsAcross   = ppi->HFragments >> 1;
+			FragsDown     = ppi->VFragments >> 1;
+			LineLength    = ppi->UVStride;
+			LineFragments = ppi->HFragments / 2;
+			rowStart      = ppi->LastFrameRecon + ppi->ReconUDataOffset;
+			break;
+		case 2:	// V
+			FromFragment  = ppi->YPlaneFragments + ppi->UVPlaneFragments;
+			FragsAcross   = ppi->HFragments >> 1;
+			FragsDown     = ppi->VFragments >> 1;
+			LineLength    = ppi->UVStride;
+			LineFragments = ppi->HFragments / 2;
+			rowStart      = ppi->LastFrameRecon + ppi->ReconVDataOffset;
+			break;
+		}
+		
+        nextRow = 8*LineLength;
+		i = FromFragment;
+		n = 0;
+		
+		//************/
+		// First Row */
+		//************/
+		
+		/* First Column */
+		
+		// only do 2 prediction if fragment coded and on non intra or if all fragments are intra 
+		if ( blockCoded ( i ) )
+		{
+			// Filter right hand border only if the block to the right is not coded
+			if ( !blockCoded ( i + 1 ) )
+				FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+			
+			// Bottom done if next row set
+			if( !blockCoded (i + LineFragments) )
+				FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+		}
+		
+		i++;
+		
+		/* Middle columns */
+		for ( n=1; n<FragsAcross-1; n++, i++ )
+		{
+			if ( blockCoded ( i ) )
+			{
+				// Filter Left edge always
+				FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+				
+				// Filter right hand border only if the block to the right is not coded
+				if ( !blockCoded ( i + 1 ) )
+					FilterHoriz(ppi, rowStart + 8*n +6 , LineLength, BoundingValuePtr);
+				
+				// Bottom done if next row set
+				if( !blockCoded (i + LineFragments) )
+					FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+			}
+			
+		}
+		
+		/* Last Column */
+		if ( blockCoded ( i ) )
+		{
+			// Filter Left edge always
+			FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+			
+			// Bottom done if next row set
+			if( !blockCoded (i + LineFragments) )
+				FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+		}
+		
+		i++;
+		rowStart += nextRow;
+		n = 0;
+		
+		//**************/
+		// Middle Rows */
+		//**************/
+		for ( m=1; m<FragsDown-1; m++ )
+		{
+			/* First column */
+			n = 0;
+			
+			// only do 2 prediction if fragment coded and on non intra or if all fragments are intra 
+			if ( blockCoded ( i ) )
+			{
+				// TopRow is always done
+				FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+				
+				// Filter right hand border only if the block to the right is not coded
+				if ( !blockCoded ( i + 1 ) )
+					FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+				
+				// Bottom done if next row set
+				if( !blockCoded (i + LineFragments) )
+					FilterVert(ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr);
+			}
+			
+			i++;
+			
+			/* Middle columns */
+			for ( n=1; n<FragsAcross-1; n++, i++ )
+			{
+				if ( blockCoded ( i ) )
+				{
+					// Filter Left edge always
+					FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+					
+					// TopRow is always done
+					FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+					
+					// Filter right hand border only if the block to the right is not coded
+					if ( !blockCoded ( i + 1 ) )
+						FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+					
+					// Bottom done if next row set
+					if( !blockCoded (i + LineFragments) )
+						FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+				}
+			}
+			
+			/* Last Column */
+			if ( blockCoded ( i ) )
+			{
+				// Filter Left edge always
+				FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+				
+				// TopRow is always done
+				FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+				
+				// Bottom done if next row set
+				if( !blockCoded (i + LineFragments) )
+					FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
+			}
+			
+			i++;
+			rowStart += nextRow;
+		}
+		
+		//***********/
+		// Last Row */
+		//***********/
+		
+		/* First column */
+		n = 0;
+		
+        // only do 2 prediction if fragment coded and on non intra or if all fragments are intra 
+		if ( blockCoded ( i ) )
+		{
+			// TopRow is always done
+			FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+			
+			// Filter right hand border only if the block to the right is not coded
+			if ( !blockCoded ( i + 1 ) )
+				FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+		}
+		
+		i++;
+		
+		/* Middle columns */
+		for ( n=1; n<FragsAcross-1; n++, i++ )
+		{
+			if ( blockCoded ( i ) )
+			{
+				// Filter Left edge always
+				FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+				
+				// TopRow is always done
+				FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+				
+				// Filter right hand border only if the block to the right is not coded
+				if ( !blockCoded ( i + 1 ) )
+					FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
+			}
+			
+		}
+		
+		/* Last Column */
+		if ( blockCoded ( i ) )
+		{
+			// Filter Left edge always
+			FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
+			
+			// TopRow is always done
+			FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
+		}
+		
+		i++;
+
+	}
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/postproc.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/postproc.c
new file mode 100644
index 00000000..d39af5ef
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/postproc.c
@@ -0,0 +1,796 @@
+/*************************************************************************** 
+ *
+ *   Module Title :     PostProc.c
+ *
+ *   Description  :     Post Processing
+ *
+ ***************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "postp.h"
+#include "duck_mem.h"
+#include "stdlib.h"
+#include <math.h>
+#include <stddef.h>
+/****************************************************************************
+*  Macros
+****************************************************************************/              
+#define Clamp255(x)	(unsigned char) ( (x) < 0 ? 0 : ( (x) <= 255 ? (x) : 255 ) )
+// TODO: benski> need better checks for other compilers
+
+#if defined(_M_AMD64) || defined(__LP64__)
+#define ROUNDUP32(X) ( ( ( (uintptr_t) X ) + 31 )&( 0xFFFFFFFFFFFFFFE0 ) )
+#else //#elif //defined(_M_IX86) 
+#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
+#endif
+
+/****************************************************************************
+*  Imports
+****************************************************************************/              
+extern void SimpleDeblockFrame(POSTPROC_INSTANCE *ppi, UINT8* SrcBuffer, UINT8* DestBuffer);
+extern void UpdateUMVBorder( POSTPROC_INSTANCE *ppi, UINT8 * DestReconPtr);
+extern void PostProcMachineSpecificConfig(UINT32 );
+
+extern void DeringFrame(POSTPROC_INSTANCE *ppi, UINT8 *Src, UINT8 *Dst);
+extern void DeringFrameInterlaced(POSTPROC_INSTANCE *ppi, UINT8 *Src, UINT8 *Dst);
+extern void DeblockFrame(POSTPROC_INSTANCE *ppi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer);
+extern void DeblockFrameUsing7TapFilter(POSTPROC_INSTANCE *ppi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer);
+extern void DeblockFrameInterlaced(POSTPROC_INSTANCE *ppi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer);
+
+extern UINT32 DeringModifierV1[ Q_TABLE_SIZE ];
+extern UINT32 DeringModifierV2[ Q_TABLE_SIZE ];
+
+extern UINT32 *DCQuantScaleV2;
+extern UINT32 *DCQuantScaleUV;
+extern UINT32 *DCQuantScaleV1;
+
+extern UINT32  LoopFilterLimitValuesVp4[Q_TABLE_SIZE];
+extern UINT32  LoopFilterLimitValuesVp5[Q_TABLE_SIZE];
+extern UINT32  LoopFilterLimitValuesVp6[Q_TABLE_SIZE];
+
+extern UINT32 DeblockLimitValuesVp4[Q_TABLE_SIZE];
+extern UINT32 DeblockLimitValuesVp5[Q_TABLE_SIZE];
+extern UINT32 DeblockLimitValuesVp6[Q_TABLE_SIZE];
+
+extern UINT32 *LoopFilterLimitValuesV2;
+
+extern UINT32  *DeblockLimitValuesV2;
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+UINT8 LimitVal_VP31[VAL_RANGE * 3];
+void  (*FilteringVert_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch); 
+void  (*FilteringHoriz_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch); 
+void  (*FilteringVert_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch); 
+void  (*FilteringHoriz_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch); 
+void  (*VerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void  (*LastVerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void  (*VerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void  (*LastVerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void  (*HorizontalLine_1_2_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+void  (*HorizontalLine_3_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+void  (*HorizontalLine_4_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+void  (*VerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void  (*LastVerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+void  (*FilterHoriz_Simple)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+void  (*FilterVert_Simple)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+void  (*DeringBlockWeak)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
+void  (*DeringBlockStrong)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
+void  (*DeblockLoopFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+void  (*DeblockNonFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+void  (*DeblockNonFilteredBandNewFilter)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+INT32*(*SetupBoundingValueArray)(xPB_INST ppi, INT32 FLimit);
+INT32*(*SetupDeblockValueArray)(xPB_INST ppi, INT32 FLimit);
+void  (*FilterHoriz)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+void  (*FilterVert)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+void  (*ClampLevels)( POSTPROC_INSTANCE *ppi,INT32 BlackClamp,	INT32 WhiteClamp, UINT8	*Src, UINT8	*Dst);
+void  (*FastDeInterlace)(UINT8 *SrcPtr, UINT8 *DstPtr, INT32 Width, INT32 Height, INT32 Stride);  
+void  (*PlaneAddNoise)( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : InitPostProcessing
+ *
+ *  INPUTS        : UINT32 *DCQuantScaleV2p :
+ *	                UINT32 *DCQuantScaleUVp :
+ *	                UINT32 *DCQuantScaleV1p :
+ *	                UINT32 Version          : Codec version number.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Initialise pointers to version specific data tables &
+ *                  set-up LUTs.
+ *
+ *  SPECIAL NOTES : None
+ *
+ ****************************************************************************/
+void InitPostProcessing
+( 
+	UINT32 *DCQuantScaleV2p,
+	UINT32 *DCQuantScaleUVp,
+	UINT32 *DCQuantScaleV1p,
+	UINT32 Version
+)
+{
+    int i;
+
+	for ( i=0; i<VAL_RANGE*3; i++ ) 
+    {
+		int x = i - VAL_RANGE;
+		LimitVal_VP31[i] = Clamp255 ( x );
+	}
+
+	DCQuantScaleV2 = DCQuantScaleV2p;
+	DCQuantScaleUV = DCQuantScaleUVp;
+	DCQuantScaleV1 = DCQuantScaleV1p;
+
+    for ( i=0 ; i<Q_TABLE_SIZE; i++ )
+        DeringModifierV1[i] = DCQuantScaleV1[i]; 
+
+    if ( Version >= 6 )
+    {
+		LoopFilterLimitValuesV2 = LoopFilterLimitValuesVp6;
+		DeblockLimitValuesV2    = DeblockLimitValuesVp6;
+    }
+	else if ( Version >= 5 )
+	{
+		LoopFilterLimitValuesV2 = LoopFilterLimitValuesVp5;
+		DeblockLimitValuesV2    = DeblockLimitValuesVp5;
+	}
+	else
+	{
+		LoopFilterLimitValuesV2 = LoopFilterLimitValuesVp4;
+		DeblockLimitValuesV2    = DeblockLimitValuesVp4;
+	}
+	PostProcMachineSpecificConfig ( Version );
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeInitPostProcessing
+ *
+ *  INPUTS        : None.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : De-initializes post-processing module.
+ *
+ *  SPECIAL NOTES : Currently this function does nothing.
+ *
+ ****************************************************************************/
+void DeInitPostProcessing ( void )
+{
+	return;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeletePostProcBuffers
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : De-allocates buffers used by the post-processing module.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void DeletePostProcBuffers ( POSTPROC_INSTANCE *ppi )
+{
+	if ( ppi->IntermediateBufferAlloc )
+		duck_free ( ppi->IntermediateBufferAlloc );
+	ppi->IntermediateBufferAlloc = 0;
+	ppi->IntermediateBuffer		 = 0;
+
+	if ( ppi->IntermediateBufferAlloc )
+		duck_free ( ppi->IntermediateBufferAlloc );
+	ppi->IntermediateBufferAlloc = 0;
+	ppi->IntermediateBuffer		 = 0;
+
+	if ( ppi->FiltBoundingValueAlloc )
+		duck_free ( ppi->FiltBoundingValueAlloc );
+	ppi->FiltBoundingValueAlloc	= 0;
+	ppi->FiltBoundingValue		= 0;
+
+	if ( ppi->DeblockBoundingValueAlloc )
+		duck_free ( ppi->DeblockBoundingValueAlloc );
+	ppi->DeblockBoundingValueAlloc = 0;
+	ppi->DeblockBoundingValue	   = 0;
+
+	if ( ppi->FragQIndexAlloc )
+		duck_free ( ppi->FragQIndexAlloc );
+	ppi->FragQIndexAlloc = 0;
+	ppi->FragQIndex		 = 0;
+
+	if ( ppi->FragmentVariancesAlloc )
+		duck_free ( ppi->FragmentVariancesAlloc );
+	ppi->FragmentVariancesAlloc	= 0;
+	ppi->FragmentVariances		= 0;
+
+	if ( ppi->FragDeblockingFlagAlloc )
+		duck_free ( ppi->FragDeblockingFlagAlloc );
+	ppi->FragDeblockingFlagAlloc = 0;
+	ppi->FragDeblockingFlag		 = 0;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : AllocatePostProcBuffers
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : INT32: TRUE: Success, FALSE Failure (Chenge to BOOL!!)
+ *
+ *  FUNCTION      : Allocates buffers used by the post-processing module.
+ *
+ *  SPECIAL NOTES : Uses ROUNDUP32 to align allocated buffers to improve
+ *                  cache performance. 
+ *
+ ****************************************************************************/
+INT32 AllocatePostProcBuffers ( POSTPROC_INSTANCE *ppi )
+{
+	DeletePostProcBuffers ( ppi );
+
+    ppi->IntermediateBufferAlloc     = (UINT8*)duck_malloc ( 32 + ppi->YStride * 
+            (ppi->Configuration.VideoFrameHeight + ppi->MVBorder*2) * 3/2 * sizeof(UINT8), DMEM_GENERAL);
+    if ( !ppi->IntermediateBufferAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
+    ppi->IntermediateBuffer          = (UINT8 *)ROUNDUP32 ( ppi->IntermediateBufferAlloc );
+
+    ppi->FiltBoundingValueAlloc      = (INT32 *)duck_malloc(32+512*sizeof(INT32), DMEM_GENERAL);
+    if ( !ppi->FiltBoundingValueAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
+	ppi->FiltBoundingValue			 = (INT32 *)ROUNDUP32 ( ppi->FiltBoundingValueAlloc );
+
+	ppi->DeblockBoundingValueAlloc   = (INT32 *)duck_malloc(32+512*sizeof(INT32), DMEM_GENERAL);
+    if ( !ppi->DeblockBoundingValueAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
+	ppi->DeblockBoundingValue		 = (INT32 *)ROUNDUP32 ( ppi->DeblockBoundingValueAlloc );
+
+	ppi->FragQIndexAlloc			 = (INT32 *)duck_malloc(32+ppi->UnitFragments*sizeof(INT32), DMEM_GENERAL);
+    if ( !ppi->FragQIndexAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
+	ppi->FragQIndex					 = (INT32 *)ROUNDUP32 ( ppi->FragQIndexAlloc );
+
+	ppi->FragmentVariancesAlloc      = (INT32 *)duck_malloc(32+ppi->UnitFragments*sizeof(INT32), DMEM_GENERAL);
+    if ( !ppi->FragmentVariancesAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
+	ppi->FragmentVariances			 = (INT32 *)ROUNDUP32 ( ppi->FragmentVariancesAlloc );
+
+	ppi->FragDeblockingFlagAlloc     = (UINT8 *)duck_malloc(32+ppi->UnitFragments*sizeof(UINT8), DMEM_GENERAL);
+    if ( !ppi->FragDeblockingFlagAlloc ){ DeletePostProcBuffers ( ppi ); return FALSE; };
+	ppi->FragDeblockingFlag			 = (UINT8 *)ROUNDUP32 ( ppi->FragDeblockingFlagAlloc );
+
+	return TRUE;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ChangePostProcConfiguration
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi         : Pointer to post-processor instance.
+ *                  CONFIG_TYPE *ConfigurationInit : Pointer to 
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ * 
+ *  FUNCTION      : Initialize post-processor to with the setting passed in.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void ChangePostProcConfiguration ( POSTPROC_INSTANCE *ppi, CONFIG_TYPE *ConfigurationInit )
+{
+	memcpy ((void *)&ppi->Configuration, (void *)ConfigurationInit, sizeof(CONFIG_TYPE) );
+
+	ppi->HFragments       = (ppi->Configuration.VideoFrameWidth >> 3);
+	ppi->VFragments       = (ppi->Configuration.VideoFrameHeight>> 3);
+	ppi->YStride          = ppi->Configuration.YStride;
+	ppi->UVStride         = ppi->Configuration.UVStride;
+	ppi->YPlaneFragments  = ppi->HFragments * ppi->VFragments;
+	ppi->UVPlaneFragments = ppi->YPlaneFragments / 4;
+	ppi->UnitFragments    = ppi->YPlaneFragments + 2 * ppi->UVPlaneFragments;
+	ppi->MVBorder         = (ppi->YStride - 8*ppi->HFragments)/2;
+	ppi->ReconYDataOffset = ppi->MVBorder * ppi->YStride + ppi->MVBorder;
+	ppi->ReconYDataOffset = ppi->MVBorder * ppi->YStride + ppi->MVBorder;
+
+	ppi->ReconUDataOffset = 
+		(ppi->YStride * (ppi->Configuration.VideoFrameHeight + ppi->MVBorder*2)) 
+		+ ppi->MVBorder / 2 * ppi->UVStride + ppi->MVBorder/2;
+
+	ppi->ReconVDataOffset = 
+		(ppi->YStride * (ppi->Configuration.VideoFrameHeight + ppi->MVBorder*2)) 
+		+ (ppi->UVStride * (ppi->Configuration.VideoFrameHeight/2 + ppi->MVBorder)) 
+		+ ppi->MVBorder/2 * ppi->UVStride +ppi->MVBorder/2;
+
+	AllocatePostProcBuffers ( ppi );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : CreatePostProcInstance
+ *
+ *  INPUTS        : CONFIG_TYPE *ConfigurationInit : Pointer to configuration.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : POSTPROC_INSTANCE *: Pointer to allocated & configured
+ *                                       post-processor instance.
+ * 
+ *  FUNCTION      : Allocates space for and initializes a post-processor
+ *                  instance.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+POSTPROC_INSTANCE *CreatePostProcInstance ( CONFIG_TYPE *ConfigurationInit )
+{
+	POSTPROC_INSTANCE *ppi;
+	int postproc_size = sizeof ( POSTPROC_INSTANCE );
+
+	ppi = (POSTPROC_INSTANCE *) duck_malloc ( postproc_size, DMEM_GENERAL );
+    if ( !ppi )
+        return 0;
+
+	// initialize whole structure to 0
+	memset ( (unsigned char *)ppi, 0, postproc_size );
+	
+	ChangePostProcConfiguration ( ppi, ConfigurationInit );
+
+    ppi->AddNoiseMode = 1;
+
+	return ppi;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeletePostProcInstance
+ *
+ *  INPUTS        : POSTPROC_INSTANCE **ppi : Pointer-to-pointer to post-processor instance.
+ *
+ *  OUTPUTS       : POSTPROC_INSTANCE **ppi : Pointer-to-pointer to post-processor instance.
+ *
+ *  RETURNS       : void.   
+ * 
+ *  FUNCTION      : Deletes post-processor instance & de-allocates memory.
+ *
+ *  SPECIAL NOTES : Pointer to post-processor instance is set to NULL
+ *                  on exit.
+ *
+ ****************************************************************************/
+void DeletePostProcInstance ( POSTPROC_INSTANCE **ppi )
+{
+    if ( *ppi )
+    {
+        // Delete any other dynamically allocaed temporary buffers
+		DeletePostProcBuffers ( *ppi );
+		duck_free ( *ppi );
+		*ppi = 0;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SetPPInterlacedMode
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *                  int Interlaced         : 0=Non-interlaced, 1=Interlaced.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void.   
+ * 
+ *  FUNCTION      : Set post-processor's Interlaced Mode to specified value.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetPPInterlacedMode ( POSTPROC_INSTANCE *ppi, int Interlaced )
+{
+	ppi->Configuration.Interlaced = Interlaced;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SetDeInterlaceMode
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *                  int DeInterlaceMode    : Mode to use for de-interlacing.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void.   
+ * 
+ *  FUNCTION      : Set post-processor's De-Interlace Mode to specified value.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetDeInterlaceMode ( POSTPROC_INSTANCE *ppi, int DeInterlaceMode )
+{
+	ppi->DeInterlaceMode = DeInterlaceMode;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SetDeInterlaceMode
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *                  int DeInterlaceMode    : Mode to use for de-interlacing.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void.   
+ * 
+ *  FUNCTION      : Set post-processor's De-Interlace Mode to specified value.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SetAddNoiseMode(POSTPROC_INSTANCE *ppi, int AddNoiseMode)
+{
+	ppi->AddNoiseMode = AddNoiseMode;
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : UpdateFragQIndex
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void.   
+ * 
+ *  FUNCTION      : Update the QIndex for each updated block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UpdateFragQIndex ( POSTPROC_INSTANCE *ppi )
+{
+    UINT32 i;
+    UINT32 ThisFrameQIndex;    
+
+    // Mark coded blocks with Q-index
+    ThisFrameQIndex = ppi->FrameQIndex;
+
+    for ( i=0; i<ppi->UnitFragments; i++ )
+        if ( blockCoded ( i ) )
+            ppi->FragQIndex[i] = ThisFrameQIndex;
+}
+
+
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : Gaussian
+ *
+ *  INPUTS        : sigma ( standard deviation), mu ( mean) and x (value)
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void.   
+ * 
+ *  FUNCTION      : generate height of gaussian distribution curve with 
+ *                  deviation sigma and mean mu at position x
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+double gaussian(double sigma, double mu, double x)
+{
+    return 1 / ( sigma * sqrt(2.0*3.14159265)) * 
+        (exp(-(x-mu)*(x-mu)/(2*sigma*sigma)));
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PlaneAddNoise_C
+ *
+ *  INPUTS        : UINT8 *Start    starting address of buffer to add gaussian
+ *                                  noise to
+ *                  UINT32 Width    width of plane
+ *                  UINT32 Height   height of plane
+ *                  INT32  Pitch    distance between subsequent lines of frame
+ *                  INT32  q        quantizer used to determine amount of noise 
+ *                                  to add
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void.   
+ * 
+ *  FUNCTION      : adds gaussian noise to a plane of pixels
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PlaneAddNoise_C( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q)
+{
+    unsigned int i,j;
+    INT32 Pitch4 = Pitch * 4;
+    const int noiseAmount = 2;
+    const int noiseAdder = 2 * noiseAmount + 1;
+
+    unsigned char blackclamp[16];
+    unsigned char whiteclamp[16];
+    unsigned char bothclamp[16];
+    char CharDist[300];
+    char Rand[2048];
+
+    double sigma;
+    sigma = 1 + .8*(63-q) / 63.0;
+
+    // set up a lookup table of 256 entries that matches 
+    // a gaussian distribution with sigma determined by q.
+    // 
+    {
+        double i,sum=0;
+        int next,j;
+
+        next=0;
+        for(i=-32;i<32;i++)
+        {
+            int a = (int) (.5+256*gaussian(sigma,0,i));
+
+            if(a)
+            {
+                for(j=0;j<a;j++)
+                {
+                    CharDist[next+j]=(char) i;
+                }
+                next = next+j;
+            }
+
+        }
+        for(next=next;next<256;next++)
+            CharDist[next] = 0;
+
+    }
+
+    // generate a line of 2048 characters following our gaussian distribution
+    for(i=0;i<2048;i++)
+    {
+        Rand[i]=CharDist[rand() & 0xff];
+    }
+
+	for(i=0;i<16;i++)
+	{
+		blackclamp[i]=-CharDist[0];
+		whiteclamp[i]=-CharDist[0];
+		bothclamp[i]=-2*CharDist[0];
+	}
+
+    for(i=0;i<Height;i++)
+    {
+        UINT8* Pos = Start + i *Pitch;
+        INT8*  Ref = (INT8 *) (Rand + (rand() & 0xff));  /* cast required on strict OSX-CW8 */
+
+        for(j=0;j<Width;j++)
+        {
+            if(Pos[j] < -CharDist[0])
+               Pos[j] = -CharDist[0];
+
+            if(Pos[j] > 255-CharDist[0])
+               Pos[j] = 255-CharDist[0];
+
+            Pos[j]+=Ref[j];
+        }
+    }
+}
+ 
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PostProcess
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi     : Pointer to post-processor instance.
+ *                  INT32  Vp3VersionNo        : Encoder version used to code frame.
+ *                  INT32  FrameType           : Encoding method: Keyframe or non-Keyframe.
+ *                  INT32  PostProcessingLevel : Level of post-processing to perform.
+ *                  INT32  FrameQIndex         : Q-index used to code frame.
+ *                  UINT8  *LastFrameRecon     : Pointer to last frame reconstruction buffer.
+ *                  UINT8  *PostProcessBuffer  : Pointer to last post-processing buffer.
+ *                  UINT8  *FragInfo           : Pointer to list of coded blocks.
+ *                  UINT32 FragInfoElementSize : Size of each element.
+ *                  UINT32 FragInfoCodedMask   : Mask to get at whether fragment is coded.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void.   
+ * 
+ *  FUNCTION      : Applies de-blocking and de-ringing filters to the frame.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PostProcess
+(
+    POSTPROC_INSTANCE *ppi,
+    INT32        Vp3VersionNo,
+    INT32		 FrameType,
+    INT32		 PostProcessingLevel,
+    INT32		 FrameQIndex,
+    UINT8		*LastFrameRecon,
+    UINT8		*PostProcessBuffer,
+    UINT8		*FragInfo,
+    UINT32       FragInfoElementSize,
+    UINT32		 FragInfoCodedMask
+)
+{
+	int ReconUVPlaneSize;
+    
+    // variables passed in per frame
+	ppi->Vp3VersionNo			= Vp3VersionNo;
+	ppi->FrameType 				= FrameType;
+	ppi->PostProcessingLevel 	= PostProcessingLevel;
+	ppi->FrameQIndex 			= FrameQIndex;
+	ppi->LastFrameRecon 		= LastFrameRecon;
+	ppi->PostProcessBuffer 		= PostProcessBuffer;
+	ppi->FragInfo 				= FragInfo;
+	ppi->FragInfoElementSize 	= FragInfoElementSize;
+	ppi->FragInfoCodedMask		= FragInfoCodedMask;
+
+    switch ( ppi->PostProcessingLevel )
+    {
+    case 8:
+        // On a slow machine, use a simpler and faster deblocking filter
+		UpdateFragQIndex ( ppi );
+		if(ppi->Vp3VersionNo < 2)
+		{
+	        DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+		}
+		else
+		{
+            if ( ppi->Configuration.Interlaced && ppi->DeInterlaceMode )
+            {
+                SimpleDeblockFrame ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
+                ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
+                memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
+                memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
+                FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset, 
+                                  ppi->PostProcessBuffer+ppi->ReconYDataOffset, 
+                                  ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
+            }
+            else
+			    SimpleDeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+		}
+        break;
+
+
+    case 6:
+	case 5:
+	    if ( ppi->Vp3VersionNo < 5 ) 
+		{
+			UpdateFragQIndex ( ppi );
+		}
+		else
+		{
+			if ( ppi->Configuration.Interlaced )
+			{
+                if ( !ppi->DeInterlaceMode )
+                {
+                    DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+                    UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
+                    DeringFrameInterlaced ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
+                }
+                else
+                {
+                    DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
+                    UpdateUMVBorder ( ppi, ppi->IntermediateBuffer );
+                    DeringFrameInterlaced ( ppi, ppi->IntermediateBuffer, ppi->IntermediateBuffer );
+                    
+                    ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
+                    memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
+                    memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
+                    FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset, 
+                                      ppi->PostProcessBuffer+ppi->ReconYDataOffset, 
+                                      ppi->HFragments*8, ppi->VFragments*8, ppi->YStride);      
+                }
+				break;
+			}
+		}
+		DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+		UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
+		DeringFrame ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
+
+        if(ppi->AddNoiseMode&&PlaneAddNoise!=0) 
+            PlaneAddNoise(ppi->PostProcessBuffer + ppi->ReconYDataOffset,ppi->HFragments*8, ppi->VFragments*8,ppi->YStride,FrameQIndex);
+
+        break;
+    case 7:
+	    if ( ppi->Vp3VersionNo >= 5 ) 
+		{
+			if ( ppi->Configuration.Interlaced )		
+			{
+                if ( !ppi->DeInterlaceMode )
+				    DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+                else
+                {
+				    DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
+                    ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
+                    memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
+                    memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
+                    FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset, 
+                                      ppi->PostProcessBuffer+ppi->ReconYDataOffset, 
+                                      ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
+                }
+				break;
+			}
+		}
+		else
+        {
+			UpdateFragQIndex ( ppi );
+        }
+		DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+		UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
+		DeringFrame ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
+
+
+        break;
+
+
+    case 4:
+	    if ( ppi->Vp3VersionNo >= 5 ) 
+		{
+			if ( ppi->Configuration.Interlaced )		
+			{
+                if ( !ppi->DeInterlaceMode )
+				    DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+                else
+                {
+				    DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
+                    ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
+                    memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
+                    memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
+                    FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset, 
+                                      ppi->PostProcessBuffer+ppi->ReconYDataOffset, 
+                                      ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
+                }
+				break;
+			}
+		}
+		else
+        {
+			UpdateFragQIndex ( ppi );
+        }
+        DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+        //PlaneAddNoise(ppi->PostProcessBuffer + ppi->ReconYDataOffset,ppi->HFragments*8, ppi->VFragments*8,ppi->YStride,FrameQIndex);
+        break;
+
+    case 1:
+        UpdateFragQIndex ( ppi );
+        break;
+
+    case 0:
+        if ( ppi->Configuration.Interlaced && ppi->DeInterlaceMode )
+        {
+            ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
+            memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->LastFrameRecon+ppi->ReconUDataOffset, ReconUVPlaneSize );
+            memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->LastFrameRecon+ppi->ReconVDataOffset, ReconUVPlaneSize );
+            FastDeInterlace ( ppi->LastFrameRecon+ppi->ReconYDataOffset, 
+                              ppi->PostProcessBuffer+ppi->ReconYDataOffset, 
+                              ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
+        }
+        break;
+
+    default:
+        DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
+        UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
+        DeringFrame ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
+        break;
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/scale.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/scale.c
new file mode 100644
index 00000000..ee7a26fb
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/scale.c
@@ -0,0 +1,1496 @@
+/****************************************************************************
+*        
+*   Module Title :     scale.c
+*
+*   Description  :     Image scaling functions.
+*
+***************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+*  Imports
+****************************************************************************/
+extern void UpdateUMVBorder ( POSTPROC_INSTANCE *ppi, UINT8 * DestReconPtr );
+
+/****************************************************************************
+* 
+*  ROUTINE       : HorizontalLine_Copy
+*
+*  INPUTS        : const unsigned char *source : Pointer to source data.
+*                  unsigned int sourceWidth    : Stride of source.
+*                  unsigned char *dest         : Pointer to destination data.
+*                  unsigned int destWidth      : Stride of destination (NOT USED).
+*                   
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Copies horizontal line of pixels from source to 
+*                  destination unscaled.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void HorizontalLine_Copy 
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+ )
+{
+	(void) destWidth;
+	memcpy ( dest, source, sourceWidth );
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : NullScale
+*
+*  INPUTS        : unsigned char *dest    : Pointer to destination data (NOT USED).
+*                  unsigned int destPitch : Stride of destination data (NOT USED).
+*                  unsigned int destWidth : Width of destination data (NOT USED).
+*                  
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Null scaling function -- does nothing.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void NullScale ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+	(void) destWidth;
+	(void) destPitch;
+	(void) dest;
+	return;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : HorizontalLine_4_5_Scale_C
+*
+*  INPUTS        : const unsigned char *source : Pointer to source data.
+*                  unsigned int sourceWidth    : Stride of source.
+*                  unsigned char *dest         : Pointer to destination data.
+*                  unsigned int destWidth      : Stride of destination (NOT USED).
+*                   
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Copies horizontal line of pixels from source to 
+*                  destination scaling up by 4 to 5.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void HorizontalLine_4_5_Scale_C 
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+ )
+{
+	unsigned i;
+	unsigned int a, b, c;
+	unsigned char *des = dest;
+	const unsigned char *src = source;
+
+	(void) destWidth;
+
+	for ( i=0; i<sourceWidth-4; i+=4 )
+	{
+		a = src[0];
+		b = src[1];
+		des [0] = (UINT8) a;
+		des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8);
+		c = src[2] * 154;
+		a = src[3];
+		des [2] = (UINT8) (( b * 102 + c + 128) >> 8);
+		des [3] = (UINT8) (( c + 102 * a + 128) >> 8);
+		b = src[4];
+		des [4] = (UINT8) (( a * 205 + 51 * b + 128) >> 8);
+
+		src += 4;
+		des += 5;
+	}
+
+	a = src[0];
+	b = src[1];
+	des [0] = (UINT8) (a);
+	des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8);
+	c = src[2] * 154;
+	a = src[3];
+	des [2] = (UINT8) (( b * 102 + c + 128) >> 8);
+	des [3] = (UINT8) (( c + 102 * a + 128) >> 8);
+	des [4] = (UINT8) (a);
+
+}        
+
+/****************************************************************************
+* 
+*  ROUTINE       : VerticalBand_4_5_Scale_C
+*
+*  INPUTS        : unsigned char *dest    : Pointer to destination data.
+*                  unsigned int destPitch : Stride of destination data.
+*                  unsigned int destWidth : Width of destination data.
+*                  
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Scales vertical band of pixels by scale 4 to 5. The
+*                  height of the band scaled is 4-pixels.
+*
+*  SPECIAL NOTES : The routine uses the first line of the band below 
+*                  the current band.
+*
+****************************************************************************/
+void VerticalBand_4_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+	unsigned int i;
+	unsigned int a, b, c, d;
+	unsigned char *des = dest;
+
+	for ( i=0; i<destWidth; i++ )
+	{
+		a = des [0];
+		b = des [destPitch];
+
+		des[destPitch] = (UINT8) (( a * 51 + 205 * b + 128)>>8);
+
+		c = des[destPitch*2]*154;
+		d = des[destPitch*3];
+
+		des [destPitch*2] = (UINT8) (( b * 102 + c + 128) >> 8);
+		des [destPitch*3] = (UINT8) (( c + 102 * d + 128) >> 8);
+
+		// First line in next band
+		a = des [destPitch * 5];
+		des [destPitch * 4] = (UINT8) (( d * 205 + 51 * a +128)>>8);
+
+		des ++;
+	}
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : LastVerticalBand_4_5_Scale_C
+*
+*  INPUTS        : unsigned char *dest    : Pointer to destination data.
+*                  unsigned int destPitch : Stride of destination data.
+*                  unsigned int destWidth : Width of destination data.
+*                  
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Scales last vertical band of pixels by scale 4 to 5. The
+*                  height of the band scaled is 4-pixels.
+*
+*  SPECIAL NOTES : The routine does not have available the first line of
+*                  the band below the current band, since this is the
+*                  last band.
+*
+****************************************************************************/
+void LastVerticalBand_4_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+	unsigned int i;
+	unsigned int a, b, c, d;
+	unsigned char *des = dest;
+
+	for ( i=0; i<destWidth; ++i )
+	{
+		a = des[0];
+		b = des[destPitch];
+
+		des[destPitch] = (UINT8) ((a * 51 + 205 * b + 128)>>8);
+
+		c = des[destPitch*2]*154;
+		d = des[destPitch*3];
+
+		des [destPitch*2] = (UINT8) (( b * 102 + c + 128) >> 8);
+		des [destPitch*3] = (UINT8) (( c + 102 * d + 128) >> 8);
+
+		// No other line for interplation of this line, so ..
+		des[destPitch*4] = (UINT8) d;
+
+		des++;
+	}
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : Scale _4_5_2D
+*
+*  INPUTS        : POSTPROC_INSTANCE *ppi      : Pointer to post-processor instance (NOT USED).
+*                  const unsigned char *source : Pointer to source image.
+*                  unsigned int sourcePitch    : Stride of source image.
+*                  unsigned int sourceWidth    : Width of source image.
+*                  unsigned int sourceHeight   : Height of source image (NOT USED).
+*                  unsigned char *dest         : Pointer to destination image.
+*                  unsigned int destPitch      : Stride of destination image.
+*                  unsigned int destWidth      : Width of destination image.
+*                  unsigned int destHeight     : Height of destination image.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Two-dimensional 4 to 5 scaling up of an image.
+*
+*  SPECIAL NOTES : None.
+*
+****************************************************************************/
+void Scale_4_5_2D
+(
+ POSTPROC_INSTANCE *ppi,
+ const unsigned char *source,
+ unsigned int sourcePitch,
+ unsigned int sourceWidth,
+ unsigned int sourceHeight,
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth,
+ unsigned int destHeight
+ )
+{
+	unsigned i, k;
+	const unsigned int srcBandHeight  = 4;
+	const unsigned int destBandHeight = 5;
+
+	(void) sourceHeight;
+	(void) ppi;
+
+	HorizontalLine_4_5_Scale ( source, sourceWidth, dest, destWidth );
+
+	// Except last band
+	for ( k=0; k<destHeight/destBandHeight-1; k++ )
+	{
+		// scale one band horizontally 
+		for ( i=1; i<srcBandHeight; i++ )
+		{
+			HorizontalLine_4_5_Scale ( source+i*sourcePitch,
+				sourceWidth,
+				dest+i*destPitch,
+				destWidth);
+		}
+
+		// first line of next band
+		HorizontalLine_4_5_Scale ( source+srcBandHeight*sourcePitch,
+			sourceWidth,
+			dest+destBandHeight*destPitch,
+			destWidth );
+
+		// Vertical scaling is in place       
+		VerticalBand_4_5_Scale ( dest, destPitch, destWidth );
+
+		// move to the next band
+		source += srcBandHeight  * sourcePitch;
+		dest   += destBandHeight * destPitch;
+	}
+
+	// scale one band horizontally 
+	for ( i=1; i<srcBandHeight; i++ )
+	{
+		HorizontalLine_4_5_Scale ( source+i*sourcePitch,
+			sourceWidth,
+			dest+i*destPitch,
+			destWidth );
+	}
+
+	// Vertical scaling is in place       
+	LastVerticalBand_4_5_Scale ( dest, destPitch, destWidth );
+}
+
+
+/****************************************************************************
+* 
+*  ROUTINE       : HorizontalLine_3_5_Scale_C
+*
+*  INPUTS        : const unsigned char *source : Pointer to source data.
+*                  unsigned int sourceWidth    : Stride of source.
+*                  unsigned char *dest         : Pointer to destination data.
+*                  unsigned int destWidth      : Stride of destination (NOT USED).
+*                   
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Copies horizontal line of pixels from source to 
+*                  destination scaling up by 3 to 5.
+*
+*  SPECIAL NOTES : None. 
+*
+*
+****************************************************************************/
+void HorizontalLine_3_5_Scale_C 
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+ )
+{
+	unsigned int i;
+	unsigned int a, b, c;
+	unsigned char *des = dest;
+	const unsigned char *src = source;
+
+	(void) destWidth;
+
+	for ( i=0; i<sourceWidth-3; i+=3 )
+	{
+		a = src[0];
+		b = src[1];
+		des [0] = (UINT8) (a);
+		des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+
+		c = src[2] ;
+		des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+		des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+		a = src[3];
+		des [4] = (UINT8) (( c * 154 + a * 102 + 128 ) >> 8);
+
+		src += 3;
+		des += 5;
+	}
+
+	a = src[0];
+	b = src[1];
+	des [0] = (UINT8) (a);
+
+	des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+	c = src[2] ;
+	des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+	des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+	des [4] = (UINT8) (c);
+}        
+
+/****************************************************************************
+* 
+*  ROUTINE       : VerticalBand_3_5_Scale_C
+*
+*  INPUTS        : unsigned char *dest    : Pointer to destination data.
+*                  unsigned int destPitch : Stride of destination data.
+*                  unsigned int destWidth : Width of destination data.
+*                  
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Scales vertical band of pixels by scale 3 to 5. The
+*                  height of the band scaled is 3-pixels.
+*
+*  SPECIAL NOTES : The routine uses the first line of the band below 
+*                  the current band.
+*
+****************************************************************************/
+void VerticalBand_3_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+	unsigned int i;
+	unsigned int a, b, c;
+	unsigned char *des = dest;
+
+	for ( i=0; i<destWidth; i++ )
+	{
+		a = des [0];
+		b = des [destPitch];      
+		des [destPitch] =  (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+
+		c = des[destPitch*2];
+		des [destPitch*2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+		des [destPitch*3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+		// First line in next band...
+		a = des [destPitch * 5];
+		des [destPitch * 4] = (UINT8) (( c * 154 + a * 102 + 128 ) >> 8);
+
+		des++;
+	}
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : LastVerticalBand_3_5_Scale_C
+*
+*  INPUTS        : unsigned char *dest    : Pointer to destination data.
+*                  unsigned int destPitch : Stride of destination data.
+*                  unsigned int destWidth : Width of destination data.
+*                  
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Scales last vertical band of pixels by scale 3 to 5. The
+*                  height of the band scaled is 3-pixels.
+*
+*  SPECIAL NOTES : The routine does not have available the first line of
+*                  the band below the current band, since this is the
+*                  last band.
+*
+****************************************************************************/
+void LastVerticalBand_3_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+	unsigned int i;
+	unsigned int a, b, c;
+	unsigned char *des = dest;
+
+	for ( i=0; i<destWidth; ++i )
+	{
+		a = des [0];
+		b = des [destPitch];
+
+		des [ destPitch ] =  (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+
+		c = des[destPitch*2];
+		des [destPitch*2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+		des [destPitch*3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+		// No other line for interplation of this line, so ..
+		des [ destPitch * 4 ] = (UINT8) (c) ;
+
+		des++;
+	}
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : Scale _3_5_2D
+*
+*  INPUTS        : POSTPROC_INSTANCE *ppi      : Pointer to post-processor instance (NOT USED).
+*                  const unsigned char *source : Pointer to source image.
+*                  unsigned int sourcePitch    : Stride of source image.
+*                  unsigned int sourceWidth    : Width of source image.
+*                  unsigned int sourceHeight   : Height of source image (NOT USED).
+*                  unsigned char *dest         : Pointer to destination image.
+*                  unsigned int destPitch      : Stride of destination image.
+*                  unsigned int destWidth      : Width of destination image.
+*                  unsigned int destHeight     : Height of destination image.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Two-dimensional 3 to 5 scaling up of an image.
+*
+*  SPECIAL NOTES : None.
+* 
+****************************************************************************/
+void Scale_3_5_2D
+( 
+ POSTPROC_INSTANCE *ppi,
+ const unsigned char *source,
+ unsigned int sourcePitch,
+ unsigned int sourceWidth,
+ unsigned int sourceHeight,
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth,
+ unsigned int destHeight
+ )
+{
+	// define the constants for a 3->5 scale up
+	const unsigned int srcBandHeight = 3;
+	const unsigned int destBandHeight = 5;
+	unsigned int i, k;
+
+	(void) ppi;
+	(void) sourceHeight;
+
+	HorizontalLine_3_5_Scale ( source, sourceWidth, dest, destWidth );
+
+	// Except last band
+	for ( k=0; k<destHeight/destBandHeight-1; k++ )
+	{
+		// scale one band horizontally 
+		for ( i=1; i<srcBandHeight; i++ )
+		{
+			HorizontalLine_3_5_Scale ( source+i*sourcePitch,
+				sourceWidth,
+				dest+i*destPitch,
+				destWidth );
+		}
+
+		// First line of next band
+		HorizontalLine_3_5_Scale ( source+srcBandHeight*sourcePitch,
+			sourceWidth,
+			dest+destBandHeight*destPitch,
+			destWidth );
+
+		// Vertical scaling is in place       
+		VerticalBand_3_5_Scale ( dest, destPitch, destWidth );
+
+		// move to the next band
+		source += srcBandHeight  * sourcePitch;
+		dest   += destBandHeight * destPitch;
+	}
+
+	// scale one band horizontally 
+	for ( i=1; i<srcBandHeight; i++ )
+	{
+		HorizontalLine_3_5_Scale ( source+i*sourcePitch,
+			sourceWidth,
+			dest+i*destPitch,
+			destWidth );
+	}
+
+	// Vertical scaling is in place       
+	LastVerticalBand_3_5_Scale ( dest, destPitch, destWidth );
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : HorizontalLine_1_2_Scale_C
+*
+*  INPUTS        : const unsigned char *source : Pointer to source data.
+*                  unsigned int sourceWidth    : Stride of source.
+*                  unsigned char *dest         : Pointer to destination data.
+*                  unsigned int destWidth      : Stride of destination (NOT USED).
+*                   
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Copies horizontal line of pixels from source to 
+*                  destination scaling up by 1 to 2.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void HorizontalLine_1_2_Scale_C 
+(
+ const unsigned char *source,
+ unsigned int sourceWidth,
+ unsigned char *dest,
+ unsigned int destWidth
+ )
+{
+	unsigned int i;
+	unsigned int a, b;
+	unsigned char *des = dest;
+	const unsigned char *src = source;
+
+	(void) destWidth;
+
+	for ( i=0; i<sourceWidth-1; i+=1 )
+	{
+		a = src[0];
+		b = src[1];
+		des [0] = (UINT8) (a);
+		des [1] = (UINT8) (( a + b + 1 ) >> 1);
+		src += 1;
+		des += 2;
+	}
+
+	a = src[0];
+	des [0] = (UINT8) (a);
+	des [1] = (UINT8) (a);
+}        
+
+/****************************************************************************
+* 
+*  ROUTINE       : VerticalBand_1_2_Scale_C
+*
+*  INPUTS        : unsigned char *dest    : Pointer to destination data.
+*                  unsigned int destPitch : Stride of destination data.
+*                  unsigned int destWidth : Width of destination data.
+*                  
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Scales vertical band of pixels by scale 1 to 2. The
+*                  height of the band scaled is 1-pixel.
+*
+*  SPECIAL NOTES : The routine uses the first line of the band below 
+*                  the current band.
+*
+****************************************************************************/
+void VerticalBand_1_2_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+	unsigned int i;
+	unsigned int a, b;
+	unsigned char *des = dest;
+
+	for ( i=0; i<destWidth; i++ )
+	{
+		a = des [0];
+		b = des [destPitch * 2];
+
+		des[destPitch] = (UINT8) ((a + b + 1 )>>1);
+
+		des++;
+	}
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : LastVerticalBand_1_2_Scale_C
+*
+*  INPUTS        : unsigned char *dest    : Pointer to destination data.
+*                  unsigned int destPitch : Stride of destination data.
+*                  unsigned int destWidth : Width of destination data.
+*                  
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*
+*  FUNCTION      : Scales last vertical band of pixels by scale 1 to 2. The
+*                  height of the band scaled is 1-pixel.
+*
+*  SPECIAL NOTES : The routine does not have available the first line of
+*                  the band below the current band, since this is the
+*                  last band.
+*
+****************************************************************************/
+void LastVerticalBand_1_2_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth )
+{
+	unsigned int i;
+	unsigned char *des = dest;
+
+	for ( i=0; i<destWidth; ++i )
+	{
+		des[destPitch] = des[0];
+		des++;
+	}
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : Scale1D_c
+*
+*  INPUTS        : const unsigned char *source : Pointer to data to be scaled.
+*                  int sourceStep              : Number of pixels to step on in source.
+*                  unsigned int sourceScale    : Scale for source.
+*                  unsigned int sourceLength   : Length of source (UNUSED).
+*                  unsigned char *dest         : Pointer to output data array.
+*                  int destStep                : Number of pixels to step on in destination.
+*                  unsigned int destScale      : Scale for destination.
+*                  unsigned int destLength     : Length of destination.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*	
+*  FUNCTION      : Performs linear interpolation in one dimension.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void Scale1D_c
+( 
+ const unsigned char *source,
+ int sourceStep,
+ unsigned int sourceScale,
+ unsigned int sourceLength,
+ unsigned char *dest,
+ int destStep,
+ unsigned int destScale,
+ unsigned int destLength
+ )
+{
+	unsigned int i;
+	unsigned int roundValue = destScale / 2;
+	unsigned int leftModifier = destScale;
+	unsigned int rightModifier = 0;
+	unsigned char leftPixel = *source;
+	unsigned char rightPixel = *( source + sourceStep );
+
+	(void) sourceLength;
+
+	// These asserts are needed if there are boundary issues...
+	//assert ( destScale > sourceScale );
+	//assert ( (sourceLength-1) * destScale >= (destLength-1) * sourceScale );
+
+	for ( i=0; i<destLength*destStep; i+=destStep )
+	{
+		dest[i] = (INT8)((leftModifier*leftPixel + rightModifier*rightPixel + roundValue) / destScale);
+
+		rightModifier += sourceScale;
+
+		while ( rightModifier > destScale )
+		{
+			rightModifier -= destScale;
+			source += sourceStep;
+			leftPixel = *source;
+			rightPixel = *( source + sourceStep );
+		}
+
+		leftModifier = destScale - rightModifier;
+	}
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : Scale1D_2t1_i
+*
+*  INPUTS        : const unsigned char *source : Pointer to data to be scaled.
+*                  int sourceStep              : Number of pixels to step on in source.
+*                  unsigned int sourceScale    : Scale for source (UNUSED).
+*                  unsigned int sourceLength   : Length of source (UNUSED).
+*                  unsigned char *dest         : Pointer to output data array.
+*                  int destStep                : Number of pixels to step on in destination.
+*                  unsigned int destScale      : Scale for destination (UNUSED).
+*                  unsigned int destLength     : Length of destination.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*	
+*  FUNCTION      : Performs 2-to-1 interpolated scaling.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void Scale1D_2t1_i
+( 
+ const unsigned char *source,
+ int sourceStep,
+ unsigned int sourceScale,
+ unsigned int sourceLength,
+ unsigned char *dest,
+ int destStep,
+ unsigned int destScale,
+ unsigned int destLength
+ )
+{
+	unsigned int i, j;
+	unsigned int temp;
+
+	(void) sourceLength;
+	(void) sourceScale;
+	(void) destScale;
+
+	sourceStep *= 2;
+	dest[0] = source[0];
+	for ( i=destStep, j=sourceStep; i<destLength*destStep; i+=destStep, j+=sourceStep )
+	{
+		temp = 8;
+		temp += 3 * source[j-sourceStep];
+		temp += 10 * source[j];
+		temp += 3 * source[j+sourceStep];
+		temp >>= 4;
+		dest[i] = (INT8) (temp);
+	}
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : Scale1D_2t1_ps
+*
+*  INPUTS        : const unsigned char *source : Pointer to data to be scaled.
+*                  int sourceStep              : Number of pixels to step on in source.
+*                  unsigned int sourceScale    : Scale for source (UNUSED).
+*                  unsigned int sourceLength   : Length of source (UNUSED).
+*                  unsigned char *dest         : Pointer to output data array.
+*                  int destStep                : Number of pixels to step on in destination.
+*                  unsigned int destScale      : Scale for destination (UNUSED).
+*                  unsigned int destLength     : Length of destination.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*	
+*  FUNCTION      : Performs 2-to-1 point subsampled scaling.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void Scale1D_2t1_ps
+( 
+ const unsigned char *source,
+ int sourceStep,
+ unsigned int sourceScale,
+ unsigned int sourceLength,
+ unsigned char *dest,
+ int destStep,
+ unsigned int destScale,
+ unsigned int destLength
+ )
+{
+	unsigned int i, j;
+
+	(void) sourceLength;
+	(void) sourceScale;
+	(void) destScale;
+
+	sourceStep *= 2;
+	j = 0;
+	for ( i=0; i<destLength*destStep; i+=destStep, j+=sourceStep )
+		dest[i] = source[j];
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : Scale2D
+*
+*  INPUTS        : const unsigned char *source  : Pointer to data to be scaled.
+*                  int sourcePitch              : Stride of source image.
+*                  unsigned int sourceWidth     : Width of input image.
+*                  unsigned int sourceHeight    : Height of input image.
+*                  unsigned char *dest          : Pointer to output data array.
+*                  int destPitch                : Stride of destination image.
+*                  unsigned int destWidth       : Width of destination image.
+*                  unsigned int destHeight      : Height of destination image.
+*                  unsigned char *tempArea      : Pointer to temp work area.
+*                  unsigned char tempAreaHeight : Height of temp work area.
+*                  unsigned int hscale          : Horizontal scale factor numerator.
+*                  unsigned int hratio          : Horizontal scale factor denominator.
+*                  unsigned int vscale          : Vertical scale factor numerator.
+*                  unsigned int vratio          : Vertical scale factor denominator.
+*                  unsigned int interlaced      : Interlace flag.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*	
+*  FUNCTION      : Performs 2-tap linear interpolation in two dimensions.
+*
+*  SPECIAL NOTES : Expansion is performed one band at a time to help with 
+*                  caching.
+*
+****************************************************************************/
+void Scale2D
+( 
+ const unsigned char *source,
+ int sourcePitch,
+ unsigned int sourceWidth,
+ unsigned int sourceHeight,
+ unsigned char *dest,
+ int destPitch,
+ unsigned int destWidth,
+ unsigned int destHeight,
+ unsigned char *tempArea,
+ unsigned char tempAreaHeight,
+ unsigned int hscale,
+ unsigned int hratio,
+ unsigned int vscale,
+ unsigned int vratio,
+ unsigned int interlaced
+ )
+{
+	unsigned int i, j, k;
+	unsigned int bands;
+	unsigned int destBandHeight;
+	unsigned int sourceBandHeight;
+
+	typedef void (*Scale1D)( const unsigned char *source,int sourceStep,unsigned int sourceScale,unsigned int sourceLength,
+		unsigned char *dest,int destStep,unsigned int destScale,unsigned int destLength);
+
+	Scale1D Scale1Dv = Scale1D_c;
+	Scale1D Scale1Dh = Scale1D_c;
+
+	if ( hscale==2 && hratio==1 )
+		Scale1Dh = Scale1D_2t1_ps;
+
+	if ( vscale==2 && vratio==1 )
+	{
+		if ( interlaced )
+			Scale1Dv = Scale1D_2t1_ps;
+		else
+			Scale1Dv = Scale1D_2t1_i;
+	}
+
+	if ( sourceHeight == destHeight )
+	{
+		// for each band of the image
+		for ( k=0; k<destHeight; k++ )
+		{ 
+			Scale1Dh ( source, 1, hscale, sourceWidth+1, dest, 1, hratio, destWidth );
+			source += sourcePitch;
+			dest   += destPitch;
+		}
+		return;
+	}
+
+	if ( destHeight > sourceHeight )
+	{
+		destBandHeight   = tempAreaHeight - 1;
+		sourceBandHeight = destBandHeight * sourceHeight / destHeight;
+	}
+	else
+	{
+		sourceBandHeight = tempAreaHeight - 1;
+		destBandHeight   = sourceBandHeight * vratio / vscale;
+	}
+
+	// first row needs to be done so that we can stay one row ahead for vertical zoom
+	Scale1Dh ( source, 1, hscale, sourceWidth+1, tempArea, 1, hratio, destWidth );
+
+	// for each band of the image
+	bands = (destHeight + destBandHeight - 1)/ destBandHeight;
+	for ( k=0; k<bands; k++ )
+	{
+		// scale one band horizontally 
+		for ( i=1; i<sourceBandHeight+1; i++ )
+		{
+			if ( k*sourceBandHeight+i < sourceHeight )
+			{
+				Scale1Dh ( source+i*sourcePitch, 1, hscale, sourceWidth+1,
+					tempArea+i*destPitch, 1, hratio, destWidth );
+			}
+			else  //  Duplicate the last row 
+			{
+				// copy tempArea row 0 over from last row in the past
+				memcpy ( tempArea+i*destPitch, tempArea+(i-1)*destPitch, destPitch );
+			}
+		}
+
+		// scale one band vertically 
+		for ( j=0; j<destWidth; j++ )
+		{
+			Scale1Dv ( &tempArea[j], destPitch, vscale, sourceBandHeight+1,
+				&dest[j], destPitch, vratio, destBandHeight );
+		}
+
+		// copy tempArea row 0 over from last row in the past
+		memcpy ( tempArea, tempArea+sourceBandHeight*destPitch, destPitch );
+
+		// move to the next band
+		source += sourceBandHeight * sourcePitch;
+		dest   += destBandHeight * destPitch;
+	}
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : ScaleFrame
+*
+*  INPUTS        : YUV_BUFFER_CONFIG *src       : Pointer to frame to be scaled.
+*                  YUV_BUFFER_CONFIG *dst       : Pointer to buffer to hold scaled frame.
+*                  unsigned char *tempArea      : Pointer to temp work area.
+*                  unsigned char tempAreaHeight : Height of temp work area.
+*                  unsigned int hscale          : Horizontal scale factor numerator.
+*                  unsigned int hratio          : Horizontal scale factor denominator.
+*                  unsigned int vscale          : Vertical scale factor numerator.
+*                  unsigned int vratio          : Vertical scale factor denominator.
+*                  unsigned int interlaced      : Interlace flag.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*	
+*  FUNCTION      : Performs 2-tap linear interpolation in two dimensions.
+*
+*  SPECIAL NOTES : Expansion is performed one band at a time to help with 
+*                  caching.
+*
+****************************************************************************/
+void ScaleFrame
+(  
+ YUV_BUFFER_CONFIG *src,
+ YUV_BUFFER_CONFIG *dst,
+ unsigned char *tempArea,
+ unsigned char tempHeight,
+ unsigned int hscale,
+ unsigned int hratio,
+ unsigned int vscale,
+ unsigned int vratio,
+ unsigned int interlaced
+ )
+{
+	int i;
+	int dw = (hscale - 1 + src->YWidth * hratio) / hscale;
+	int dh = (vscale - 1 + src->YHeight * vratio) / vscale;
+
+	// call our internal scaling routines!!
+	Scale2D ( (unsigned char *) src->YBuffer, src->YStride, src->YWidth, src->YHeight,
+		(unsigned char *) dst->YBuffer, dst->YStride, dw, dh,
+		tempArea, tempHeight, hscale, hratio, vscale, vratio, interlaced );
+
+	if ( dw < (int)dst->YWidth )
+		for ( i=0; i<dh; i++ )
+			memset ( dst->YBuffer+i*dst->YStride+dw-1, dst->YBuffer[i*dst->YStride+dw-2], dst->YWidth-dw+1 );
+
+	if ( dh < (int)dst->YHeight )
+		for ( i=dh-1; i<(int)dst->YHeight; i++ )
+			memcpy(dst->YBuffer + i*dst->YStride, dst->YBuffer + (dh-2) * dst->YStride, dst->YWidth+1);
+
+	Scale2D ( (unsigned char *) src->UBuffer,src->UVStride, src->UVWidth, src->UVHeight,
+		(unsigned char *) dst->UBuffer,dst->UVStride, dw/2, dh/2,
+		tempArea, tempHeight, hscale, hratio, vscale, vratio, interlaced );
+
+	if ( dw/2 < (int)dst->UVWidth )
+		for(i=0;i<dst->UVHeight;i++)
+			memset(dst->UBuffer + i * dst->UVStride + dw/2 - 1, dst->UBuffer[i*dst->UVStride+dw/2-2],dst->UVWidth-dw/2 + 1);
+
+	if ( dh/2 < (int)dst->UVHeight )
+		for ( i=dh/2-1; i<(int)dst->YHeight/2; i++ )
+			memcpy ( dst->UBuffer+i*dst->UVStride, dst->UBuffer+(dh/2-2)*dst->UVStride, dst->UVWidth );
+
+	Scale2D ( (unsigned char *) src->VBuffer,src->UVStride, src->UVWidth, src->UVHeight,
+		(unsigned char *) dst->VBuffer,dst->UVStride, dw/2, dh/2,
+		tempArea, tempHeight, hscale, hratio, vscale, vratio, interlaced );
+
+	if ( dw/2 < (int)dst->UVWidth )
+		for ( i=0; i<dst->UVHeight; i++ )
+			memset ( dst->VBuffer+i*dst->UVStride+dw/2-1, dst->VBuffer[i*dst->UVStride+dw/2-2], dst->UVWidth-dw/2+1 );
+
+	if ( dh/2 < (int) dst->UVHeight )
+		for ( i=dh/2-1; i<(int)dst->YHeight/2; i++ )
+			memcpy ( dst->VBuffer+i*dst->UVStride, dst->VBuffer+(dh/2-2)*dst->UVStride, dst->UVWidth );
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : Fast_4_5_Scale
+*
+*  INPUTS        : POSTPROC_INSTANCE *ppi       : Pointer to post-processor instance (NOT USED).
+*                  UINT8 *FrameBuffer           : Pointer to source image.
+*                  YUV_BUFFER_CONFIG *YuvConfig : Pointer to destination image.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*	
+*  FUNCTION      : Scales up image by factor of 5/4, creating 5 output
+*                  samples for every 4 input samples horizontally & 
+*                  vertically.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void Fast_4_5_Scale ( POSTPROC_INSTANCE *ppi, UINT8 *FrameBuffer, YUV_BUFFER_CONFIG *YuvConfig )
+{
+	// check that width and height are valid please..!
+	int h = ppi->Configuration.VideoFrameHeight;
+	int w = ppi->Configuration.VideoFrameWidth;
+	int nh = YuvConfig->YHeight;
+	int nw = YuvConfig->YWidth;
+
+	Scale_4_5_2D ( ppi, &FrameBuffer[ppi->ReconYDataOffset], w+32, w, h,
+		(UINT8 *)YuvConfig->YBuffer, nw, nw, nh );
+	w  >>= 1;
+	h  >>= 1;
+	nw >>= 1;
+	nh >>= 1;
+
+	Scale_4_5_2D ( ppi, &FrameBuffer[ppi->ReconUDataOffset], w+16, w, h,
+		(UINT8 *)YuvConfig->UBuffer, nw, nw, nh );
+
+	Scale_4_5_2D ( ppi, &FrameBuffer[ppi->ReconVDataOffset], w+16, w, h,
+		(UINT8 *)YuvConfig->VBuffer, nw, nw, nh );
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : Fast_3_5_Scale
+*
+*  INPUTS        : POSTPROC_INSTANCE *ppi       : Pointer to post-processor instance (NOT USED).
+*                  UINT8 *FrameBuffer           : Pointer to source image.
+*                  YUV_BUFFER_CONFIG *YuvConfig : Pointer to destination image.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*	
+*  FUNCTION      : Scales up image by factor of 5/3, creating 5 output
+*                  samples for every 3 input samples horizontally & 
+*                  vertically.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void Fast_3_5_Scale ( POSTPROC_INSTANCE *ppi, UINT8 *FrameBuffer, YUV_BUFFER_CONFIG *YuvConfig )
+{
+	// check that width and height are valid please..!
+	int h = ppi->Configuration.VideoFrameHeight;
+	int w = ppi->Configuration.VideoFrameWidth;
+	int nh = YuvConfig->YHeight;
+	int nw = YuvConfig->YWidth;
+
+	Scale_3_5_2D ( ppi, &FrameBuffer[ppi->ReconYDataOffset], w+32, w, h,
+		(UINT8 *)YuvConfig->YBuffer, nw, nw, nh );
+	w  >>= 1;
+	h  >>= 1;
+	nw >>= 1;
+	nh >>= 1;
+
+	Scale_3_5_2D ( ppi, &FrameBuffer[ppi->ReconUDataOffset], w+16, w, h,
+		(UINT8 *)YuvConfig->UBuffer, nw, nw, nh );
+
+	Scale_3_5_2D ( ppi, &FrameBuffer[ppi->ReconVDataOffset], w+16, w, h,
+		(UINT8 *)YuvConfig->VBuffer, nw, nw, nh );
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : AnyRatio_2D_Scale
+*
+*  INPUTS        : POSTPROC_INSTANCE *ppi      : Pointer to post-processor instance (NOT USED).
+*                  const unsigned char *source : Pointer to source image.
+*                  unsigned int sourcePitch    : Stride of source image.
+*                  unsigned int sourceWidth    : Width of source image.
+*                  unsigned int sourceHeight   : Height of source image (NOT USED).
+*                  unsigned char *dest         : Pointer to destination image.
+*                  unsigned int destPitch      : Stride of destination image.
+*                  unsigned int destWidth      : Width of destination image.
+*                  unsigned int destHeight     : Height of destination image.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : int: 1 if image scaled, 0 if image could not be scaled.
+*	
+*  FUNCTION      : Scale the image with changing apect ratio.
+*
+*  SPECIAL NOTES : This scaling is a bi-linear scaling. Need to re-work the 
+*                  whole function for new scaling algorithm.
+*
+****************************************************************************/
+int AnyRatio_2D_Scale
+(
+ POSTPROC_INSTANCE *ppi, 
+ const unsigned char *source,
+ unsigned int sourcePitch,
+ unsigned int sourceWidth,
+ unsigned int sourceHeight,
+ unsigned char *dest,
+ unsigned int destPitch,
+ unsigned int destWidth,
+ unsigned int destHeight
+ )
+{
+	unsigned int i, k, max_k;
+	unsigned int srcBandHeight  = 0;
+	unsigned int destBandHeight = 0;
+
+	// suggested scale factors
+	int hs = ppi->Configuration.HScale;
+	int hr = ppi->Configuration.HRatio;
+	int vs = ppi->Configuration.VScale;
+	int vr = ppi->Configuration.VRatio;
+
+	// assume the ratios are scalable instead of should be centered
+	int RatioScalable = 1;
+
+	void (*HorizLineScale) ( const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL;
+	void (*VertBandScale) ( unsigned char *, unsigned int, unsigned int) = NULL;
+	void (*LastVertBandScale) ( unsigned char *, unsigned int, unsigned int) = NULL;
+
+	(void) ppi;
+
+	// find out the ratio for each direction
+	switch ( hr*10/hs )
+	{
+	case 8:
+		// 4-5 Scale in Width direction
+		HorizLineScale = HorizontalLine_4_5_Scale;   
+		break;
+	case 6:
+		// 3-5 Scale in Width direction
+		HorizLineScale = HorizontalLine_3_5_Scale;
+		break;
+	case 5: 
+		// 1-2 Scale in Width direction
+		HorizLineScale = HorizontalLine_1_2_Scale;
+		break;
+	case 10:
+		// no scale in Width direction
+		HorizLineScale = HorizontalLine_Copy;
+		break;
+	default:
+		// The ratio is not acceptable now
+		// throw("The ratio is not acceptable for now!");
+		RatioScalable = 0;
+		break;
+	}
+
+	switch ( vr*10/vs )
+	{
+	case 8:
+		// 4-5 Scale in vertical direction
+		VertBandScale     = VerticalBand_4_5_Scale;
+		LastVertBandScale = LastVerticalBand_4_5_Scale;
+		srcBandHeight     = 4;
+		destBandHeight    = 5;
+		break;
+	case 6:
+		// 3-5 Scale in vertical direction
+		VertBandScale     = VerticalBand_3_5_Scale;
+		LastVertBandScale = LastVerticalBand_3_5_Scale;
+		srcBandHeight     = 3;
+		destBandHeight    = 5;
+		break;
+	case 5:
+		// 1-2 Scale in vertical direction
+		VertBandScale     = VerticalBand_1_2_Scale;
+		LastVertBandScale = LastVerticalBand_1_2_Scale;
+		srcBandHeight     = 1;
+		destBandHeight    = 2;
+		break;
+	case 10:
+		// no scale in Width direction
+		VertBandScale     = NullScale;
+		LastVertBandScale = NullScale;
+		srcBandHeight     = 4;
+		destBandHeight    = 4;
+		break;
+	default:
+		// The ratio is not acceptable now
+		// throw("The ratio is not acceptable for now!");
+		RatioScalable = 0;
+		break;
+	}
+
+	if ( RatioScalable == 0 )
+		return RatioScalable;
+
+	HorizLineScale ( source, sourceWidth, dest, destWidth );
+
+	// except last band
+	max_k = (destHeight+destBandHeight-1)/destBandHeight;
+	if (max_k)
+	{
+		for ( k=0; k<max_k-1; k++ )
+		{
+			// scale one band horizontally 
+			for ( i=1; i<srcBandHeight; i++ )
+			{
+				HorizLineScale ( source+i*sourcePitch,
+					sourceWidth,
+					dest+i*destPitch,
+					destWidth );
+			}
+
+			// first line of next band
+			HorizLineScale ( source+srcBandHeight*sourcePitch,
+				sourceWidth,
+				dest+destBandHeight*destPitch,
+				destWidth );
+
+			// Vertical scaling is in place       
+			VertBandScale ( dest, destPitch, destWidth );
+
+			// Next band...
+			source += srcBandHeight  * sourcePitch;
+			dest   += destBandHeight * destPitch;
+		}
+
+		// scale one band horizontally 
+		for ( i=1; i<srcBandHeight; i++ )
+		{
+			HorizLineScale ( source+i*sourcePitch,
+				sourceWidth,
+				dest+i*destPitch,
+				destWidth );
+		}
+
+		// Vertical scaling is in place       
+		LastVertBandScale ( dest, destPitch, destWidth );
+	}
+	return RatioScalable;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : AnyRatioFrameScale
+*
+*  INPUTS        : POSTPROC_INSTANCE *ppi       : Pointer to post-processor instance (NOT USED).
+*                  UINT8 *FrameBuffer           : Pointer to source image.
+*                  YUV_BUFFER_CONFIG *YuvConfig : Pointer to destination image.
+*                  INT32 YOffset                : Offset from start of buffer to Y samples.
+*                  INT32 UVOffset               : Offset from start of buffer to UV samples.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : int: 1 if image scaled, 0 if image could not be scaled.
+*	
+*  FUNCTION      : Scale the image with changing apect ratio.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+int AnyRatioFrameScale
+( 
+ POSTPROC_INSTANCE *ppi, 
+ UINT8 *FrameBuffer, 
+ YUV_BUFFER_CONFIG *YuvConfig,
+ INT32 YOffset,
+ INT32 UVOffset
+ )
+{
+	int i;
+	int ew;
+	int eh;
+
+	// suggested scale factors
+	int hs = ppi->Configuration.HScale;
+	int hr = ppi->Configuration.HRatio;
+	int vs = ppi->Configuration.VScale;
+	int vr = ppi->Configuration.VRatio;
+
+	int RatioScalable = 1;
+
+	int sw = (ppi->Configuration.ExpandedFrameWidth * hr + hs - 1)/hs;
+	int sh = (ppi->Configuration.ExpandedFrameHeight * vr + vs - 1)/vs;
+	int dw = ppi->Configuration.ExpandedFrameWidth;
+	int dh = ppi->Configuration.ExpandedFrameHeight;
+
+	if ( hr == 3 )
+		ew = (sw+2)/3 * 3 * hs / hr;
+	else
+		ew = (sw+7)/8 * 8 * hs / hr;
+
+	if ( vr == 3 )
+		eh = (sh+2)/3 * 3 * vs / vr;
+	else
+		eh = (sh+7)/8 * 8 * vs / vr;
+
+	RatioScalable = AnyRatio_2D_Scale ( ppi, &FrameBuffer[ppi->ReconYDataOffset], 
+		ppi->Configuration.VideoFrameWidth +ppi->MVBorder*2, sw, sh,
+		(UINT8 *) YuvConfig->YBuffer + YOffset, YuvConfig->YStride, dw, dh);
+
+	for ( i=0; i<eh; i++ )
+		memset ( YuvConfig->YBuffer+YOffset+i*YuvConfig->YStride+dw, 0, ew-dw );
+
+	for ( i=dh; i<eh; i++ )
+		memset ( YuvConfig->YBuffer+YOffset+i*YuvConfig->YStride, 0, ew );
+
+	if ( RatioScalable==0 )
+		return RatioScalable;
+
+	sw = (sw+1)>>1;
+	sh = (sh+1)>>1;
+	dw = (dw+1)>>1;
+	dh = (dh+1)>>1;
+
+	AnyRatio_2D_Scale ( ppi, &FrameBuffer[ppi->ReconUDataOffset], ppi->Configuration.VideoFrameWidth/2+ppi->MVBorder, sw,sh,
+		(UINT8 *)YuvConfig->UBuffer+UVOffset, YuvConfig->UVStride, dw, dh );
+
+	AnyRatio_2D_Scale ( ppi, &FrameBuffer[ppi->ReconVDataOffset], ppi->Configuration.VideoFrameWidth/2+ppi->MVBorder, sw, sh,
+		(UINT8 *)YuvConfig->VBuffer+UVOffset, YuvConfig->UVStride, dw, dh );
+
+	return RatioScalable;
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : CenterImage
+*
+*  INPUTS        : POSTPROC_INSTANCE *ppi       : Pointer to post-processor instance.
+*                  UINT8 *FrameBuffer           : Pointer to source image.
+*                  YUV_BUFFER_CONFIG *YuvConfig : Pointer to destination image.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*	
+*  FUNCTION      : Centers the image without scaling in the output buffer.
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void CCONV CenterImage ( POSTPROC_INSTANCE *ppi, UINT8 *FrameBuffer, YUV_BUFFER_CONFIG *YuvConfig )
+{
+	UINT32 i;
+	INT32 RowOffset, ColOffset;
+	UINT8 *SrcDataPointer;
+	UINT8 *DstDataPointer;
+
+	// center values
+	RowOffset = (YuvConfig->YHeight - ppi->Configuration.VideoFrameHeight)/2;
+	ColOffset = (YuvConfig->YWidth - ppi->Configuration.VideoFrameWidth)/2;
+
+	// Y's
+	SrcDataPointer = &FrameBuffer[ppi->ReconYDataOffset];
+	DstDataPointer = (UINT8 *)YuvConfig->YBuffer+RowOffset*YuvConfig->YWidth+ColOffset;
+	for ( i=0; i<ppi->Configuration.VideoFrameHeight; i++ )
+	{
+		memcpy ( DstDataPointer, SrcDataPointer, ppi->Configuration.VideoFrameWidth );
+		DstDataPointer += YuvConfig->YWidth;
+		SrcDataPointer += ppi->YStride; 
+	}
+
+	// U's
+	SrcDataPointer = &FrameBuffer[ppi->ReconUDataOffset];
+	DstDataPointer = (UINT8 *)YuvConfig->UBuffer+RowOffset/2*YuvConfig->UVWidth+ColOffset/2;
+	for ( i=0; i<ppi->Configuration.VideoFrameHeight/2; i++ )
+	{
+		memcpy ( DstDataPointer, SrcDataPointer, ppi->Configuration.VideoFrameWidth/2 );
+		DstDataPointer += YuvConfig->UVWidth;
+		SrcDataPointer += ppi->UVStride;
+	}
+
+	// V's
+	SrcDataPointer = &FrameBuffer[ppi->ReconVDataOffset];
+	DstDataPointer = (UINT8 *)YuvConfig->VBuffer+RowOffset/2*YuvConfig->UVWidth+ColOffset/2;
+	for ( i=0; i<ppi->Configuration.VideoFrameHeight/2; i++ )
+	{
+		memcpy ( DstDataPointer, SrcDataPointer, ppi->Configuration.VideoFrameWidth/2 );
+		DstDataPointer += YuvConfig->UVWidth;
+		SrcDataPointer += ppi->UVStride;
+	}
+}
+
+/****************************************************************************
+* 
+*  ROUTINE       : ScaleOrCenter 
+*
+*  INPUTS        : POSTPROC_INSTANCE *ppi       : Pointer to post-processor instance.
+*                  UINT8 *FrameBuffer           : Pointer to source image.
+*                  YUV_BUFFER_CONFIG *YuvConfig : Pointer to destination image.
+*
+*  OUTPUTS       : None.
+*
+*  RETURNS       : void
+*	
+*  FUNCTION      : Centers the image without scaling in the output buffer.
+*	
+*  FUNCTION      : Decides to scale or center image in scale buffer for blit
+*
+*  SPECIAL NOTES : None. 
+*
+****************************************************************************/
+void CCONV ScaleOrCenter
+( 
+ POSTPROC_INSTANCE *ppi, 
+ UINT8 *FrameBuffer,
+ YUV_BUFFER_CONFIG *YuvConfig
+ )
+{
+	if ( ppi->PostProcessingLevel ) 
+		UpdateUMVBorder ( ppi, FrameBuffer );
+
+	switch ( ppi->Configuration.ScalingMode )
+	{
+	case SCALE_TO_FIT:
+	case MAINTAIN_ASPECT_RATIO:
+		{ 
+			// center values
+			int row = (YuvConfig->YHeight - (int)ppi->Configuration.ExpandedFrameHeight ) / 2;
+			int col = (YuvConfig->YWidth  - (int)ppi->Configuration.ExpandedFrameWidth  ) / 2;
+
+			int YOffset  = row * YuvConfig->YWidth + col;
+			int UVOffset = (row>>1) * YuvConfig->UVWidth + (col>>1);
+
+			// perform center and scale 
+			AnyRatioFrameScale ( ppi, FrameBuffer, YuvConfig, YOffset, UVOffset );
+
+			break;
+		}
+		/*		
+		case SCALE_TO_FIT:
+		// Scale the image if the aspect ratio is scalable
+		if ( AnyRatioFrameScale( ppi, FrameBuffer, YuvConfig, 0, 0 ) != 1 )
+		CenterImage ( ppi, FrameBuffer, YuvConfig );
+		break;
+		*/		
+	case CENTER:
+		CenterImage ( ppi, FrameBuffer, YuvConfig );
+		break;
+
+	default:
+		break;
+	}
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/generic/simpledeblocker.c b/Src/libvpShared/corelibs/cdxv/vppp/generic/simpledeblocker.c
new file mode 100644
index 00000000..66153da2
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/generic/simpledeblocker.c
@@ -0,0 +1,392 @@
+/****************************************************************************
+ *
+ *   Module Title :     simpledeblock.c
+ *
+ *   Description  :     Simple deblocking filter.
+ *
+ ***************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/        
+#if ( defined(_MSC_VER) || defined(MAPCA) )
+#define abs(x) ( (x>0) ? (x) : (-(x)) )
+#endif
+
+/****************************************************************************
+*  Imports
+****************************************************************************/              
+extern UINT32 *DeblockLimitValuesV2;
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/
+static const UINT32 DeblockLimitValuesV1[Q_TABLE_SIZE] =
+{	
+	30, 25, 20, 20, 15, 15, 14, 14,
+    13, 13, 12, 12, 11, 11, 10, 10, 
+     9,  9,  8,  8,  7,  7,  7,  7,
+     6,  6,  6,  6,  5,  5,  5,  5,
+     4,  4,  4,  4,  3,  3,  3,  3,  
+     2,  2,  2,  2,  2,  2,  2,  2,  
+     0,  0,  0,  0,  0,  0,  0,  0,  
+     0,  0,  0,  0,  0,  0,  0,  0 
+};
+ 
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterHoriz_Simple2_C
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi  : Pointer to post-processing instance (NOT USED).
+ *                  UINT8 *PixelPtr         : Pointer to four pixels that straddle the edge.
+ *                  INT32 LineLength        : Stride of the image being filtered.
+ *                  INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a loop filter to the vertical edge by applying
+ *                  the filter horizontally to each of the 8-rows of the 
+ *                  block edge.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FilterHoriz_Simple2_C
+(
+    POSTPROC_INSTANCE *ppi, 
+    UINT8 *PixelPtr, 
+    INT32 LineLength, 
+    INT32 *BoundingValuePtr
+)
+{
+	INT32 j;
+	INT32 x,y,z;
+	INT32 FiltVal;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+	
+    (void) ppi;
+
+	for ( j=0; j<8; j++ )
+	{            
+		y = PixelPtr[2]-PixelPtr[1];
+
+		if ( !y ) continue;
+
+		x = PixelPtr[1]-PixelPtr[0];
+		z = PixelPtr[3]-PixelPtr[2];
+		
+		FiltVal = 2 * y + z - x;
+        FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+		
+		PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
+		PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
+
+		FiltVal >>= 1;
+		FiltVal *= ((x|z)==0);
+
+        PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] + FiltVal];
+		PixelPtr[3] = LimitTable[(INT32)PixelPtr[3] - FiltVal];
+		
+		PixelPtr += LineLength;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterVert_Simple2_C
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi  : Pointer to post-processing instance (NOT USED).
+ *                  UINT8 *PixelPtr         : Pointer to four pixels that straddle the edge.
+ *                  INT32 LineLength        : Stride of the image being filtered.
+ *                  INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a loop filter to the horizontal edge by applying
+ *                  the filter vertically to each of the 8-columns of the 
+ *                  block edge.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FilterVert_Simple2_C
+(
+    POSTPROC_INSTANCE *ppi, 
+    UINT8 *PixelPtr, 
+    INT32 LineLength, 
+    INT32 *BoundingValuePtr
+)
+{
+	INT32 j;
+	INT32 FiltVal;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+	
+    (void) ppi;
+
+	for ( j=0; j<8; j++ )
+	{            
+        INT32 UseHighVariance;
+
+        FiltVal = ( ((INT32)PixelPtr[0]*3) - ((INT32)PixelPtr[-LineLength]*3) );
+
+        UseHighVariance = abs ( PixelPtr[-(2*LineLength)] - PixelPtr[-LineLength] ) > 1 ||
+                          abs ( PixelPtr[0] - PixelPtr[LineLength]) > 1;
+
+        if ( UseHighVariance )
+		    FiltVal += ((INT32)PixelPtr[-(2*LineLength)]) - ((INT32)PixelPtr[LineLength]);
+
+		FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+		
+		PixelPtr[-LineLength] = LimitTable[(INT32)PixelPtr[-LineLength] + FiltVal];
+		PixelPtr[          0] = LimitTable[(INT32)PixelPtr[          0] - FiltVal];
+		
+        if ( !UseHighVariance )
+        {
+            FiltVal >>=1;
+            PixelPtr[-2*LineLength] = LimitTable[(INT32)PixelPtr[-2*LineLength] + FiltVal];
+            PixelPtr[   LineLength] = LimitTable[(INT32)PixelPtr[   LineLength] - FiltVal];
+        }
+
+        PixelPtr++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterHoriz_Simple_C
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi  : Pointer to post-processing instance (NOT USED).
+ *                  UINT8 *PixelPtr         : Pointer to four pixels that straddle the edge.
+ *                  INT32 LineLength        : Stride of the image being filtered.
+ *                  INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a loop filter to the vertical edge by applying
+ *                  the filter horizontally to each of the 8-rows of the 
+ *                  block edge.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FilterHoriz_Simple_C
+(
+    POSTPROC_INSTANCE *ppi, 
+    UINT8 *PixelPtr, 
+    INT32 LineLength, 
+    INT32 *BoundingValuePtr
+)
+{
+	INT32 j;
+	INT32 FiltVal;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+	
+    (void) ppi;
+
+	for ( j=0; j<8; j++ )
+	{            
+        INT32 UseHighVariance;
+		
+		FiltVal = (PixelPtr[2]*3) - (PixelPtr[1]*3);
+
+        UseHighVariance = abs(PixelPtr[0] - PixelPtr[1]) > 1 ||
+                          abs(PixelPtr[2] - PixelPtr[3]) > 1;
+
+        if ( UseHighVariance )
+            FiltVal += PixelPtr[0] - PixelPtr[3];
+
+        FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+		
+		PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
+		PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
+
+        if ( !UseHighVariance )
+        {
+            FiltVal >>= 1;
+            PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] + FiltVal];
+		    PixelPtr[3] = LimitTable[(INT32)PixelPtr[3] - FiltVal];
+        }
+		
+		PixelPtr += LineLength;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterVert_Simple_C
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi  : Pointer to post-processing instance (NOT USED).
+ *                  UINT8 *PixelPtr         : Pointer to four pixels that straddle the edge.
+ *                  INT32 LineLength        : Stride of the image being filtered.
+ *                  INT32 *BoundingValuePtr : Pointer to array of bounding values.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a loop filter to the horizontal edge by applying
+ *                  the filter vertically to each of the 8-columns of the 
+ *                  block edge.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void FilterVert_Simple_C
+(
+    POSTPROC_INSTANCE *ppi, 
+    UINT8 *PixelPtr, 
+    INT32 LineLength, 
+    INT32 *BoundingValuePtr
+)
+{
+	INT32 j;
+	INT32 FiltVal;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+	
+    (void) ppi;
+
+	for ( j=0; j<8; j++ )
+	{            
+        INT32 UseHighVariance;
+
+        FiltVal = ( ((INT32)PixelPtr[0]*3) - ((INT32)PixelPtr[-LineLength]*3) );
+
+        UseHighVariance = abs(PixelPtr[-(2*LineLength)] - PixelPtr[-LineLength]) > 1 ||
+                          abs(PixelPtr[0] - PixelPtr[LineLength]) > 1;
+
+        if ( UseHighVariance )
+		    FiltVal += ((INT32)PixelPtr[-(2*LineLength)]) - ((INT32)PixelPtr[LineLength]);
+        
+		FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+		
+		PixelPtr[-LineLength] = LimitTable[(INT32)PixelPtr[-LineLength] + FiltVal];
+		PixelPtr[          0] = LimitTable[(INT32)PixelPtr[          0] - FiltVal];
+		
+        if ( !UseHighVariance )
+        {
+            FiltVal >>=1;
+            PixelPtr[-2*LineLength] = LimitTable[(INT32)PixelPtr[-2*LineLength] + FiltVal];
+            PixelPtr[   LineLength] = LimitTable[(INT32)PixelPtr[   LineLength] - FiltVal];
+        }
+
+        PixelPtr++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SimpleDeblockFrame
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *ppi  : Pointer to post-processing instance.
+ *                  UINT8 *SrcBuffer        : Pointer to image to be deblocked.
+ *                  UINT8 *DestBuffer       : Pointer to image to hold deblocked image.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Simple deblocker.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SimpleDeblockFrame ( POSTPROC_INSTANCE *ppi, UINT8 *SrcBuffer, UINT8 *DestBuffer )
+{
+	INT32 j, m, n;
+	INT32 RowStart;
+	INT32 NextRow;
+    INT32 FLimit; 
+    INT32 QIndex;
+    INT32 *BoundingValuePtr;
+    INT32 LineLength  = 0;
+	INT32 FragsAcross = ppi->HFragments;	
+	INT32 FragsDown   = ppi->VFragments;
+
+	QIndex = ppi->FrameQIndex;
+
+    // Encoder version specific clause
+	if ( ppi->Vp3VersionNo >= 2 )
+		FLimit = DeblockLimitValuesV2[QIndex];
+	else
+		FLimit = DeblockLimitValuesV1[QIndex];
+     
+    BoundingValuePtr = SetupDeblockValueArray ( ppi, FLimit );
+
+	for ( j=0; j<3; j++ )
+	{
+		switch ( j )
+		{
+		case 0: // Y
+			FragsAcross = ppi->HFragments;
+			FragsDown   = ppi->VFragments;
+			LineLength  = ppi->YStride;
+			RowStart    = ppi->ReconYDataOffset;
+			break;
+		case 1: // U
+			FragsAcross = ppi->HFragments >> 1;
+			FragsDown   = ppi->VFragments >> 1;
+			LineLength  = ppi->UVStride;
+			RowStart    = ppi->ReconUDataOffset;
+			break;
+		case 2:	// V
+			FragsAcross = ppi->HFragments >> 1;
+			FragsDown   = ppi->VFragments >> 1;
+			LineLength  = ppi->UVStride;
+			RowStart    = ppi->ReconVDataOffset;
+			break;
+		}
+
+		NextRow = LineLength * 8;
+
+		/*************/
+		/* First Row */
+		/*************/
+
+		memcpy ( &DestBuffer[RowStart], &SrcBuffer[RowStart], 8*LineLength );
+
+        /* First Column -- Skip */
+
+        /* Remaining Columns */
+		for ( n=1; n<FragsAcross; n++ )  // Filter Left edge always
+			FilterHoriz_Simple ( ppi, &DestBuffer[RowStart+n*8-2], LineLength, BoundingValuePtr );
+		
+		RowStart += NextRow;
+
+		//**************/
+		// Middle Rows */
+		//**************/
+		for ( m=1; m<FragsDown; m++ )
+		{
+			n = 0;
+		
+		    memcpy ( &DestBuffer[RowStart], &SrcBuffer[RowStart], 8*LineLength );
+
+			/* First column */
+			FilterVert_Simple ( ppi, &DestBuffer[RowStart+n*8], LineLength, BoundingValuePtr );
+			 
+			/* Middle columns */
+			for ( n=1; n<FragsAcross; n++ )
+			{
+				// Filter Left edge always
+				FilterHoriz_Simple ( ppi, &DestBuffer[RowStart+n*8-2], LineLength, BoundingValuePtr );
+				
+				// TopRow is always done
+				FilterVert_Simple ( ppi, &DestBuffer[RowStart+n*8], LineLength, BoundingValuePtr );
+			}
+			
+			RowStart += NextRow;
+		}
+	}	
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/include/postp.h b/Src/libvpShared/corelibs/cdxv/vppp/include/postp.h
new file mode 100644
index 00000000..08f0ef7e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/include/postp.h
@@ -0,0 +1,136 @@
+/****************************************************************************
+*
+*   Module Title :     postp.h
+*
+*   Description  :     Post processor interface
+*
+****************************************************************************/
+#ifndef POSTP_H
+#define POSTP_H
+
+#include "codec_common.h"
+
+// YUV buffer configuration structure
+typedef struct
+{
+    int     YWidth;
+    int     YHeight;
+    int     YStride;
+
+    int     UVWidth;
+    int     UVHeight;
+    int     UVStride;
+
+    char *  YBuffer;
+    char *  UBuffer;
+    char *  VBuffer;
+
+} YUV_BUFFER_CONFIG;
+
+typedef enum
+{
+    MAINTAIN_ASPECT_RATIO   = 0x0,
+    SCALE_TO_FIT            = 0x1,
+    CENTER                  = 0x2,
+    OTHER                   = 0x3
+} SCALE_MODE;
+
+// macro defined so that I can get the information  from fraginfo ( I suspect this is going to change !) 
+// and I wanted to be ready for the change!
+#define blockCoded(i) (ppi->FragInfo[(i)*ppi->FragInfoElementSize]&ppi->FragInfoCodedMask)
+
+
+typedef struct 
+{
+
+	// per frame information passed in
+	INT32        Vp3VersionNo;			// version of frame
+	INT32		 FrameType;				// key or non key
+	INT32		 PostProcessingLevel;	// level of post processing to perform 
+	INT32		 FrameQIndex;			// q index value used on passed in frame
+	UINT8		*LastFrameRecon;		// reconstruction buffer : passed in
+	UINT8		*PostProcessBuffer;		// postprocessing buffer : passed in
+
+	// per block information passed in 
+	UINT8		*FragInfo;				// blocks coded : passed in
+	UINT32       FragInfoElementSize;	// size of each element
+	UINT32		 FragInfoCodedMask;		// mask to get at whether fragment is coded
+
+	// per block info maintained by postprocessor
+	INT32		*FragQIndex;			// block's q index : allocated and filled
+	INT32		*FragmentVariances;		// block's pseudo variance : allocated and filled
+	UINT8		*FragDeblockingFlag;	// whether to deblock block : allocated and filled
+
+	// filter specific vars
+    INT32		*BoundingValuePtr;		// pointer to a filter     
+	INT32		*FiltBoundingValue;		// allocated (512 big)
+
+	// deblocker specific vars
+    INT32		*DeblockValuePtr;		// pointer to a filter     
+	INT32		*DeblockBoundingValue;	// allocated (512 big)
+
+	
+	// frame configuration 
+	CONFIG_TYPE  Configuration;			
+	UINT32		 ReconYDataOffset;		// position within buffer of first y fragment
+	UINT32		 ReconUDataOffset;		// position within buffer of first u fragment
+	UINT32		 ReconVDataOffset;		// position within buffer of first v fragment
+	UINT32		 YPlaneFragments;		// number of y fragments
+	UINT32		 UVPlaneFragments;		// number of u and v fragments
+	UINT32		 UnitFragments;			// number of total fragments y+u+v 
+	UINT32		 HFragments;			// number of horizontal fragments in y
+	UINT32		 VFragments;			// number of vertical fragments in y
+	INT32        YStride;				// pitch of y in bytes
+	INT32        UVStride;				// pitch of uv in bytes
+
+	// allocs so we can align our ptrs
+	INT32		*FiltBoundingValueAlloc;
+	INT32		*DeblockBoundingValueAlloc;
+	INT32		*FragQIndexAlloc;		
+	INT32		*FragmentVariancesAlloc;
+	UINT8		*FragDeblockingFlagAlloc;
+	UINT32      MVBorder;
+    UINT8       *IntermediateBufferAlloc;
+    UINT8       *IntermediateBuffer; 
+    UINT32      DeInterlaceMode;
+    UINT32      AddNoiseMode;
+
+} POSTPROC_INSTANCE;
+
+#define VAL_RANGE   256
+extern UINT8 LimitVal_VP31[VAL_RANGE * 3];
+typedef POSTPROC_INSTANCE * xPB_INST ;
+
+extern void  (*FilteringVert_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch); 
+extern void  (*FilteringHoriz_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch); 
+extern void  (*FilteringVert_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch); 
+extern void  (*FilteringHoriz_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch); 
+
+extern void  (*CopyBlock) (unsigned char *src, unsigned char *dest, unsigned int srcstride);
+extern void  (*VerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void  (*LastVerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void  (*VerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void  (*LastVerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void  (*HorizontalLine_1_2_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+extern void  (*HorizontalLine_3_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+extern void  (*HorizontalLine_4_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
+extern void  (*VerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void  (*LastVerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
+extern void  (*FilterHoriz_Simple)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+extern void  (*FilterVert_Simple)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+extern void  (*DeringBlockWeak)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
+extern void  (*DeringBlockStrong)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
+extern void  (*DeblockLoopFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+extern void  (*DeblockNonFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+extern void  (*DeblockNonFilteredBandNewFilter)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
+extern INT32*(*SetupBoundingValueArray)(xPB_INST pbi, INT32 FLimit);
+extern INT32*(*SetupDeblockValueArray)(xPB_INST pbi, INT32 FLimit);
+extern void  (*FilterHoriz)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+extern void  (*FilterVert)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
+extern void  (*ClampLevels)( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,	INT32 WhiteClamp, UINT8	*Src, UINT8	*Dst);
+extern void  (*FastDeInterlace)(UINT8 *SrcPtr, UINT8 *DstPtr, INT32 Width, INT32 Height, INT32 Stride);  
+extern void  (*PlaneAddNoise)( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+extern void  DMachineSpecificConfig(INT32 MmxEnabled, INT32 XmmEnabled, INT32 WmtEnabled);
+
+#endif
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj b/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj
new file mode 100644
index 00000000..b78564bc
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj
@@ -0,0 +1,441 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>17.0</VCProjectVersion>
+    <ProjectGuid>{8F2BF92C-C4E1-45AE-BA45-2617B03B32AC}</ProjectGuid>
+    <RootNamespace>vppp</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnableManifest>false</VcpkgEnableManifest>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_LIB;_DEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_LIB;_DEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_LIB;NDEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_LIB;NDEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="generic\borders.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\clamp.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\deblock.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\DeInterlace.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\dering.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\doptsystemdependant.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\loopfilter.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\postproc.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\scale.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\simpledeblocker.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\clamp_asm.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\deblockopt.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\deblockwmtopt.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\DeInterlaceMmx.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\DeInterlaceWmt.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\deringopt.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\deringwmtopt.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\doptsystemdependant.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\loopf_asm.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\newlooptest_asm.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\scaleopt.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\simpledeblock_asm.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj.filters
new file mode 100644
index 00000000..d5946f3d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/vppp.vcxproj.filters
@@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="generic">
+      <UniqueIdentifier>{adcd4975-46d4-4f20-8422-a898d3456999}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="win32">
+      <UniqueIdentifier>{4fbef4da-8fe3-440e-858e-2fbabea42066}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="generic\borders.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\clamp.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\deblock.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\DeInterlace.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\dering.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\doptsystemdependant.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\loopfilter.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\postproc.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\scale.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\simpledeblocker.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\clamp_asm.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\deblockopt.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\deblockwmtopt.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\DeInterlaceMmx.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\DeInterlaceWmt.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\deringopt.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\deringwmtopt.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\doptsystemdependant.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\loopf_asm.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\newlooptest_asm.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\scaleopt.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\simpledeblock_asm.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/vppp.xcodeproj/project.pbxproj b/Src/libvpShared/corelibs/cdxv/vppp/vppp.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..c7b0b41c
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/vppp.xcodeproj/project.pbxproj
@@ -0,0 +1,233 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 42;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		0C14238C0BB8155500FDDAB7 /* postproc.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C14238B0BB8155500FDDAB7 /* postproc.c */; };
+		0C14238E0BB8155F00FDDAB7 /* loopfilter.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C14238D0BB8155F00FDDAB7 /* loopfilter.c */; };
+		0CF73A2D0BB78F6700DD0AFD /* scale.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A2C0BB78F6700DD0AFD /* scale.c */; };
+		0CF73A3A0BB78F8600DD0AFD /* simpledeblocker.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A310BB78F8600DD0AFD /* simpledeblocker.c */; };
+		0CF73A3D0BB78F8600DD0AFD /* doptsystemdependant.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A340BB78F8600DD0AFD /* doptsystemdependant.c */; };
+		0CF73A3E0BB78F8600DD0AFD /* deblock.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A350BB78F8600DD0AFD /* deblock.c */; };
+		0CF73A3F0BB78F8600DD0AFD /* DeInterlace.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A360BB78F8600DD0AFD /* DeInterlace.c */; };
+		0CF73A400BB78F8600DD0AFD /* dering.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A370BB78F8600DD0AFD /* dering.c */; };
+		0CF73A410BB78F8600DD0AFD /* borders.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A380BB78F8600DD0AFD /* borders.c */; };
+		0CF73A420BB78F8600DD0AFD /* clamp.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A390BB78F8600DD0AFD /* clamp.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		0C14238B0BB8155500FDDAB7 /* postproc.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = postproc.c; path = generic/postproc.c; sourceTree = "<group>"; };
+		0C14238D0BB8155F00FDDAB7 /* loopfilter.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = loopfilter.c; path = generic/loopfilter.c; sourceTree = "<group>"; };
+		0CF73A2C0BB78F6700DD0AFD /* scale.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = scale.c; path = generic/scale.c; sourceTree = "<group>"; };
+		0CF73A310BB78F8600DD0AFD /* simpledeblocker.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = simpledeblocker.c; path = generic/simpledeblocker.c; sourceTree = "<group>"; };
+		0CF73A340BB78F8600DD0AFD /* doptsystemdependant.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = doptsystemdependant.c; path = generic/doptsystemdependant.c; sourceTree = "<group>"; };
+		0CF73A350BB78F8600DD0AFD /* deblock.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = deblock.c; path = generic/deblock.c; sourceTree = "<group>"; };
+		0CF73A360BB78F8600DD0AFD /* DeInterlace.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = DeInterlace.c; path = generic/DeInterlace.c; sourceTree = "<group>"; };
+		0CF73A370BB78F8600DD0AFD /* dering.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = dering.c; path = generic/dering.c; sourceTree = "<group>"; };
+		0CF73A380BB78F8600DD0AFD /* borders.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = borders.c; path = generic/borders.c; sourceTree = "<group>"; };
+		0CF73A390BB78F8600DD0AFD /* clamp.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = clamp.c; path = generic/clamp.c; sourceTree = "<group>"; };
+		D2AAC046055464E500DB518D /* libvppp.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvppp.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		D289987405E68DCB004EDB86 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		08FB7794FE84155DC02AAC07 /* vppp */ = {
+			isa = PBXGroup;
+			children = (
+				08FB7795FE84155DC02AAC07 /* Source */,
+				C6A0FF2B0290797F04C91782 /* Documentation */,
+				1AB674ADFE9D54B511CA2CBB /* Products */,
+			);
+			name = vppp;
+			sourceTree = "<group>";
+		};
+		08FB7795FE84155DC02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				0C14238B0BB8155500FDDAB7 /* postproc.c */,
+				0CF73A2C0BB78F6700DD0AFD /* scale.c */,
+				0CF73A310BB78F8600DD0AFD /* simpledeblocker.c */,
+				0CF73A340BB78F8600DD0AFD /* doptsystemdependant.c */,
+				0CF73A350BB78F8600DD0AFD /* deblock.c */,
+				0CF73A360BB78F8600DD0AFD /* DeInterlace.c */,
+				0CF73A370BB78F8600DD0AFD /* dering.c */,
+				0C14238D0BB8155F00FDDAB7 /* loopfilter.c */,
+				0CF73A380BB78F8600DD0AFD /* borders.c */,
+				0CF73A390BB78F8600DD0AFD /* clamp.c */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		1AB674ADFE9D54B511CA2CBB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				D2AAC046055464E500DB518D /* libvppp.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		C6A0FF2B0290797F04C91782 /* Documentation */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			name = Documentation;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		D2AAC043055464E500DB518D /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		D2AAC045055464E500DB518D /* vppp */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vppp" */;
+			buildPhases = (
+				D2AAC043055464E500DB518D /* Headers */,
+				D2AAC044055464E500DB518D /* Sources */,
+				D289987405E68DCB004EDB86 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = vppp;
+			productName = vppp;
+			productReference = D2AAC046055464E500DB518D /* libvppp.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		08FB7793FE84155DC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vppp" */;
+			hasScannedForEncodings = 1;
+			mainGroup = 08FB7794FE84155DC02AAC07 /* vppp */;
+			projectDirPath = "";
+			targets = (
+				D2AAC045055464E500DB518D /* vppp */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		D2AAC044055464E500DB518D /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				0CF73A2D0BB78F6700DD0AFD /* scale.c in Sources */,
+				0CF73A3A0BB78F8600DD0AFD /* simpledeblocker.c in Sources */,
+				0CF73A3D0BB78F8600DD0AFD /* doptsystemdependant.c in Sources */,
+				0CF73A3E0BB78F8600DD0AFD /* deblock.c in Sources */,
+				0CF73A3F0BB78F8600DD0AFD /* DeInterlace.c in Sources */,
+				0CF73A400BB78F8600DD0AFD /* dering.c in Sources */,
+				0CF73A410BB78F8600DD0AFD /* borders.c in Sources */,
+				0CF73A420BB78F8600DD0AFD /* clamp.c in Sources */,
+				0C14238C0BB8155500FDDAB7 /* postproc.c in Sources */,
+				0C14238E0BB8155F00FDDAB7 /* loopfilter.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		1DEB91EC08733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COPY_PHASE_STRIP = NO;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_FIX_AND_CONTINUE = YES;
+				GCC_MODEL_TUNING = G5;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				INSTALL_PATH = /usr/local/lib;
+				PRODUCT_NAME = vppp;
+				ZERO_LINK = YES;
+			};
+			name = Debug;
+		};
+		1DEB91ED08733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = (
+					ppc,
+					i386,
+				);
+				GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+				GCC_MODEL_TUNING = G5;
+				INSTALL_PATH = /usr/local/lib;
+				PRODUCT_NAME = vppp;
+			};
+			name = Release;
+		};
+		1DEB91F008733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				OBJROOT = build;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+				SYMROOT = ../../../lib/osx;
+				USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
+			};
+			name = Debug;
+		};
+		1DEB91F108733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				OBJROOT = build;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+				SYMROOT = ../../../lib/osx;
+				USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vppp" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91EC08733DB70010E9CD /* Debug */,
+				1DEB91ED08733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vppp" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91F008733DB70010E9CD /* Debug */,
+				1DEB91F108733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceMmx.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceMmx.c
new file mode 100644
index 00000000..cf62c887
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceMmx.c
@@ -0,0 +1,143 @@
+/**************************************************************************** 
+ *
+ *   Module Title :     DeInterlaceWmt.c
+ *
+ *   Description  :     DeInterlace Routines
+ *
+ ***************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+*  Module constants.
+****************************************************************************/        
+
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+short four2s[] = { 2, 2, 2, 2 }; 
+#pragma pack()
+#else
+__declspec(align(16)) short four2s[] = { 2, 2, 2, 2 }; 
+#endif
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : WmtFastDeInterlace
+ *
+ *  INPUTS        : UINT8 *SrcPtr : Pointer to input frame.
+ *                  UINT8 *DstPtr : Pointer to output frame.
+ *                  INT32 Width   : Width of frame in pixels.
+ *                  INT32 Height  : Height of frame in pixels.
+ *                  INT32 Stride  : Stride of images.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 3 tap filter vertically to remove interlacing
+ *                  artifacts.
+ *
+ *  SPECIAL NOTES : This function use a three tap filter [1, 2, 1] to blur
+ *                  veritically in an interlaced frame. This function assumes:
+ *                  1) Buffers SrcPtr and DstPtr point to have the same geometery,
+ *                  2) SrcPtr and DstPtr can _not_ be same.
+ *
+ ****************************************************************************/
+void MmxFastDeInterlace
+(
+    UINT8 *SrcPtr,
+    UINT8 *DstPtr,
+    INT32 Width,
+    INT32 Height,
+    INT32 Stride
+)
+{
+    INT32 i;  
+    UINT8 *CurrentSrcPtr = SrcPtr;
+    UINT8 *CurrentDstPtr = DstPtr;
+    
+    // Always copy the first line
+    memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+
+    for ( i=1; i<Height-1; i++ )
+    {
+        CurrentDstPtr += Stride;
+
+        __asm
+        {
+            mov         esi,        [CurrentSrcPtr]
+            mov         edi,        [CurrentDstPtr]
+            
+            xor         ecx,        ecx 
+            mov         edx,        [Stride]
+
+            lea         eax,        [esi + edx]
+            lea         edx,        [eax + edx]
+
+            mov         ebx,        [Width]
+            pxor        mm7,        mm7
+
+MmxDeInterlaceLoop:            
+            movq        mm0,       QWORD ptr [esi + ecx]                // line -1
+            movq        mm1,       QWORD ptr [eax + ecx]                // line  0
+
+            movq        mm3,        mm0                                 // line -1
+            punpcklbw   mm0,        mm7                                 // line -1 low
+            
+            movq        mm2,        QWORD ptr [edx + ecx]               // line 1
+            punpckhbw   mm3,        mm7                                 // line -1 high
+
+
+            movq        mm4,        mm1                                 // line 0 
+            punpcklbw   mm1,        mm7                                 // line 0 low
+
+            paddw       mm0,        four2s                              // line -1 low + 2s
+            paddw       mm3,        four2s                              // line -1 high + 2s
+
+            punpckhbw   mm4,        mm7                                 // line 0 high
+            psllw       mm1,        1                                   // line 0 * 2
+
+            psllw       mm4,        1                                   // line 0 * 2
+            movq        mm5,        mm2                                 // line 1
+
+            punpcklbw   mm2,        mm7                                 // line 1 low
+            paddw       mm0,        mm1                                 // line -1 + line 0 * 2
+
+            paddw       mm3,        mm4                                 // line -1 + line 0 * 2
+            punpckhbw   mm5,        mm7                                 // line 1 high
+            
+            paddw       mm0,        mm2                                 // -1 + 0 * 2 + 1
+            paddw       mm3,        mm5                                 // -1 + 0 * 2 + 1
+
+            psraw       mm0,        2                                   // >> 2
+            psraw       mm3,        2                                   // >> 2
+            
+            packuswb    mm0,        mm3
+
+            movq        QWORD ptr [edi+ecx],   mm0
+            add         ecx,        8        
+
+            cmp         ecx,        ebx
+            jl          MmxDeInterlaceLoop
+            
+        }
+        CurrentSrcPtr += Stride;
+        /*
+        for(j=0;j<Width;j++)
+        {
+            x0 = PrevSrcPtr[j];
+            x1 = (CurrentSrcPtr[j]<<1);
+            x2 = NextSrcPtr[j];
+            CurrentDstPtr[j] = (UINT8)( (x0 + x1 + x2)>>2 );
+        }
+        */
+    }
+    
+    // copy the last line
+    CurrentSrcPtr += Stride;
+    CurrentDstPtr += Stride;
+    memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceWmt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceWmt.c
new file mode 100644
index 00000000..d324d9e1
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/DeInterlaceWmt.c
@@ -0,0 +1,129 @@
+/**************************************************************************** 
+ *
+ *   Module Title :     DeInterlaceWmt.c
+ *
+ *   Description  :     DeInterlace
+ *
+ ***************************************************************************/
+
+/****************************************************************************
+*  Header Frames
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+*  Module constants.
+****************************************************************************/        
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+short Eight2s[] = { 2, 2, 2, 2, 2, 2, 2, 2 }; 
+#pragma pack()
+#else
+__declspec(align(16)) short Eight2s[] = { 2, 2, 2, 2, 2, 2, 2, 2 }; 
+#endif
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : WmtFastDeInterlace
+ *
+ *  INPUTS        : UINT8 *SrcPtr : Pointer to input frame.
+ *                  UINT8 *DstPtr : Pointer to output frame.
+ *                  INT32 Width   : Width of frame in pixels.
+ *                  INT32 Height  : Height of frame in pixels.
+ *                  INT32 Stride  : Stride of images.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 3 tap filter vertically to remove interlacing
+ *                  artifacts.
+ *
+ *  SPECIAL NOTES : This function use a three tap filter [1, 2, 1] to blur
+ *                  veritically in an interlaced frame. This function assumes:
+ *                  1) Buffers SrcPtr and DstPtr point to have the same geometery,
+ *                  2) SrcPtr and DstPtr can _not_ be same.
+ *
+ ****************************************************************************/
+void WmtFastDeInterlace
+(
+    UINT8 *SrcPtr,
+    UINT8 *DstPtr,
+    INT32 Width,
+    INT32 Height,
+    INT32 Stride
+)
+{
+    INT32 i;  
+    UINT8 *CurrentSrcPtr = SrcPtr;
+    UINT8 *CurrentDstPtr = DstPtr;
+#if defined(_WIN32_WCE)
+	return;
+#else
+    
+    // Always copy the first line
+    memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+
+    for ( i=1; i<Height-1; i++ )
+    {
+        CurrentDstPtr += Stride;
+
+        __asm
+        {
+            mov         esi,        [CurrentSrcPtr]
+            mov         edi,        [CurrentDstPtr]
+            
+            xor         ecx,        ecx 
+            mov         edx,        [Stride]
+
+            lea         eax,        [esi + edx]
+            lea         edx,        [eax + edx]
+
+            mov         ebx,        [Width]
+            pxor        xmm7,       xmm7
+
+WmtDeInterlaceLoop:            
+            movq        xmm0,       QWORD ptr [esi + ecx]
+            movq        xmm1,       QWORD ptr [eax + ecx]
+
+            punpcklbw   xmm0,       xmm7
+            movq        xmm2,       QWORD ptr [edx + ecx]
+
+            punpcklbw   xmm1,       xmm7
+            paddw       xmm0,       Eight2s
+
+            psllw       xmm1,       1
+            punpcklbw   xmm2,       xmm7
+
+            paddw       xmm0,       xmm1
+            paddw       xmm0,       xmm2
+
+            psraw       xmm0,       2
+            packuswb    xmm0,       xmm7
+
+            movq        QWORD ptr [edi+ecx],   xmm0
+            add         ecx,        8        
+
+            cmp         ecx,        ebx
+            jl          WmtDeInterlaceLoop
+            
+        }
+        CurrentSrcPtr += Stride;
+        /*
+        for(j=0;j<Width;j++)
+        {
+            x0 = PrevSrcPtr[j];
+            x1 = (CurrentSrcPtr[j]<<1);
+            x2 = NextSrcPtr[j];
+            CurrentDstPtr[j] = (UINT8)( (x0 + x1 + x2)>>2 );
+        }
+        */
+    }
+    
+    //copy the last line
+    CurrentSrcPtr += Stride;
+    CurrentDstPtr += Stride;
+    memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
+#endif
+  
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/clamp_asm.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/clamp_asm.c
new file mode 100644
index 00000000..27e43065
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/clamp_asm.c
@@ -0,0 +1,170 @@
+/****************************************************************************
+ *        
+ *   Module Title :     clamp.c
+ *
+ *   Description  :     c
+ *
+ *   AUTHOR       :     Jim Bankoski
+ *
+ *****************************************************************************
+ *   Revision History
+ *
+ *   1.09 YWX 26-Sep-01 Changed the default bandHeight from 5 to 4
+ *   1.08 YWX 23-Jul-00 Changed horizontal scaling function names
+ *   1.07 JBB 04 Dec 00 Added new Center vs Scale Bits
+ *   1.06 YWX 01-Dec-00 Removed bi-cubic scale functions
+ *   1.05 YWX 18-Oct-00 Added 1-2 scale functions
+ *   1.04 YWX 11-Oct-00 Added ratio check to determine scaling or centering
+ *   1.03 YWX 09-Oct-00 Added functions that do differen scaling in horizontal
+ *                      and vertical directions
+ *   1.02 YWX 04-Oct-00 Added 3-5 scaling functions
+ *   1.01 YWX 03-Oct-00 Added a set of 4-5 scaling functions
+ *   1.00 JBB 15 Sep 00 New Configuration baseline.
+ *
+ *****************************************************************************
+ */
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+#include "postp.h"
+#include <stdio.h>
+
+/****************************************************************************
+ *  Imported
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */        
+ 
+       
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+
+/****************************************************************************
+*  Module Static Variables
+*****************************************************************************
+*/
+void ClampLevels_wmt( 
+	POSTPROC_INSTANCE *pbi,
+	INT32        BlackClamp,			// number of values to clamp from 0 
+	INT32        WhiteClamp,			// number of values to clamp from 255
+	UINT8		*Src,					// reconstruction buffer : passed in
+	UINT8		*Dst					// postprocessing buffer : passed in
+	)
+{
+#if defined(_WIN32_WCE)
+	return;
+#else
+
+	__declspec(align(16)) unsigned char blackclamp[16];
+	__declspec(align(16)) unsigned char whiteclamp[16];
+	__declspec(align(16)) unsigned char bothclamp[16];
+
+	int i; 
+	int	width  = pbi->HFragments *8;
+	int	height = pbi->VFragments *8;
+	UINT8 *SrcPtr     = Src + pbi->ReconYDataOffset;
+	UINT8 *DestPtr    = Dst + pbi->ReconYDataOffset;
+	UINT32 LineLength = pbi->YStride ;				// pitch is doubled for interlacing
+	int row;
+
+	for(i=0;i<16;i++)
+	{
+		blackclamp[i]=(unsigned char )BlackClamp;
+		whiteclamp[i]=(unsigned char )WhiteClamp;
+		bothclamp[i]=BlackClamp+WhiteClamp;
+	}
+
+	// clamping is for y only!
+	for ( row = 0 ; row < height ; row ++)
+	{
+		__asm
+		{
+			mov         ecx, [width]
+			mov         esi, SrcPtr
+			mov			edi, DestPtr
+			xor		    eax,eax
+		nextset:
+			movdqa      xmm1,[esi+eax]
+			psubusb     xmm1,blackclamp
+			paddusb     xmm1,bothclamp
+			psubusb     xmm1,whiteclamp
+			movdqa      [edi+eax],xmm1             ;write first 4 pixels
+			add         eax,16
+			cmp         eax, ecx
+			jl			nextset
+		}
+		SrcPtr += LineLength;
+		DestPtr += LineLength;
+    }
+#endif
+}
+
+
+
+void ClampLevels_mmx( 
+	POSTPROC_INSTANCE *pbi,
+	INT32        BlackClamp,			// number of values to clamp from 0 
+	INT32        WhiteClamp,			// number of values to clamp from 255
+	UINT8		*Src,					// reconstruction buffer : passed in
+	UINT8		*Dst					// postprocessing buffer : passed in
+	)
+{
+
+#if defined(_WIN32_WCE)
+	#pragma pack(8)
+	unsigned char blackclamp[16];
+	unsigned char whiteclamp[16];
+	unsigned char bothclamp[16];
+	#pragma pack()
+#else
+	__declspec(align(8)) unsigned char blackclamp[16];
+	__declspec(align(8)) unsigned char whiteclamp[16];
+	__declspec(align(8)) unsigned char bothclamp[16];
+#endif
+	int i; 
+	int	width  = pbi->HFragments *8;
+	int	height = pbi->VFragments *8;
+	UINT8 *SrcPtr     = Src + pbi->ReconYDataOffset;
+	UINT8 *DestPtr    = Dst + pbi->ReconYDataOffset;
+	UINT32 LineLength = pbi->YStride ;				// pitch is doubled for interlacing
+	int row;
+
+	for(i=0;i<8;i++)
+	{
+		blackclamp[i]=(unsigned char )BlackClamp;
+		whiteclamp[i]=(unsigned char )WhiteClamp;
+		bothclamp[i]=BlackClamp+WhiteClamp;
+	}
+
+	// clamping is for y only!
+	for ( row = 0 ; row < height ; row ++)
+	{
+		__asm
+		{
+			mov         ecx, [width]
+			mov         esi, SrcPtr
+			mov			edi, DestPtr
+			xor		    eax,eax
+		nextset:
+			movq        mm1,[esi+eax]
+			psubusb     mm1,blackclamp
+			paddusb     mm1,bothclamp
+			psubusb     mm1,whiteclamp
+			movq        [edi+eax],mm1             ;write first 4 pixels
+			add         eax,8
+			cmp         eax, ecx
+			jl			nextset
+		}
+		SrcPtr += LineLength;
+		DestPtr += LineLength;
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockopt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockopt.c
new file mode 100644
index 00000000..67285fab
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockopt.c
@@ -0,0 +1,6692 @@
+/****************************************************************************
+ *
+ *   Module Title :     DeblockOpt.c
+ *
+ *   Description  :     Optimized functions for deblocking 
+ *
+ *   AUTHOR       :     Yaowu Xu
+ *
+ *****************************************************************************
+ *  Revision History
+ * 
+ *      1.04 YWX 21-Mar-02 bug fixed in functions using abs diff criteria
+ *      1.03 YWX 15-Jun-01 Added new 7 tap filter in deblocking 
+ *      1.02 YWX 02-May-01 Changed to use sum of abs diff to replace variance
+ *	1.01 YWX 17-Nov-00 Re-arranged loop inside deblockNonFilteredBand()
+ *	1.00 YWX 02-Nov-00 Configuration baseline from old PPoptfunctions.c
+ *
+ *****************************************************************************
+ */
+
+
+/****************************************************************************
+ *  Header Frames
+ *****************************************************************************
+ */
+
+#ifdef _MSC_VER 
+#pragma warning(disable:4799)
+#pragma warning(disable:4731)
+#endif
+
+
+#define STRICT              /* Strict type checking. */
+
+#include "postp.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */        
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+static short Eight128s[] = {128, 128, 128, 128,128, 128, 128, 128 };
+static short Eight64s[] = {64, 64, 64, 64, 64, 64, 64, 64  };
+static short EightThrees[]= {3, 3, 3, 3, 3, 3, 3, 3};
+static short EightFours[]= {4, 4, 4, 4, 4, 4, 4, 4};
+static short Four128s[] = {128, 128, 128, 128};
+static short Four64s[] = {64, 64, 64, 64 };
+static short FourThrees[]= {3, 3, 3, 3};
+static short FourFours[]= {4, 4, 4, 4};
+static short FourOnes[]= { 1, 1, 1, 1};
+static unsigned char  Eight128c[] = {128, 128, 128, 128,128, 128, 128, 128 };
+#pragma pack()
+#else
+__declspec(align(16)) static short Eight128s[] = {128, 128, 128, 128,128, 128, 128, 128 };
+__declspec(align(16)) static short Eight64s[] = {64, 64, 64, 64, 64, 64, 64, 64  };
+__declspec(align(16)) static short EightThrees[]= {3, 3, 3, 3, 3, 3, 3, 3};
+__declspec(align(16)) static short EightFours[]= {4, 4, 4, 4, 4, 4, 4, 4};
+__declspec(align(16)) static short Four128s[] = {128, 128, 128, 128};
+__declspec(align(16)) static short Four64s[] = {64, 64, 64, 64 };
+__declspec(align(16)) static short FourThrees[]= {3, 3, 3, 3};
+__declspec(align(16)) static short FourFours[]= {4, 4, 4, 4};
+__declspec(align(16)) static short FourOnes[]= { 1, 1, 1, 1};
+__declspec(align(16)) static unsigned char  Eight128c[] = {128, 128, 128, 128,128, 128, 128, 128 };
+#endif
+
+/****************************************************************************
+ *  Explicit Imports
+ *****************************************************************************
+ */              
+
+extern UINT32 *DeblockLimitValuesV2;
+/****************************************************************************
+ *  Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ *  Exported Functions
+ *****************************************************************************
+ */              
+extern double gaussian(double sigma, double mu, double x);
+
+/****************************************************************************
+ *  Module Statics
+ *****************************************************************************
+ */
+/****************************************************************************
+ * 
+ *  ROUTINE       :     SetupBoundingValueArray_ForMMX
+ *
+ *  INPUTS        :      
+ *                               
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Applies a loop filter to the edge pixels of coded blocks.
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+INT32 *SetupDeblockValueArray_ForMMX(POSTPROC_INSTANCE *pbi, INT32 FLimit)
+{
+    INT32 * BoundingValuePtr;
+
+    /* 
+        Since the FiltBoundingValue array is currently only used in the generic version, we are going
+        to reuse this memory for our own purposes.
+        2 longs for limit, 2 longs for _4ONES, 2 longs for LFABS_MMX, and 8 longs for temp work storage 
+    */
+   BoundingValuePtr = (INT32 *)((UINT32)(&pbi->DeblockBoundingValue[256]) & 0xffffffe0);    
+
+    //expand for mmx code
+    BoundingValuePtr[0] = BoundingValuePtr[1] = FLimit * 0x00010001;
+    BoundingValuePtr[2] = BoundingValuePtr[3] = 0x00010001;
+    BoundingValuePtr[4] = BoundingValuePtr[5] = 0x00040004;
+
+    return BoundingValuePtr;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeblockLoopFilteredBand_MMX
+ *
+ *  INPUTS        :     None
+ *                               
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Filter both horizontal and vertical edge in a band
+ *
+ *  SPECIAL NOTES :     
+ *
+ *	REFERENCE	  :		
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void DeblockLoopFilteredBand_MMX(
+                                 POSTPROC_INSTANCE *pbi, 
+                                 UINT8 *SrcPtr, 
+                                 UINT8 *DesPtr,
+                                 UINT32 PlaneLineStep, 
+                                 UINT32 FragAcross,
+                                 UINT32 StartFrag,
+                                 UINT32 *QuantScale
+							    )
+{
+	UINT32 j;
+	UINT32 CurrentFrag=StartFrag;
+	UINT32 QStep;
+	UINT8 *Src, *Des;
+	UINT32 Var1, Var2;
+
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+short QStepMmx[4];
+short FLimitMmx[4];
+short Rows[80];
+short NewRows[64];
+
+unsigned short Variance11[4];
+unsigned short Variance12[4];
+unsigned short Variance21[4];
+unsigned short Variance22[4];
+#pragma pack()
+#else
+__declspec(align(16)) short QStepMmx[4];
+__declspec(align(16)) short FLimitMmx[4];
+__declspec(align(16)) short Rows[80];
+__declspec(align(16)) short NewRows[64];
+
+__declspec(align(16)) unsigned short Variance11[4];
+__declspec(align(16)) unsigned short Variance12[4];
+__declspec(align(16)) unsigned short Variance21[4];
+__declspec(align(16)) unsigned short Variance22[4];
+#endif
+
+	Src=SrcPtr;
+	Des=DesPtr;
+
+	while(CurrentFrag < StartFrag + FragAcross )
+    {
+        
+        QStep = QuantScale[ pbi->FragQIndex[CurrentFrag+FragAcross]];
+        if( QStep > 3 )
+        {
+            QStepMmx[0] = (INT16)QStep;
+            QStepMmx[1] = (INT16)QStep;
+            QStepMmx[2] = (INT16)QStep;
+            QStepMmx[3] = (INT16)QStep;
+			__asm 
+			{
+				
+				/* Save the registers */
+				push		eax
+				push		ebp
+				push		ecx			
+				push		edx
+				push		esi
+				push		edi
+				
+				
+				/* Calculate the FLimit and store FLimit and QStep */					
+				
+				movq		mm0,	QStepMmx			/* mm0 = QStep */				
+				movq		mm1,	FourThrees			/* mm1 = 03030303 */			
+
+                pmullw		mm1,	mm0					/* mm1 = QStep * 3 */			
+				pmullw		mm1,	mm0					/* mm1 = QStep * QStep * 3 */	
+				
+				psrlw		mm1,	5					/* mm1 = FLimit */				
+				movq		[FLimitMmx], mm1			/* Save FLimit */				
+				
+				/* Copy the data to the intermediate buffer */							
+				
+				mov			eax,	Src					/* eax = Src */					
+				xor			edx,	edx					/* clear edx */					
+				
+				lea			esi,	NewRows				/* esi = NewRows */
+
+				lea			edi,	Rows				/* edi = Rows */				
+				mov			ecx,	PlaneLineStep		/* ecx = Pitch */				
+				
+				pxor		mm7,	mm7					/* Clear mm7 */					
+				sub			edx,	ecx					/* edx = -Pitch */				
+				
+				lea			eax,	[eax + edx * 4 ]	/* eax = Src - 4*Pitch */		
+				movq		mm0,	[eax + edx]			/* mm0 = Src[-5*Pitch] */		
+				
+				movq		mm1,	mm0					/* mm1 = mm0 */					
+				punpcklbw	mm0,	mm7					/* Lower Four -5 */				
+				
+				movq		mm2,	[eax]				/* mm2 = Src[-4*Pitch] */		
+				movq		mm3,	mm2					/* mm3 = mm2 */					
+				
+				punpckhbw	mm1,	mm7					/* Higher Four -5 */			
+				movq		[edi],	mm0					/* Write Lower Four of -5 */	
+				
+				punpcklbw	mm2,	mm7					/* Lower Four -4 */				
+				punpckhbw	mm3,	mm7					/* higher Four -4 */			
+				
+				movq		[edi+8], mm1				/* Write Higher Four of -5 */	
+				movq		mm4,	[eax + ecx]			/* mm4 = Src[-3*Pitch] */		
+				
+				movq		[edi+16], mm2				/* Write Lower -4 */			
+				movq		[edi+24], mm3				/* write hight -4 */			
+				
+				movq		mm5,	mm4					/* mm5 = mm4 */					
+				punpcklbw	mm4,	mm7					/* lower four -3 */				
+				
+				movq		mm0,	[eax + ecx *2]		/* mm0 = Src[-2*Pitch] */		
+				punpckhbw	mm5,	mm7					/* higher four -3 */			
+				
+				movq		mm1,	mm0					/* mm1 = mm0 */					
+				movq		[edi+32], mm4				/* write Lower -3 */			
+				
+				punpcklbw	mm0,	mm7					/* lower four -2 */				
+				lea			eax,	[eax + ecx *4]		/* eax = Src */					
+				
+				movq		[edi+40], mm5				/* write Higher -3 */			
+				punpckhbw	mm1,	mm7					/* higher four -2 */			
+				
+				movq		mm2,	[eax + edx]			/* mm2 = Src[-Pitch] */			
+				movq		[edi+48], mm0				/* lower -2	*/					
+				
+				movq		mm3,	mm2					/* mm3 = mm2 */					
+				punpcklbw	mm2,	mm7					/* lower -1 */					
+				
+				movq		[edi+56], mm1				/* higher -2 */					
+				punpckhbw	mm3,	mm7					/* Higher -1 */					
+				
+				movq		mm4,	[eax]				/* mm4 = Src[0] */				
+				movq		[edi+64], mm2				/* Lower -1 */					
+				
+				movq		mm5,	mm4					/* mm5 = mm4 */					
+				movq		[edi+72], mm3				/* Higher -1 */					
+				
+				punpcklbw	mm4,	mm7					/* lower 0 */					
+				punpckhbw	mm5,	mm7					/* higher 0 */					
+				
+				movq		mm0,	[eax + ecx]			/* mm0 = Src[Pitch] */			
+				movq		[edi+80], mm4				/* write lower 0 */				
+				
+				movq		mm1,	mm0					/* mm1 = mm0 */					
+				movq		[edi+88], mm5				/* write higher 0 */			
+				
+				punpcklbw	mm0,	mm7					/* lower 1 */					
+				punpckhbw	mm1,	mm7					/* higher 1 */					
+				
+				movq		mm2,	[eax + ecx *2 ]     /* mm2 = Src[2*Pitch] */		
+				lea			eax,	[eax + ecx *4]		/* eax = Src + 4 * Pitch  */	
+				
+				movq		mm3,	mm2					/* mm3 = mm2 */					
+				movq		[edi+96], mm0				/* write lower 1 */				
+				
+				punpcklbw	mm2,	mm7					/* lower 2 */					
+				punpckhbw	mm3,	mm7					/* higher 2 */					
+				
+				movq		mm4,	[eax + edx ]		/* mm4 = Src[3*pitch] */		
+				movq		[edi+104], mm1				/* wirte higher 1 */			
+				
+				movq		mm5,	mm4					/* mm5 = mm4 */					
+				punpcklbw	mm4,	mm7					/* Low 3	*/					
+				
+				movq		[edi+112], mm2				/* write lower 2 */				
+				movq		[edi+120], mm3				/* write higher 2 */			
+				
+				movq		mm0,	[eax]				/* mm0 = Src[4*pitch] */		
+				punpckhbw	mm5,	mm7					/* high 3 */					
+				
+				movq		mm1,	mm0					/* mm1=mm0 */					
+				movq		[edi+128], mm4				/* low 3 */						
+				
+				punpcklbw	mm0,	mm7					/* low 4 */						
+				punpckhbw	mm1,	mm7					/* high 4 */					
+				
+				movq		[edi+136], mm5				/* high 3 */					
+				movq		[edi+144], mm0				/* low 4 */						
+				
+				movq		[edi+152], mm1				/* high 4 */					
+				
+				/* done with copying everything to intermediate buffer */				
+				/* Now, compute the variances for Pixel  1-4 and 5-8 */					
+		
+				/* we use mm0,mm1,mm2 for 1234 and mm4, mm5, mm6 for 5-8 */				
+				/* mm7 = 0, mm3 = {128, 128, 128, 128} */								
+				
+				pcmpeqw		mm3,	mm3					/* mm3 = FFFFFFFFFFFFFFFF */	
+				psllw		mm3,	15					/* mm3 = 8000800080008000 */	
+				psrlw		mm3,	8					/* mm3 = 0080008000800080 */
+				
+				movq		mm2,	[edi+16]			/* Pixel 1 */					
+				movq		mm6,	[edi+80]			/* Pixel 5 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				movq		mm0,	mm2					/* mm0 = pixel 1 */				
+				movq		mm4,	mm6					/* mm4 = pixel 5 */				
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel1 * pixel1 */		
+				pmullw		mm6,	mm6					/* mm6 = pixel5 * pixel5 */		
+				
+				movq		mm1,	mm2					/* mm1 = pixel1^2 */			
+				movq		mm5,	mm6					/* mm5 = pixel5^2 */			
+				
+				movq		mm2,	[edi+32]			/* Pixel 2 */					
+				movq		mm6,	[edi+96]			/* Pixel 6 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 2 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 6 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel2^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel6^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel2^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel6^2 */			
+				
+				movq		mm2,	[edi+48]			/* Pixel 3 */					
+				movq		mm6,	[edi+112]			/* Pixel 7 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 3 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 7 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel3^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel7^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel3^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel7^2 */			
+				
+				movq		mm2,	[edi+64]			/* Pixel 4 */					
+				movq		mm6,	[edi+128]			/* Pixel 8 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 4 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 8 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel4^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel8^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 = pixel4^2 */			
+				paddw		mm5,	mm6					/* mm5 = pixel8^2 */			
+				
+				/* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+				/* mm1 = x1 + x2 + x3 + x4 */											
+				/* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+				/* mm5 = x5 + x6 + x7 + x8 */											
+				
+				movq		mm7,	mm3					/* mm7 = mm3 */					
+				psrlw		mm7,	7					/* mm7 = 0001000100010001 */	
+				
+				movq		mm2,	mm0					/* make copy of sum1 */			
+				movq		mm6,	mm4					/* make copy of sum2 */			
+				
+				paddw		mm0,	mm7					/* (sum1 + 1) */				
+				paddw		mm4,	mm7					/* (sum2 + 1) */				
+				
+				psraw		mm2,	1					/* sum1 /2 */					
+				psraw		mm6,	1					/* sum2 /2 */					
+				
+				psraw		mm0,	1					/* (sum1 + 1)/2 */				
+				psraw		mm4,	1					/* (sum2 + 1)/2 */				
+				
+				pmullw		mm2,	mm0					/* (sum1)/2*(sum1+1)/2 */		
+				pmullw		mm6,	mm4					/* (sum2)/2*(sum2+1)/2 */		
+				
+				psubw		mm1,	mm2					/* Variance 1 */				
+				psubw		mm5,	mm6					/* Variance 2 */				
+				
+				movq		mm7,	FLimitMmx			/* mm7 = FLimit */				
+				movq		mm2,	mm1					/* copy of Varinace 1*/
+
+				movq		mm6,	mm5					/* Variance 2 */
+				movq		[Variance11], mm1			/* Save Variance1 */
+
+				movq		[Variance21], mm5			/* Save Variance2 */
+				psubw		mm1,	mm7					/* Variance 1 < Flimit? */		
+				
+				psubw		mm5,	mm7					/* Variance 2 < Flimit? */		
+				psraw		mm2,	15					/* Variance 1 > 32768? */
+
+				psraw		mm6,	15					/* Vaiance  2 > 32768? */	
+				psraw		mm1,	15					/* FFFF/0000 for true/false */	
+				
+				psraw		mm5,	15					/* FFFF/0000 for true/false */	
+				movq		mm7,	[edi+64]			/* mm0 = Pixel 4			*/	
+
+				pandn		mm2,	mm1					/* Variance1<32678 && 
+														   Variance1<Limit			*/
+				pandn		mm6,	mm5					/* Variance2<32678 && 
+														   Variance1<Limit			*/
+				
+				movq		mm4,	[edi+80]			/* mm4 = Pixel 5			*/	
+				pand		mm6,	mm2					/* mm6 = Variance1 < Flimit */	
+														/*     &&Variance2 < Flimit */	
+
+				movq		mm2,	mm7					/* make copy of Pixel4		*/	
+
+				psubusw		mm7,	mm4					/* 4 - 5 */						
+				psubusw		mm4,	mm2					/* 5 - 4 */						
+				
+				por			mm7,	mm4					/* abs(4 - 5) */				
+				psubw		mm7,	QStepMmx			/* abs(4-5)<QStepMmx ? */		
+				
+				psraw		mm7,	15					/* FFFF/0000 for True/Flase */
+				pand		mm7,	mm6													
+				
+				/* mm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* now lets look at the right four colomn */							
+				
+				add			edi,	8					/* offset 8 to right 4 cols */	
+				
+				movq		mm2,	[edi+16]			/* Pixel 1 */					
+				movq		mm6,	[edi+80]			/* Pixel 5 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				movq		mm0,	mm2					/* mm0 = pixel 1 */				
+				movq		mm4,	mm6					/* mm4 = pixel 5 */				
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel1 * pixel1 */		
+				pmullw		mm6,	mm6					/* mm6 = pixel5 * pixel5 */		
+				
+				movq		mm1,	mm2					/* mm1 = pixel1^2 */			
+				movq		mm5,	mm6					/* mm5 = pixel5^2 */			
+				
+				movq		mm2,	[edi+32]			/* Pixel 2 */					
+				movq		mm6,	[edi+96]			/* Pixel 6 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 2 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 6 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel2^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel6^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel2^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel6^2 */			
+				
+				movq		mm2,	[edi+48]			/* Pixel 3 */					
+				movq		mm6,	[edi+112]			/* Pixel 7 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 3 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 7 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel3^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel7^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel3^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel7^2 */			
+				
+				movq		mm2,	[edi+64]			/* Pixel 4 */					
+				movq		mm6,	[edi+128]			/* Pixel 8 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 4 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 8 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel4^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel8^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 = pixel4^2 */			
+				paddw		mm5,	mm6					/* mm5 = pixel8^2 */			
+				
+				/* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+				/* mm1 = x1 + x2 + x3 + x4 */											
+				/* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+				/* mm5 = x5 + x6 + x7 + x8 */											
+				
+				psrlw		mm3,	7					/* mm3 = 0001000100010001 */	
+				
+				movq		mm2,	mm0					/* make copy of sum1 */			
+				movq		mm6,	mm4					/* make copy of sum2 */			
+				
+				paddw		mm0,	mm3					/* (sum1 + 1) */				
+				paddw		mm4,	mm3					/* (sum2 + 1) */				
+				
+				psraw		mm2,	1					/* sum1 /2 */					
+				psraw		mm6,	1					/* sum2 /2 */					
+				
+				psraw		mm0,	1					/* (sum1 + 1)/2 */				
+				psraw		mm4,	1					/* (sum2 + 1)/2 */				
+				
+				pmullw		mm2,	mm0					/* (sum1)/2*(sum1+1)/2 */		
+				pmullw		mm6,	mm4					/* (sum2)/2*(sum2+1)/2 */		
+				
+				psubw		mm1,	mm2					/* Variance 1 */				
+				psubw		mm5,	mm6					/* Variance 2 */				
+
+				movq		[Variance12], mm1			/* Save Variance1 */
+				movq		[Variance22], mm5			/* Save Variance2 */
+				
+				movq		mm3,	FLimitMmx			/* mm3 = FLimit */				
+				movq		mm2,	mm1					/* copy of Varinace 1*/
+
+				movq		mm6,	mm5					/* Variance 2 */
+				psubw		mm1,	mm3					/* Variance 1 < Flimit? */		
+				
+				psubw		mm5,	mm3					/* Variance 2 < Flimit? */		
+				psraw		mm2,	15					/* Variance 1 > 32768? */
+
+				psraw		mm6,	15					/* Vaiance  2 > 32768? */	
+				psraw		mm1,	15					/* FFFF/0000 for true/false */	
+				
+				psraw		mm5,	15					/* FFFF/0000 for true/false */	
+				movq		mm0,	[edi+64]			/* mm0 = Pixel 4			*/	
+
+				pandn		mm2,	mm1					/* Variance1<32678 && 
+														   Variance1<Limit			*/
+				pandn		mm6,	mm5					/* Variance2<32678 && 
+														   Variance1<Limit			*/
+
+				movq		mm4,	[edi+80]			/* mm4 = Pixel 5			*/	
+				pand		mm6,	mm2					/* mm6 = Variance1 < Flimit */	
+														/*     &&Variance2 < Flimit */	
+				movq		mm2,	mm0					/* make copy of Pixel4		*/	
+														
+				psubusw		mm0,	mm4					/* 4 - 5 */						
+				psubusw		mm4,	mm2					/* 5 - 4 */						
+				
+				por			mm0,	mm4					/* abs(4 - 5) */				
+				psubw		mm0,	QStepMmx			/* abs(4-5)<QStepMmx ? */		
+				
+				psraw		mm0,	15					/* FFFF/0000 for True/False */
+				pand		mm0,	mm6				
+				
+				sub			edi,	8					/* offset edi back */			
+				
+				/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* mm0 and mm7 now are in use  */										
+				/* Let's do the filtering now */										
+				/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+				/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+				
+				movq		mm5,	[edi]				/* mm5 = -5 */					
+				movq		mm4,	[edi + 16]			/* mm4 = -4 */					
+				
+				movq		mm3,	mm4					/* copy of -4 */				
+				movq		mm6,	mm5					/* copy of -5 */				
+				
+				psubusw		mm4,	mm6					/* mm4 = [-4] - [-5] */			
+				psubusw		mm5,	mm3					/* mm5 = [-5] - [-4] */			
+				
+				por			mm4,	mm5					/* abs([-4]-[-5] ) */			
+				psubw		mm4,	QStepMmx			/* abs([-4]-[-5] )<QStep? */	
+				
+				psraw		mm4,	15					/* FFFF/0000 for True/False */	
+				movq		mm1,	mm4					/* copy of the mm4 */			
+				
+				pand		mm4,	mm6					/*							*/	
+				pandn		mm1,	mm3					/*							*/	
+				
+				por			mm1,	mm4					/* mm1 = p1					*/	
+				
+				/* now find P2 */														
+				
+				movq		mm4,	[edi+128]			/* mm4 = [3] */					
+				movq		mm5,	[edi+144]			/* mm5 = [4] */					
+				
+				movq		mm3,	mm4					/* copy of 3 */					
+				movq		mm6,	mm5					/* copy of 4 */					
+				
+				psubusw		mm4,	mm6					/* mm4 = [3] - [4] */			
+				psubusw		mm5,	mm3					/* mm5 = [4] - [3] */			
+				
+				por			mm4,	mm5					/* abs([3]-[4] ) */				
+				psubw		mm4,	QStepMmx			/* abs([3]-[4] )<QStep? */		
+				
+				psraw		mm4,	15					/* FFFF/0000 for True/False */	
+				movq		mm2,	mm4					/* copy of the mm4 */			
+				
+				pand		mm4,	mm6					/*							*/	
+				pandn		mm2,	mm3					/*							*/	
+				
+				por			mm2,	mm4					/* mm2 = p2					*/	
+				
+				/* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+				/* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */			
+				/* Des[-w4] = Src[-w4]; */												
+				/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+				
+				movq		mm3,	mm1					/* mm3 = p1 */					
+				paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+				
+				paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+				movq		mm4,	[edi+16]			/* mm4 = x1 */					
+				
+				paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+				paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+				
+				paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+				paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+				
+				paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+				movq		mm4,	mm3					/* mm4 = mm3 */					
+				
+				movq		mm5,	[edi+16]			/* mm5 = x1 */					
+				paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+				
+				psllw		mm4,	1					/* mm4 = (sum+x1)<<1 */			
+				psubw		mm4,	[edi+64]			/* mm4 = (sum+x1)<<1-x4 */		
+				
+				paddw		mm4,	[edi+80]			/* mm4 = (sum+x1)<<1-x4+x5 */	
+				psraw		mm4,	4					/* mm4 >>=4 */					
+				
+				psubw		mm4,	mm5					/* New Value - old Value */		
+				pand		mm4,	mm7					/* And the flag */				
+				
+				paddw		mm4,	mm5					/* add the old value back */	
+				movq		[esi],	mm4					/* Write new x1 */				
+				
+				/* sum += x5 -p1 */														
+				/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+				
+				movq		mm5,	[edi+32]			/* mm5= x2 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+				movq		mm4,	mm5					/* copy sum */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x2 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+80]			/* mm4 =(sum+x2)<<1-x5 */		
+				paddw		mm4,	[edi+96]			/* mm4 =(sum+x2)<<1-x5+x6 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm7					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+16], mm4				/* write new x2 */				
+				
+				/* sum += x6 - p1 */													
+				/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+				
+				movq		mm5,	[edi+48]			/* mm5= x3 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+				movq		mm4,	mm5					/* copy x3 */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x3 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+96]			/* mm4 =(sum+x3)<<1-x6 */		
+				paddw		mm4,	[edi+112]			/* mm4 =(sum+x3)<<1-x6+x7 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm7					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+32], mm4				/* write new x3 */				
+				
+				/* sum += x7 - p1 */													
+				/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+				
+				movq		mm5,	[edi+64]			/* mm5 = x4 */					
+				psubw		mm3,	mm1					/* sum = sum-p1 */				
+				
+				paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+				movq		mm4,	mm5					/* mm4 = x4 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+				paddw		mm4,	mm4					/* mm4 *=2 */					
+				
+				paddw		mm4,	mm1					/* += p1 */						
+				psubw		mm4,	[edi+16]			/* -= x1 */						
+				
+				psubw		mm4,	[edi+112]			/* -= x7 */						
+				paddw		mm4,	[edi+128]			/* += x8 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x4 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x4 */						
+				
+				movq		[esi+48], mm4				/* write new x4 */				
+				
+				/* sum+= x8-x1 */														
+				/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+				
+				movq		mm5,	[edi+80]			/* mm5 = x5 */					
+				psubw		mm3,	[edi+16]			/* sum -= x1 */					
+				
+				paddw		mm3,	[edi+128]			/* sub += x8 */					
+				movq		mm4,	mm5					/* mm4 = x5 */					
+				
+				paddw		mm4,	mm3					/* mm4= sum+x5 */				
+				paddw		mm4,	mm4					/* mm4 *= 2 */					
+				
+				paddw		mm4,	[edi+16]			/* += x1 */						
+				psubw		mm4,	[edi+32]			/* -= x2 */						
+				
+				psubw		mm4,	[edi+128]			/* -= x8 */						
+				paddw		mm4,	mm2					/* += p2 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x5 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x5 */						
+				
+				movq		[esi+64], mm4				/* write new x5 */				
+				
+				/* sum += p2 - x2 */													
+				/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+				
+				movq		mm5,	[edi+96]			/* mm5 = x6 */					
+				psubw		mm3,	[edi+32]			/* -= x2 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x6 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+32]			/* +=x2 */						
+				psubw		mm4,	[edi+48]			/* -=x3 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x6 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x6 */						
+				
+				movq		[esi+80], mm4				/* write new x6 */				
+				
+				/* sum += p2 - x3 */													
+				/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+				
+				movq		mm5,	[edi+112]			/* mm5 = x7 */					
+				psubw		mm3,	[edi+48]			/* -= x3 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x7 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+48]			/* +=x3 */						
+				psubw		mm4,	[edi+64]			/* -=x4 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x7 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x7 */						
+				
+				movq		[esi+96], mm4				/* write new x7 */				
+				
+				/* sum += p2 - x4 */													
+				/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+				
+				movq		mm5,	[edi+128]			/* mm5 = x8 */					
+				psubw		mm3,	[edi+64]			/* -= x4 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x8 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+64]			/* +=x4 */						
+				psubw		mm4,	[edi+80]			/* -=x5 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x8 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x8 */						
+				
+				movq		[esi+112], mm4				/* write new x8 */				
+				
+				/* done with left four columns */										
+				/* now do the righ four columns */										
+				
+				add			edi,	8					/* shift to right four column */
+				add			esi,	8					/* shift to right four column */
+				
+				/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* mm0 now are in use  */										
+				/* Let's do the filtering now */										
+				/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+				/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+				
+				movq		mm5,	[edi]				/* mm5 = -5 */					
+				movq		mm4,	[edi + 16]			/* mm4 = -4 */					
+				
+				movq		mm3,	mm4					/* copy of -4 */				
+				movq		mm6,	mm5					/* copy of -5 */				
+				
+				psubusw		mm4,	mm6					/* mm4 = [-4] - [-5] */			
+				psubusw		mm5,	mm3					/* mm5 = [-5] - [-4] */			
+				
+				por			mm4,	mm5					/* abs([-4]-[-5] ) */			
+				psubw		mm4,	QStepMmx			/* abs([-4]-[-5] )<QStep? */	
+				
+				psraw		mm4,	15					/* FFFF/0000 for True/False */	
+				movq		mm1,	mm4					/* copy of the mm4 */			
+				
+				pand		mm4,	mm6					/*							*/	
+				pandn		mm1,	mm3					/*							*/	
+				
+				por			mm1,	mm4					/* mm1 = p1					*/	
+				
+				/* now find P2 */														
+				
+				movq		mm4, [edi+128]				/* mm4 = [3] */					
+				movq		mm5, [edi+144]				/* mm5 = [4] */					
+				
+				movq		mm3, mm4					/* copy of 3 */					
+				movq		mm6, mm5					/* copy of 4 */					
+				
+				psubusw		mm4, mm6					/* mm4 = [3] - [4] */			
+				psubusw		mm5, mm3					/* mm5 = [4] - [3] */			
+				
+				por			mm4, mm5					/* abs([3]-[4] ) */				
+				psubw		mm4, QStepMmx				/* abs([3]-[4] )<QStep? */		
+				
+				psraw		mm4, 15						/* FFFF/0000 for True/False */	
+				movq		mm2, mm4					/* copy of the mm4 */			
+				
+				pand		mm4, mm6					/*							*/	
+				pandn		mm2, mm3					/*							*/	
+				
+				por			mm2, mm4					/* mm2 = p2					*/	
+				
+				/* psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4; */				
+				/* Des[-w4] = (((psum + v[1]) << 1) - (v[4] - v[5])) >> 4; */			
+				/* Des[-w4]=Src[-w4]; */												
+				/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+				
+				movq		mm3,	mm1					/* mm3 = p1 */					
+				paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+				
+				paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+				movq		mm4,	[edi+16]			/* mm4 = x1 */					
+				
+				paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+				paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+				
+				paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+				paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+				
+				paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+				movq		mm4,	mm3					/* mm4 = mm3 */					
+				
+				movq		mm5,	[edi+16]			/* mm5 = x1 */					
+				paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+				
+				psllw		mm4,	1					/* mm4 = (sum+x1)<<1 */			
+				psubw		mm4,	[edi+64]			/* mm4 = (sum+x1)<<1-x4 */		
+				
+				paddw		mm4,	[edi+80]			/* mm4 = (sum+x1)<<1-x4+x5 */	
+				psraw		mm4,	4					/* mm4 >>=4 */					
+				
+				psubw		mm4,	mm5					/* New Value - old Value */		
+				pand		mm4,	mm0					/* And the flag */				
+				
+				paddw		mm4,	mm5					/* add the old value back */	
+				movq		[esi],	mm4					/* Write new x1 */				
+				
+				/* sum += x5 -p1 */														
+				/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+				
+				movq		mm5,	[edi+32]			/* mm5= x2 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+				movq		mm4,	mm5					/* copy sum */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x2 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+80]			/* mm4 =(sum+x2)<<1-x5 */		
+				paddw		mm4,	[edi+96]			/* mm4 =(sum+x2)<<1-x5+x6 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm0					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+16], mm4				/* write new x2 */				
+				
+				/* sum += x6 - p1 */													
+				/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+				
+				movq		mm5,	[edi+48]			/* mm5= x3 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+				movq		mm4,	mm5					/* copy x3 */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x3 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+96]			/* mm4 =(sum+x3)<<1-x6 */		
+				paddw		mm4,	[edi+112]			/* mm4 =(sum+x3)<<1-x6+x7 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm0					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+32], mm4				/* write new x3 */				
+				
+				/* sum += x7 - p1 */													
+				/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+				
+				movq		mm5,	[edi+64]			/* mm5 = x4 */					
+				psubw		mm3,	mm1					/* sum = sum-p1 */				
+				
+				paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+				movq		mm4,	mm5					/* mm4 = x4 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+				paddw		mm4,	mm4					/* mm4 *=2 */					
+				
+				paddw		mm4,	mm1					/* += p1 */						
+				psubw		mm4,	[edi+16]			/* -= x1 */						
+				
+				psubw		mm4,	[edi+112]			/* -= x7 */						
+				paddw		mm4,	[edi+128]			/* += x8 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x4 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x4 */						
+				
+				movq		[esi+48], mm4				/* write new x4 */				
+				
+				/* sum+= x8-x1 */														
+				/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+				
+				movq		mm5,	[edi+80]			/* mm5 = x5 */					
+				psubw		mm3,	[edi+16]			/* sum -= x1 */					
+				
+				paddw		mm3,	[edi+128]			/* sub += x8 */					
+				movq		mm4,	mm5					/* mm4 = x5 */					
+				
+				paddw		mm4,	mm3					/* mm4= sum+x5 */				
+				paddw		mm4,	mm4					/* mm4 *= 2 */					
+				
+				paddw		mm4,	[edi+16]			/* += x1 */						
+				psubw		mm4,	[edi+32]			/* -= x2 */						
+				
+				psubw		mm4,	[edi+128]			/* -= x8 */						
+				paddw		mm4,	mm2					/* += p2 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x5 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x5 */						
+				
+				movq		[esi+64], mm4				/* write new x5 */				
+				
+				/* sum += p2 - x2 */													
+				/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+				
+				movq		mm5,	[edi+96]			/* mm5 = x6 */					
+				psubw		mm3,	[edi+32]			/* -= x2 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x6 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+32]			/* +=x2 */						
+				psubw		mm4,	[edi+48]			/* -=x3 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x6 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x6 */						
+				
+				movq		[esi+80], mm4				/* write new x6 */				
+				
+				/* sum += p2 - x3 */													
+				/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+				
+				movq		mm5,	[edi+112]			/* mm5 = x7 */					
+				psubw		mm3,	[edi+48]			/* -= x3 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x7 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+48]			/* +=x3 */						
+				psubw		mm4,	[edi+64]			/* -=x4 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x7 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x7 */						
+				
+				movq		[esi+96], mm4				/* write new x7 */				
+				
+				/* sum += p2 - x4 */													
+				/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+				
+				movq		mm5,	[edi+128]			/* mm5 = x8 */					
+				psubw		mm3,	[edi+64]			/* -= x4 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x8 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+64]			/* +=x4 */						
+				psubw		mm4,	[edi+80]			/* -=x5 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x8 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x8 */						
+				
+				movq		[esi+112], mm4				/* write new x8 */				
+				
+				/* done with right four column */										
+				add			edi,	8					/* shift edi to point x1 */
+				sub			esi,	8					/* shift esi back to x1 */
+
+				mov			ebp, Des					/* the destination */							
+				lea			ebp, [ebp + edx *4]			/* point to des[-w4] */			
+				
+				movq		mm0, [esi]													
+				packuswb	mm0, [esi + 8]												
+				
+				movq		[ebp], mm0					/* write des[-w4] */			
+				
+				movq		mm1, [esi + 16]												
+				packuswb	mm1, [esi + 24]												
+				
+				movq		[ebp+ecx ], mm1				/* write des[-w3] */			
+				
+				movq		mm2, [esi + 32]												
+				packuswb	mm2, [esi + 40]												
+				
+				movq		[ebp+ecx*2 ], mm2			/* write des[-w2] */			
+				
+				movq		mm3, [esi + 48]												
+				packuswb	mm3, [esi + 56]												
+				
+				lea			ebp, [ebp+ecx*4]			/* point to des[0] */			
+				movq		[ebp+edx], mm3				/* write des[-w1] */			
+				
+				movq		mm0, [esi + 64]												
+				packuswb	mm0, [esi + 72]												
+				
+				movq		[ebp ], mm0					/* write des[0] */				
+				
+				movq		mm1, [esi + 80]												
+				packuswb	mm1, [esi + 88]												
+				
+				movq		[ebp+ecx], mm1				/* write des[w1] */				
+				
+				movq		mm2, [esi + 96]												
+				packuswb	mm2, [esi + 104]											
+				
+				movq		[ebp+ecx*2], mm2			/* write des[w2] */				
+				
+				movq		mm3, [esi + 112]											
+				packuswb	mm3, [esi + 120]											
+				
+				lea			ebp, [ebp+ecx*2]			/* point to des[w4] */			
+				movq		[ebp+ecx], mm3				/* write des[w3] */				
+				
+				pop			edi
+				pop			esi
+				pop			edx
+				pop			ecx
+				pop			ebp
+				pop			eax
+				
+				
+		    } /* end of the macro */
+		
+		    Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+		    Var1 += Variance12[0]+ Variance12[1]+Variance12[2]+Variance12[3];
+		    pbi->FragmentVariances[CurrentFrag] += Var1;
+
+		    Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+		    Var2 += Variance22[0]+ Variance22[1]+Variance22[2]+Variance22[3];
+		    pbi->FragmentVariances[CurrentFrag + FragAcross] += Var2;
+        }
+        else
+        {
+
+			/* copy from src to des */
+			__asm	
+			{
+				push		esi
+				push		edi
+				push		ecx
+				
+				mov			esi,	Src					/* esi = Src */					
+				mov			edi,	Des					/* edi = Des */				
+
+				push		edx
+
+				mov			ecx,	PlaneLineStep		/* ecx = Pitch */				
+				xor			edx,	edx					/* clear edx */					
+				
+				sub			edx,	ecx					/* edx = -Pitch */				
+				lea			esi,	[esi+edx*4]			/* esi=Src-4*Pitch*/
+				
+				movq		mm0,	[esi]				/* first row */
+				movq		[edi+edx*4],	mm0			/* write first row */
+				
+				lea			edi,	[edi+edx*4]			/* edi=Des-4*Pitch*/
+				movq		mm1,	[esi+ecx]			/* Src-3*Pitch */
+
+				movq		[edi+ecx],	mm1				/* write second row */
+				movq		mm2,	[esi+ecx*2]			/* Src-2*Pitch */
+
+				lea			esi,	[esi+ecx*4]			/* Src */
+				movq		[edi+ecx*2], mm2			/* write third row */
+
+				lea			edi,	[edi+ecx*4]			/* Des */
+				movq		mm3,	[esi+edx]			/* Src-Pitch */
+				
+				movq		[edi+edx],	mm3				/* write fourth row */				
+				movq		mm4,	[esi]				/* Src */
+
+				movq		mm5,	[esi+ecx]			/* Src+Pitch */
+				movq		[edi],	mm4					/* write fifth rwo */
+
+				movq		mm6,	[esi+ecx*2]
+				lea			esi,	[esi+ecx*4]			/* Src+pitch*4 */
+
+				movq		[edi+ecx], mm5				/* write the sixth rwo */
+				movq		[edi+ecx*2], mm6			/* write the seventh row */
+
+				movq		mm7,	[esi+edx]
+				lea			edi,	[edi+ecx*4]			/* Des+Pitch*4 */
+
+				movq		[edi+edx], mm7				/* write the last row */
+
+				pop			edx
+				pop			ecx
+				pop			edi
+				pop			esi				
+			}
+
+        }
+		
+		Src += 8;
+		Des += 8;
+		CurrentFrag ++;
+	}
+
+	Des -= ((PlaneLineStep + FragAcross)<<3);
+	Des += 8;
+	Src = Des;
+
+	CurrentFrag = StartFrag ;
+
+	while(CurrentFrag < StartFrag + FragAcross - 1)
+	{
+
+        QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];		
+
+        if( QStep > 3 )
+        {
+            QStepMmx[0] = (INT16)QStep;
+            QStepMmx[1] = (INT16)QStep;
+            QStepMmx[2] = (INT16)QStep;
+            QStepMmx[3] = (INT16)QStep;
+
+			for( j=0; j<8;j++)
+		    {
+    			Rows[j] = (short) (Src[-5 +j*PlaneLineStep]);
+	    		Rows[72+j] = (short)(Src[4+j*PlaneLineStep]);		
+    		}
+
+	    	__asm
+    		{
+				/* Save the registers */
+				push		eax
+				push		ebp
+				push		ecx			
+				push		edx
+				push		esi
+				push		edi
+				
+				/* Calculate the FLimit and store FLimit and QStep */					
+				
+				movq		mm0,	QStepMmx			/* mm0 = QStep */				
+				movq		mm1,	FourThrees			/* mm1 = 03030303 */			
+
+                pmullw		mm1,	mm0					/* mm1 = QStep * 3 */							
+				pmullw		mm1,	mm0					/* mm1 = QStep * QStep * 3 */	
+				
+				psrlw		mm1,	5					/* mm1 = FLimit */				
+				movq		[FLimitMmx], mm1			/* Save FLimit */				
+
+				/* setup the pointers to data */
+
+				mov			eax,	Src					/* eax = Src */
+				xor			edx,	edx					/* clear edx */
+				
+				sub			eax,	4					/* eax = Src-4 */
+				lea			esi,	NewRows				/* esi = NewRows */
+				lea			edi,	Rows				/* edi = Rows */				
+
+				mov			ecx,	PlaneLineStep		/* ecx = Pitch */				
+				sub			edx,	ecx					/* edx = -Pitch */				
+
+				/* Get the data to the intermediate buffer */
+
+				movq		mm0,	[eax]				/* mm0 = 07 06 05 04 03 02 01 00 */
+				movq		mm1,	[eax+ecx]			/* mm1 = 17 16 15 14 13 12 11 10 */
+
+				movq		mm2,	[eax+ecx*2]			/* mm2 = 27 26 25 24 23 22 21 20 */
+				lea			eax,	[eax+ecx*4]			/* Go down four Rows */	
+
+				movq		mm3,	[eax+edx]			/* mm3 = 37 36 35 34 33 32 31 30 */
+				movq		mm4,	mm0					/* mm4 = 07 06 05 04 03 02 01 00 */
+			
+				punpcklbw	mm0,	mm1					/* mm0 = 13 03 12 02 11 01 10 00 */
+				punpckhbw	mm4,	mm1					/* mm4 = 17 07 16 06 15 05 14 04 */
+
+				movq		mm5,	mm2					/* mm5 = 27 26 25 24 23 22 21 20 */
+				punpcklbw	mm2,	mm3					/* mm2 = 33 23 32 22 31 21 30 20 */
+
+				punpckhbw	mm5,	mm3					/* mm5 = 37 27 36 26 35 25 34 24 */
+				movq		mm1,	mm0					/* mm1 = 13 03 12 02 11 01 10 00 */
+
+				punpcklwd	mm0,	mm2					/* mm0 = 31 21 11 01 30 20 10 00 */
+				punpckhwd	mm1,	mm2					/* mm1 = 33 23 13 03 32 22 12 02 */
+				
+				movq		mm2,	mm4					/* mm2 = 17 07 16 06 15 05 14 04 */
+				punpckhwd	mm4,	mm5					/* mm4 = 37 27 17 07 36 26 16 06 */
+
+				punpcklwd	mm2,	mm5					/* mm2 = 35 25 15 05 34 24 14 04 */
+				pxor		mm7,	mm7					/* clear mm7 */
+
+				movq		mm5,	mm0					/* make a copy */
+				punpcklbw	mm0,	mm7					/* mm0 = 30 20 10 00 */
+
+				movq		[edi+16], mm0				/* write 00 10 20 30 */
+				punpckhbw	mm5,	mm7					/* mm5 = 31 21 11 01 */
+
+				movq		mm0,	mm1					/* mm0 =33 23 13 03 32 22 12 02 */
+				movq		[edi+32], mm5				/* write 01 11 21 31 */
+				
+				punpcklbw	mm1,	mm7					/* mm1 = 32 22 12 02 */
+				punpckhbw	mm0,	mm7					/* mm0 = 33 23 12 03 */
+
+				movq		[edi+48], mm1				/* write 02 12 22 32 */
+				movq		mm3,	mm2					/* mm3 = 35 25 15 05 34 24 14 04 */
+				
+				movq		mm5,	mm4					/* mm5 = 37 27 17 07 36 26 16 06 */
+				movq		[edi+64], mm0				/* write 03 13 23 33 */
+
+				punpcklbw	mm2,	mm7					/* mm2 = 34 24 14 04 */
+				punpckhbw	mm3,	mm7					/* mm3 = 35 25 15 05 */
+
+				movq		[edi+80], mm2				/* write 04 14 24 34 */
+				punpcklbw	mm4,	mm7					/* mm4 = 36 26 16 06 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 37 27 17 07 */
+				movq		[edi+96], mm3				/* write 05 15 25 35 */
+			
+				movq		mm0,	[eax]				/* mm0 = 47 46 45 44 43 42 41 40 */
+				movq		mm1,	[eax + ecx ]		/* mm1 = 57 56 55 54 53 52 51 50 */
+
+				movq		[edi+112], mm4				/* write 06 16 26 37 */
+				movq		mm2,	[eax+ecx*2]			/* mm2 = 67 66 65 64 63 62 61 60 */
+
+				lea			eax,	[eax+ ecx*4]		/* Go down four rows */
+				movq		[edi+128], mm5				/* write 07 17 27 37 */
+
+				movq		mm4,	mm0					/* mm4 = 47 46 45 44 43 42 41 40 */
+				movq		mm3,	[eax+edx]			/* mm3 = 77 76 75 74 73 72 71 70 */
+
+				punpcklbw	mm0,	mm1					/* mm0 = 53 43 52 42 51 41 50 40 */
+				punpckhbw	mm4,	mm1					/* mm4 = 57 57 56 46 55 45 54 44 */
+
+				movq		mm5,	mm2					/* mm5 = 67 66 65 64 63 62 61 60 */
+				punpcklbw	mm2,	mm3					/* mm2 = 73 63 72 62 71 61 70 60 */
+
+				punpckhbw	mm5,	mm3					/* mm5 = 77 67 76 66 75 65 74 64 */
+				movq		mm1,	mm0					/* mm1 = 53 43 52 42 51 41 50 40 */
+
+				punpcklwd	mm0,	mm2					/* mm0 = 71 61 51 41 70 60 50 40 */
+				punpckhwd	mm1,	mm2					/* mm1 = 73 63 53 43 72 62 52 42 */
+				
+				movq		mm2,	mm4					/* mm2 = 57 57 56 46 55 45 54 44 */
+				punpckhwd	mm4,	mm5					/* mm4 = 77 67 57 47 76 66 56 46 */
+
+				punpcklwd	mm2,	mm5					/* mm2 = 75 65 55 45 74 64 54 44 */
+
+				movq		mm5,	mm0					/* make a copy */
+				punpcklbw	mm0,	mm7					/* mm0 = 70 60 50 40 */
+
+				movq		[edi+24], mm0				/* write 40 50 60 70 */
+				punpckhbw	mm5,	mm7					/* mm5 = 71 61 51 41 */
+
+				movq		mm0,	mm1					/* mm0 = 73 63 53 43 72 62 52 42 */
+				movq		[edi+40], mm5				/* write 41 51 61 71 */
+				
+				punpcklbw	mm1,	mm7					/* mm1 = 72 62 52 42 */
+				punpckhbw	mm0,	mm7					/* mm0 = 73 63 53 43 */
+
+				movq		[edi+56], mm1				/* write 42 52 62 72 */
+				movq		mm3,	mm2					/* mm3 = 75 65 55 45 74 64 54 44 */
+				
+				movq		mm5,	mm4					/* mm5 = 77 67 57 47 76 66 56 46 */
+				movq		[edi+72], mm0				/* write 43 53 63 73 */
+
+				punpcklbw	mm2,	mm7					/* mm2 = 74 64 54 44 */
+				punpckhbw	mm3,	mm7					/* mm3 = 75 65 55 45 */
+
+				movq		[edi+88], mm2				/* write 44 54 64 74 */
+				punpcklbw	mm4,	mm7					/* mm4 = 76 66 56 46 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 77 67 57 47 */
+				movq		[edi+104], mm3				/* write 45 55 65 75 */
+			
+				movq		[edi+120], mm4				/* write 46 56 66 76 */
+				movq		[edi+136], mm5				/* write 47 57 67 77 */
+
+
+				/* Now, compute the variances for Pixel  1-4 and 5-8 */					
+				
+				/* we use mm0,mm1,mm2 for 1234 and mm4, mm5, mm6 for 5-8 */				
+				/* mm7 = 0, mm3 = {128, 128, 128, 128} */								
+				
+				pcmpeqw		mm3,	mm3					/* mm3 = FFFFFFFFFFFFFFFF */	
+				psllw		mm3,	15					/* mm3 = 8000800080008000 */	
+				psrlw		mm3,	8					/* mm3 = 0080008000800080 */
+				
+				movq		mm2,	[edi+16]			/* Pixel 1 */					
+				movq		mm6,	[edi+80]			/* Pixel 5 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				movq		mm0,	mm2					/* mm0 = pixel 1 */				
+				movq		mm4,	mm6					/* mm4 = pixel 5 */				
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel1 * pixel1 */		
+				pmullw		mm6,	mm6					/* mm6 = pixel5 * pixel5 */		
+				
+				movq		mm1,	mm2					/* mm1 = pixel1^2 */			
+				movq		mm5,	mm6					/* mm5 = pixel5^2 */			
+				
+				movq		mm2,	[edi+32]			/* Pixel 2 */					
+				movq		mm6,	[edi+96]			/* Pixel 6 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 2 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 6 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel2^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel6^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel2^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel6^2 */			
+				
+				movq		mm2,	[edi+48]			/* Pixel 3 */					
+				movq		mm6,	[edi+112]			/* Pixel 7 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 3 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 7 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel3^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel7^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel3^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel7^2 */			
+				
+				movq		mm2,	[edi+64]			/* Pixel 4 */					
+				movq		mm6,	[edi+128]			/* Pixel 8 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 4 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 8 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel4^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel8^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 = pixel4^2 */			
+				paddw		mm5,	mm6					/* mm5 = pixel8^2 */			
+				
+				/* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+				/* mm1 = x1 + x2 + x3 + x4 */											
+				/* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+				/* mm5 = x5 + x6 + x7 + x8 */											
+				
+				movq		mm7,	mm3					/* mm7 = mm3 */					
+				psrlw		mm7,	7					/* mm7 = 0001000100010001 */	
+				
+				movq		mm2,	mm0					/* make copy of sum1 */			
+				movq		mm6,	mm4					/* make copy of sum2 */			
+				
+				paddw		mm0,	mm7					/* (sum1 + 1) */				
+				paddw		mm4,	mm7					/* (sum2 + 1) */				
+				
+				psraw		mm2,	1					/* sum1 /2 */					
+				psraw		mm6,	1					/* sum2 /2 */					
+				
+				psraw		mm0,	1					/* (sum1 + 1)/2 */				
+				psraw		mm4,	1					/* (sum2 + 1)/2 */				
+				
+				pmullw		mm2,	mm0					/* (sum1)/2*(sum1+1)/2 */		
+				pmullw		mm6,	mm4					/* (sum2)/2*(sum2+1)/2 */		
+				
+				psubw		mm1,	mm2					/* Variance 1 */				
+				psubw		mm5,	mm6					/* Variance 2 */				
+				
+				movq		[Variance11], mm1				/* Save Variance1 */
+				movq		[Variance21], mm5				/* Save Variance2 */
+
+				movq		mm7,	FLimitMmx			/* mm7 = FLimit */
+				movq		mm2,	mm1					/* copy of Variance 1*/
+
+				movq		mm6,	mm5					/* copy of Variance 2*/
+				psubw		mm1,	mm7					/* Variance 1 < Flimit? */		
+				
+				psubw		mm5,	mm7					/* Variance 2 < Flimit? */		
+				psraw		mm1,	15					/* FFFF/0000 for true/false */	
+				
+				psraw		mm5,	15					/* FFFF/0000 for true/false */	
+				psraw		mm2,	15					/* Variance 1 > 32768 ? */
+
+				psraw		mm6,	15					/* Variance 2 > 32768 ? */
+				movq		mm7,	[edi+64]			/* mm0 = Pixel 4			*/	
+
+				pandn		mm2,	mm1					/* Variance 1 < Flimit &&
+														   Variance 1 < 32768		*/
+				pandn		mm6,	mm5					/* Variance 2 < Flimit &&
+														   Variance 2 < 32768		*/
+				movq		mm4,	[edi+80]			/* mm4 = Pixel 5			*/				
+				pand		mm6,	mm2					/* mm1 = Variance1 < Flimit */	
+														/*     &&Variance2 < Flimit */	
+				movq		mm2,	mm7					/* make copy of Pixel4		*/	
+
+				psubusw		mm7,	mm4					/* 4 - 5 */						
+				psubusw		mm4,	mm2					/* 5 - 4 */						
+				
+				por			mm7,	mm4					/* abs(4 - 5) */				
+				psubw		mm7,	QStepMmx			/* abs(4-5)<QStepMmx ? */		
+				
+				psraw		mm7,	15					/* FFFF/0000 for True/Flase */
+				pand		mm7,	mm6													
+				
+				/* mm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* now lets look at the right four colomn */							
+				
+				add			edi,	8					/* offset 8 to right 4 cols */	
+				
+				movq		mm2,	[edi+16]			/* Pixel 1 */					
+				movq		mm6,	[edi+80]			/* Pixel 5 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				movq		mm0,	mm2					/* mm0 = pixel 1 */				
+				movq		mm4,	mm6					/* mm4 = pixel 5 */				
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel1 * pixel1 */		
+				pmullw		mm6,	mm6					/* mm6 = pixel5 * pixel5 */		
+				
+				movq		mm1,	mm2					/* mm1 = pixel1^2 */			
+				movq		mm5,	mm6					/* mm5 = pixel5^2 */			
+				
+				movq		mm2,	[edi+32]			/* Pixel 2 */					
+				movq		mm6,	[edi+96]			/* Pixel 6 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 2 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 6 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel2^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel6^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel2^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel6^2 */			
+				
+				movq		mm2,	[edi+48]			/* Pixel 3 */					
+				movq		mm6,	[edi+112]			/* Pixel 7 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 3 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 7 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel3^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel7^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel3^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel7^2 */			
+				
+				movq		mm2,	[edi+64]			/* Pixel 4 */					
+				movq		mm6,	[edi+128]			/* Pixel 8 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 4 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 8 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel4^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel8^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 = pixel4^2 */			
+				paddw		mm5,	mm6					/* mm5 = pixel8^2 */			
+				
+				/* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+				/* mm1 = x1 + x2 + x3 + x4 */											
+				/* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+				/* mm5 = x5 + x6 + x7 + x8 */											
+				
+				psrlw		mm3,	7					/* mm3 = 0001000100010001 */	
+				
+				movq		mm2,	mm0					/* make copy of sum1 */			
+				movq		mm6,	mm4					/* make copy of sum2 */			
+				
+				paddw		mm0,	mm3					/* (sum1 + 1) */				
+				paddw		mm4,	mm3					/* (sum2 + 1) */				
+				
+				psraw		mm2,	1					/* sum1 /2 */					
+				psraw		mm6,	1					/* sum2 /2 */					
+				
+				psraw		mm0,	1					/* (sum1 + 1)/2 */				
+				psraw		mm4,	1					/* (sum2 + 1)/2 */				
+				
+				pmullw		mm2,	mm0					/* (sum1)/2*(sum1+1)/2 */		
+				pmullw		mm6,	mm4					/* (sum2)/2*(sum2+1)/2 */		
+				
+				psubw		mm1,	mm2					/* Variance 1 */				
+				psubw		mm5,	mm6					/* Variance 2 */				
+				
+				movq		[Variance12], mm1				/* Save Variance1 */
+				movq		[Variance22], mm5				/* Save Variance2 */
+	
+				movq		mm3,	FLimitMmx			/* mm3 = FLimit */				
+				movq		mm2,	mm1					/* copy of Varinace 1*/
+
+				movq		mm6,	mm5					/* Variance 2 */
+				psubw		mm1,	mm3					/* Variance 1 < Flimit? */		
+				
+				psubw		mm5,	mm3					/* Variance 2 < Flimit? */		
+				psraw		mm6,	15					/* Variance 1 > 32768 */
+				
+				psraw		mm2,	15					/* Variance 2 > 32768 */
+				psraw		mm1,	15					/* FFFF/0000 for true/false */	
+				
+				psraw		mm5,	15					/* FFFF/0000 for true/false */	
+				movq		mm0,	[edi+64]			/* mm0 = Pixel 4			*/	
+
+				pandn		mm2,	mm1					/* Variance1<32678 && 
+														   Variance1<Limit			*/
+				pandn		mm6,	mm5					/* Variance2<32678 && 
+														   Variance1<Limit			*/
+				
+				movq		mm4,	[edi+80]			/* mm4 = Pixel 5			*/	
+				pand		mm6,	mm2					/* mm1 = Variance1 < Flimit */	
+														/*     &&Variance2 < Flimit */	
+				movq		mm2,	mm0					/* make copy of Pixel4		*/	
+														
+				psubusw		mm0,	mm4					/* 4 - 5 */						
+				psubusw		mm4,	mm2					/* 5 - 4 */						
+				
+				por			mm0,	mm4					/* abs(4 - 5) */				
+				psubw		mm0,	QStepMmx			/* abs(4-5)<QStepMmx ? */		
+				
+				psraw		mm0,	15					/* FFFF/0000 for True/False */
+				pand		mm0,	mm6				
+				
+				sub			edi,	8					/* offset edi back */			
+				
+				/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* mm0 and mm7 now are in use  */										
+				/* Let's do the filtering now */										
+				/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+				/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+				
+				movq		mm5,	[edi]				/* mm5 = -5 */					
+				movq		mm4,	[edi + 16]			/* mm4 = -4 */					
+				
+				movq		mm3,	mm4					/* copy of -4 */				
+				movq		mm6,	mm5					/* copy of -5 */				
+				
+				psubusw		mm4,	mm6					/* mm4 = [-4] - [-5] */			
+				psubusw		mm5,	mm3					/* mm5 = [-5] - [-4] */			
+				
+				por			mm4,	mm5					/* abs([-4]-[-5] ) */			
+				psubw		mm4,	QStepMmx			/* abs([-4]-[-5] )<QStep? */	
+				
+				psraw		mm4,	15					/* FFFF/0000 for True/False */	
+				movq		mm1,	mm4					/* copy of the mm4 */			
+				
+				pand		mm4,	mm6					/*							*/	
+				pandn		mm1,	mm3					/*							*/	
+				
+				por			mm1,	mm4					/* mm1 = p1					*/	
+				
+				/* now find P2 */														
+				
+				movq		mm4,	[edi+128]			/* mm4 = [3] */					
+				movq		mm5,	[edi+144]			/* mm5 = [4] */					
+				
+				movq		mm3,	mm4					/* copy of 3 */					
+				movq		mm6,	mm5					/* copy of 4 */					
+				
+				psubusw		mm4,	mm6					/* mm4 = [3] - [4] */			
+				psubusw		mm5,	mm3					/* mm5 = [4] - [3] */			
+				
+				por			mm4,	mm5					/* abs([3]-[4] ) */				
+				psubw		mm4,	QStepMmx			/* abs([3]-[4] )<QStep? */		
+				
+				psraw		mm4,	15					/* FFFF/0000 for True/False */	
+				movq		mm2,	mm4					/* copy of the mm4 */			
+				
+				pand		mm4,	mm6					/*							*/	
+				pandn		mm2,	mm3					/*							*/	
+				
+				por			mm2,	mm4					/* mm2 = p2					*/	
+				
+				/* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+				/* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */			
+				/* Des[-w4] = Src[-w4]; */												
+				/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+				
+				movq		mm3,	mm1					/* mm3 = p1 */					
+				paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+				
+				paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+				movq		mm4,	[edi+16]			/* mm4 = x1 */					
+				
+				paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+				paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+				
+				paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+				paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+				
+				paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+				movq		mm4,	mm3					/* mm4 = mm3 */					
+				
+				movq		mm5,	[edi+16]			/* mm5 = x1 */					
+				paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+				
+				psllw		mm4,	1					/* mm4 = (sum+x1)<<1 */			
+				psubw		mm4,	[edi+64]			/* mm4 = (sum+x1)<<1-x4 */		
+				
+				paddw		mm4,	[edi+80]			/* mm4 = (sum+x1)<<1-x4+x5 */	
+				psraw		mm4,	4					/* mm4 >>=4 */					
+				
+				psubw		mm4,	mm5					/* New Value - old Value */		
+				pand		mm4,	mm7					/* And the flag */				
+				
+				paddw		mm4,	mm5					/* add the old value back */	
+				movq		[esi],	mm4					/* Write new x1 */				
+				
+				/* sum += x5 -p1 */														
+				/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+				
+				movq		mm5,	[edi+32]			/* mm5= x2 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+				movq		mm4,	mm5					/* copy sum */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x2 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+80]			/* mm4 =(sum+x2)<<1-x5 */		
+				paddw		mm4,	[edi+96]			/* mm4 =(sum+x2)<<1-x5+x6 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm7					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+16], mm4				/* write new x2 */				
+				
+				/* sum += x6 - p1 */													
+				/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+				
+				movq		mm5,	[edi+48]			/* mm5= x3 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+				movq		mm4,	mm5					/* copy x3 */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x3 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+96]			/* mm4 =(sum+x3)<<1-x6 */		
+				paddw		mm4,	[edi+112]			/* mm4 =(sum+x3)<<1-x6+x7 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm7					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+32], mm4				/* write new x3 */				
+				
+				/* sum += x7 - p1 */													
+				/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+				
+				movq		mm5,	[edi+64]			/* mm5 = x4 */					
+				psubw		mm3,	mm1					/* sum = sum-p1 */				
+				
+				paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+				movq		mm4,	mm5					/* mm4 = x4 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+				paddw		mm4,	mm4					/* mm4 *=2 */					
+				
+				paddw		mm4,	mm1					/* += p1 */						
+				psubw		mm4,	[edi+16]			/* -= x1 */						
+				
+				psubw		mm4,	[edi+112]			/* -= x7 */						
+				paddw		mm4,	[edi+128]			/* += x8 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x4 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x4 */						
+				
+				movq		[esi+48], mm4				/* write new x4 */				
+				
+				/* sum+= x8-x1 */														
+				/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+				
+				movq		mm5,	[edi+80]			/* mm5 = x5 */					
+				psubw		mm3,	[edi+16]			/* sum -= x1 */					
+				
+				paddw		mm3,	[edi+128]			/* sub += x8 */					
+				movq		mm4,	mm5					/* mm4 = x5 */					
+				
+				paddw		mm4,	mm3					/* mm4= sum+x5 */				
+				paddw		mm4,	mm4					/* mm4 *= 2 */					
+				
+				paddw		mm4,	[edi+16]			/* += x1 */						
+				psubw		mm4,	[edi+32]			/* -= x2 */						
+				
+				psubw		mm4,	[edi+128]			/* -= x8 */						
+				paddw		mm4,	mm2					/* += p2 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x5 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x5 */						
+				
+				movq		[esi+64], mm4				/* write new x5 */				
+				
+				/* sum += p2 - x2 */													
+				/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+				
+				movq		mm5,	[edi+96]			/* mm5 = x6 */					
+				psubw		mm3,	[edi+32]			/* -= x2 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x6 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+32]			/* +=x2 */						
+				psubw		mm4,	[edi+48]			/* -=x3 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x6 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x6 */						
+				
+				movq		[esi+80], mm4				/* write new x6 */				
+				
+				/* sum += p2 - x3 */													
+				/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+				
+				movq		mm5,	[edi+112]			/* mm5 = x7 */					
+				psubw		mm3,	[edi+48]			/* -= x3 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x7 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+48]			/* +=x3 */						
+				psubw		mm4,	[edi+64]			/* -=x4 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x7 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x7 */						
+				
+				movq		[esi+96], mm4				/* write new x7 */				
+				
+				/* sum += p2 - x4 */													
+				/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+				
+				movq		mm5,	[edi+128]			/* mm5 = x8 */					
+				psubw		mm3,	[edi+64]			/* -= x4 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x8 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+64]			/* +=x4 */						
+				psubw		mm4,	[edi+80]			/* -=x5 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x8 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x8 */						
+				
+				movq		[esi+112], mm4				/* write new x8 */				
+				
+				/* done with left four columns */										
+				/* now do the righ four columns */										
+				
+				add			edi,	8					/* shift to right four column */
+				add			esi,	8					/* shift to right four column */
+				
+				/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* mm0 now are in use  */										
+				/* Let's do the filtering now */										
+				/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+				/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+				
+				movq		mm5,	[edi]				/* mm5 = -5 */					
+				movq		mm4,	[edi + 16]			/* mm4 = -4 */					
+				
+				movq		mm3,	mm4					/* copy of -4 */				
+				movq		mm6,	mm5					/* copy of -5 */				
+				
+				psubusw		mm4,	mm6					/* mm4 = [-4] - [-5] */			
+				psubusw		mm5,	mm3					/* mm5 = [-5] - [-4] */			
+				
+				por			mm4,	mm5					/* abs([-4]-[-5] ) */			
+				psubw		mm4,	QStepMmx			/* abs([-4]-[-5] )<QStep? */	
+				
+				psraw		mm4,	15					/* FFFF/0000 for True/False */	
+				movq		mm1,	mm4					/* copy of the mm4 */			
+				
+				pand		mm4,	mm6					/*							*/	
+				pandn		mm1,	mm3					/*							*/	
+				
+				por			mm1,	mm4					/* mm1 = p1					*/	
+				
+				/* now find P2 */														
+				
+				movq		mm4, [edi+128]				/* mm4 = [3] */					
+				movq		mm5, [edi+144]				/* mm5 = [4] */					
+				
+				movq		mm3, mm4					/* copy of 3 */					
+				movq		mm6, mm5					/* copy of 4 */					
+				
+				psubusw		mm4, mm6					/* mm4 = [3] - [4] */			
+				psubusw		mm5, mm3					/* mm5 = [4] - [3] */			
+				
+				por			mm4, mm5					/* abs([3]-[4] ) */				
+				psubw		mm4, QStepMmx				/* abs([3]-[4] )<QStep? */		
+				
+				psraw		mm4, 15						/* FFFF/0000 for True/False */	
+				movq		mm2, mm4					/* copy of the mm4 */			
+				
+				pand		mm4, mm6					/*							*/	
+				pandn		mm2, mm3					/*							*/	
+				
+				por			mm2, mm4					/* mm2 = p2					*/	
+				
+				/* psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4; */				
+				/* Des[-w4] = (((psum + v[1]) << 1) - (v[4] - v[5])) >> 4; */			
+				/* Des[-w4]=Src[-w4]; */												
+				/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+				
+				movq		mm3,	mm1					/* mm3 = p1 */					
+				paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+				
+				paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+				movq		mm4,	[edi+16]			/* mm4 = x1 */					
+				
+				paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+				paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+				
+				paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+				paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+				
+				paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+				movq		mm4,	mm3					/* mm4 = mm3 */					
+				
+				movq		mm5,	[edi+16]			/* mm5 = x1 */					
+				paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+				
+				psllw		mm4,	1					/* mm4 = (sum+x1)<<1 */			
+				psubw		mm4,	[edi+64]			/* mm4 = (sum+x1)<<1-x4 */		
+				
+				paddw		mm4,	[edi+80]			/* mm4 = (sum+x1)<<1-x4+x5 */	
+				psraw		mm4,	4					/* mm4 >>=4 */					
+				
+				psubw		mm4,	mm5					/* New Value - old Value */		
+				pand		mm4,	mm0					/* And the flag */				
+				
+				paddw		mm4,	mm5					/* add the old value back */	
+				movq		[esi],	mm4					/* Write new x1 */				
+				
+				/* sum += x5 -p1 */														
+				/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+				
+				movq		mm5,	[edi+32]			/* mm5= x2 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+				movq		mm4,	mm5					/* copy sum */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x2 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+80]			/* mm4 =(sum+x2)<<1-x5 */		
+				paddw		mm4,	[edi+96]			/* mm4 =(sum+x2)<<1-x5+x6 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm0					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+16], mm4				/* write new x2 */				
+				
+				/* sum += x6 - p1 */													
+				/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+				
+				movq		mm5,	[edi+48]			/* mm5= x3 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+				movq		mm4,	mm5					/* copy x3 */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x3 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+96]			/* mm4 =(sum+x3)<<1-x6 */		
+				paddw		mm4,	[edi+112]			/* mm4 =(sum+x3)<<1-x6+x7 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm0					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+32], mm4				/* write new x3 */				
+				
+				/* sum += x7 - p1 */													
+				/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+				
+				movq		mm5,	[edi+64]			/* mm5 = x4 */					
+				psubw		mm3,	mm1					/* sum = sum-p1 */				
+				
+				paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+				movq		mm4,	mm5					/* mm4 = x4 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+				paddw		mm4,	mm4					/* mm4 *=2 */					
+				
+				paddw		mm4,	mm1					/* += p1 */						
+				psubw		mm4,	[edi+16]			/* -= x1 */						
+				
+				psubw		mm4,	[edi+112]			/* -= x7 */						
+				paddw		mm4,	[edi+128]			/* += x8 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x4 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x4 */						
+				
+				movq		[esi+48], mm4				/* write new x4 */				
+				
+				/* sum+= x8-x1 */														
+				/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+				
+				movq		mm5,	[edi+80]			/* mm5 = x5 */					
+				psubw		mm3,	[edi+16]			/* sum -= x1 */					
+				
+				paddw		mm3,	[edi+128]			/* sub += x8 */					
+				movq		mm4,	mm5					/* mm4 = x5 */					
+				
+				paddw		mm4,	mm3					/* mm4= sum+x5 */				
+				paddw		mm4,	mm4					/* mm4 *= 2 */					
+				
+				paddw		mm4,	[edi+16]			/* += x1 */						
+				psubw		mm4,	[edi+32]			/* -= x2 */						
+				
+				psubw		mm4,	[edi+128]			/* -= x8 */						
+				paddw		mm4,	mm2					/* += p2 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x5 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x5 */						
+				
+				movq		[esi+64], mm4				/* write new x5 */				
+				
+				/* sum += p2 - x2 */													
+				/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+				
+				movq		mm5,	[edi+96]			/* mm5 = x6 */					
+				psubw		mm3,	[edi+32]			/* -= x2 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x6 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+32]			/* +=x2 */						
+				psubw		mm4,	[edi+48]			/* -=x3 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x6 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x6 */						
+				
+				movq		[esi+80], mm4				/* write new x6 */				
+				
+				/* sum += p2 - x3 */													
+				/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+				
+				movq		mm5,	[edi+112]			/* mm5 = x7 */					
+				psubw		mm3,	[edi+48]			/* -= x3 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x7 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+48]			/* +=x3 */						
+				psubw		mm4,	[edi+64]			/* -=x4 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x7 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x7 */						
+				
+				movq		[esi+96], mm4				/* write new x7 */				
+				
+				/* sum += p2 - x4 */													
+				/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+				
+				movq		mm5,	[edi+128]			/* mm5 = x8 */					
+				psubw		mm3,	[edi+64]			/* -= x4 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x8 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+64]			/* +=x4 */						
+				psubw		mm4,	[edi+80]			/* -=x5 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x8 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x8 */						
+				
+				movq		[esi+112], mm4				/* write new x8 */				
+				
+				/* done with right four column */	
+				/* transpose */
+				mov			eax,	Des					/* the destination */			
+				add			edi,	8					/* shift edi to point x1 */
+
+				sub			esi,	8					/* shift esi back to left x1 */
+				sub			eax,	4
+
+				movq		mm0,	[esi]				/* mm0 = 30 20 10 00 */
+				movq		mm1,	[esi+16]			/* mm1 = 31 21 11 01 */
+
+				movq		mm4,	mm0					/* mm4 = 30 20 10 00 */
+				punpcklwd	mm0,	mm1					/* mm0 = 11 10 01 00 */
+
+				punpckhwd	mm4,	mm1					/* mm4 = 31 30 21 20 */
+				movq		mm2,	[esi+32]			/* mm2 = 32 22 12 02 */
+
+				movq		mm3,	[esi+48]			/* mm3 = 33 23 13 03 */
+				movq		mm5,	mm2					/* mm5 = 32 22 12 02 */
+
+				punpcklwd	mm2,	mm3					/* mm2 = 13 12 03 02 */
+				punpckhwd	mm5,	mm3					/* mm5 = 33 32 23 22 */
+
+				movq		mm1,	mm0					/* mm1 = 11 10 01 00 */
+				punpckldq	mm0,	mm2					/* mm0 = 03 02 01 00 */
+
+				movq		[edi],	mm0					/* write 00 01 02 03 */
+				punpckhdq	mm1,	mm2					/* mm1 = 13 12 11 10 */
+				
+				movq		mm0,	mm4					/* mm0 = 31 30 21 20 */
+				movq		[edi+16], mm1				/* write 10 11 12 13 */
+
+				punpckldq	mm0,	mm5					/* mm0 = 23 22 21 20 */
+				punpckhdq	mm4,	mm5					/* mm4 = 33 32 31 30 */
+
+				movq		mm1,	[esi+64]			/* mm1 = 34 24 14 04 */
+				movq		mm2,	[esi+80]			/* mm2 = 35 25 15 05 */				
+
+				movq		mm5,	[esi+96]			/* mm5 = 36 26 16 06 */
+				movq		mm6,	[esi+112]			/* mm6 = 37 27 17 07 */
+								
+				movq		mm3,	mm1					/* mm3 = 34 24 14 04 */
+				movq		mm7,	mm5					/* mm7 = 36 26 16 06 */
+
+				punpcklwd	mm1,	mm2					/* mm1 = 15 14 05 04 */
+				punpckhwd	mm3,	mm2					/* mm3 = 35 34 25 24 */
+
+				punpcklwd	mm5,	mm6					/* mm5 = 17 16 07 06 */
+				punpckhwd	mm7,	mm6					/* mm7 = 37 36 27 26 */
+
+				movq		mm2,	mm1					/* mm2 = 15 14 05 04 */
+				movq		mm6,	mm3					/* mm6 = 35 34 25 24 */
+
+				punpckldq	mm1,	mm5					/* mm1 = 07 06 05 04 */
+				punpckhdq	mm2,	mm5					/* mm2 = 17 16 15 14 */
+
+				punpckldq	mm3,	mm7					/* mm3 = 27 26 25 24 */
+				punpckhdq	mm6,	mm7					/* mm6 = 37 36 35 34 */
+			
+				movq		mm5,	[edi]				/* mm5 = 03 02 01 00 */
+				packuswb	mm5,	mm1					/* mm5 = 07 06 05 04 03 02 01 00 */
+				
+				movq		[eax],	mm5					/* write 00 01 02 03 04 05 06 07 */
+				movq		mm7,	[edi+16]			/* mm7 = 13 12 11 10 */
+
+				packuswb	mm7,	mm2					/* mm7 = 17 16 15 14 13 12 11 10 */
+				movq		[eax+ecx], mm7				/* write 10 11 12 13 14 15 16 17 */
+
+				packuswb	mm0,	mm3					/* mm0 = 27 26 25 24 23 22 21 20 */
+				packuswb	mm4,	mm6					/* mm4 = 37 36 35 34 33 32 31 30 */
+				
+				movq		[eax+ecx*2], mm0			/* write 20 21 22 23 24 25 26 27 */
+				lea			eax,	[eax+ecx*4]			/* mov forward the desPtr */
+
+				movq		[eax+edx],	mm4				/* write 30 31 32 33 34 35 36 37 */
+				add			edi, 8						/* move to right four column */
+				add			esi, 8						/* move to right x1 */
+
+				movq		mm0,	[esi]				/* mm0 = 70 60 50 40 */
+				movq		mm1,	[esi+16]			/* mm1 = 71 61 51 41 */
+
+				movq		mm4,	mm0					/* mm4 = 70 60 50 40 */
+				punpcklwd	mm0,	mm1					/* mm0 = 51 50 41 40 */
+
+				punpckhwd	mm4,	mm1					/* mm4 = 71 70 61 60 */
+				movq		mm2,	[esi+32]			/* mm2 = 72 62 52 42 */
+
+				movq		mm3,	[esi+48]			/* mm3 = 73 63 53 43 */
+				movq		mm5,	mm2					/* mm5 = 72 62 52 42 */
+
+				punpcklwd	mm2,	mm3					/* mm2 = 53 52 43 42 */
+				punpckhwd	mm5,	mm3					/* mm5 = 73 72 63 62 */
+
+				movq		mm1,	mm0					/* mm1 = 51 50 41 40 */
+				punpckldq	mm0,	mm2					/* mm0 = 43 42 41 40 */
+
+				movq		[edi],	mm0					/* write 40 41 42 43 */
+				punpckhdq	mm1,	mm2					/* mm1 = 53 52 51 50 */
+				
+				movq		mm0,	mm4					/* mm0 = 71 70 61 60 */
+				movq		[edi+16], mm1				/* write 50 51 52 53 */
+
+				punpckldq	mm0,	mm5					/* mm0 = 63 62 61 60 */
+				punpckhdq	mm4,	mm5					/* mm4 = 73 72 71 70 */
+
+				movq		mm1,	[esi+64]			/* mm1 = 74 64 54 44 */
+				movq		mm2,	[esi+80]			/* mm2 = 75 65 55 45 */				
+
+				movq		mm5,	[esi+96]			/* mm5 = 76 66 56 46 */
+				movq		mm6,	[esi+112]			/* mm6 = 77 67 57 47 */
+								
+				movq		mm3,	mm1					/* mm3 = 74 64 54 44 */
+				movq		mm7,	mm5					/* mm7 = 76 66 56 46 */
+
+				punpcklwd	mm1,	mm2					/* mm1 = 55 54 45 44 */
+				punpckhwd	mm3,	mm2					/* mm3 = 75 74 65 64 */
+
+				punpcklwd	mm5,	mm6					/* mm5 = 57 56 47 46 */
+				punpckhwd	mm7,	mm6					/* mm7 = 77 76 67 66 */
+
+				movq		mm2,	mm1					/* mm2 = 55 54 45 44 */
+				movq		mm6,	mm3					/* mm6 = 75 74 65 64 */
+
+				punpckldq	mm1,	mm5					/* mm1 = 47 46 45 44 */
+				punpckhdq	mm2,	mm5					/* mm2 = 57 56 55 54 */
+
+				punpckldq	mm3,	mm7					/* mm3 = 67 66 65 64 */
+				punpckhdq	mm6,	mm7					/* mm6 = 77 76 75 74 */
+			
+				movq		mm5,	[edi]				/* mm5 = 43 42 41 40 */
+				packuswb	mm5,	mm1					/* mm5 = 47 46 45 44 43 42 41 40 */
+				
+				movq		[eax],	mm5					/* write 40 41 42 43 44 45 46 47 */
+				movq		mm7,	[edi+16]			/* mm7 = 53 52 51 50 */
+
+				packuswb	mm7,	mm2					/* mm7 = 57 56 55 54 53 52 51 50 */
+				movq		[eax+ecx], mm7				/* write 50 51 52 53 54 55 56 57 */
+
+				packuswb	mm0,	mm3					/* mm0 = 67 66 65 64 63 62 61 60 */
+				packuswb	mm4,	mm6					/* mm4 = 77 76 75 74 73 72 71 70 */
+				
+				movq		[eax+ecx*2], mm0			/* write 60 61 62 63 64 65 66 67 */
+				lea			eax,	[eax+ecx*4]			/* mov forward the desPtr */
+
+				movq		[eax+edx],	mm4				/* write 70 71 72 73 74 75 76 77 */
+				
+				pop			edi
+				pop			esi
+				pop			edx
+				pop			ecx
+				pop			ebp
+				pop			eax
+	    	}	
+
+    		Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+		    Var1 += Variance12[0]+ Variance12[1]+Variance12[2]+Variance12[3];
+		    pbi->FragmentVariances[CurrentFrag] += Var1;
+
+    		Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+    		Var2 += Variance22[0]+ Variance22[1]+Variance22[2]+Variance22[3];
+		    pbi->FragmentVariances[CurrentFrag + 1] += Var2;
+        }
+		CurrentFrag ++;
+		Src += 8;
+		Des += 8;		
+	}
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeblockNonFilteredBand_MMX
+ *
+ *  INPUTS        :     None
+ *                               
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Filter both horizontal and vertical edge in a band
+ *
+ *  SPECIAL NOTES :     
+ *
+ *	REFERENCE	  :		
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void DeblockNonFilteredBand_MMX(
+                                 POSTPROC_INSTANCE *pbi, 
+                                 UINT8 *SrcPtr, 
+                                 UINT8 *DesPtr,
+                                 UINT32 PlaneLineStep, 
+                                 UINT32 FragAcross,
+                                 UINT32 StartFrag,
+                                 UINT32 *QuantScale
+							    )
+{
+	UINT32 j;
+	UINT32 CurrentFrag=StartFrag;
+	UINT32 QStep;
+    UINT32 LoopFLimit;
+	UINT8 *Src, *Des;
+	UINT32 Var1, Var2;
+
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+short QStepMmx[4];
+short FLimitMmx[4];
+short LoopFLimitMmx[4];
+short Rows[80];
+short NewRows[64];
+short LoopFilteredValuesUp[4];
+short LoopFilteredValuesDown[4];
+
+unsigned short Variance11[4];
+unsigned short Variance12[4];
+unsigned short Variance21[4];
+unsigned short Variance22[4];
+#pragma pack()
+#else
+__declspec(align(16)) short QStepMmx[4];
+__declspec(align(16)) short FLimitMmx[4];
+__declspec(align(16)) short LoopFLimitMmx[4];
+__declspec(align(16)) short Rows[80];
+__declspec(align(16)) short NewRows[64];
+__declspec(align(16)) short LoopFilteredValuesUp[4];
+__declspec(align(16)) short LoopFilteredValuesDown[4];
+
+__declspec(align(16)) unsigned short Variance11[4];
+__declspec(align(16)) unsigned short Variance12[4];
+__declspec(align(16)) unsigned short Variance21[4];
+__declspec(align(16)) unsigned short Variance22[4];
+#endif
+
+    LoopFLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+    LoopFLimitMmx[0] = (INT16)LoopFLimit;
+    LoopFLimitMmx[1] = (INT16)LoopFLimit;
+    LoopFLimitMmx[2] = (INT16)LoopFLimit;
+    LoopFLimitMmx[3] = (INT16)LoopFLimit;
+
+	while(CurrentFrag < StartFrag + FragAcross )
+	{
+
+		Src=SrcPtr+8*(CurrentFrag-StartFrag);
+		Des=DesPtr+8*(CurrentFrag-StartFrag);
+
+		QStep = QuantScale[ pbi->FragQIndex[CurrentFrag+FragAcross]];
+
+
+		__asm 
+		{
+			
+		push		eax
+
+		push		ebp
+			
+		push		ecx			
+
+		push		edx
+
+		push		esi
+
+		push		edi
+
+			/* Calculate the FLimit and store FLimit and QStep */					
+			/* Copy the data to the intermediate buffer */							
+			mov			eax,	QStep
+			xor			edx,	edx					/* clear edx */					
+
+			mov			ecx,	PlaneLineStep		/* ecx = Pitch */				
+			pcmpeqw		mm6,	mm6					
+			
+			
+			movd		mm5,	eax
+			mov			eax,	Src					/* eax = Src */					
+							
+			psrlw		mm6,	14					/* mm6 = 3, 3, 3, 3*/
+			punpcklwd	mm5,	mm5					
+
+			lea			esi,	NewRows				/* esi = NewRows */
+			punpckldq	mm5,	mm5
+
+			sub			edx,	ecx					/* edx = - Pitch */
+			pmullw		mm6,	mm5					/* Qstep * 3 */
+
+			movq		QStepMmx,	mm5
+			lea			edi,	Rows				/* edi = Rows */				
+
+			pxor		mm7,	mm7					/* Clear mm7 */					
+			pmullw		mm6,	mm5
+			
+			lea			eax,	[eax + edx * 4 ]	/* eax = Src - 4*Pitch */		
+			movq		mm0,	[eax + edx]			/* mm0 = Src[-5*Pitch] */		
+			
+			movq		mm1,	mm0					/* mm1 = mm0 */					
+			punpcklbw	mm0,	mm7					/* Lower Four -5 */				
+
+			psrlw		mm6,	5
+			movq		[FLimitMmx], mm6		
+			
+			movq		mm2,	[eax]				/* mm2 = Src[-4*Pitch] */		
+			punpckhbw	mm1,	mm7					/* Higher Four -5 */			
+
+			movq		mm3,	mm2					/* mm3 = mm2 */					
+			punpcklbw	mm2,	mm7					/* Lower Four -4 */				
+
+			movq		[edi],	mm0					/* Write Lower Four of -5 */					
+			punpckhbw	mm3,	mm7					/* higher Four -4 */			
+			
+			movq		[edi+8], mm1				/* Write Higher Four of -5 */	
+			movq		mm4,	[eax + ecx]			/* mm4 = Src[-3*Pitch] */		
+			
+			movq		[edi+16], mm2				/* Write Lower -4 */			
+			movq		[edi+24], mm3				/* write hight -4 */			
+			
+			movq		mm5,	mm4					/* mm5 = mm4 */					
+			punpcklbw	mm4,	mm7					/* lower four -3 */				
+			
+			movq		mm0,	[eax + ecx *2]		/* mm0 = Src[-2*Pitch] */		
+			punpckhbw	mm5,	mm7					/* higher four -3 */			
+			
+			movq		mm1,	mm0					/* mm1 = mm0 */					
+			movq		[edi+32], mm4				/* write Lower -3 */			
+			
+			punpcklbw	mm0,	mm7					/* lower four -2 */				
+			lea			eax,	[eax + ecx *4]		/* eax = Src */					
+			
+			movq		[edi+40], mm5				/* write Higher -3 */			
+			punpckhbw	mm1,	mm7					/* higher four -2 */			
+			
+			movq		mm2,	[eax + edx]			/* mm2 = Src[-Pitch] */			
+			movq		[edi+48], mm0				/* lower -2	*/					
+			
+			movq		mm3,	mm2					/* mm3 = mm2 */					
+			punpcklbw	mm2,	mm7					/* lower -1 */					
+			
+			movq		[edi+56], mm1				/* higher -2 */					
+			punpckhbw	mm3,	mm7					/* Higher -1 */					
+			
+			movq		mm4,	[eax]				/* mm4 = Src[0] */				
+			movq		[edi+64], mm2				/* Lower -1 */					
+			
+			movq		mm5,	mm4					/* mm5 = mm4 */					
+			movq		[edi+72], mm3				/* Higher -1 */					
+			
+			punpcklbw	mm4,	mm7					/* lower 0 */					
+			punpckhbw	mm5,	mm7					/* higher 0 */					
+			
+			movq		mm0,	[eax + ecx]			/* mm0 = Src[Pitch] */			
+			movq		[edi+80], mm4				/* write lower 0 */				
+			
+			movq		mm1,	mm0					/* mm1 = mm0 */					
+			movq		[edi+88], mm5				/* write higher 0 */			
+			
+			punpcklbw	mm0,	mm7					/* lower 1 */					
+			punpckhbw	mm1,	mm7					/* higher 1 */					
+			
+			movq		mm2,	[eax + ecx *2 ]     /* mm2 = Src[2*Pitch] */		
+			lea			eax,	[eax + ecx *4]		/* eax = Src + 4 * Pitch  */	
+			
+			movq		mm3,	mm2					/* mm3 = mm2 */					
+			movq		[edi+96], mm0				/* write lower 1 */				
+			
+			punpcklbw	mm2,	mm7					/* lower 2 */					
+			punpckhbw	mm3,	mm7					/* higher 2 */					
+			
+			movq		mm4,	[eax + edx ]		/* mm4 = Src[3*pitch] */		
+			movq		[edi+104], mm1				/* wirte higher 1 */			
+			
+			movq		mm5,	mm4					/* mm5 = mm4 */					
+			punpcklbw	mm4,	mm7					/* Low 3	*/					
+			
+			movq		[edi+112], mm2				/* write lower 2 */				
+			movq		[edi+120], mm3				/* write higher 2 */			
+			
+			movq		mm0,	[eax]				/* mm0 = Src[4*pitch] */		
+			punpckhbw	mm5,	mm7					/* high 3 */					
+			
+			movq		mm1,	mm0					/* mm1=mm0 */					
+			movq		[edi+128], mm4				/* low 3 */						
+			
+			punpcklbw	mm0,	mm7					/* low 4 */						
+			punpckhbw	mm1,	mm7					/* high 4 */					
+			
+			movq		[edi+136], mm5				/* high 3 */					
+			movq		[edi+144], mm0				/* low 4 */						
+			
+			movq		[edi+152], mm1				/* high 4 */					
+			
+/*
+			mov			eax,	Des					
+			lea			eax,	[eax+edx*4]
+			movq		mm2,	[eax]
+			movq		mm2,	[eax+ecx]
+			movq		mm2,	[eax+ecx*2]
+			lea			eax,	[eax+ecx*4]
+			movq		mm2,	[eax+edx]
+			movq		mm2,	[eax]
+			movq		mm2,	[eax+ecx]
+			movq		mm2,	[eax+ecx*2]
+			lea			eax,	[eax+ecx*4]
+			movq		mm2,	[eax+edx]
+			movq		mm2,	[eax]
+			
+*/
+	
+			/* done with copying everything to intermediate buffer */				
+			/* Now, compute the variances for Pixel  1-4 and 5-8 */					
+	
+			/* we use mm0,mm1,mm2 for 1234 and mm4, mm5, mm6 for 5-8 */				
+			/* mm7 = 0, mm3 = {128, 128, 128, 128} */								
+			
+
+			pcmpeqw		mm3,	mm3					/* mm3 = FFFFFFFFFFFFFFFF */	
+			psllw		mm3,	15					/* mm3 = 8000800080008000 */	
+			psrlw		mm3,	8					/* mm3 = 0080008000800080 */
+			
+			movq		mm2,	[edi+16]			/* Pixel 1 */					
+			movq		mm6,	[edi+80]			/* Pixel 5 */					
+			
+			psubw		mm2,	mm3					/* mm2 -=128 */					
+			psubw		mm6,	mm3					/* mm6 -=128 */					
+			
+			movq		mm0,	mm2					/* mm0 = pixel 1 */				
+			movq		mm4,	mm6					/* mm4 = pixel 5 */				
+			
+			pmullw		mm2,	mm2					/* mm2 = pixel1 * pixel1 */		
+			pmullw		mm6,	mm6					/* mm6 = pixel5 * pixel5 */		
+			
+			movq		mm1,	mm2					/* mm1 = pixel1^2 */			
+			movq		mm5,	mm6					/* mm5 = pixel5^2 */			
+			
+			movq		mm2,	[edi+32]			/* Pixel 2 */					
+			movq		mm6,	[edi+96]			/* Pixel 6 */					
+			
+			psubw		mm2,	mm3					/* mm2 -=128 */					
+			psubw		mm6,	mm3					/* mm6 -=128 */					
+			
+			paddw		mm0,	mm2					/* mm0 += pixel 2 */			
+			paddw		mm4,	mm6					/* mm4 += pixel 6 */			
+			
+			pmullw		mm2,	mm2					/* mm2 = pixel2^2 */			
+			pmullw		mm6,	mm6					/* mm6 = pixel6^2 */			
+			
+			paddw		mm1,	mm2					/* mm1 += pixel2^2 */			
+			paddw		mm5,	mm6					/* mm5 += pixel6^2 */			
+			
+			movq		mm2,	[edi+48]			/* Pixel 3 */					
+			movq		mm6,	[edi+112]			/* Pixel 7 */					
+			
+			psubw		mm2,	mm3					/* mm2 -=128 */					
+			psubw		mm6,	mm3					/* mm6 -=128 */					
+			
+			paddw		mm0,	mm2					/* mm0 += pixel 3 */			
+			paddw		mm4,	mm6					/* mm4 += pixel 7 */			
+			
+			pmullw		mm2,	mm2					/* mm2 = pixel3^2 */			
+			pmullw		mm6,	mm6					/* mm6 = pixel7^2 */			
+			
+			paddw		mm1,	mm2					/* mm1 += pixel3^2 */			
+			paddw		mm5,	mm6					/* mm5 += pixel7^2 */			
+			
+			movq		mm2,	[edi+64]			/* Pixel 4 */					
+			movq		mm6,	[edi+128]			/* Pixel 8 */					
+			
+			psubw		mm2,	mm3					/* mm2 -=128 */					
+			psubw		mm6,	mm3					/* mm6 -=128 */					
+			
+			paddw		mm0,	mm2					/* mm0 += pixel 4 */			
+			paddw		mm4,	mm6					/* mm4 += pixel 8 */			
+			
+			pmullw		mm2,	mm2					/* mm2 = pixel4^2 */			
+			pmullw		mm6,	mm6					/* mm6 = pixel8^2 */			
+			
+			paddw		mm1,	mm2					/* mm1 = pixel4^2 */			
+			paddw		mm5,	mm6					/* mm5 = pixel8^2 */			
+			
+			
+			/* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+			/* mm1 = x1 + x2 + x3 + x4 */											
+			/* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+			/* mm5 = x5 + x6 + x7 + x8 */											
+			
+			movq		mm7,	mm3					/* mm7 = mm3 */					
+			psrlw		mm7,	7					/* mm7 = 0001000100010001 */	
+			
+			movq		mm2,	mm0					/* make copy of sum1 */			
+			movq		mm6,	mm4					/* make copy of sum2 */			
+			
+			paddw		mm0,	mm7					/* (sum1 + 1) */				
+			paddw		mm4,	mm7					/* (sum2 + 1) */				
+			
+			psraw		mm2,	1					/* sum1 /2 */					
+			psraw		mm6,	1					/* sum2 /2 */					
+			
+			psraw		mm0,	1					/* (sum1 + 1)/2 */				
+			psraw		mm4,	1					/* (sum2 + 1)/2 */				
+			
+			pmullw		mm2,	mm0					/* (sum1)/2*(sum1+1)/2 */		
+			pmullw		mm6,	mm4					/* (sum2)/2*(sum2+1)/2 */		
+			
+			psubw		mm1,	mm2					/* Variance 1 */				
+			psubw		mm5,	mm6					/* Variance 2 */				
+			
+			movq		mm7,	FLimitMmx			/* mm7 = FLimit */				
+			movq		mm2,	mm1					/* copy of Varinace 1*/
+
+			movq		mm6,	mm5					/* Variance 2 */
+			movq		[Variance11], mm1			/* Save Variance1 */
+
+			movq		[Variance21], mm5			/* Save Variance2 */
+			psubw		mm1,	mm7					/* Variance 1 < Flimit? */		
+			
+			psubw		mm5,	mm7					/* Variance 2 < Flimit? */		
+			psraw		mm2,	15					/* Variance 1 > 32768? */
+
+			psraw		mm6,	15					/* Vaiance  2 > 32768? */	
+			psraw		mm1,	15					/* FFFF/0000 for true/false */	
+			
+			psraw		mm5,	15					/* FFFF/0000 for true/false */	
+			movq		mm7,	[edi+64]			/* mm0 = Pixel 4			*/	
+
+			pandn		mm2,	mm1					/* Variance1<32678 && 
+													   Variance1<Limit			*/
+			pandn		mm6,	mm5					/* Variance2<32678 && 
+													   Variance1<Limit			*/
+			
+			movq		mm4,	[edi+80]			/* mm4 = Pixel 5			*/	
+			pand		mm6,	mm2					/* mm6 = Variance1 < Flimit */	
+													/*     &&Variance2 < Flimit */	
+
+			movq		mm2,	mm7					/* make copy of Pixel4		*/	
+
+			psubusw		mm7,	mm4					/* 4 - 5 */						
+			psubusw		mm4,	mm2					/* 5 - 4 */						
+			
+			por			mm7,	mm4					/* abs(4 - 5) */				
+			psubw		mm7,	QStepMmx			/* abs(4-5)<QStepMmx ? */		
+			
+			psraw		mm7,	15					/* FFFF/0000 for True/Flase */
+			pand		mm7,	mm6													
+			
+			/* mm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+			/* now lets look at the right four colomn */							
+			
+			add			edi,	8					/* offset 8 to right 4 cols */	
+			
+			movq		mm2,	[edi+16]			/* Pixel 1 */					
+			movq		mm6,	[edi+80]			/* Pixel 5 */					
+			
+			psubw		mm2,	mm3					/* mm2 -=128 */					
+			psubw		mm6,	mm3					/* mm6 -=128 */					
+			
+			movq		mm0,	mm2					/* mm0 = pixel 1 */				
+			movq		mm4,	mm6					/* mm4 = pixel 5 */				
+			
+			pmullw		mm2,	mm2					/* mm2 = pixel1 * pixel1 */		
+			pmullw		mm6,	mm6					/* mm6 = pixel5 * pixel5 */		
+			
+			movq		mm1,	mm2					/* mm1 = pixel1^2 */			
+			movq		mm5,	mm6					/* mm5 = pixel5^2 */			
+			
+			movq		mm2,	[edi+32]			/* Pixel 2 */					
+			movq		mm6,	[edi+96]			/* Pixel 6 */					
+			
+			psubw		mm2,	mm3					/* mm2 -=128 */					
+			psubw		mm6,	mm3					/* mm6 -=128 */					
+			
+			paddw		mm0,	mm2					/* mm0 += pixel 2 */			
+			paddw		mm4,	mm6					/* mm4 += pixel 6 */			
+			
+			pmullw		mm2,	mm2					/* mm2 = pixel2^2 */			
+			pmullw		mm6,	mm6					/* mm6 = pixel6^2 */			
+			
+			paddw		mm1,	mm2					/* mm1 += pixel2^2 */			
+			paddw		mm5,	mm6					/* mm5 += pixel6^2 */			
+			
+			movq		mm2,	[edi+48]			/* Pixel 3 */					
+			movq		mm6,	[edi+112]			/* Pixel 7 */					
+			
+			psubw		mm2,	mm3					/* mm2 -=128 */					
+			psubw		mm6,	mm3					/* mm6 -=128 */					
+			
+			paddw		mm0,	mm2					/* mm0 += pixel 3 */			
+			paddw		mm4,	mm6					/* mm4 += pixel 7 */			
+			
+			pmullw		mm2,	mm2					/* mm2 = pixel3^2 */			
+			pmullw		mm6,	mm6					/* mm6 = pixel7^2 */			
+			
+			paddw		mm1,	mm2					/* mm1 += pixel3^2 */			
+			paddw		mm5,	mm6					/* mm5 += pixel7^2 */			
+			
+			movq		mm2,	[edi+64]			/* Pixel 4 */					
+			movq		mm6,	[edi+128]			/* Pixel 8 */					
+			
+			psubw		mm2,	mm3					/* mm2 -=128 */					
+			psubw		mm6,	mm3					/* mm6 -=128 */					
+			
+			paddw		mm0,	mm2					/* mm0 += pixel 4 */			
+			paddw		mm4,	mm6					/* mm4 += pixel 8 */			
+			
+			pmullw		mm2,	mm2					/* mm2 = pixel4^2 */			
+			pmullw		mm6,	mm6					/* mm6 = pixel8^2 */			
+			
+			paddw		mm1,	mm2					/* mm1 = pixel4^2 */			
+			paddw		mm5,	mm6					/* mm5 = pixel8^2 */			
+			
+			/* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+			/* mm1 = x1 + x2 + x3 + x4 */											
+			/* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+			/* mm5 = x5 + x6 + x7 + x8 */											
+			
+			psrlw		mm3,	7					/* mm3 = 0001000100010001 */	
+			
+			movq		mm2,	mm0					/* make copy of sum1 */			
+			movq		mm6,	mm4					/* make copy of sum2 */			
+			
+			paddw		mm0,	mm3					/* (sum1 + 1) */				
+			paddw		mm4,	mm3					/* (sum2 + 1) */				
+			
+			psraw		mm2,	1					/* sum1 /2 */					
+			psraw		mm6,	1					/* sum2 /2 */					
+			
+			psraw		mm0,	1					/* (sum1 + 1)/2 */				
+			psraw		mm4,	1					/* (sum2 + 1)/2 */				
+			
+			pmullw		mm2,	mm0					/* (sum1)/2*(sum1+1)/2 */		
+			pmullw		mm6,	mm4					/* (sum2)/2*(sum2+1)/2 */		
+			
+			psubw		mm1,	mm2					/* Variance 1 */				
+			psubw		mm5,	mm6					/* Variance 2 */				
+
+			movq		[Variance12], mm1			/* Save Variance1 */
+			movq		[Variance22], mm5			/* Save Variance2 */
+			
+			movq		mm3,	FLimitMmx			/* mm3 = FLimit */				
+			movq		mm2,	mm1					/* copy of Varinace 1*/
+
+			movq		mm6,	mm5					/* Variance 2 */
+			psubw		mm1,	mm3					/* Variance 1 < Flimit? */		
+			
+			psubw		mm5,	mm3					/* Variance 2 < Flimit? */		
+			psraw		mm2,	15					/* Variance 1 > 32768? */
+
+			psraw		mm6,	15					/* Vaiance  2 > 32768? */	
+			psraw		mm1,	15					/* FFFF/0000 for true/false */	
+			
+			psraw		mm5,	15					/* FFFF/0000 for true/false */	
+			movq		mm0,	[edi+64]			/* mm0 = Pixel 4			*/	
+
+			pandn		mm2,	mm1					/* Variance1<32678 && 
+													   Variance1<Limit			*/
+			pandn		mm6,	mm5					/* Variance2<32678 && 
+													   Variance1<Limit			*/
+
+			movq		mm4,	[edi+80]			/* mm4 = Pixel 5			*/	
+			pand		mm6,	mm2					/* mm6 = Variance1 < Flimit */	
+													/*     &&Variance2 < Flimit */	
+			movq		mm2,	mm0					/* make copy of Pixel4		*/	
+													
+			psubusw		mm0,	mm4					/* 4 - 5 */						
+			psubusw		mm4,	mm2					/* 5 - 4 */						
+			
+			por			mm0,	mm4					/* abs(4 - 5) */				
+			psubw		mm0,	QStepMmx			/* abs(4-5)<QStepMmx ? */		
+			
+			psraw		mm0,	15					/* FFFF/0000 for True/False */
+			pand		mm0,	mm6				
+			
+			sub			edi,	8					/* offset edi back */			
+			
+			/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+			/* mm0 and mm7 now are in use  */										
+            
+            /* find the loop filtered values for the pixels on block boundary */
+            movq        mm1,    LoopFLimitMmx;      /* Get the Flimit values for loop filter */
+            movq        mm3,    [edi + 48]          /* mm3 = x3 = p[-2] */
+
+            movq        mm4,    [edi + 64]          /* mm4 = x4 = p[-1] */
+            movq        mm5,    [edi + 80]          /* mm5 = x5 = p[ 0] */
+
+            movq        mm6,    [edi + 96]          /* mm6 = x6 = p[ 1] */
+            psubw       mm5,    mm4                 /* mm5 = p[ 0] - p[-1] */
+
+            psubw       mm3,    mm6                 /* mm3 = p[-2] - p[ 1] */
+            movq        mm4,    mm5                 /* make a copy */
+
+            paddw       mm4,    mm5                 /* 2 * ( p[0] - p[-1] ) */
+            paddw       mm3,    FourFours           /* mm3 + 4 */
+
+            paddw       mm5,    mm4                 /* 3 * ( p[0] - p[-1] ) */
+            paddw       mm3,    mm5                 /* Filtval before shift */
+
+            psraw       mm3,    3                   /* FiltVal */
+            movq        mm2,    mm3                 /* make a copy */
+
+            psraw       mm3,    15                  /* FFFF->Neg, 0000->Pos */
+            pxor        mm2,    mm3
+
+            psubsw      mm2,    mm3                 /* mm2 = abs(FiltVal) */
+            por         mm3,    FourOnes            /* -1 and 1 for + and - */
+
+            movq        mm4,    mm1                 /* make a copy of Flimit */
+            psubw       mm1,    mm2                 /* mm1= Flimit - abs(FiltVal) */
+
+            movq        mm5,    mm1                 /* copy Flimit - abs(FiltVal) */
+            psraw       mm1,    15                  /* FFFF or 0000 */
+
+            pxor        mm5,    mm1                 
+            psubsw      mm5,    mm1                 /* abs(Flimit - abs(FiltVal)) */
+
+            psubusw     mm4,    mm5                 /* Flimit-abs(Flimit - abs(FiltVal)) */
+            pmullw      mm4,    mm3                 /* get the sign back */
+
+            movq        mm1,    [edi+64]            /* p[-1] */
+            movq        mm2,    [edi+80]            /* p[0] */
+            
+            paddw       mm1,    mm4                 /* p[-1] + NewFiltVal */
+            psubw       mm2,    mm4                 /* p[0] - NewFiltVal */
+
+            pxor        mm6,    mm6                 /* clear mm6 */
+            
+            packuswb    mm1,    mm1                 /* clamping */
+            packuswb    mm2,    mm2                 /* clamping */
+
+            punpcklbw   mm1,    mm6                 /* unpack to word */
+            movq        LoopFilteredValuesUp, mm1   /* save the values */
+
+            punpcklbw   mm2,    mm6                 /* unpack to word */
+            movq        LoopFilteredValuesDown, mm2 /* save the values */
+            
+
+			/* Let's do the filtering now */										
+			/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+			/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+			
+			movq		mm5,	[edi]				/* mm5 = -5 */					
+			movq		mm4,	[edi + 16]			/* mm4 = -4 */					
+			
+			movq		mm3,	mm4					/* copy of -4 */				
+			movq		mm6,	mm5					/* copy of -5 */				
+			
+			psubusw		mm4,	mm6					/* mm4 = [-4] - [-5] */			
+			psubusw		mm5,	mm3					/* mm5 = [-5] - [-4] */			
+			
+			por			mm4,	mm5					/* abs([-4]-[-5] ) */			
+			psubw		mm4,	QStepMmx			/* abs([-4]-[-5] )<QStep? */	
+			
+			psraw		mm4,	15					/* FFFF/0000 for True/False */	
+			movq		mm1,	mm4					/* copy of the mm4 */			
+			
+			pand		mm4,	mm6					/*							*/	
+			pandn		mm1,	mm3					/*							*/	
+			
+			por			mm1,	mm4					/* mm1 = p1					*/	
+			
+			/* now find P2 */														
+			
+			movq		mm4,	[edi+128]			/* mm4 = [3] */					
+			movq		mm5,	[edi+144]			/* mm5 = [4] */					
+			
+			movq		mm3,	mm4					/* copy of 3 */					
+			movq		mm6,	mm5					/* copy of 4 */					
+			
+			psubusw		mm4,	mm6					/* mm4 = [3] - [4] */			
+			psubusw		mm5,	mm3					/* mm5 = [4] - [3] */			
+			
+			por			mm4,	mm5					/* abs([3]-[4] ) */				
+			psubw		mm4,	QStepMmx			/* abs([3]-[4] )<QStep? */		
+			
+			psraw		mm4,	15					/* FFFF/0000 for True/False */	
+			movq		mm2,	mm4					/* copy of the mm4 */			
+			
+			pand		mm4,	mm6					/*							*/	
+			pandn		mm2,	mm3					/*							*/	
+			
+			por			mm2,	mm4					/* mm2 = p2					*/	
+			
+			/* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+			/* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */			
+			/* Des[-w4] = Src[-w4]; */												
+			/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+			
+			movq		mm3,	mm1					/* mm3 = p1 */					
+			paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+			
+			paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+			movq		mm4,	[edi+16]			/* mm4 = x1 */					
+			
+			paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+			paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+			
+			paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+			paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+			
+			paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+			movq		mm4,	mm3					/* mm4 = mm3 */					
+			
+			movq		mm5,	[edi+16]			/* mm5 = x1 */					
+			paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+			
+			psllw		mm4,	1					/* mm4 = (sum+x1)<<1 */			
+			psubw		mm4,	[edi+64]			/* mm4 = (sum+x1)<<1-x4 */		
+			
+			paddw		mm4,	[edi+80]			/* mm4 = (sum+x1)<<1-x4+x5 */	
+			psraw		mm4,	4					/* mm4 >>=4 */					
+			
+			psubw		mm4,	mm5					/* New Value - old Value */		
+			pand		mm4,	mm7					/* And the flag */				
+			
+			paddw		mm4,	mm5					/* add the old value back */	
+			movq		[esi],	mm4					/* Write new x1 */				
+			
+			/* sum += x5 -p1 */														
+			/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+			
+			movq		mm5,	[edi+32]			/* mm5= x2 */					
+			psubw		mm3,	mm1					/* sum=sum-p1 */				
+			
+			paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+			movq		mm4,	mm5					/* copy sum */					
+			
+			paddw		mm4,	mm3					/* mm4=sum+x2 */				
+			paddw		mm4,	mm4					/* mm4 <<= 1 */					
+			
+			psubw		mm4,	[edi+80]			/* mm4 =(sum+x2)<<1-x5 */		
+			paddw		mm4,	[edi+96]			/* mm4 =(sum+x2)<<1-x5+x6 */	
+			
+			psraw		mm4,	4					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+			psubw		mm4,	mm5					/* new value - old value	*/	
+			
+			pand		mm4,	mm7					/* And the flag */				
+			paddw		mm4,	mm5					/* add the old value back */	
+			
+			movq		[esi+16], mm4				/* write new x2 */				
+			
+			/* sum += x6 - p1 */													
+			/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+			
+			movq		mm5,	[edi+48]			/* mm5= x3 */					
+			psubw		mm3,	mm1					/* sum=sum-p1 */				
+			
+			paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+			movq		mm4,	mm5					/* copy x3 */					
+			
+			paddw		mm4,	mm3					/* mm4=sum+x3 */				
+			paddw		mm4,	mm4					/* mm4 <<= 1 */					
+			
+			psubw		mm4,	[edi+96]			/* mm4 =(sum+x3)<<1-x6 */		
+			paddw		mm4,	[edi+112]			/* mm4 =(sum+x3)<<1-x6+x7 */	
+			
+			psraw		mm4,	4					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+			psubw		mm4,	mm5					/* new value - old value	*/	
+			
+			pand		mm4,	mm7					/* And the flag */				
+			paddw		mm4,	mm5					/* add the old value back */	
+			
+			movq		[esi+32], mm4				/* write new x3 */				
+			
+			/* sum += x7 - p1 */													
+			/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+			
+			movq		mm5,	[edi+64]			/* mm5 = x4 */					
+			psubw		mm3,	mm1					/* sum = sum-p1 */				
+			
+			paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+			movq		mm4,	mm5					/* mm4 = x4 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+			paddw		mm4,	mm4					/* mm4 *=2 */					
+			
+			paddw		mm4,	mm1					/* += p1 */						
+			psubw		mm4,	[edi+16]			/* -= x1 */						
+			
+			psubw		mm4,	[edi+112]			/* -= x7 */						
+			paddw		mm4,	[edi+128]			/* += x8 */						
+			
+			movq        mm5,    LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+            psraw		mm4,	4					/* >>=4 */						
+            
+            psubw		mm4,	mm5					/* -=x4 */						
+			pand		mm4,	mm7					/* and flag */					
+
+            paddw		mm4,	mm5					/* += x4 */						
+			movq		[esi+48], mm4				/* write new x4 */				
+			
+			/* sum+= x8-x1 */														
+			/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+			
+			movq		mm5,	[edi+80]			/* mm5 = x5 */					
+			psubw		mm3,	[edi+16]			/* sum -= x1 */					
+			
+			paddw		mm3,	[edi+128]			/* sub += x8 */					
+			movq		mm4,	mm5					/* mm4 = x5 */					
+			
+			paddw		mm4,	mm3					/* mm4= sum+x5 */				
+			paddw		mm4,	mm4					/* mm4 *= 2 */					
+			
+			paddw		mm4,	[edi+16]			/* += x1 */						
+			psubw		mm4,	[edi+32]			/* -= x2 */						
+			
+			psubw		mm4,	[edi+128]			/* -= x8 */						
+			paddw		mm4,	mm2					/* += p2 */						
+			
+			movq        mm5,    LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+            psraw		mm4,	4					/* >>=4 */						
+
+            psubw		mm4,	mm5					/* -=x5 */						
+			pand		mm4,	mm7					/* and flag */					
+
+            paddw		mm4,	mm5					/* += x5 */										
+			movq		[esi+64], mm4				/* write new x5 */				
+			
+			/* sum += p2 - x2 */													
+			/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+			
+			movq		mm5,	[edi+96]			/* mm5 = x6 */					
+			psubw		mm3,	[edi+32]			/* -= x2 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x6 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+			paddw		mm4,	mm4					/* mm4 *= 2*/					
+			
+			paddw		mm4,	[edi+32]			/* +=x2 */						
+			psubw		mm4,	[edi+48]			/* -=x3 */						
+			
+			psraw		mm4,	4					/* >>=4 */						
+			psubw		mm4,	mm5					/* -=x6 */						
+			
+			pand		mm4,	mm7					/* and flag */					
+			paddw		mm4,	mm5					/* += x6 */						
+			
+			movq		[esi+80], mm4				/* write new x6 */				
+			
+			/* sum += p2 - x3 */													
+			/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+			
+			movq		mm5,	[edi+112]			/* mm5 = x7 */					
+			psubw		mm3,	[edi+48]			/* -= x3 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x7 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+			paddw		mm4,	mm4					/* mm4 *= 2*/					
+			
+			paddw		mm4,	[edi+48]			/* +=x3 */						
+			psubw		mm4,	[edi+64]			/* -=x4 */						
+			
+			psraw		mm4,	4					/* >>=4 */						
+			psubw		mm4,	mm5					/* -=x7 */						
+			
+			pand		mm4,	mm7					/* and flag */					
+			paddw		mm4,	mm5					/* += x7 */						
+			
+			movq		[esi+96], mm4				/* write new x7 */				
+			
+			/* sum += p2 - x4 */													
+			/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+			
+			movq		mm5,	[edi+128]			/* mm5 = x8 */					
+			psubw		mm3,	[edi+64]			/* -= x4 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x8 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+			paddw		mm4,	mm4					/* mm4 *= 2*/					
+			
+			paddw		mm4,	[edi+64]			/* +=x4 */						
+			psubw		mm4,	[edi+80]			/* -=x5 */						
+			
+			psraw		mm4,	4					/* >>=4 */						
+			psubw		mm4,	mm5					/* -=x8 */						
+			
+			pand		mm4,	mm7					/* and flag */					
+			paddw		mm4,	mm5					/* += x8 */						
+			
+			movq		[esi+112], mm4				/* write new x8 */				
+			
+			/* done with left four columns */										
+			/* now do the righ four columns */										
+			
+			add			edi,	8					/* shift to right four column */
+			add			esi,	8					/* shift to right four column */
+			
+			/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+			/* mm0 now are in use  */										
+			
+            /* find the loop filtered values for the pixels on block boundary */
+
+            movq        mm1,    LoopFLimitMmx;      /* Get the Flimit values for loop filter */
+            movq        mm3,    [edi + 48]          /* mm3 = x3 = p[-2] */
+
+            movq        mm4,    [edi + 64]          /* mm4 = x4 = p[-1] */
+            movq        mm5,    [edi + 80]          /* mm5 = x5 = p[ 0] */
+
+            movq        mm6,    [edi + 96]          /* mm6 = x6 = p[ 1] */
+            psubw       mm5,    mm4                 /* mm5 = p[ 0] - p[-1] */
+
+            psubw       mm3,    mm6                 /* mm3 = p[-2] - p[ 1] */
+            movq        mm4,    mm5                 /* make a copy */
+
+            paddw       mm3,    FourFours           /* mm3 + 4 */
+            paddw       mm4,    mm4                 /* 2 * ( p[0] - p[-1] ) */
+
+            paddw       mm3,    mm4                 /* 3 * ( p[0] - p[-1] ) */
+            paddw       mm3,    mm5                 /* Filtval before shift */
+
+            psraw       mm3,    3                   /* FiltVal */
+            movq        mm2,    mm3                 /* make a copy */
+
+            psraw       mm3,    15                  /* FFFF->Neg, 0000->Pos */
+            pxor        mm2,    mm3
+
+            psubsw      mm2,    mm3                 /* mm2 = abs(FiltVal) */
+            por         mm3,    FourOnes            /* -1 and 1 for + and - */
+
+            movq        mm4,    mm1                 /* make a copy of Flimit */
+            psubw       mm1,    mm2                 /* mm1= Flimit - abs(FiltVal) */
+
+            movq        mm5,    mm1                 /* copy Flimit - abs(FiltVal) */
+            psraw       mm1,    15                  /* FFFF or 0000 */
+
+            pxor        mm5,    mm1                 
+            psubsw      mm5,    mm1                 /* abs(Flimit - abs(FiltVal)) */
+
+            psubusw     mm4,    mm5                 /* Flimit-abs(Flimit - abs(FiltVal)) */
+            pmullw      mm4,    mm3                 /* get the sign back */
+
+            movq        mm1,    [edi+64]            /* p[-1] */
+            movq        mm2,    [edi+80]            /* p[0] */
+            
+            paddw       mm1,    mm4                 /* p[-1] + NewFiltVal */
+            psubw       mm2,    mm4                 /* p[0] - NewFiltVal */
+
+            pxor        mm6,    mm6                 /* clear mm6 */
+            
+            packuswb    mm1,    mm1                 /* clamping */
+            packuswb    mm2,    mm2                 /* clamping */
+
+            punpcklbw   mm1,    mm6                 /* unpack to word */
+            movq        LoopFilteredValuesUp, mm1   /* save the values */
+
+            punpcklbw   mm2,    mm6                 /* unpack to word */
+            movq        LoopFilteredValuesDown, mm2 /* save the values */
+            
+            
+            /* Let's do the filtering now */										
+			/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+			/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+			
+			movq		mm5,	[edi]				/* mm5 = -5 */					
+			movq		mm4,	[edi + 16]			/* mm4 = -4 */					
+			
+			movq		mm3,	mm4					/* copy of -4 */				
+			movq		mm6,	mm5					/* copy of -5 */				
+			
+			psubusw		mm4,	mm6					/* mm4 = [-4] - [-5] */			
+			psubusw		mm5,	mm3					/* mm5 = [-5] - [-4] */			
+			
+			por			mm4,	mm5					/* abs([-4]-[-5] ) */			
+			psubw		mm4,	QStepMmx			/* abs([-4]-[-5] )<QStep? */	
+			
+			psraw		mm4,	15					/* FFFF/0000 for True/False */	
+			movq		mm1,	mm4					/* copy of the mm4 */			
+			
+			pand		mm4,	mm6					/*							*/	
+			pandn		mm1,	mm3					/*							*/	
+			
+			por			mm1,	mm4					/* mm1 = p1					*/	
+			
+			/* now find P2 */														
+			
+			movq		mm4, [edi+128]				/* mm4 = [3] */					
+			movq		mm5, [edi+144]				/* mm5 = [4] */					
+			
+			movq		mm3, mm4					/* copy of 3 */					
+			movq		mm6, mm5					/* copy of 4 */					
+			
+			psubusw		mm4, mm6					/* mm4 = [3] - [4] */			
+			psubusw		mm5, mm3					/* mm5 = [4] - [3] */			
+			
+			por			mm4, mm5					/* abs([3]-[4] ) */				
+			psubw		mm4, QStepMmx				/* abs([3]-[4] )<QStep? */		
+			
+			psraw		mm4, 15						/* FFFF/0000 for True/False */	
+			movq		mm2, mm4					/* copy of the mm4 */			
+			
+			pand		mm4, mm6					/*							*/	
+			pandn		mm2, mm3					/*							*/	
+			
+			por			mm2, mm4					/* mm2 = p2					*/	
+			
+			/* psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4; */				
+			/* Des[-w4] = (((psum + v[1]) << 1) - (v[4] - v[5])) >> 4; */			
+			/* Des[-w4]=Src[-w4]; */												
+			/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+			
+			movq		mm3,	mm1					/* mm3 = p1 */					
+			paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+			
+			paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+			movq		mm4,	[edi+16]			/* mm4 = x1 */					
+			
+			paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+			paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+			
+			paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+			paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+			
+			paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+			movq		mm4,	mm3					/* mm4 = mm3 */					
+			
+			movq		mm5,	[edi+16]			/* mm5 = x1 */					
+			paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+			
+			psllw		mm4,	1					/* mm4 = (sum+x1)<<1 */			
+			psubw		mm4,	[edi+64]			/* mm4 = (sum+x1)<<1-x4 */		
+			
+			paddw		mm4,	[edi+80]			/* mm4 = (sum+x1)<<1-x4+x5 */	
+			psraw		mm4,	4					/* mm4 >>=4 */					
+			
+			psubw		mm4,	mm5					/* New Value - old Value */		
+			pand		mm4,	mm0					/* And the flag */				
+			
+			paddw		mm4,	mm5					/* add the old value back */	
+			movq		[esi],	mm4					/* Write new x1 */				
+			
+			/* sum += x5 -p1 */														
+			/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+			
+			movq		mm5,	[edi+32]			/* mm5= x2 */					
+			psubw		mm3,	mm1					/* sum=sum-p1 */				
+			
+			paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+			movq		mm4,	mm5					/* copy sum */					
+			
+			paddw		mm4,	mm3					/* mm4=sum+x2 */				
+			paddw		mm4,	mm4					/* mm4 <<= 1 */					
+			
+			psubw		mm4,	[edi+80]			/* mm4 =(sum+x2)<<1-x5 */		
+			paddw		mm4,	[edi+96]			/* mm4 =(sum+x2)<<1-x5+x6 */	
+			
+			psraw		mm4,	4					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+			psubw		mm4,	mm5					/* new value - old value	*/	
+			
+			pand		mm4,	mm0					/* And the flag */				
+			paddw		mm4,	mm5					/* add the old value back */	
+			
+			movq		[esi+16], mm4				/* write new x2 */				
+			
+			/* sum += x6 - p1 */													
+			/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+			
+			movq		mm5,	[edi+48]			/* mm5= x3 */					
+			psubw		mm3,	mm1					/* sum=sum-p1 */				
+			
+			paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+			movq		mm4,	mm5					/* copy x3 */					
+			
+			paddw		mm4,	mm3					/* mm4=sum+x3 */				
+			paddw		mm4,	mm4					/* mm4 <<= 1 */					
+			
+			psubw		mm4,	[edi+96]			/* mm4 =(sum+x3)<<1-x6 */		
+			paddw		mm4,	[edi+112]			/* mm4 =(sum+x3)<<1-x6+x7 */	
+			
+			psraw		mm4,	4					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+			psubw		mm4,	mm5					/* new value - old value	*/	
+			
+			pand		mm4,	mm0					/* And the flag */				
+			paddw		mm4,	mm5					/* add the old value back */	
+			
+			movq		[esi+32], mm4				/* write new x3 */				
+			
+			/* sum += x7 - p1 */													
+			/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+			
+			movq		mm5,	[edi+64]			/* mm5 = x4 */					
+			psubw		mm3,	mm1					/* sum = sum-p1 */				
+			
+			paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+			movq		mm4,	mm5					/* mm4 = x4 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+			paddw		mm4,	mm4					/* mm4 *=2 */					
+			
+			paddw		mm4,	mm1					/* += p1 */						
+			psubw		mm4,	[edi+16]			/* -= x1 */						
+			
+			psubw		mm4,	[edi+112]			/* -= x7 */						
+			paddw		mm4,	[edi+128]			/* += x8 */						
+			
+			movq        mm5,    LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+            psraw		mm4,	4					/* >>=4 */						
+
+			psubw		mm4,	mm5					/* -=x4 */						
+			pand		mm4,	mm0					/* and flag */					
+
+            paddw		mm4,	mm5					/* += x4 */						
+			movq		[esi+48], mm4				/* write new x4 */				
+
+			/* sum+= x8-x1 */														
+			/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+			
+			movq		mm5,	[edi+80]			/* mm5 = x5 */					
+			psubw		mm3,	[edi+16]			/* sum -= x1 */					
+			
+			paddw		mm3,	[edi+128]			/* sub += x8 */					
+			movq		mm4,	mm5					/* mm4 = x5 */					
+			
+			paddw		mm4,	mm3					/* mm4= sum+x5 */				
+			paddw		mm4,	mm4					/* mm4 *= 2 */					
+			
+			paddw		mm4,	[edi+16]			/* += x1 */						
+			psubw		mm4,	[edi+32]			/* -= x2 */						
+			
+			psubw		mm4,	[edi+128]			/* -= x8 */						
+			paddw		mm4,	mm2					/* += p2 */						
+
+			movq        mm5,    LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+			psraw		mm4,	4					/* >>=4 */						
+
+            psubw		mm4,	mm5					/* -=x5 */						
+			pand		mm4,	mm0					/* and flag */					
+
+            paddw		mm4,	mm5					/* += x5 */						
+			movq		[esi+64], mm4				/* write new x5 */				
+			
+			/* sum += p2 - x2 */													
+			/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+			
+			movq		mm5,	[edi+96]			/* mm5 = x6 */					
+			psubw		mm3,	[edi+32]			/* -= x2 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x6 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+			paddw		mm4,	mm4					/* mm4 *= 2*/					
+			
+			paddw		mm4,	[edi+32]			/* +=x2 */						
+			psubw		mm4,	[edi+48]			/* -=x3 */						
+			
+			psraw		mm4,	4					/* >>=4 */						
+			psubw		mm4,	mm5					/* -=x6 */						
+			
+			pand		mm4,	mm0					/* and flag */					
+			paddw		mm4,	mm5					/* += x6 */						
+			
+			movq		[esi+80], mm4				/* write new x6 */				
+			
+			/* sum += p2 - x3 */													
+			/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+			
+			movq		mm5,	[edi+112]			/* mm5 = x7 */					
+			psubw		mm3,	[edi+48]			/* -= x3 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x7 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+			paddw		mm4,	mm4					/* mm4 *= 2*/					
+			
+			paddw		mm4,	[edi+48]			/* +=x3 */						
+			psubw		mm4,	[edi+64]			/* -=x4 */						
+			
+			psraw		mm4,	4					/* >>=4 */						
+			psubw		mm4,	mm5					/* -=x7 */						
+			
+			pand		mm4,	mm0					/* and flag */					
+			paddw		mm4,	mm5					/* += x7 */						
+			
+			movq		[esi+96], mm4				/* write new x7 */				
+			
+			/* sum += p2 - x4 */													
+			/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+			
+			movq		mm5,	[edi+128]			/* mm5 = x8 */					
+			psubw		mm3,	[edi+64]			/* -= x4 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x8 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+			paddw		mm4,	mm4					/* mm4 *= 2*/					
+			
+			paddw		mm4,	[edi+64]			/* +=x4 */						
+			psubw		mm4,	[edi+80]			/* -=x5 */						
+			
+			psraw		mm4,	4					/* >>=4 */						
+			psubw		mm4,	mm5					/* -=x8 */						
+			
+			pand		mm4,	mm0					/* and flag */					
+			paddw		mm4,	mm5					/* += x8 */						
+			
+			movq		[esi+112], mm4				/* write new x8 */				
+			
+			/* done with right four column */										
+			add			edi,	8					/* shift edi to point x1 */
+			sub			esi,	8					/* shift esi back to x1 */
+
+			mov			ebp, Des					/* the destination */							
+			lea			ebp, [ebp + edx *4]			/* point to des[-w4] */			
+			
+			movq		mm0, [esi]													
+			packuswb	mm0, [esi + 8]												
+			
+			movq		[ebp], mm0					/* write des[-w4] */			
+			
+			movq		mm1, [esi + 16]												
+			packuswb	mm1, [esi + 24]												
+			
+			movq		[ebp+ecx ], mm1				/* write des[-w3] */			
+			
+			movq		mm2, [esi + 32]												
+			packuswb	mm2, [esi + 40]												
+			
+			movq		[ebp+ecx*2 ], mm2			/* write des[-w2] */			
+			
+			movq		mm3, [esi + 48]												
+			packuswb	mm3, [esi + 56]												
+			
+			lea			ebp, [ebp+ecx*4]			/* point to des[0] */			
+			movq		[ebp+edx], mm3				/* write des[-w1] */			
+			
+			movq		mm0, [esi + 64]												
+			packuswb	mm0, [esi + 72]												
+			
+			movq		[ebp ], mm0					/* write des[0] */				
+			
+			movq		mm1, [esi + 80]												
+			packuswb	mm1, [esi + 88]												
+			
+			movq		[ebp+ecx], mm1				/* write des[w1] */				
+			
+			movq		mm2, [esi + 96]												
+			packuswb	mm2, [esi + 104]											
+			
+			movq		[ebp+ecx*2], mm2			/* write des[w2] */				
+			
+			movq		mm3, [esi + 112]											
+			packuswb	mm3, [esi + 120]											
+			
+			lea			ebp, [ebp+ecx*2]			/* point to des[w4] */			
+			movq		[ebp+ecx], mm3				/* write des[w3] */				
+
+
+			pop			edi
+			pop			esi
+			pop			edx
+			pop			ecx
+			pop			ebp
+			pop			eax
+			
+		} /* end of the macro */
+		
+		Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+		Var1 += Variance12[0]+ Variance12[1]+Variance12[2]+Variance12[3];
+		pbi->FragmentVariances[CurrentFrag] += Var1;
+
+		Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+		Var2 += Variance22[0]+ Variance22[1]+Variance22[2]+Variance22[3];
+		pbi->FragmentVariances[CurrentFrag + FragAcross] += Var2;
+		
+
+        if(CurrentFrag==StartFrag)
+			CurrentFrag++;
+		else
+		{
+			
+			Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag);
+			Src=Des;
+
+			QStep = QuantScale[pbi->FragQIndex[CurrentFrag]];		
+			for( j=0; j<8;j++)
+			{
+				Rows[j] = (short) (Src[-5 +j*PlaneLineStep]);
+				Rows[72+j] = (short)(Src[4+j*PlaneLineStep]);		
+			}
+
+			__asm
+			{
+			/* Save the registers */
+			push		eax
+			push		ebp
+				/* Calculate the FLimit and store FLimit and QStep */					
+				mov			eax,	QStep				/* get QStep */
+				movd		mm0,	eax					/* mm0 = 0, 0, 0, Q */
+
+			push		ecx			
+				
+				punpcklwd	mm0,	mm0					/* mm0 = 0, 0, Q, Q */
+				movq		mm1,	FourThrees			/* mm1 = 03 03 03 03 */
+
+			push		edx
+				
+				punpckldq	mm0,	mm0					/* mm0 = Q, Q, Q, Q */
+				movq		QStepMmx,	mm0				/* write the Q step */
+
+			push		esi
+
+                pmullw		mm1,	mm0					/* mm1 = QStep * 3 */							
+				pmullw		mm1,	mm0					/* mm1 = QStep * QStep * 3 */	
+
+			push		edi
+				
+				
+				psrlw		mm1,	5					/* mm1 = FLimit */				
+				movq		[FLimitMmx], mm1			/* Save FLimit */				
+
+				/* setup the pointers to data */
+
+				mov			eax,	Src					/* eax = Src */
+				xor			edx,	edx					/* clear edx */
+				
+				sub			eax,	4					/* eax = Src-4 */
+				lea			esi,	NewRows				/* esi = NewRows */
+				lea			edi,	Rows				/* edi = Rows */				
+
+				mov			ecx,	PlaneLineStep		/* ecx = Pitch */				
+				sub			edx,	ecx					/* edx = -Pitch */				
+
+				/* Get the data to the intermediate buffer */
+
+				movq		mm0,	[eax]				/* mm0 = 07 06 05 04 03 02 01 00 */
+				movq		mm1,	[eax+ecx]			/* mm1 = 17 16 15 14 13 12 11 10 */
+
+				movq		mm2,	[eax+ecx*2]			/* mm2 = 27 26 25 24 23 22 21 20 */
+				lea			eax,	[eax+ecx*4]			/* Go down four Rows */	
+
+				movq		mm3,	[eax+edx]			/* mm3 = 37 36 35 34 33 32 31 30 */
+				movq		mm4,	mm0					/* mm4 = 07 06 05 04 03 02 01 00 */
+			
+				punpcklbw	mm0,	mm1					/* mm0 = 13 03 12 02 11 01 10 00 */
+				punpckhbw	mm4,	mm1					/* mm4 = 17 07 16 06 15 05 14 04 */
+
+				movq		mm5,	mm2					/* mm5 = 27 26 25 24 23 22 21 20 */
+				punpcklbw	mm2,	mm3					/* mm2 = 33 23 32 22 31 21 30 20 */
+
+				punpckhbw	mm5,	mm3					/* mm5 = 37 27 36 26 35 25 34 24 */
+				movq		mm1,	mm0					/* mm1 = 13 03 12 02 11 01 10 00 */
+
+				punpcklwd	mm0,	mm2					/* mm0 = 31 21 11 01 30 20 10 00 */
+				punpckhwd	mm1,	mm2					/* mm1 = 33 23 13 03 32 22 12 02 */
+				
+				movq		mm2,	mm4					/* mm2 = 17 07 16 06 15 05 14 04 */
+				punpckhwd	mm4,	mm5					/* mm4 = 37 27 17 07 36 26 16 06 */
+
+				punpcklwd	mm2,	mm5					/* mm2 = 35 25 15 05 34 24 14 04 */
+				pxor		mm7,	mm7					/* clear mm7 */
+
+				movq		mm5,	mm0					/* make a copy */
+				punpcklbw	mm0,	mm7					/* mm0 = 30 20 10 00 */
+
+				movq		[edi+16], mm0				/* write 00 10 20 30 */
+				punpckhbw	mm5,	mm7					/* mm5 = 31 21 11 01 */
+
+				movq		mm0,	mm1					/* mm0 =33 23 13 03 32 22 12 02 */
+				movq		[edi+32], mm5				/* write 01 11 21 31 */
+				
+				punpcklbw	mm1,	mm7					/* mm1 = 32 22 12 02 */
+				punpckhbw	mm0,	mm7					/* mm0 = 33 23 12 03 */
+
+				movq		[edi+48], mm1				/* write 02 12 22 32 */
+				movq		mm3,	mm2					/* mm3 = 35 25 15 05 34 24 14 04 */
+				
+				movq		mm5,	mm4					/* mm5 = 37 27 17 07 36 26 16 06 */
+				movq		[edi+64], mm0				/* write 03 13 23 33 */
+
+				punpcklbw	mm2,	mm7					/* mm2 = 34 24 14 04 */
+				punpckhbw	mm3,	mm7					/* mm3 = 35 25 15 05 */
+
+				movq		[edi+80], mm2				/* write 04 14 24 34 */
+				punpcklbw	mm4,	mm7					/* mm4 = 36 26 16 06 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 37 27 17 07 */
+				movq		[edi+96], mm3				/* write 05 15 25 35 */
+			
+				movq		mm0,	[eax]				/* mm0 = 47 46 45 44 43 42 41 40 */
+				movq		mm1,	[eax + ecx ]		/* mm1 = 57 56 55 54 53 52 51 50 */
+
+				movq		[edi+112], mm4				/* write 06 16 26 37 */
+				movq		mm2,	[eax+ecx*2]			/* mm2 = 67 66 65 64 63 62 61 60 */
+
+				lea			eax,	[eax+ ecx*4]		/* Go down four rows */
+				movq		[edi+128], mm5				/* write 07 17 27 37 */
+
+				movq		mm4,	mm0					/* mm4 = 47 46 45 44 43 42 41 40 */
+				movq		mm3,	[eax+edx]			/* mm3 = 77 76 75 74 73 72 71 70 */
+
+				punpcklbw	mm0,	mm1					/* mm0 = 53 43 52 42 51 41 50 40 */
+				punpckhbw	mm4,	mm1					/* mm4 = 57 57 56 46 55 45 54 44 */
+
+				movq		mm5,	mm2					/* mm5 = 67 66 65 64 63 62 61 60 */
+				punpcklbw	mm2,	mm3					/* mm2 = 73 63 72 62 71 61 70 60 */
+
+				punpckhbw	mm5,	mm3					/* mm5 = 77 67 76 66 75 65 74 64 */
+				movq		mm1,	mm0					/* mm1 = 53 43 52 42 51 41 50 40 */
+
+				punpcklwd	mm0,	mm2					/* mm0 = 71 61 51 41 70 60 50 40 */
+				punpckhwd	mm1,	mm2					/* mm1 = 73 63 53 43 72 62 52 42 */
+				
+				movq		mm2,	mm4					/* mm2 = 57 57 56 46 55 45 54 44 */
+				punpckhwd	mm4,	mm5					/* mm4 = 77 67 57 47 76 66 56 46 */
+
+				punpcklwd	mm2,	mm5					/* mm2 = 75 65 55 45 74 64 54 44 */
+
+				movq		mm5,	mm0					/* make a copy */
+				punpcklbw	mm0,	mm7					/* mm0 = 70 60 50 40 */
+
+				movq		[edi+24], mm0				/* write 40 50 60 70 */
+				punpckhbw	mm5,	mm7					/* mm5 = 71 61 51 41 */
+
+				movq		mm0,	mm1					/* mm0 = 73 63 53 43 72 62 52 42 */
+				movq		[edi+40], mm5				/* write 41 51 61 71 */
+				
+				punpcklbw	mm1,	mm7					/* mm1 = 72 62 52 42 */
+				punpckhbw	mm0,	mm7					/* mm0 = 73 63 53 43 */
+
+				movq		[edi+56], mm1				/* write 42 52 62 72 */
+				movq		mm3,	mm2					/* mm3 = 75 65 55 45 74 64 54 44 */
+				
+				movq		mm5,	mm4					/* mm5 = 77 67 57 47 76 66 56 46 */
+				movq		[edi+72], mm0				/* write 43 53 63 73 */
+
+				punpcklbw	mm2,	mm7					/* mm2 = 74 64 54 44 */
+				punpckhbw	mm3,	mm7					/* mm3 = 75 65 55 45 */
+
+				movq		[edi+88], mm2				/* write 44 54 64 74 */
+				punpcklbw	mm4,	mm7					/* mm4 = 76 66 56 46 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 77 67 57 47 */
+				movq		[edi+104], mm3				/* write 45 55 65 75 */
+			
+				movq		[edi+120], mm4				/* write 46 56 66 76 */
+				movq		[edi+136], mm5				/* write 47 57 67 77 */
+
+
+				/* Now, compute the variances for Pixel  1-4 and 5-8 */					
+				
+				/* we use mm0,mm1,mm2 for 1234 and mm4, mm5, mm6 for 5-8 */				
+				/* mm7 = 0, mm3 = {128, 128, 128, 128} */								
+				
+				pcmpeqw		mm3,	mm3					/* mm3 = FFFFFFFFFFFFFFFF */	
+				psllw		mm3,	15					/* mm3 = 8000800080008000 */	
+				psrlw		mm3,	8					/* mm3 = 0080008000800080 */
+				
+				movq		mm2,	[edi+16]			/* Pixel 1 */					
+				movq		mm6,	[edi+80]			/* Pixel 5 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				movq		mm0,	mm2					/* mm0 = pixel 1 */				
+				movq		mm4,	mm6					/* mm4 = pixel 5 */				
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel1 * pixel1 */		
+				pmullw		mm6,	mm6					/* mm6 = pixel5 * pixel5 */		
+				
+				movq		mm1,	mm2					/* mm1 = pixel1^2 */			
+				movq		mm5,	mm6					/* mm5 = pixel5^2 */			
+				
+				movq		mm2,	[edi+32]			/* Pixel 2 */					
+				movq		mm6,	[edi+96]			/* Pixel 6 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 2 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 6 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel2^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel6^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel2^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel6^2 */			
+				
+				movq		mm2,	[edi+48]			/* Pixel 3 */					
+				movq		mm6,	[edi+112]			/* Pixel 7 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 3 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 7 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel3^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel7^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel3^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel7^2 */			
+				
+				movq		mm2,	[edi+64]			/* Pixel 4 */					
+				movq		mm6,	[edi+128]			/* Pixel 8 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 4 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 8 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel4^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel8^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 = pixel4^2 */			
+				paddw		mm5,	mm6					/* mm5 = pixel8^2 */			
+				
+				/* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+				/* mm1 = x1 + x2 + x3 + x4 */											
+				/* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+				/* mm5 = x5 + x6 + x7 + x8 */											
+				
+				movq		mm7,	mm3					/* mm7 = mm3 */					
+				psrlw		mm7,	7					/* mm7 = 0001000100010001 */	
+				
+				movq		mm2,	mm0					/* make copy of sum1 */			
+				movq		mm6,	mm4					/* make copy of sum2 */			
+				
+				paddw		mm0,	mm7					/* (sum1 + 1) */				
+				paddw		mm4,	mm7					/* (sum2 + 1) */				
+				
+				psraw		mm2,	1					/* sum1 /2 */					
+				psraw		mm6,	1					/* sum2 /2 */					
+				
+				psraw		mm0,	1					/* (sum1 + 1)/2 */				
+				psraw		mm4,	1					/* (sum2 + 1)/2 */				
+				
+				pmullw		mm2,	mm0					/* (sum1)/2*(sum1+1)/2 */		
+				pmullw		mm6,	mm4					/* (sum2)/2*(sum2+1)/2 */		
+				
+				psubw		mm1,	mm2					/* Variance 1 */				
+				psubw		mm5,	mm6					/* Variance 2 */				
+				
+				movq		[Variance11], mm1				/* Save Variance1 */
+				movq		[Variance21], mm5				/* Save Variance2 */
+
+				movq		mm7,	FLimitMmx			/* mm7 = FLimit */
+				movq		mm2,	mm1					/* copy of Variance 1*/
+
+				movq		mm6,	mm5					/* copy of Variance 2*/
+				psubw		mm1,	mm7					/* Variance 1 < Flimit? */		
+				
+				psubw		mm5,	mm7					/* Variance 2 < Flimit? */		
+				psraw		mm1,	15					/* FFFF/0000 for true/false */	
+				
+				psraw		mm5,	15					/* FFFF/0000 for true/false */	
+				psraw		mm2,	15					/* Variance 1 > 32768 ? */
+
+				psraw		mm6,	15					/* Variance 2 > 32768 ? */
+				movq		mm7,	[edi+64]			/* mm0 = Pixel 4			*/	
+
+				pandn		mm2,	mm1					/* Variance 1 < Flimit &&
+														   Variance 1 < 32768		*/
+				pandn		mm6,	mm5					/* Variance 2 < Flimit &&
+														   Variance 2 < 32768		*/
+				movq		mm4,	[edi+80]			/* mm4 = Pixel 5			*/				
+				pand		mm6,	mm2					/* mm1 = Variance1 < Flimit */	
+														/*     &&Variance2 < Flimit */	
+				movq		mm2,	mm7					/* make copy of Pixel4		*/	
+
+				psubusw		mm7,	mm4					/* 4 - 5 */						
+				psubusw		mm4,	mm2					/* 5 - 4 */						
+				
+				por			mm7,	mm4					/* abs(4 - 5) */				
+				psubw		mm7,	QStepMmx			/* abs(4-5)<QStepMmx ? */		
+				
+				psraw		mm7,	15					/* FFFF/0000 for True/Flase */
+				pand		mm7,	mm6													
+				
+				/* mm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* now lets look at the right four colomn */							
+				
+				add			edi,	8					/* offset 8 to right 4 cols */	
+				
+				movq		mm2,	[edi+16]			/* Pixel 1 */					
+				movq		mm6,	[edi+80]			/* Pixel 5 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				movq		mm0,	mm2					/* mm0 = pixel 1 */				
+				movq		mm4,	mm6					/* mm4 = pixel 5 */				
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel1 * pixel1 */		
+				pmullw		mm6,	mm6					/* mm6 = pixel5 * pixel5 */		
+				
+				movq		mm1,	mm2					/* mm1 = pixel1^2 */			
+				movq		mm5,	mm6					/* mm5 = pixel5^2 */			
+				
+				movq		mm2,	[edi+32]			/* Pixel 2 */					
+				movq		mm6,	[edi+96]			/* Pixel 6 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 2 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 6 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel2^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel6^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel2^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel6^2 */			
+				
+				movq		mm2,	[edi+48]			/* Pixel 3 */					
+				movq		mm6,	[edi+112]			/* Pixel 7 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 3 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 7 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel3^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel7^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 += pixel3^2 */			
+				paddw		mm5,	mm6					/* mm5 += pixel7^2 */			
+				
+				movq		mm2,	[edi+64]			/* Pixel 4 */					
+				movq		mm6,	[edi+128]			/* Pixel 8 */					
+				
+				psubw		mm2,	mm3					/* mm2 -=128 */					
+				psubw		mm6,	mm3					/* mm6 -=128 */					
+				
+				paddw		mm0,	mm2					/* mm0 += pixel 4 */			
+				paddw		mm4,	mm6					/* mm4 += pixel 8 */			
+				
+				pmullw		mm2,	mm2					/* mm2 = pixel4^2 */			
+				pmullw		mm6,	mm6					/* mm6 = pixel8^2 */			
+				
+				paddw		mm1,	mm2					/* mm1 = pixel4^2 */			
+				paddw		mm5,	mm6					/* mm5 = pixel8^2 */			
+				
+				/* mm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+				/* mm1 = x1 + x2 + x3 + x4 */											
+				/* mm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+				/* mm5 = x5 + x6 + x7 + x8 */											
+				
+				psrlw		mm3,	7					/* mm3 = 0001000100010001 */	
+				
+				movq		mm2,	mm0					/* make copy of sum1 */			
+				movq		mm6,	mm4					/* make copy of sum2 */			
+				
+				paddw		mm0,	mm3					/* (sum1 + 1) */				
+				paddw		mm4,	mm3					/* (sum2 + 1) */				
+				
+				psraw		mm2,	1					/* sum1 /2 */					
+				psraw		mm6,	1					/* sum2 /2 */					
+				
+				psraw		mm0,	1					/* (sum1 + 1)/2 */				
+				psraw		mm4,	1					/* (sum2 + 1)/2 */				
+				
+				pmullw		mm2,	mm0					/* (sum1)/2*(sum1+1)/2 */		
+				pmullw		mm6,	mm4					/* (sum2)/2*(sum2+1)/2 */		
+				
+				psubw		mm1,	mm2					/* Variance 1 */				
+				psubw		mm5,	mm6					/* Variance 2 */				
+				
+				movq		[Variance12], mm1				/* Save Variance1 */
+				movq		[Variance22], mm5				/* Save Variance2 */
+	
+				movq		mm3,	FLimitMmx			/* mm3 = FLimit */				
+				movq		mm2,	mm1					/* copy of Varinace 1*/
+
+				movq		mm6,	mm5					/* Variance 2 */
+				psubw		mm1,	mm3					/* Variance 1 < Flimit? */		
+				
+				psubw		mm5,	mm3					/* Variance 2 < Flimit? */		
+				psraw		mm6,	15					/* Variance 1 > 32768 */
+				
+				psraw		mm2,	15					/* Variance 2 > 32768 */
+				psraw		mm1,	15					/* FFFF/0000 for true/false */	
+				
+				psraw		mm5,	15					/* FFFF/0000 for true/false */	
+				movq		mm0,	[edi+64]			/* mm0 = Pixel 4			*/	
+
+				pandn		mm2,	mm1					/* Variance1<32678 && 
+														   Variance1<Limit			*/
+				pandn		mm6,	mm5					/* Variance2<32678 && 
+														   Variance1<Limit			*/
+				
+				movq		mm4,	[edi+80]			/* mm4 = Pixel 5			*/	
+				pand		mm6,	mm2					/* mm1 = Variance1 < Flimit */	
+														/*     &&Variance2 < Flimit */	
+				movq		mm2,	mm0					/* make copy of Pixel4		*/	
+														
+				psubusw		mm0,	mm4					/* 4 - 5 */						
+				psubusw		mm4,	mm2					/* 5 - 4 */						
+				
+				por			mm0,	mm4					/* abs(4 - 5) */				
+				psubw		mm0,	QStepMmx			/* abs(4-5)<QStepMmx ? */		
+				
+				psraw		mm0,	15					/* FFFF/0000 for True/False */
+				pand		mm0,	mm6				
+				
+				sub			edi,	8					/* offset edi back */			
+				
+				/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* mm0 and mm7 now are in use  */										
+                /* find the loop filtered values for the pixels on block boundary */
+                movq        mm1,    LoopFLimitMmx;      /* Get the Flimit values for loop filter */
+                movq        mm3,    [edi + 48]          /* mm3 = x3 = p[-2] */
+
+                movq        mm4,    [edi + 64]          /* mm4 = x4 = p[-1] */
+                movq        mm5,    [edi + 80]          /* mm5 = x5 = p[ 0] */
+
+                movq        mm6,    [edi + 96]          /* mm6 = x6 = p[ 1] */
+                psubw       mm5,    mm4                 /* mm5 = p[ 0] - p[-1] */
+
+                psubw       mm3,    mm6                 /* mm3 = p[-2] - p[ 1] */
+                movq        mm4,    mm5                 /* make a copy */
+
+                paddw       mm4,    mm5                 /* 2 * ( p[0] - p[-1] ) */
+                paddw       mm3,    FourFours           /* mm3 + 4 */
+
+                paddw       mm5,    mm4                 /* 3 * ( p[0] - p[-1] ) */
+                paddw       mm3,    mm5                 /* Filtval before shift */
+
+                psraw       mm3,    3                   /* FiltVal */
+                movq        mm2,    mm3                 /* make a copy */
+
+                psraw       mm3,    15                  /* FFFF->Neg, 0000->Pos */
+                pxor        mm2,    mm3
+
+                psubsw      mm2,    mm3                 /* mm2 = abs(FiltVal) */
+                por         mm3,    FourOnes            /* -1 and 1 for + and - */
+
+                movq        mm4,    mm1                 /* make a copy of Flimit */
+                psubw       mm1,    mm2                 /* mm1= Flimit - abs(FiltVal) */
+
+                movq        mm5,    mm1                 /* copy Flimit - abs(FiltVal) */
+                psraw       mm1,    15                  /* FFFF or 0000 */
+
+                pxor        mm5,    mm1                 
+                psubsw      mm5,    mm1                 /* abs(Flimit - abs(FiltVal)) */
+
+                psubusw     mm4,    mm5                 /* Flimit-abs(Flimit - abs(FiltVal)) */
+                pmullw      mm4,    mm3                 /* get the sign back */
+
+                movq        mm1,    [edi+64]            /* p[-1] */
+                movq        mm2,    [edi+80]            /* p[0] */
+                
+                paddw       mm1,    mm4                 /* p[-1] + NewFiltVal */
+                psubw       mm2,    mm4                 /* p[0] - NewFiltVal */
+
+                pxor        mm6,    mm6                 /* clear mm6 */
+                
+                packuswb    mm1,    mm1                 /* clamping */
+                packuswb    mm2,    mm2                 /* clamping */
+
+                punpcklbw   mm1,    mm6                 /* unpack to word */
+                movq        LoopFilteredValuesUp, mm1   /* save the values */
+
+                punpcklbw   mm2,    mm6                 /* unpack to word */
+                movq        LoopFilteredValuesDown, mm2 /* save the values */
+
+                /* Let's do the filtering now */										
+				/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+				/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+				
+				movq		mm5,	[edi]				/* mm5 = -5 */					
+				movq		mm4,	[edi + 16]			/* mm4 = -4 */					
+				
+				movq		mm3,	mm4					/* copy of -4 */				
+				movq		mm6,	mm5					/* copy of -5 */				
+				
+				psubusw		mm4,	mm6					/* mm4 = [-4] - [-5] */			
+				psubusw		mm5,	mm3					/* mm5 = [-5] - [-4] */			
+				
+				por			mm4,	mm5					/* abs([-4]-[-5] ) */			
+				psubw		mm4,	QStepMmx			/* abs([-4]-[-5] )<QStep? */	
+				
+				psraw		mm4,	15					/* FFFF/0000 for True/False */	
+				movq		mm1,	mm4					/* copy of the mm4 */			
+				
+				pand		mm4,	mm6					/*							*/	
+				pandn		mm1,	mm3					/*							*/	
+				
+				por			mm1,	mm4					/* mm1 = p1					*/	
+				
+				/* now find P2 */														
+				
+				movq		mm4,	[edi+128]			/* mm4 = [3] */					
+				movq		mm5,	[edi+144]			/* mm5 = [4] */					
+				
+				movq		mm3,	mm4					/* copy of 3 */					
+				movq		mm6,	mm5					/* copy of 4 */					
+				
+				psubusw		mm4,	mm6					/* mm4 = [3] - [4] */			
+				psubusw		mm5,	mm3					/* mm5 = [4] - [3] */			
+				
+				por			mm4,	mm5					/* abs([3]-[4] ) */				
+				psubw		mm4,	QStepMmx			/* abs([3]-[4] )<QStep? */		
+				
+				psraw		mm4,	15					/* FFFF/0000 for True/False */	
+				movq		mm2,	mm4					/* copy of the mm4 */			
+				
+				pand		mm4,	mm6					/*							*/	
+				pandn		mm2,	mm3					/*							*/	
+				
+				por			mm2,	mm4					/* mm2 = p2					*/	
+				
+				/* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+				/* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */			
+				/* Des[-w4] = Src[-w4]; */												
+				/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+				
+				movq		mm3,	mm1					/* mm3 = p1 */					
+				paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+				
+				paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+				movq		mm4,	[edi+16]			/* mm4 = x1 */					
+				
+				paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+				paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+				
+				paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+				paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+				
+				paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+				movq		mm4,	mm3					/* mm4 = mm3 */					
+				
+				movq		mm5,	[edi+16]			/* mm5 = x1 */					
+				paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+				
+				psllw		mm4,	1					/* mm4 = (sum+x1)<<1 */			
+				psubw		mm4,	[edi+64]			/* mm4 = (sum+x1)<<1-x4 */		
+				
+				paddw		mm4,	[edi+80]			/* mm4 = (sum+x1)<<1-x4+x5 */	
+				psraw		mm4,	4					/* mm4 >>=4 */					
+				
+				psubw		mm4,	mm5					/* New Value - old Value */		
+				pand		mm4,	mm7					/* And the flag */				
+				
+				paddw		mm4,	mm5					/* add the old value back */	
+				movq		[esi],	mm4					/* Write new x1 */				
+				
+				/* sum += x5 -p1 */														
+				/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+				
+				movq		mm5,	[edi+32]			/* mm5= x2 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+				movq		mm4,	mm5					/* copy sum */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x2 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+80]			/* mm4 =(sum+x2)<<1-x5 */		
+				paddw		mm4,	[edi+96]			/* mm4 =(sum+x2)<<1-x5+x6 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm7					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+16], mm4				/* write new x2 */				
+				
+				/* sum += x6 - p1 */													
+				/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+				
+				movq		mm5,	[edi+48]			/* mm5= x3 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+				movq		mm4,	mm5					/* copy x3 */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x3 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+96]			/* mm4 =(sum+x3)<<1-x6 */		
+				paddw		mm4,	[edi+112]			/* mm4 =(sum+x3)<<1-x6+x7 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm7					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+32], mm4				/* write new x3 */				
+				
+				/* sum += x7 - p1 */													
+				/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+				
+				movq		mm5,	[edi+64]			/* mm5 = x4 */					
+				psubw		mm3,	mm1					/* sum = sum-p1 */				
+				
+				paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+				movq		mm4,	mm5					/* mm4 = x4 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+				paddw		mm4,	mm4					/* mm4 *=2 */					
+				
+				paddw		mm4,	mm1					/* += p1 */						
+				psubw		mm4,	[edi+16]			/* -= x1 */						
+				
+				psubw		mm4,	[edi+112]			/* -= x7 */						
+				paddw		mm4,	[edi+128]			/* += x8 */						
+
+                movq        mm5,    LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+				psraw		mm4,	4					/* >>=4 */						
+
+				psubw		mm4,	mm5					/* -=x4 */						
+				pand		mm4,	mm7					/* and flag */					
+
+				paddw		mm4,	mm5					/* += x4 */						
+				movq		[esi+48], mm4				/* write new x4 */				
+				
+				/* sum+= x8-x1 */														
+				/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+				
+				movq		mm5,	[edi+80]			/* mm5 = x5 */					
+				psubw		mm3,	[edi+16]			/* sum -= x1 */					
+				
+				paddw		mm3,	[edi+128]			/* sub += x8 */					
+				movq		mm4,	mm5					/* mm4 = x5 */					
+				
+				paddw		mm4,	mm3					/* mm4= sum+x5 */				
+				paddw		mm4,	mm4					/* mm4 *= 2 */					
+				
+				paddw		mm4,	[edi+16]			/* += x1 */						
+				psubw		mm4,	[edi+32]			/* -= x2 */						
+				
+				psubw		mm4,	[edi+128]			/* -= x8 */						
+				paddw		mm4,	mm2					/* += p2 */						
+
+                movq        mm5,    LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+				psraw		mm4,	4					/* >>=4 */						
+
+                psubw		mm4,	mm5					/* -=x5 */						
+				pand		mm4,	mm7					/* and flag */					
+
+                paddw		mm4,	mm5					/* += x5 */						
+				movq		[esi+64], mm4				/* write new x5 */				
+				
+				/* sum += p2 - x2 */													
+				/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+				
+				movq		mm5,	[edi+96]			/* mm5 = x6 */					
+				psubw		mm3,	[edi+32]			/* -= x2 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x6 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+32]			/* +=x2 */						
+				psubw		mm4,	[edi+48]			/* -=x3 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x6 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x6 */						
+				
+				movq		[esi+80], mm4				/* write new x6 */				
+				
+				/* sum += p2 - x3 */													
+				/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+				
+				movq		mm5,	[edi+112]			/* mm5 = x7 */					
+				psubw		mm3,	[edi+48]			/* -= x3 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x7 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+48]			/* +=x3 */						
+				psubw		mm4,	[edi+64]			/* -=x4 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x7 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x7 */						
+				
+				movq		[esi+96], mm4				/* write new x7 */				
+				
+				/* sum += p2 - x4 */													
+				/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+				
+				movq		mm5,	[edi+128]			/* mm5 = x8 */					
+				psubw		mm3,	[edi+64]			/* -= x4 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x8 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+64]			/* +=x4 */						
+				psubw		mm4,	[edi+80]			/* -=x5 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x8 */						
+				
+				pand		mm4,	mm7					/* and flag */					
+				paddw		mm4,	mm5					/* += x8 */						
+				
+				movq		[esi+112], mm4				/* write new x8 */				
+				
+				/* done with left four columns */										
+				/* now do the righ four columns */										
+				
+				add			edi,	8					/* shift to right four column */
+				add			esi,	8					/* shift to right four column */
+				
+				/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* mm0 now are in use  */										
+                /* find the loop filtered values for the pixels on block boundary */
+                movq        mm1,    LoopFLimitMmx;      /* Get the Flimit values for loop filter */
+                movq        mm3,    [edi + 48]          /* mm3 = x3 = p[-2] */
+
+                movq        mm4,    [edi + 64]          /* mm4 = x4 = p[-1] */
+                movq        mm5,    [edi + 80]          /* mm5 = x5 = p[ 0] */
+
+                movq        mm6,    [edi + 96]          /* mm6 = x6 = p[ 1] */
+                psubw       mm5,    mm4                 /* mm5 = p[ 0] - p[-1] */
+
+                psubw       mm3,    mm6                 /* mm3 = p[-2] - p[ 1] */
+                movq        mm4,    mm5                 /* make a copy */
+
+                paddw       mm4,    mm5                 /* 2 * ( p[0] - p[-1] ) */
+                paddw       mm3,    FourFours           /* mm3 + 4 */
+
+                paddw       mm5,    mm4                 /* 3 * ( p[0] - p[-1] ) */
+                paddw       mm3,    mm5                 /* Filtval before shift */
+
+                psraw       mm3,    3                   /* FiltVal */
+                movq        mm2,    mm3                 /* make a copy */
+
+                psraw       mm3,    15                  /* FFFF->Neg, 0000->Pos */
+                pxor        mm2,    mm3
+
+                psubsw      mm2,    mm3                 /* mm2 = abs(FiltVal) */
+                por         mm3,    FourOnes            /* -1 and 1 for + and - */
+
+                movq        mm4,    mm1                 /* make a copy of Flimit */
+                psubw       mm1,    mm2                 /* mm1= Flimit - abs(FiltVal) */
+
+                movq        mm5,    mm1                 /* copy Flimit - abs(FiltVal) */
+                psraw       mm1,    15                  /* FFFF or 0000 */
+
+                pxor        mm5,    mm1                 
+                psubsw      mm5,    mm1                 /* abs(Flimit - abs(FiltVal)) */
+
+                psubusw     mm4,    mm5                 /* Flimit-abs(Flimit - abs(FiltVal)) */
+                pmullw      mm4,    mm3                 /* get the sign back */
+
+                movq        mm1,    [edi+64]            /* p[-1] */
+                movq        mm2,    [edi+80]            /* p[0] */
+                
+                paddw       mm1,    mm4                 /* p[-1] + NewFiltVal */
+                psubw       mm2,    mm4                 /* p[0] - NewFiltVal */
+
+                pxor        mm6,    mm6                 /* clear mm6 */
+                
+                packuswb    mm1,    mm1                 /* clamping */
+                packuswb    mm2,    mm2                 /* clamping */
+
+                punpcklbw   mm1,    mm6                 /* unpack to word */
+                movq        LoopFilteredValuesUp, mm1   /* save the values */
+
+                punpcklbw   mm2,    mm6                 /* unpack to word */
+                movq        LoopFilteredValuesDown, mm2 /* save the values */
+
+                /* Let's do the filtering now */										
+				/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+				/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+				
+				movq		mm5,	[edi]				/* mm5 = -5 */					
+				movq		mm4,	[edi + 16]			/* mm4 = -4 */					
+				
+				movq		mm3,	mm4					/* copy of -4 */				
+				movq		mm6,	mm5					/* copy of -5 */				
+				
+				psubusw		mm4,	mm6					/* mm4 = [-4] - [-5] */			
+				psubusw		mm5,	mm3					/* mm5 = [-5] - [-4] */			
+				
+				por			mm4,	mm5					/* abs([-4]-[-5] ) */			
+				psubw		mm4,	QStepMmx			/* abs([-4]-[-5] )<QStep? */	
+				
+				psraw		mm4,	15					/* FFFF/0000 for True/False */	
+				movq		mm1,	mm4					/* copy of the mm4 */			
+				
+				pand		mm4,	mm6					/*							*/	
+				pandn		mm1,	mm3					/*							*/	
+				
+				por			mm1,	mm4					/* mm1 = p1					*/	
+				
+				/* now find P2 */														
+				
+				movq		mm4, [edi+128]				/* mm4 = [3] */					
+				movq		mm5, [edi+144]				/* mm5 = [4] */					
+				
+				movq		mm3, mm4					/* copy of 3 */					
+				movq		mm6, mm5					/* copy of 4 */					
+				
+				psubusw		mm4, mm6					/* mm4 = [3] - [4] */			
+				psubusw		mm5, mm3					/* mm5 = [4] - [3] */			
+				
+				por			mm4, mm5					/* abs([3]-[4] ) */				
+				psubw		mm4, QStepMmx				/* abs([3]-[4] )<QStep? */		
+				
+				psraw		mm4, 15						/* FFFF/0000 for True/False */	
+				movq		mm2, mm4					/* copy of the mm4 */			
+				
+				pand		mm4, mm6					/*							*/	
+				pandn		mm2, mm3					/*							*/	
+				
+				por			mm2, mm4					/* mm2 = p2					*/	
+				
+				/* psum = p1 + p1 + p1 + v[1] + v[2] + v[3] + v[4] + 4; */				
+				/* Des[-w4] = (((psum + v[1]) << 1) - (v[4] - v[5])) >> 4; */			
+				/* Des[-w4]=Src[-w4]; */												
+				/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+				
+				movq		mm3,	mm1					/* mm3 = p1 */					
+				paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+				
+				paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+				movq		mm4,	[edi+16]			/* mm4 = x1 */					
+				
+				paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+				paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+				
+				paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+				paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+				
+				paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+				movq		mm4,	mm3					/* mm4 = mm3 */					
+				
+				movq		mm5,	[edi+16]			/* mm5 = x1 */					
+				paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+				
+				psllw		mm4,	1					/* mm4 = (sum+x1)<<1 */			
+				psubw		mm4,	[edi+64]			/* mm4 = (sum+x1)<<1-x4 */		
+				
+				paddw		mm4,	[edi+80]			/* mm4 = (sum+x1)<<1-x4+x5 */	
+				psraw		mm4,	4					/* mm4 >>=4 */					
+				
+				psubw		mm4,	mm5					/* New Value - old Value */		
+				pand		mm4,	mm0					/* And the flag */				
+				
+				paddw		mm4,	mm5					/* add the old value back */	
+				movq		[esi],	mm4					/* Write new x1 */				
+				
+				/* sum += x5 -p1 */														
+				/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+				
+				movq		mm5,	[edi+32]			/* mm5= x2 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+				movq		mm4,	mm5					/* copy sum */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x2 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+80]			/* mm4 =(sum+x2)<<1-x5 */		
+				paddw		mm4,	[edi+96]			/* mm4 =(sum+x2)<<1-x5+x6 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm0					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+16], mm4				/* write new x2 */				
+				
+				/* sum += x6 - p1 */													
+				/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+				
+				movq		mm5,	[edi+48]			/* mm5= x3 */					
+				psubw		mm3,	mm1					/* sum=sum-p1 */				
+				
+				paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+				movq		mm4,	mm5					/* copy x3 */					
+				
+				paddw		mm4,	mm3					/* mm4=sum+x3 */				
+				paddw		mm4,	mm4					/* mm4 <<= 1 */					
+				
+				psubw		mm4,	[edi+96]			/* mm4 =(sum+x3)<<1-x6 */		
+				paddw		mm4,	[edi+112]			/* mm4 =(sum+x3)<<1-x6+x7 */	
+				
+				psraw		mm4,	4					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+				psubw		mm4,	mm5					/* new value - old value	*/	
+				
+				pand		mm4,	mm0					/* And the flag */				
+				paddw		mm4,	mm5					/* add the old value back */	
+				
+				movq		[esi+32], mm4				/* write new x3 */				
+				
+				/* sum += x7 - p1 */													
+				/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+				
+				movq		mm5,	[edi+64]			/* mm5 = x4 */					
+				psubw		mm3,	mm1					/* sum = sum-p1 */				
+				
+				paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+				movq		mm4,	mm5					/* mm4 = x4 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+				paddw		mm4,	mm4					/* mm4 *=2 */					
+				
+				paddw		mm4,	mm1					/* += p1 */						
+				psubw		mm4,	[edi+16]			/* -= x1 */						
+				
+				psubw		mm4,	[edi+112]			/* -= x7 */				
+				paddw		mm4,	[edi+128]			/* += x8 */						
+
+                movq        mm5,    LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+				psraw		mm4,	4					/* >>=4 */						
+
+                psubw		mm4,	mm5					/* -=x4 */						
+				pand		mm4,	mm0					/* and flag */					
+
+                paddw		mm4,	mm5					/* += x4 */						
+				movq		[esi+48], mm4				/* write new x4 */				
+				
+				/* sum+= x8-x1 */														
+				/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+				
+				movq		mm5,	[edi+80]			/* mm5 = x5 */					
+				psubw		mm3,	[edi+16]			/* sum -= x1 */					
+				
+				paddw		mm3,	[edi+128]			/* sub += x8 */					
+				movq		mm4,	mm5					/* mm4 = x5 */					
+				
+				paddw		mm4,	mm3					/* mm4= sum+x5 */				
+				paddw		mm4,	mm4					/* mm4 *= 2 */					
+				
+				paddw		mm4,	[edi+16]			/* += x1 */						
+				psubw		mm4,	[edi+32]			/* -= x2 */						
+				
+				psubw		mm4,	[edi+128]			/* -= x8 */						
+				paddw		mm4,	mm2					/* += p2 */						
+
+                movq        mm5,    LoopFilteredValuesDown/* Read the loopfiltered value of x5 */
+				psraw		mm4,	4					/* >>=4 */						
+
+                psubw		mm4,	mm5					/* -=x5 */						
+				pand		mm4,	mm0					/* and flag */					
+
+                paddw		mm4,	mm5					/* += x5 */						
+				movq		[esi+64], mm4				/* write new x5 */				
+				
+				/* sum += p2 - x2 */													
+				/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+				
+				movq		mm5,	[edi+96]			/* mm5 = x6 */					
+				psubw		mm3,	[edi+32]			/* -= x2 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x6 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+32]			/* +=x2 */						
+				psubw		mm4,	[edi+48]			/* -=x3 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x6 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x6 */						
+				
+				movq		[esi+80], mm4				/* write new x6 */				
+				
+				/* sum += p2 - x3 */													
+				/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+				
+				movq		mm5,	[edi+112]			/* mm5 = x7 */					
+				psubw		mm3,	[edi+48]			/* -= x3 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x7 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+48]			/* +=x3 */						
+				psubw		mm4,	[edi+64]			/* -=x4 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x7 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x7 */						
+				
+				movq		[esi+96], mm4				/* write new x7 */				
+				
+				/* sum += p2 - x4 */													
+				/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+				
+				movq		mm5,	[edi+128]			/* mm5 = x8 */					
+				psubw		mm3,	[edi+64]			/* -= x4 */						
+				
+				paddw		mm3,	mm2					/* += p2 */						
+				movq		mm4,	mm5					/* mm4 = x8 */					
+				
+				paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+				paddw		mm4,	mm4					/* mm4 *= 2*/					
+				
+				paddw		mm4,	[edi+64]			/* +=x4 */						
+				psubw		mm4,	[edi+80]			/* -=x5 */						
+				
+				psraw		mm4,	4					/* >>=4 */						
+				psubw		mm4,	mm5					/* -=x8 */						
+				
+				pand		mm4,	mm0					/* and flag */					
+				paddw		mm4,	mm5					/* += x8 */						
+				
+				movq		[esi+112], mm4				/* write new x8 */				
+				
+				/* done with right four column */	
+				/* transpose */
+				mov			eax,	Des					/* the destination */			
+				add			edi,	8					/* shift edi to point x1 */
+
+				sub			esi,	8					/* shift esi back to left x1 */
+				sub			eax,	4
+
+				movq		mm0,	[esi]				/* mm0 = 30 20 10 00 */
+				movq		mm1,	[esi+16]			/* mm1 = 31 21 11 01 */
+
+				movq		mm4,	mm0					/* mm4 = 30 20 10 00 */
+				punpcklwd	mm0,	mm1					/* mm0 = 11 10 01 00 */
+
+				punpckhwd	mm4,	mm1					/* mm4 = 31 30 21 20 */
+				movq		mm2,	[esi+32]			/* mm2 = 32 22 12 02 */
+
+				movq		mm3,	[esi+48]			/* mm3 = 33 23 13 03 */
+				movq		mm5,	mm2					/* mm5 = 32 22 12 02 */
+
+				punpcklwd	mm2,	mm3					/* mm2 = 13 12 03 02 */
+				punpckhwd	mm5,	mm3					/* mm5 = 33 32 23 22 */
+
+				movq		mm1,	mm0					/* mm1 = 11 10 01 00 */
+				punpckldq	mm0,	mm2					/* mm0 = 03 02 01 00 */
+
+				movq		[edi],	mm0					/* write 00 01 02 03 */
+				punpckhdq	mm1,	mm2					/* mm1 = 13 12 11 10 */
+				
+				movq		mm0,	mm4					/* mm0 = 31 30 21 20 */
+				movq		[edi+16], mm1				/* write 10 11 12 13 */
+
+				punpckldq	mm0,	mm5					/* mm0 = 23 22 21 20 */
+				punpckhdq	mm4,	mm5					/* mm4 = 33 32 31 30 */
+
+				movq		mm1,	[esi+64]			/* mm1 = 34 24 14 04 */
+				movq		mm2,	[esi+80]			/* mm2 = 35 25 15 05 */				
+
+				movq		mm5,	[esi+96]			/* mm5 = 36 26 16 06 */
+				movq		mm6,	[esi+112]			/* mm6 = 37 27 17 07 */
+								
+				movq		mm3,	mm1					/* mm3 = 34 24 14 04 */
+				movq		mm7,	mm5					/* mm7 = 36 26 16 06 */
+
+				punpcklwd	mm1,	mm2					/* mm1 = 15 14 05 04 */
+				punpckhwd	mm3,	mm2					/* mm3 = 35 34 25 24 */
+
+				punpcklwd	mm5,	mm6					/* mm5 = 17 16 07 06 */
+				punpckhwd	mm7,	mm6					/* mm7 = 37 36 27 26 */
+
+				movq		mm2,	mm1					/* mm2 = 15 14 05 04 */
+				movq		mm6,	mm3					/* mm6 = 35 34 25 24 */
+
+				punpckldq	mm1,	mm5					/* mm1 = 07 06 05 04 */
+				punpckhdq	mm2,	mm5					/* mm2 = 17 16 15 14 */
+
+				punpckldq	mm3,	mm7					/* mm3 = 27 26 25 24 */
+				punpckhdq	mm6,	mm7					/* mm6 = 37 36 35 34 */
+			
+				movq		mm5,	[edi]				/* mm5 = 03 02 01 00 */
+				packuswb	mm5,	mm1					/* mm5 = 07 06 05 04 03 02 01 00 */
+				
+				movq		[eax],	mm5					/* write 00 01 02 03 04 05 06 07 */
+				movq		mm7,	[edi+16]			/* mm7 = 13 12 11 10 */
+
+				packuswb	mm7,	mm2					/* mm7 = 17 16 15 14 13 12 11 10 */
+				movq		[eax+ecx], mm7				/* write 10 11 12 13 14 15 16 17 */
+
+				packuswb	mm0,	mm3					/* mm0 = 27 26 25 24 23 22 21 20 */
+				packuswb	mm4,	mm6					/* mm4 = 37 36 35 34 33 32 31 30 */
+				
+				movq		[eax+ecx*2], mm0			/* write 20 21 22 23 24 25 26 27 */
+				lea			eax,	[eax+ecx*4]			/* mov forward the desPtr */
+
+				movq		[eax+edx],	mm4				/* write 30 31 32 33 34 35 36 37 */
+				add			edi, 8						/* move to right four column */
+				add			esi, 8						/* move to right x1 */
+
+				movq		mm0,	[esi]				/* mm0 = 70 60 50 40 */
+				movq		mm1,	[esi+16]			/* mm1 = 71 61 51 41 */
+
+				movq		mm4,	mm0					/* mm4 = 70 60 50 40 */
+				punpcklwd	mm0,	mm1					/* mm0 = 51 50 41 40 */
+
+				punpckhwd	mm4,	mm1					/* mm4 = 71 70 61 60 */
+				movq		mm2,	[esi+32]			/* mm2 = 72 62 52 42 */
+
+				movq		mm3,	[esi+48]			/* mm3 = 73 63 53 43 */
+				movq		mm5,	mm2					/* mm5 = 72 62 52 42 */
+
+				punpcklwd	mm2,	mm3					/* mm2 = 53 52 43 42 */
+				punpckhwd	mm5,	mm3					/* mm5 = 73 72 63 62 */
+
+				movq		mm1,	mm0					/* mm1 = 51 50 41 40 */
+				punpckldq	mm0,	mm2					/* mm0 = 43 42 41 40 */
+
+				movq		[edi],	mm0					/* write 40 41 42 43 */
+				punpckhdq	mm1,	mm2					/* mm1 = 53 52 51 50 */
+				
+				movq		mm0,	mm4					/* mm0 = 71 70 61 60 */
+				movq		[edi+16], mm1				/* write 50 51 52 53 */
+
+				punpckldq	mm0,	mm5					/* mm0 = 63 62 61 60 */
+				punpckhdq	mm4,	mm5					/* mm4 = 73 72 71 70 */
+
+				movq		mm1,	[esi+64]			/* mm1 = 74 64 54 44 */
+				movq		mm2,	[esi+80]			/* mm2 = 75 65 55 45 */				
+
+				movq		mm5,	[esi+96]			/* mm5 = 76 66 56 46 */
+				movq		mm6,	[esi+112]			/* mm6 = 77 67 57 47 */
+								
+				movq		mm3,	mm1					/* mm3 = 74 64 54 44 */
+				movq		mm7,	mm5					/* mm7 = 76 66 56 46 */
+
+				punpcklwd	mm1,	mm2					/* mm1 = 55 54 45 44 */
+				punpckhwd	mm3,	mm2					/* mm3 = 75 74 65 64 */
+
+				punpcklwd	mm5,	mm6					/* mm5 = 57 56 47 46 */
+				punpckhwd	mm7,	mm6					/* mm7 = 77 76 67 66 */
+
+				movq		mm2,	mm1					/* mm2 = 55 54 45 44 */
+				movq		mm6,	mm3					/* mm6 = 75 74 65 64 */
+
+				punpckldq	mm1,	mm5					/* mm1 = 47 46 45 44 */
+				punpckhdq	mm2,	mm5					/* mm2 = 57 56 55 54 */
+
+				punpckldq	mm3,	mm7					/* mm3 = 67 66 65 64 */
+				punpckhdq	mm6,	mm7					/* mm6 = 77 76 75 74 */
+			
+				movq		mm5,	[edi]				/* mm5 = 43 42 41 40 */
+				packuswb	mm5,	mm1					/* mm5 = 47 46 45 44 43 42 41 40 */
+				
+				movq		[eax],	mm5					/* write 40 41 42 43 44 45 46 47 */
+				movq		mm7,	[edi+16]			/* mm7 = 53 52 51 50 */
+
+				packuswb	mm7,	mm2					/* mm7 = 57 56 55 54 53 52 51 50 */
+				movq		[eax+ecx], mm7				/* write 50 51 52 53 54 55 56 57 */
+
+				packuswb	mm0,	mm3					/* mm0 = 67 66 65 64 63 62 61 60 */
+				packuswb	mm4,	mm6					/* mm4 = 77 76 75 74 73 72 71 70 */
+				
+				movq		[eax+ecx*2], mm0			/* write 60 61 62 63 64 65 66 67 */
+				lea			eax,	[eax+ecx*4]			/* mov forward the desPtr */
+
+				movq		[eax+edx],	mm4				/* write 70 71 72 73 74 75 76 77 */
+				
+				pop			edi
+				pop			esi
+				pop			edx
+				pop			ecx
+				pop			ebp
+				pop			eax
+			}//__asm	
+
+			Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+			Var1 += Variance12[0]+ Variance12[1]+Variance12[2]+Variance12[3];
+			pbi->FragmentVariances[CurrentFrag-1] += Var1;
+
+			Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+			Var2 += Variance22[0]+ Variance22[1]+Variance22[2]+Variance22[3];
+			pbi->FragmentVariances[CurrentFrag] += Var2;
+		
+			CurrentFrag ++;
+
+		}//else
+			
+	}//while
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeblockNonFilteredBandNewFilter_MMX(
+ *
+ *  INPUTS        :     None
+ *                               
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Filter both horizontal and vertical edge in a band
+ *
+ *  SPECIAL NOTES :     Using Sum of abs to determine where to apply the 
+ *                      new 7 tap filter
+ *
+ *	REFERENCE	  :		
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+ void DeblockNonFilteredBandNewFilter_MMX(
+                                          POSTPROC_INSTANCE *pbi, 
+                                          UINT8 *SrcPtr, 
+                                          UINT8 *DesPtr,
+                                          UINT32 PlaneLineStep, 
+                                          UINT32 FragAcross,
+                                          UINT32 StartFrag,
+                                          UINT32 *QuantScale
+                                          )
+{
+	UINT32 j;
+	UINT32 CurrentFrag=StartFrag;
+	UINT32 QStep;
+    UINT32 LoopFLimit;
+	UINT8 *Src, *Des;
+
+#if defined(_WIN32_WCE)
+	#pragma pack(16)
+	short QStepMmx[4];
+	short FLimitMmx[4];
+	short LoopFLimitMmx[4];
+	short Rows[80];
+	short NewRows[64];
+	short LoopFilteredValuesUp[4];
+	short LoopFilteredValuesDown[4];
+	unsigned char Variance11[8];
+	unsigned char Variance21[8];
+    UINT32 Var1, Var2;
+	#pragma pack()
+#else
+	__declspec(align(16)) short QStepMmx[4];
+	__declspec(align(16)) short FLimitMmx[4];
+	__declspec(align(16)) short LoopFLimitMmx[4];
+	__declspec(align(16)) short Rows[80];
+	__declspec(align(16)) short NewRows[64];
+	__declspec(align(16)) short LoopFilteredValuesUp[4];
+	__declspec(align(16)) short LoopFilteredValuesDown[4];
+	__declspec(align(16)) unsigned char Variance11[8];
+	__declspec(align(16)) unsigned char Variance21[8];
+    UINT32 Var1, Var2;
+#endif
+
+
+	QStep = QuantScale[pbi->FrameQIndex];
+	QStepMmx[0] = (INT16)QStep;
+    QStepMmx[1] = (INT16)QStep;
+    QStepMmx[2] = (INT16)QStep;
+    QStepMmx[3] = (INT16)QStep;
+    LoopFLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+    LoopFLimitMmx[0] = (INT16)LoopFLimit;
+    LoopFLimitMmx[1] = (INT16)LoopFLimit;
+    LoopFLimitMmx[2] = (INT16)LoopFLimit;
+    LoopFLimitMmx[3] = (INT16)LoopFLimit;
+
+	while(CurrentFrag < StartFrag + FragAcross )
+	{
+
+		Src=SrcPtr+8*(CurrentFrag-StartFrag);
+		Des=DesPtr+8*(CurrentFrag-StartFrag);
+    	__asm 
+		{
+			
+			push		eax
+
+			push		ebp
+			
+			push		ecx			
+
+			push		edx
+
+			push		esi
+
+			push		edi
+
+			/* Calculate the FLimit and store FLimit and QStep */					
+			/* Copy the data to the intermediate buffer */							
+			mov			eax,	QStep
+			xor			edx,	edx					/* clear edx */					
+
+			mov			ecx,	PlaneLineStep		/* ecx = Pitch */				
+			movd		mm5,	eax
+
+            mov			eax,	Src					/* eax = Src */					
+			punpcklwd	mm5,	mm5					
+
+			lea			esi,	NewRows				/* esi = NewRows */
+			punpckldq	mm5,	mm5
+			
+            sub			edx,	ecx					/* edx = - Pitch */
+            movq        mm6,    mm5                 /*  Q Q Q Q */
+
+            paddw       mm6,    mm5                 
+            paddw       mm6,    mm5                 /* 3Q3Q3Q3Q */
+
+            packuswb    mm5,    mm5                 /* QQQQQQQQ */            
+			movq		QStepMmx,	mm5
+
+            psraw       mm6,    2                   /*  F F F F */           
+            packuswb    mm6,    mm6                 /* FFFFFFFF */
+
+			lea			edi,	Rows				/* edi = Rows */				
+            pxor		mm7,	mm7					/* Clear mm7 */					
+
+            psubb       mm6,    Eight128c           /* Eight (F-128)s */        
+    
+			lea			eax,	[eax + edx * 4 ]	/* eax = Src - 4*Pitch */		
+			movq		mm0,	[eax + edx]			/* mm0 = Src[-5*Pitch] */		
+
+			movq		mm1,	mm0					/* mm1 = mm0 */					
+			punpcklbw	mm0,	mm7					/* Lower Four -5 */				
+
+            movq        mm4,    mm1                 /* mm4 = Src[-5*Pitch] */
+            movq		[FLimitMmx], mm6            /* FFFF FFFF */		
+			
+			movq		mm2,	[eax]				/* mm2 = Src[-4*Pitch] */		
+			punpckhbw	mm1,	mm7					/* Higher Four -5 */	
+            
+			movq		[edi],	mm0					/* Write Lower Four of -5 */					
+            movq        mm5,    mm2                 /* mm5 = S_4 */
+            
+            movq		mm3,	mm2					/* mm3 = S_4 */					
+			movq		[edi+8], mm1				/* Write Higher Four of -5 */	
+
+            movq		mm0,	[eax + ecx]			/* mm0 = Src[-3*Pitch] */		
+            psubusb     mm5,    mm4                 /* S_4 - S_5 */
+            
+            psubusb     mm4,    mm2                 /* S_5 - S_4 */
+            punpcklbw	mm2,	mm7					/* Lower Four -4 */				
+
+            por         mm4,    mm5                 /* abs(S_4-S_5) */
+            movq		[edi+16], mm2				/* Write Lower -4 */			
+
+            movq        mm6,    mm3                 /* mm6 = S_4 */
+			punpckhbw	mm3,	mm7					/* higher Four -4 */			
+
+            movq		[edi+24], mm3				/* write hight -4 */						
+            movq		mm1,	mm0					/* mm1 = S_3 */					
+
+			punpcklbw	mm0,	mm7					/* lower four -3 */				
+			movq		[edi+32], mm0				/* write Lower -3 */			
+
+			movq		mm2,	[eax + ecx *2]		/* mm2 = Src[-2*Pitch] */		
+            movq        mm5,    mm1                 /* mm5 = S_3 */
+
+            psubusb     mm5,    mm6                 /* S_3 - S_4 */            
+            psubusb     mm6,    mm1                 /* S_4 - S_3 */
+
+            por         mm5,    mm6                 /* abs(S_4-S_3) */
+            movq        mm6,    mm1                 /* mm6 = S_3 */
+			
+			punpckhbw	mm1,	mm7					/* higher four -3 */						
+			movq		mm3,	mm2					/* mm3 = S_2 */					
+			
+			movq		[edi+40], mm1				/* write Higher -3 */			
+            paddusb      mm4,    mm5                 /* abs(S_5-S_4)+abs(S_4-S_3) */
+    
+            movq        mm5,    mm2                 /* mm5 = S_2 */
+            psubusb     mm5,    mm6                 /* S_2 - S_3 */
+
+            psubusb     mm6,    mm2                 /* S_3 - S_2 */
+            por         mm5,    mm6                 /* abs(S_3 - S_2) */
+
+            movq        mm6,    mm2                 /* mm6 = S_2 */
+
+			punpcklbw	mm2,	mm7					/* lower four -2 */				
+			lea			eax,	[eax + ecx *4]		/* eax = Src */					
+			
+			punpckhbw	mm3,	mm7					/* higher four -2 */			
+
+			movq		mm0,	[eax + edx]			/* mm2 = Src[-Pitch] */			
+			movq		[edi+48], mm2				/* lower -2	*/					
+			
+            paddusb     mm4,    mm5                 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2) */
+            movq        mm5,    mm0                 /* mm5 = S_1 */
+
+			movq		[edi+56], mm3				/* higher -2 */					
+            movq		mm1,	mm0					/* mm1 = S_1 */					
+
+            psubusb     mm5,    mm6                 /* S_1 - S_2 */
+            psubusb     mm6,    mm1                 /* S_2 - S_1 */
+			
+            punpcklbw	mm0,	mm7					/* lower -1 */					
+            por         mm5,    mm6                 /* abs(S_2 - S_1) */
+
+            movq		[edi+64], mm0				/* Lower -1 */					
+            movq        mm6,    mm1                 /* mm6 = S_1 */
+
+            punpckhbw	mm1,	mm7					/* Higher -1 */					
+			movq		[edi+72], mm1				/* Higher -1 */					
+
+			movq		mm0,	[eax]				/* mm0 = Src[0] */				
+            paddusb       mm4,    mm5               /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2 - S_1) */
+
+            movq        [Variance11], mm4;          /* save the variance */
+
+            movq        mm5,    FLimitMmx           /* mm5 = FFFF FFFF */
+            psubb       mm4,    Eight128c           /* abs(..) - 128 */
+
+            pcmpgtb     mm5,    mm4                 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2 - S_1) < FLimit ? */
+			            
+            movq		mm1,	mm0					/* mm1 = S0 */					
+			punpcklbw	mm0,	mm7					/* lower 0 */					
+    
+            movq        mm4,    mm1                 /* mm4 = S0 */
+			movq		[edi+80], mm0				/* write lower 0 */				
+
+            psubusb     mm4,    mm6                 /* S0 - S_1 */
+            psubusb     mm6,    mm1                 /* S_1 - S0 */
+
+			movq		mm0,	[eax + ecx]			/* mm0 = Src[Pitch] */			
+            movq        mm3,    QStepMmx            /* mm3 = QQQQQQQQQ */
+
+            por         mm4,    mm6                 /* abs(S0 - S_1) */            
+            movq        mm6,    mm1                 /* mm6 = S0 */
+            
+            psubb       mm3,    Eight128c           /* -128 for using signed compare*/
+            psubb       mm4,    Eight128c           /* -128 for using signed compare*/
+
+            pcmpgtb     mm3,    mm4                 /* abs(S0-S_1) < QStep */
+			punpckhbw	mm1,	mm7					/* higher 0 */			
+            
+            movq        mm4,    mm0                 /* mm4 = S1 */
+            pand        mm5,    mm3                 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2 - S_1) < FLimit &&
+                                                       abs(S0-S_1) < QStep */
+
+            movq		[edi+88], mm1				/* write higher 0 */			
+			
+			movq		mm1,	mm0					/* mm1 = S1 */					
+            psubusb     mm4,    mm6                 /* S1 - S0 */
+
+			punpcklbw	mm0,	mm7					/* lower 1 */					
+            psubusb     mm6,    mm1                 /* S0 - S1 */
+
+            movq		[edi+96], mm0				/* write lower 1 */		
+            por         mm4,    mm6                 /* mm4 = abs(S1-S0) */
+
+			movq		mm2,	[eax + ecx *2 ]     /* mm2 = Src[2*Pitch] */		
+            movq        mm6,    mm1                 /* mm6 = S1 */
+
+            lea			eax,	[eax + ecx *4]		/* eax = Src + 4 * Pitch  */	
+            punpckhbw	mm1,	mm7					/* higher 1 */					
+			
+			
+			movq		mm0,	mm2					/* mm0 = S2 */					
+			movq		[edi+104], mm1				/* wirte higher 1 */			
+
+
+            movq        mm3,    mm0                 /* mm3 = S2 */
+			movq		mm1,	[eax + edx ]		/* mm4 = Src[3*pitch] */		
+            
+            punpcklbw	mm2,	mm7					/* lower 2 */					
+            psubusb     mm3,    mm6                 /* S2 - S1 */
+            
+            psubusb     mm6,    mm0                 /* S1 - S2 */
+            por         mm3,    mm6                 /* abs(S1-S2) */
+
+            movq		[edi+112], mm2				/* write lower 2 */				
+            movq        mm6,    mm0                 /* mm6 = S2 */
+
+			punpckhbw	mm0,	mm7					/* higher 2 */					
+            paddusb       mm4,    mm3                 /* abs(S0-S1)+abs(S1-S2) */
+			
+            movq        mm2,    mm1                 /* mm2 = S3 */            
+            movq        mm3,    mm1                 /* mm3 = S3 */
+			
+			movq		[edi+120], mm0				/* write higher 2 */			
+			punpcklbw	mm1,	mm7					/* Low 3	*/					
+
+			movq		mm0,	[eax]				/* mm0 = Src[4*pitch] */		
+            psubusb     mm3,    mm6                 /* S3 - S2 */
+
+            psubusb     mm6,    mm2                 /* S2 - S3 */
+            por         mm3,    mm6                 /* abs(S2-S3) */
+            
+            movq		[edi+128], mm1				/* low 3 */						
+            movq        mm6,    mm2                 /* mm6 = S3 */
+            
+			punpckhbw	mm2,	mm7					/* high 3 */					
+			paddusb       mm4,    mm3                 /* abs(S0-S1)+abs(S1-S2)+abs(S2-S3) */
+
+
+			movq		mm1,	mm0					/* mm1 = S4 */					
+            movq        mm3,    mm0                 /* mm3 = S4 */
+			
+            movq		[edi+136], mm2				/* high 3 */					
+            punpcklbw	mm0,	mm7					/* low 4 */						
+            
+            psubusb     mm3,    mm6                 /* S4 - S3 */
+   			movq		[edi+144], mm0				/* low 4 */						
+   
+            psubusb     mm6,    mm1                 /* S3 - S4 */
+            por         mm3,    mm6                 /* abs(S3-S4) */
+
+            punpckhbw	mm1,	mm7					/* high 4 */							
+			paddusb     mm4,    mm3                 /* abs((S0-S1)+abs(S1-S2)+abs(S2-S3)+abs(S3-S4) */
+        
+            movq        [Variance21], mm4;          /* save the variance */
+
+            movq        mm6,    FLimitMmx           /* mm6 = FFFFFFFFF */
+			psubb        mm4,    Eight128c           /* abs(..) - 128 */
+
+            movq		[edi+152], mm1				/* high 4 */					
+			
+	        pcmpgtb     mm6,    mm4                 /* abs((S0-S1)+abs(S1-S2)+abs(S2-S3)+abs(S3-S4)<FLimit? */
+            pand        mm6,    mm5                 /* Flag */
+
+			/* done with copying everything to intermediate buffer */				
+            /* mm7 = 0, mm6 = Flag */								
+            movq        mm0,    mm6
+            movq        mm7,    mm6 
+            
+            punpckhbw   mm0,    mm6
+            punpcklbw   mm7,    mm6
+            
+			/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+			/* mm0 and mm7 now are in use  */										
+            
+            /* find the loop filtered values for the pixels on block boundary */
+            movq        mm1,    LoopFLimitMmx;      /* Get the Flimit values for loop filter */
+            movq        mm3,    [edi + 48]          /* mm3 = x3 = p[-2] */
+
+            movq        mm4,    [edi + 64]          /* mm4 = x4 = p[-1] */
+            movq        mm5,    [edi + 80]          /* mm5 = x5 = p[ 0] */
+
+            movq        mm6,    [edi + 96]          /* mm6 = x6 = p[ 1] */
+            psubw       mm5,    mm4                 /* mm5 = p[ 0] - p[-1] */
+
+            psubw       mm3,    mm6                 /* mm3 = p[-2] - p[ 1] */
+            movq        mm4,    mm5                 /* make a copy */
+
+            paddw       mm4,    mm5                 /* 2 * ( p[0] - p[-1] ) */
+            paddw       mm3,    FourFours           /* mm3 + 4 */
+
+            paddw       mm5,    mm4                 /* 3 * ( p[0] - p[-1] ) */
+            paddw       mm3,    mm5                 /* Filtval before shift */
+
+            psraw       mm3,    3                   /* FiltVal */
+            movq        mm2,    mm3                 /* make a copy */
+
+            psraw       mm3,    15                  /* FFFF->Neg, 0000->Pos */
+            pxor        mm2,    mm3
+
+            psubsw      mm2,    mm3                 /* mm2 = abs(FiltVal) */
+            por         mm3,    FourOnes            /* -1 and 1 for + and - */
+
+            movq        mm4,    mm1                 /* make a copy of Flimit */
+            psubw       mm1,    mm2                 /* mm1= Flimit - abs(FiltVal) */
+
+            movq        mm5,    mm1                 /* copy Flimit - abs(FiltVal) */
+            psraw       mm1,    15                  /* FFFF or 0000 */
+
+            pxor        mm5,    mm1                 
+            psubsw      mm5,    mm1                 /* abs(Flimit - abs(FiltVal)) */
+
+            psubusw     mm4,    mm5                 /* Flimit-abs(Flimit - abs(FiltVal)) */
+            pmullw      mm4,    mm3                 /* get the sign back */
+
+            movq        mm1,    [edi+64]            /* p[-1] */
+            movq        mm2,    [edi+80]            /* p[0] */
+            
+            paddw       mm1,    mm4                 /* p[-1] + NewFiltVal */
+            psubw       mm2,    mm4                 /* p[0] - NewFiltVal */
+
+            pxor        mm6,    mm6                 /* clear mm6 */
+            
+            packuswb    mm1,    mm1                 /* clamping */
+            packuswb    mm2,    mm2                 /* clamping */
+
+            punpcklbw   mm1,    mm6                 /* unpack to word */
+            movq        LoopFilteredValuesUp, mm1   /* save the values */
+
+            punpcklbw   mm2,    mm6                 /* unpack to word */
+            movq        LoopFilteredValuesDown, mm2 /* save the values */
+
+            /* Let's do the filtering now */										
+            /* p1 = Src[-5] */		
+            /* p2 = Src[+4] */		
+            /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+            
+            movq		mm3,	[edi]			    /* mm3 = [-5] */
+            movq		mm2,	[edi+144]			/* mm2 = [4] */					
+
+            movq		mm1,	mm3					/* p1 = [-4] */					
+			paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+
+			movq		mm4,	[edi+16]			/* mm4 = x1 */					
+			paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+            
+			paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+			paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+			
+			paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+			paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+			
+			paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+
+            /* Des[-w4] = (((sum + x1) >> 3; */			
+			/* Des[-w4] = Src[-w4]; */												
+			/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+			
+			movq		mm4,	mm3					/* mm4 = mm3 */					
+			movq		mm5,	[edi+16]			/* mm5 = x1 */					
+
+            paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+			psraw		mm4,	3					/* mm4 >>=4 */					
+
+            psubw		mm4,	mm5					/* New Value - old Value */		
+			pand		mm4,	mm7					/* And the flag */				
+			
+			paddw		mm4,	mm5					/* add the old value back */	
+			movq		[esi],	mm4					/* Write new x1 */				
+			
+			/* sum += x5 -p1 */														
+			/* Des[-w3]=((sum+x2)>>3 */									
+			
+			movq		mm5,	[edi+32]			/* mm5= x2 */					
+			psubw		mm3,	mm1					/* sum=sum-p1 */				
+			
+			paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+			movq		mm4,	mm5					/* copy sum */					
+			
+			paddw		mm4,	mm3					/* mm4=sum+x2 */				
+			psraw		mm4,	3					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+			psubw		mm4,	mm5					/* new value - old value	*/	
+			
+			pand		mm4,	mm7					/* And the flag */				
+			paddw		mm4,	mm5					/* add the old value back */	
+			
+			movq		[esi+16], mm4				/* write new x2 */				
+			
+			/* sum += x6 - p1 */													
+			/* Des[-w2]=((sum+x[3])>>3 */								
+			
+			movq		mm5,	[edi+48]			/* mm5= x3 */					
+			psubw		mm3,	mm1					/* sum=sum-p1 */				
+			
+			paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+			movq		mm4,	mm5					/* copy x3 */					
+			
+			paddw		mm4,	mm3					/* mm4=sum+x3 */				
+			psraw		mm4,	3					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+
+            psubw		mm4,	mm5					/* new value - old value	*/	
+			pand		mm4,	mm7					/* And the flag */				
+
+            paddw		mm4,	mm5					/* add the old value back */	
+			movq		[esi+32], mm4				/* write new x3 */				
+			
+			/* sum += x7 - p1 */													
+			/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+			
+			movq		mm5,	[edi+64]			/* mm5 = x4 */					
+			psubw		mm3,	mm1					/* sum = sum-p1 */				
+			
+			paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+			movq		mm4,	mm5					/* mm4 = x4 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+			movq        mm5,    LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+
+            psraw		mm4,	3					/* >>=4 */						
+            psubw		mm4,	mm5					/* -=x4 */						
+
+            pand		mm4,	mm7					/* and flag */					
+            paddw		mm4,	mm5					/* += x4 */						
+
+            movq		[esi+48], mm4				/* write new x4 */				
+			
+			/* sum+= x8-x1 */														
+			/* Des[0]=((sum+x5)>>3 */								
+			
+			movq		mm5,	[edi+80]			/* mm5 = x5 */					
+			psubw		mm3,	[edi+16]			/* sum -= x1 */					
+			
+			paddw		mm3,	[edi+128]			/* sub += x8 */					
+			movq		mm4,	mm5					/* mm4 = x5 */					
+			
+			paddw		mm4,	mm3					/* mm4= sum+x5 */				
+			movq        mm5,    LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+
+            psraw		mm4,	3					/* >>=4 */						
+            psubw		mm4,	mm5					/* -=x5 */						
+
+            pand		mm4,	mm7					/* and flag */					
+            paddw		mm4,	mm5					/* += x5 */										
+
+            movq		[esi+64], mm4				/* write new x5 */				
+			
+			/* sum += p2 - x2 */													
+			/* Des[w1] = ((sum+x6)>>3 */								
+			
+			movq		mm5,	[edi+96]			/* mm5 = x6 */					
+			psubw		mm3,	[edi+32]			/* -= x2 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x6 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+			psraw		mm4,	3					/* >>=3 */						
+
+			psubw		mm4,	mm5					/* -=x6 */						
+			pand		mm4,	mm7					/* and flag */					
+
+            paddw		mm4,	mm5					/* += x6 */						
+			movq		[esi+80], mm4				/* write new x6 */				
+			
+			/* sum += p2 - x3 */													
+			/* Des[w2] = (sum+x7)>>3 */								
+			
+			movq		mm5,	[edi+112]			/* mm5 = x7 */					
+			psubw		mm3,	[edi+48]			/* -= x3 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x7 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+			psraw		mm4,	3					/* >>=3 */						
+
+			psubw		mm4,	mm5					/* -=x7 */						
+			pand		mm4,	mm7					/* and flag */					
+
+            paddw		mm4,	mm5					/* += x7 */						
+			movq		[esi+96], mm4				/* write new x7 */				
+			
+			/* sum += p2 - x4 */													
+			/* Des[w3] = ((sum+x8)>>3 */								
+			
+			movq		mm5,	[edi+128]			/* mm5 = x8 */					
+			psubw		mm3,	[edi+64]			/* -= x4 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x8 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+			psraw		mm4,	3					/* >>=3 */						
+			
+            psubw		mm4,	mm5					/* -=x8 */						
+			pand		mm4,	mm7					/* and flag */					
+
+            paddw		mm4,	mm5					/* += x8 */						
+			movq		[esi+112], mm4				/* write new x8 */				
+						
+			/* done with left four columns */										
+			/* now do the righ four columns */										
+			
+			add			edi,	8					/* shift to right four column */
+			add			esi,	8					/* shift to right four column */
+			
+			/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+			/* mm0 now are in use  */										
+			
+
+            /* find the loop filtered values for the pixels on block boundary */
+            movq        mm1,    LoopFLimitMmx;      /* Get the Flimit values for loop filter */
+            movq        mm3,    [edi + 48]          /* mm3 = x3 = p[-2] */
+
+            movq        mm4,    [edi + 64]          /* mm4 = x4 = p[-1] */
+            movq        mm5,    [edi + 80]          /* mm5 = x5 = p[ 0] */
+
+            movq        mm6,    [edi + 96]          /* mm6 = x6 = p[ 1] */
+            psubw       mm5,    mm4                 /* mm5 = p[ 0] - p[-1] */
+
+            psubw       mm3,    mm6                 /* mm3 = p[-2] - p[ 1] */
+            movq        mm4,    mm5                 /* make a copy */
+
+            paddw       mm3,    FourFours           /* mm3 + 4 */
+            paddw       mm4,    mm4                 /* 2 * ( p[0] - p[-1] ) */
+
+            paddw       mm3,    mm4                 /* 3 * ( p[0] - p[-1] ) */
+            paddw       mm3,    mm5                 /* Filtval before shift */
+
+            psraw       mm3,    3                   /* FiltVal */
+            movq        mm2,    mm3                 /* make a copy */
+
+            psraw       mm3,    15                  /* FFFF->Neg, 0000->Pos */
+            pxor        mm2,    mm3
+
+            psubsw      mm2,    mm3                 /* mm2 = abs(FiltVal) */
+            por         mm3,    FourOnes            /* -1 and 1 for + and - */
+
+            movq        mm4,    mm1                 /* make a copy of Flimit */
+            psubw       mm1,    mm2                 /* mm1= Flimit - abs(FiltVal) */
+
+            movq        mm5,    mm1                 /* copy Flimit - abs(FiltVal) */
+            psraw       mm1,    15                  /* FFFF or 0000 */
+
+            pxor        mm5,    mm1                 
+            psubsw      mm5,    mm1                 /* abs(Flimit - abs(FiltVal)) */
+
+            psubusw     mm4,    mm5                 /* Flimit-abs(Flimit - abs(FiltVal)) */
+            pmullw      mm4,    mm3                 /* get the sign back */
+
+            movq        mm1,    [edi+64]            /* p[-1] */
+            movq        mm2,    [edi+80]            /* p[0] */
+            
+            paddw       mm1,    mm4                 /* p[-1] + NewFiltVal */
+            psubw       mm2,    mm4                 /* p[0] - NewFiltVal */
+
+            pxor        mm6,    mm6                 /* clear mm6 */
+            
+            packuswb    mm1,    mm1                 /* clamping */
+            packuswb    mm2,    mm2                 /* clamping */
+
+            punpcklbw   mm1,    mm6                 /* unpack to word */
+            movq        LoopFilteredValuesUp, mm1   /* save the values */
+
+            punpcklbw   mm2,    mm6                 /* unpack to word */
+            movq        LoopFilteredValuesDown, mm2 /* save the values */
+            
+            
+            /* Let's do the filtering now */										
+            /* p1 = Src[-5] */		
+            /* p2 = Src[+4] */		
+            /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+            
+            movq		mm3,	[edi]			    /* mm3 = [-5] */
+            movq		mm2,	[edi+144]			/* mm2 = [4] */					
+            
+            movq		mm1,	mm3					/* p1 = [-4] */					
+			paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+
+			movq		mm4,	[edi+16]			/* mm4 = x1 */					
+			paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+            
+			paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+			paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+			
+			paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+			paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+			
+			paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+
+            /* Des[-w4] = (((sum + x1) >> 3; */			
+			/* Des[-w4] = Src[-w4]; */												
+			/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+			movq		mm4,	mm3					/* mm4 = mm3 */					
+			movq		mm5,	[edi+16]			/* mm5 = x1 */					
+
+            paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+			psraw		mm4,	3					/* mm4 >>=4 */					
+
+            psubw		mm4,	mm5					/* New Value - old Value */		
+			pand		mm4,	mm0					/* And the flag */				
+			
+			paddw		mm4,	mm5					/* add the old value back */	
+			movq		[esi],	mm4					/* Write new x1 */				
+			
+			/* sum += x5 -p1 */														
+			/* Des[-w3]=((sum+x2)>>3 */									
+			
+			movq		mm5,	[edi+32]			/* mm5= x2 */					
+			psubw		mm3,	mm1					/* sum=sum-p1 */				
+			
+			paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+			movq		mm4,	mm5					/* copy sum */					
+			
+			paddw		mm4,	mm3					/* mm4=sum+x2 */				
+			psraw		mm4,	3					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+			psubw		mm4,	mm5					/* new value - old value	*/	
+			
+			pand		mm4,	mm0					/* And the flag */				
+			paddw		mm4,	mm5					/* add the old value back */	
+			
+			movq		[esi+16], mm4				/* write new x2 */				
+			
+			/* sum += x6 - p1 */													
+			/* Des[-w2]=((sum+x[3])>>3 */								
+			
+			movq		mm5,	[edi+48]			/* mm5= x3 */					
+			psubw		mm3,	mm1					/* sum=sum-p1 */				
+			
+			paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+			movq		mm4,	mm5					/* copy x3 */					
+			
+			paddw		mm4,	mm3					/* mm4=sum+x3 */				
+			psraw		mm4,	3					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+
+            psubw		mm4,	mm5					/* new value - old value	*/	
+			pand		mm4,	mm0					/* And the flag */				
+
+            paddw		mm4,	mm5					/* add the old value back */	
+			movq		[esi+32], mm4				/* write new x3 */				
+			
+			/* sum += x7 - p1 */													
+			/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+			
+			movq		mm5,	[edi+64]			/* mm5 = x4 */					
+			psubw		mm3,	mm1					/* sum = sum-p1 */				
+			
+			paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+			movq		mm4,	mm5					/* mm4 = x4 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+			movq        mm5,    LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+
+            psraw		mm4,	3					/* >>=4 */						
+            psubw		mm4,	mm5					/* -=x4 */						
+
+            pand		mm4,	mm0					/* and flag */					
+            paddw		mm4,	mm5					/* += x4 */						
+
+            movq		[esi+48], mm4				/* write new x4 */				
+			
+			/* sum+= x8-x1 */														
+			/* Des[0]=((sum+x5)>>3 */								
+			
+			movq		mm5,	[edi+80]			/* mm5 = x5 */					
+			psubw		mm3,	[edi+16]			/* sum -= x1 */					
+			
+			paddw		mm3,	[edi+128]			/* sub += x8 */					
+			movq		mm4,	mm5					/* mm4 = x5 */					
+			
+			paddw		mm4,	mm3					/* mm4= sum+x5 */				
+			movq        mm5,    LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+
+            psraw		mm4,	3					/* >>=4 */						
+            psubw		mm4,	mm5					/* -=x5 */						
+
+            pand		mm4,	mm0					/* and flag */					
+            paddw		mm4,	mm5					/* += x5 */										
+
+            movq		[esi+64], mm4				/* write new x5 */				
+			
+			/* sum += p2 - x2 */													
+			/* Des[w1] = ((sum+x6)>>3 */								
+			
+			movq		mm5,	[edi+96]			/* mm5 = x6 */					
+			psubw		mm3,	[edi+32]			/* -= x2 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x6 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+			psraw		mm4,	3					/* >>=3 */						
+
+			psubw		mm4,	mm5					/* -=x6 */						
+			pand		mm4,	mm0					/* and flag */					
+
+            paddw		mm4,	mm5					/* += x6 */						
+			movq		[esi+80], mm4				/* write new x6 */				
+			
+			/* sum += p2 - x3 */													
+			/* Des[w2] = (sum+x7)>>3 */								
+			
+			movq		mm5,	[edi+112]			/* mm5 = x7 */					
+			psubw		mm3,	[edi+48]			/* -= x3 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x7 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+			psraw		mm4,	3					/* >>=3 */						
+
+			psubw		mm4,	mm5					/* -=x7 */						
+			pand		mm4,	mm0					/* and flag */					
+
+            paddw		mm4,	mm5					/* += x7 */						
+			movq		[esi+96], mm4				/* write new x7 */				
+			
+			/* sum += p2 - x4 */													
+			/* Des[w3] = ((sum+x8)>>3 */								
+			
+			movq		mm5,	[edi+128]			/* mm5 = x8 */					
+			psubw		mm3,	[edi+64]			/* -= x4 */						
+			
+			paddw		mm3,	mm2					/* += p2 */						
+			movq		mm4,	mm5					/* mm4 = x8 */					
+			
+			paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+			psraw		mm4,	3					/* >>=3 */						
+			
+            psubw		mm4,	mm5					/* -=x8 */						
+			pand		mm4,	mm0					/* and flag */					
+
+            paddw		mm4,	mm5					/* += x8 */						
+			movq		[esi+112], mm4				/* write new x8 */				
+
+			
+			/* done with right four column */										
+			add			edi,	8					/* shift edi to point x1 */
+			sub			esi,	8					/* shift esi back to x1 */
+
+			mov			ebp, Des					/* the destination */							
+			lea			ebp, [ebp + edx *4]			/* point to des[-w4] */			
+			
+			movq		mm0, [esi]													
+			packuswb	mm0, [esi + 8]												
+			
+			movq		[ebp], mm0					/* write des[-w4] */			
+			
+			movq		mm1, [esi + 16]												
+			packuswb	mm1, [esi + 24]												
+			
+			movq		[ebp+ecx ], mm1				/* write des[-w3] */			
+			
+			movq		mm2, [esi + 32]												
+			packuswb	mm2, [esi + 40]												
+			
+			movq		[ebp+ecx*2 ], mm2			/* write des[-w2] */			
+			
+			movq		mm3, [esi + 48]												
+			packuswb	mm3, [esi + 56]												
+			
+			lea			ebp, [ebp+ecx*4]			/* point to des[0] */			
+			movq		[ebp+edx], mm3				/* write des[-w1] */			
+			
+			movq		mm0, [esi + 64]												
+			packuswb	mm0, [esi + 72]												
+			
+			movq		[ebp ], mm0					/* write des[0] */				
+			
+			movq		mm1, [esi + 80]												
+			packuswb	mm1, [esi + 88]												
+			
+			movq		[ebp+ecx], mm1				/* write des[w1] */				
+			
+			movq		mm2, [esi + 96]												
+			packuswb	mm2, [esi + 104]											
+			
+			movq		[ebp+ecx*2], mm2			/* write des[w2] */				
+			
+			movq		mm3, [esi + 112]											
+			packuswb	mm3, [esi + 120]											
+			
+			lea			ebp, [ebp+ecx*2]			/* point to des[w4] */			
+			movq		[ebp+ecx], mm3				/* write des[w3] */				
+
+
+			pop			edi
+			pop			esi
+			pop			edx
+			pop			ecx
+			pop			ebp
+			pop			eax
+			
+		} /* end of the macro */
+		
+		Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+		Var1 += Variance11[4]+ Variance11[5]+Variance11[6]+Variance11[7];
+		pbi->FragmentVariances[CurrentFrag] += Var1;
+
+		Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+		Var2 += Variance21[4]+ Variance21[5]+Variance21[6]+Variance21[7];
+		pbi->FragmentVariances[CurrentFrag + FragAcross] += Var2;
+
+        if(CurrentFrag==StartFrag)
+			CurrentFrag++;
+		else 
+		{
+
+			Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag);
+			Src=Des;
+
+			for( j=0; j<8;j++)
+			{
+				Rows[j] = (short) (Src[-5+j*PlaneLineStep]);
+				Rows[72+j] = (short)(Src[4+j*PlaneLineStep]);		
+			}
+
+            __asm
+			{
+			/* Save the registers */
+			push		eax
+			push		ebp
+				/* Calculate the FLimit and store FLimit and QStep */					
+				mov			eax,	QStep				/* get QStep */
+				movd		mm0,	eax					/* mm0 = 0, 0, 0, Q */
+
+			push		ecx			
+				
+				punpcklwd	mm0,	mm0					/* mm0 = 0, 0, Q, Q */
+				punpckldq	mm0,	mm0					/* mm0 = Q, Q, Q, Q */
+
+			push		edx
+				
+                movq        mm1,    mm0                 /* mm1 = Q, Q, Q, Q */
+                paddw       mm1,    mm0                                                       
+				
+
+			push		esi
+
+               paddw        mm1,    mm0
+               packuswb     mm0,    mm0
+   
+			push		edi
+				
+                movq		QStepMmx,	mm0				/* write the Q step */
+				psraw		mm1,	2					/* mm1 = FLimit */				
+		
+                packuswb    mm1,    mm1                 /* mm1 = FFFF FFFF */
+                psubb       mm1,    Eight128c           /* F-128 */
+
+                movq		[FLimitMmx], mm1			/* Save FLimit */				
+
+				/* setup the pointers to data */
+
+				mov			eax,	Src					/* eax = Src */
+				xor			edx,	edx					/* clear edx */
+				
+				sub			eax,	4					/* eax = Src-4 */
+				lea			esi,	NewRows				/* esi = NewRows */
+				lea			edi,	Rows				/* edi = Rows */				
+
+				mov			ecx,	PlaneLineStep		/* ecx = Pitch */				
+				sub			edx,	ecx					/* edx = -Pitch */				
+
+				/* Get the data to the intermediate buffer */
+
+				movq		mm0,	[eax]				/* mm0 = 07 06 05 04 03 02 01 00 */
+				movq		mm1,	[eax+ecx]			/* mm1 = 17 16 15 14 13 12 11 10 */
+
+				movq		mm2,	[eax+ecx*2]			/* mm2 = 27 26 25 24 23 22 21 20 */
+				lea			eax,	[eax+ecx*4]			/* Go down four Rows */	
+
+				movq		mm3,	[eax+edx]			/* mm3 = 37 36 35 34 33 32 31 30 */
+				movq		mm4,	mm0					/* mm4 = 07 06 05 04 03 02 01 00 */
+			
+				punpcklbw	mm0,	mm1					/* mm0 = 13 03 12 02 11 01 10 00 */
+				punpckhbw	mm4,	mm1					/* mm4 = 17 07 16 06 15 05 14 04 */
+
+				movq		mm5,	mm2					/* mm5 = 27 26 25 24 23 22 21 20 */
+				punpcklbw	mm2,	mm3					/* mm2 = 33 23 32 22 31 21 30 20 */
+
+				punpckhbw	mm5,	mm3					/* mm5 = 37 27 36 26 35 25 34 24 */
+				movq		mm1,	mm0					/* mm1 = 13 03 12 02 11 01 10 00 */
+
+				punpcklwd	mm0,	mm2					/* mm0 = 31 21 11 01 30 20 10 00 */
+				punpckhwd	mm1,	mm2					/* mm1 = 33 23 13 03 32 22 12 02 */
+				
+				movq		mm2,	mm4					/* mm2 = 17 07 16 06 15 05 14 04 */
+				punpckhwd	mm4,	mm5					/* mm4 = 37 27 17 07 36 26 16 06 */
+
+				punpcklwd	mm2,	mm5					/* mm2 = 35 25 15 05 34 24 14 04 */
+				pxor		mm7,	mm7					/* clear mm7 */
+
+				movq		mm5,	mm0					/* make a copy */
+				punpcklbw	mm0,	mm7					/* mm0 = 30 20 10 00 */
+
+				movq		[edi+16], mm0				/* write 00 10 20 30 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 31 21 11 01 */
+
+				movq		mm0,	mm1					/* mm0 =33 23 13 03 32 22 12 02 */
+				movq		[edi+32], mm5				/* write 01 11 21 31 */
+				
+				punpcklbw	mm1,	mm7					/* mm1 = 32 22 12 02 */
+				punpckhbw	mm0,	mm7					/* mm0 = 33 23 12 03 */
+
+				movq		[edi+48], mm1				/* write 02 12 22 32 */
+				movq		mm3,	mm2					/* mm3 = 35 25 15 05 34 24 14 04 */
+				
+				movq		mm5,	mm4					/* mm5 = 37 27 17 07 36 26 16 06 */
+				movq		[edi+64], mm0				/* write 03 13 23 33 */
+
+
+				punpcklbw	mm2,	mm7					/* mm2 = 34 24 14 04 */
+				punpckhbw	mm3,	mm7					/* mm3 = 35 25 15 05 */
+
+				movq		[edi+80], mm2				/* write 04 14 24 34 */
+				punpcklbw	mm4,	mm7					/* mm4 = 36 26 16 06 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 37 27 17 07 */
+				movq		[edi+96], mm3				/* write 05 15 25 35 */
+			
+				movq		mm0,	[eax]				/* mm0 = 47 46 45 44 43 42 41 40 */
+				movq		mm1,	[eax + ecx ]		/* mm1 = 57 56 55 54 53 52 51 50 */
+
+				movq		[edi+112], mm4				/* write 06 16 26 37 */
+				movq		mm2,	[eax+ecx*2]			/* mm2 = 67 66 65 64 63 62 61 60 */
+
+				lea			eax,	[eax+ ecx*4]		/* Go down four rows */
+				movq		[edi+128], mm5				/* write 07 17 27 37 */
+
+				movq		mm4,	mm0					/* mm4 = 47 46 45 44 43 42 41 40 */
+				movq		mm3,	[eax+edx]			/* mm3 = 77 76 75 74 73 72 71 70 */
+
+				punpcklbw	mm0,	mm1					/* mm0 = 53 43 52 42 51 41 50 40 */
+				punpckhbw	mm4,	mm1					/* mm4 = 57 57 56 46 55 45 54 44 */
+
+				movq		mm5,	mm2					/* mm5 = 67 66 65 64 63 62 61 60 */
+				punpcklbw	mm2,	mm3					/* mm2 = 73 63 72 62 71 61 70 60 */
+
+				punpckhbw	mm5,	mm3					/* mm5 = 77 67 76 66 75 65 74 64 */
+				movq		mm1,	mm0					/* mm1 = 53 43 52 42 51 41 50 40 */
+
+				punpcklwd	mm0,	mm2					/* mm0 = 71 61 51 41 70 60 50 40 */
+				punpckhwd	mm1,	mm2					/* mm1 = 73 63 53 43 72 62 52 42 */
+				
+				movq		mm2,	mm4					/* mm2 = 57 57 56 46 55 45 54 44 */
+				punpckhwd	mm4,	mm5					/* mm4 = 77 67 57 47 76 66 56 46 */
+
+				punpcklwd	mm2,	mm5					/* mm2 = 75 65 55 45 74 64 54 44 */
+
+				movq		mm5,	mm0					/* make a copy */
+				punpcklbw	mm0,	mm7					/* mm0 = 70 60 50 40 */
+
+				movq		[edi+24], mm0				/* write 40 50 60 70 */
+				punpckhbw	mm5,	mm7					/* mm5 = 71 61 51 41 */
+
+				movq		mm0,	mm1					/* mm0 = 73 63 53 43 72 62 52 42 */
+				movq		[edi+40], mm5				/* write 41 51 61 71 */
+				
+				punpcklbw	mm1,	mm7					/* mm1 = 72 62 52 42 */
+				punpckhbw	mm0,	mm7					/* mm0 = 73 63 53 43 */
+
+				movq		[edi+56], mm1				/* write 42 52 62 72 */
+				movq		mm3,	mm2					/* mm3 = 75 65 55 45 74 64 54 44 */
+				
+				movq		mm5,	mm4					/* mm5 = 77 67 57 47 76 66 56 46 */
+				movq		[edi+72], mm0				/* write 43 53 63 73 */
+
+				punpcklbw	mm2,	mm7					/* mm2 = 74 64 54 44 */
+				punpckhbw	mm3,	mm7					/* mm3 = 75 65 55 45 */
+
+				movq		[edi+88], mm2				/* write 44 54 64 74 */
+				punpcklbw	mm4,	mm7					/* mm4 = 76 66 56 46 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 77 67 57 47 */
+				movq		[edi+104], mm3				/* write 45 55 65 75 */
+			
+				movq		[edi+120], mm4				/* write 46 56 66 76 */
+				movq		[edi+136], mm5				/* write 47 57 67 77 */
+
+
+			    /* Now, compute the variances for Pixel  1-4 and 5-8 */					
+
+                
+                movq        mm0,    [edi]               /* S_5 */
+                movq        mm1,    [edi+16]            /* S_4 */
+
+                movq        mm2,    [edi+32]            /* S_3 */
+                packuswb    mm0,    [edi+8]     
+
+                packuswb    mm1,    [edi+24]
+                packuswb    mm2,    [edi+40]
+
+                movq        mm3,    [edi+48]            /* S_2 */
+                movq        mm4,    [edi+64]            /* S_1 */
+
+                packuswb    mm3,    [edi+56]
+                packuswb    mm4,    [edi+72]
+
+                movq        mm5,    mm1                 /* S_4 */
+                movq        mm6,    mm2                 /* S_3 */
+
+                psubusb     mm5,    mm0                 /* S_4 - S_5 */
+                psubusb     mm0,    mm1                 /* S_5 - S_4 */
+
+                por         mm0,    mm5                 /* abs(S_5-S_4) */
+                psubusb     mm6,    mm1                 /* S_3 - S_4 */
+
+                psubusb     mm1,    mm2                 /* S_4 - S_3 */
+                movq        mm5,    mm3                 /* S_2 */
+
+                por         mm1,    mm6                 /* abs(S_4-S_3) */
+                psubusb     mm5,    mm2                 /* S_2 - S_3 */
+                
+                psubusb     mm2,    mm3                 /* S_3 - S_2 */
+                movq        mm6,    mm4                 /* S_1 */
+
+                por         mm2,    mm5                 /* abs(S_3-S_2) */
+                psubusb     mm6,    mm3                 /* S_1 - S_2 */
+
+                psubusb     mm3,    mm4                 /* S_2 - S_1 */
+                por         mm3,    mm6                 /* abs(S_2-S_1) */
+
+                paddusb      mm0,    mm1                 /* abs(S_5-S_4)+abs(S_4-S_3) */
+                paddusb      mm2,    mm3                 /* abs(S_3-S_2)+abs(S_2-S_1) */
+
+                movq        mm7,    FLimitMmx              /* FFFFF FFFF */
+                paddusb      mm0,    mm2                 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2-S_1) */
+                
+                movq        [Variance11], mm0           /* Save the variance */
+
+                movq        mm6,    mm4                 /* S_1 */
+                psubb       mm0,    Eight128c           /* abs(..) - 128 */
+                pcmpgtb     mm7,    mm0                 /* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2-S_1)<? */
+                
+				movq        mm5,    [edi+80]            /* S0 */
+                movq        mm1,    [edi+96]            /* S1 */
+
+                movq        mm2,    [edi+112]           /* S2 */
+                packuswb    mm5,    [edi+88]     
+
+                packuswb    mm1,    [edi+104]
+                packuswb    mm2,    [edi+120]
+
+                movq        mm3,    [edi+128]           /* S3 */
+                movq        mm4,    [edi+144]           /* S4 */
+
+                packuswb    mm3,    [edi+136]
+                packuswb    mm4,    [edi+152]
+
+                movq        mm0,    mm5                 /* S0 */
+                psubusb     mm5,    mm6                 /* S0-S_1 */
+
+                psubusb     mm6,    mm0                 /* S_1-S0 */
+                por         mm5,    mm6                 /* abs(S_1-S0) */
+
+                movq        mm6,    QStepMmx            /* QQQQ QQQQ */                
+                psubb       mm5,    Eight128c           /* -128 for using signed compare*/
+
+                psubb       mm6,    Eight128c           /* -128 for using signed compare*/
+                pcmpgtb     mm6,    mm5                 /* abs(S_1-S0)<QStep? */
+
+                movq        mm5,    mm1                 /* S1 */
+                pand        mm7,    mm6                 /* abs(S_1-S0)<QStep &&
+                                                            abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2-S_1)<FLimit? */
+                movq        mm6,    mm2                 /* S2 */
+                psubusb     mm5,    mm0                 /* S1 - S0 */
+
+                psubusb     mm0,    mm1                 /* S0 - S1*/
+
+                por         mm0,    mm5                 /* abs(S0-S1) */
+                psubusb     mm6,    mm1                 /* S2 - S1 */
+
+                psubusb     mm1,    mm2                 /* S1 - S2*/
+                movq        mm5,    mm3                 /* S3 */
+
+                por         mm1,    mm6                 /* abs(S1-S2) */
+                psubusb     mm5,    mm2                 /* S3 - S2 */
+                
+                psubusb     mm2,    mm3                 /* S2 - S3 */
+                movq        mm6,    mm4                 /* S4 */
+
+                por         mm2,    mm5                 /* abs(S2-S3) */
+                psubusb     mm6,    mm3                 /* S4 - S3 */
+
+                psubusb     mm3,    mm4                 /* S3 - S4 */
+                por         mm3,    mm6                 /* abs(S3-S4) */
+
+                paddusb      mm0,    mm1                 /* abs(S0-S1)+abs(S1-S2) */
+                paddusb      mm2,    mm3                 /* abs(S2-S3)+abs(S3-S4) */
+
+                movq        mm6,    FLimitMmx           /* FFFFF FFFF */
+                paddusb      mm0,    mm2                 /* abs(S0-S1)+abs(S1-S2)+abs(S2-S3)+abs(S3-S4) */
+                
+                movq        [Variance21], mm0           /* Save the variance */
+                
+                psubb        mm0,    Eight128c            /* abs(..) - 128 */
+                pcmpgtb     mm6,    mm0                 /* abs(S0-S1)+abs(S1-S2)+abs(S2-S3)+abs(S3-S4)<FLimit */
+                pand        mm6,    mm7                 /* Flag */
+
+                movq        mm0,    mm6
+                movq        mm7,    mm6 
+            
+                punpckhbw   mm0,    mm6
+                punpcklbw   mm7,    mm6
+
+				/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* mm0 and mm7 now are in use  */										
+                /* find the loop filtered values for the pixels on block boundary */
+                movq        mm1,    LoopFLimitMmx;      /* Get the Flimit values for loop filter */
+                movq        mm3,    [edi + 48]          /* mm3 = x3 = p[-2] */
+
+                movq        mm4,    [edi + 64]          /* mm4 = x4 = p[-1] */
+                movq        mm5,    [edi + 80]          /* mm5 = x5 = p[ 0] */
+
+                movq        mm6,    [edi + 96]          /* mm6 = x6 = p[ 1] */
+                psubw       mm5,    mm4                 /* mm5 = p[ 0] - p[-1] */
+
+                psubw       mm3,    mm6                 /* mm3 = p[-2] - p[ 1] */
+                movq        mm4,    mm5                 /* make a copy */
+
+                paddw       mm4,    mm5                 /* 2 * ( p[0] - p[-1] ) */
+                paddw       mm3,    FourFours           /* mm3 + 4 */
+
+                paddw       mm5,    mm4                 /* 3 * ( p[0] - p[-1] ) */
+                paddw       mm3,    mm5                 /* Filtval before shift */
+
+                psraw       mm3,    3                   /* FiltVal */
+                movq        mm2,    mm3                 /* make a copy */
+
+                psraw       mm3,    15                  /* FFFF->Neg, 0000->Pos */
+                pxor        mm2,    mm3
+
+                psubsw      mm2,    mm3                 /* mm2 = abs(FiltVal) */
+                por         mm3,    FourOnes            /* -1 and 1 for + and - */
+
+                movq        mm4,    mm1                 /* make a copy of Flimit */
+                psubw       mm1,    mm2                 /* mm1= Flimit - abs(FiltVal) */
+
+                movq        mm5,    mm1                 /* copy Flimit - abs(FiltVal) */
+                psraw       mm1,    15                  /* FFFF or 0000 */
+
+                pxor        mm5,    mm1                 
+                psubsw      mm5,    mm1                 /* abs(Flimit - abs(FiltVal)) */
+
+                psubusw     mm4,    mm5                 /* Flimit-abs(Flimit - abs(FiltVal)) */
+                pmullw      mm4,    mm3                 /* get the sign back */
+
+                movq        mm1,    [edi+64]            /* p[-1] */
+                movq        mm2,    [edi+80]            /* p[0] */
+                
+                paddw       mm1,    mm4                 /* p[-1] + NewFiltVal */
+                psubw       mm2,    mm4                 /* p[0] - NewFiltVal */
+
+                pxor        mm6,    mm6                 /* clear mm6 */
+                
+                packuswb    mm1,    mm1                 /* clamping */
+                packuswb    mm2,    mm2                 /* clamping */
+
+                punpcklbw   mm1,    mm6                 /* unpack to word */
+                movq        LoopFilteredValuesUp, mm1   /* save the values */
+
+                punpcklbw   mm2,    mm6                 /* unpack to word */
+                movq        LoopFilteredValuesDown, mm2 /* save the values */
+
+                /* Let's do the filtering now */										
+                /* p1 = Src[-5] */		
+                /* p2 = Src[+4] */		
+                /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+                
+                movq		mm3,	[edi]			    /* mm3 = [-5] */
+                movq		mm2,	[edi+144]			/* mm2 = [4] */					
+                
+                movq		mm1,	mm3					/* p1 = [-4] */					
+                paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+                
+                movq		mm4,	[edi+16]			/* mm4 = x1 */					
+                paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+                
+                paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+                paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+                
+                paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+                paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+                
+                paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+                
+                /* Des[-w4] = (((sum + x1) >> 3; */			
+                /* Des[-w4] = Src[-w4]; */												
+                /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+                
+                movq		mm4,	mm3					/* mm4 = mm3 */					
+                movq		mm5,	[edi+16]			/* mm5 = x1 */					
+                
+                paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+                psraw		mm4,	3					/* mm4 >>=3 */					
+                
+                psubw		mm4,	mm5					/* New Value - old Value */		
+                pand		mm4,	mm7					/* And the flag */				
+                
+                paddw		mm4,	mm5					/* add the old value back */	
+                movq		[esi],	mm4					/* Write new x1 */				
+                
+                /* sum += x5 -p1 */														
+                /* Des[-w3]=((sum+x2)>>3 */									
+                
+                movq		mm5,	[edi+32]			/* mm5= x2 */					
+                psubw		mm3,	mm1					/* sum=sum-p1 */				
+                
+                paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+                movq		mm4,	mm5					/* copy sum */					
+                
+                paddw		mm4,	mm3					/* mm4=sum+x2 */				
+                psraw		mm4,	3					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+                psubw		mm4,	mm5					/* new value - old value	*/	
+                
+                pand		mm4,	mm7					/* And the flag */				
+                paddw		mm4,	mm5					/* add the old value back */	
+                
+                movq		[esi+16], mm4				/* write new x2 */				
+                
+                /* sum += x6 - p1 */													
+                /* Des[-w2]=((sum+x[3])>>3 */								
+                
+                movq		mm5,	[edi+48]			/* mm5= x3 */					
+                psubw		mm3,	mm1					/* sum=sum-p1 */				
+                
+                paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+                movq		mm4,	mm5					/* copy x3 */					
+                
+                paddw		mm4,	mm3					/* mm4=sum+x3 */				
+                psraw		mm4,	3					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+                
+                psubw		mm4,	mm5					/* new value - old value	*/	
+                pand		mm4,	mm7					/* And the flag */				
+                
+                paddw		mm4,	mm5					/* add the old value back */	
+                movq		[esi+32], mm4				/* write new x3 */				
+                
+                /* sum += x7 - p1 */													
+                /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+                
+                movq		mm5,	[edi+64]			/* mm5 = x4 */					
+                psubw		mm3,	mm1					/* sum = sum-p1 */				
+                
+                paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+                movq		mm4,	mm5					/* mm4 = x4 */					
+                
+                paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+                movq        mm5,    LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+                
+                psraw		mm4,	3					/* >>=4 */						
+                psubw		mm4,	mm5					/* -=x4 */						
+                
+                pand		mm4,	mm7					/* and flag */					
+                paddw		mm4,	mm5					/* += x4 */						
+                
+                movq		[esi+48], mm4				/* write new x4 */				
+                
+                /* sum+= x8-x1 */														
+                /* Des[0]=((sum+x5)>>3 */								
+                
+                movq		mm5,	[edi+80]			/* mm5 = x5 */					
+                psubw		mm3,	[edi+16]			/* sum -= x1 */					
+                
+                paddw		mm3,	[edi+128]			/* sub += x8 */					
+                movq		mm4,	mm5					/* mm4 = x5 */					
+                
+                paddw		mm4,	mm3					/* mm4= sum+x5 */				
+                movq        mm5,    LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+                
+                psraw		mm4,	3					/* >>=4 */						
+                psubw		mm4,	mm5					/* -=x5 */						
+                
+                pand		mm4,	mm7					/* and flag */					
+                paddw		mm4,	mm5					/* += x5 */										
+                
+                movq		[esi+64], mm4				/* write new x5 */				
+                
+                /* sum += p2 - x2 */													
+                /* Des[w1] = ((sum+x6)>>3 */								
+                
+                movq		mm5,	[edi+96]			/* mm5 = x6 */					
+                psubw		mm3,	[edi+32]			/* -= x2 */						
+                
+                paddw		mm3,	mm2					/* += p2 */						
+                movq		mm4,	mm5					/* mm4 = x6 */					
+                
+                paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+                psraw		mm4,	3					/* >>=3 */						
+                
+                psubw		mm4,	mm5					/* -=x6 */						
+                pand		mm4,	mm7					/* and flag */					
+                
+                paddw		mm4,	mm5					/* += x6 */						
+                movq		[esi+80], mm4				/* write new x6 */				
+                
+                /* sum += p2 - x3 */													
+                /* Des[w2] = (sum+x7)>>3 */								
+                
+                movq		mm5,	[edi+112]			/* mm5 = x7 */					
+                psubw		mm3,	[edi+48]			/* -= x3 */						
+                
+                paddw		mm3,	mm2					/* += p2 */						
+                movq		mm4,	mm5					/* mm4 = x7 */					
+                
+                paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+                psraw		mm4,	3					/* >>=3 */						
+                
+                psubw		mm4,	mm5					/* -=x7 */						
+                pand		mm4,	mm7					/* and flag */					
+                
+                paddw		mm4,	mm5					/* += x7 */						
+                movq		[esi+96], mm4				/* write new x7 */				
+                
+                /* sum += p2 - x4 */													
+                /* Des[w3] = ((sum+x8)>>3 */								
+                
+                movq		mm5,	[edi+128]			/* mm5 = x8 */					
+                psubw		mm3,	[edi+64]			/* -= x4 */						
+                
+                paddw		mm3,	mm2					/* += p2 */						
+                movq		mm4,	mm5					/* mm4 = x8 */					
+                
+                paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+                psraw		mm4,	3					/* >>=3 */						
+                
+                psubw		mm4,	mm5					/* -=x8 */						
+                pand		mm4,	mm7					/* and flag */					
+                
+                paddw		mm4,	mm5					/* += x8 */						
+                movq		[esi+112], mm4				/* write new x8 */				
+                
+                /* done with left four columns */										
+                /* now do the righ four columns */										
+				add			edi,	8					/* shift to right four column */
+				add			esi,	8					/* shift to right four column */
+				
+				/* mm0 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* mm0 now are in use  */										
+                /* find the loop filtered values for the pixels on block boundary */
+                movq        mm1,    LoopFLimitMmx;      /* Get the Flimit values for loop filter */
+                movq        mm3,    [edi + 48]          /* mm3 = x3 = p[-2] */
+
+                movq        mm4,    [edi + 64]          /* mm4 = x4 = p[-1] */
+                movq        mm5,    [edi + 80]          /* mm5 = x5 = p[ 0] */
+
+                movq        mm6,    [edi + 96]          /* mm6 = x6 = p[ 1] */
+                psubw       mm5,    mm4                 /* mm5 = p[ 0] - p[-1] */
+
+                psubw       mm3,    mm6                 /* mm3 = p[-2] - p[ 1] */
+                movq        mm4,    mm5                 /* make a copy */
+
+                paddw       mm4,    mm5                 /* 2 * ( p[0] - p[-1] ) */
+                paddw       mm3,    FourFours           /* mm3 + 4 */
+
+                paddw       mm5,    mm4                 /* 3 * ( p[0] - p[-1] ) */
+                paddw       mm3,    mm5                 /* Filtval before shift */
+
+                psraw       mm3,    3                   /* FiltVal */
+                movq        mm2,    mm3                 /* make a copy */
+
+                psraw       mm3,    15                  /* FFFF->Neg, 0000->Pos */
+                pxor        mm2,    mm3
+
+                psubsw      mm2,    mm3                 /* mm2 = abs(FiltVal) */
+                por         mm3,    FourOnes            /* -1 and 1 for + and - */
+
+                movq        mm4,    mm1                 /* make a copy of Flimit */
+                psubw       mm1,    mm2                 /* mm1= Flimit - abs(FiltVal) */
+
+                movq        mm5,    mm1                 /* copy Flimit - abs(FiltVal) */
+                psraw       mm1,    15                  /* FFFF or 0000 */
+
+                pxor        mm5,    mm1                 
+                psubsw      mm5,    mm1                 /* abs(Flimit - abs(FiltVal)) */
+
+                psubusw     mm4,    mm5                 /* Flimit-abs(Flimit - abs(FiltVal)) */
+                pmullw      mm4,    mm3                 /* get the sign back */
+
+                movq        mm1,    [edi+64]            /* p[-1] */
+                movq        mm2,    [edi+80]            /* p[0] */
+                
+                paddw       mm1,    mm4                 /* p[-1] + NewFiltVal */
+                psubw       mm2,    mm4                 /* p[0] - NewFiltVal */
+
+                pxor        mm6,    mm6                 /* clear mm6 */
+                
+                packuswb    mm1,    mm1                 /* clamping */
+                packuswb    mm2,    mm2                 /* clamping */
+
+                punpcklbw   mm1,    mm6                 /* unpack to word */
+                movq        LoopFilteredValuesUp, mm1   /* save the values */
+
+                punpcklbw   mm2,    mm6                 /* unpack to word */
+                movq        LoopFilteredValuesDown, mm2 /* save the values */
+                
+                
+                /* Let's do the filtering now */										
+                /* p1 = Src[-5] */		
+                /* p2 = Src[+4] */		
+                /* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+                
+                movq		mm3,	[edi]			    /* mm3 = [-5] */
+                movq		mm2,	[edi+144]			/* mm2 = [4] */					
+                
+                movq		mm1,	mm3					/* p1 = [-4] */					
+                paddw		mm3,	mm3					/* mm3 = p1 + p1 */				
+                
+                movq		mm4,	[edi+16]			/* mm4 = x1 */					
+                paddw		mm3,	mm1					/* mm3 = p1 + p1 + p1 */		
+                
+                paddw		mm3,	[edi+32]			/* mm3 = p1+p1+p1+ x2 */		
+                paddw		mm4,	[edi+48]			/* mm4 = x1+x3 */				
+                
+                paddw		mm3,	[edi+64]			/* mm3 += x4 */					
+                paddw		mm4,	FourFours			/* mm4 = x1 + x3 + 4 */			
+                
+                paddw		mm3,	mm4					/* mm3 = 3*p1+x1+x2+x3+x4+4 */	
+                
+                /* Des[-w4] = (((sum + x1) >> 3; */			
+                /* Des[-w4] = Src[-w4]; */												
+                /* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+                
+                movq		mm4,	mm3					/* mm4 = mm3 */					
+                movq		mm5,	[edi+16]			/* mm5 = x1 */					
+                
+                paddw		mm4,	mm5					/* mm4 = sum+x1 */				
+                psraw		mm4,	3					/* mm4 >>=4 */					
+                
+                psubw		mm4,	mm5					/* New Value - old Value */		
+                pand		mm4,	mm0					/* And the flag */				
+                
+                paddw		mm4,	mm5					/* add the old value back */	
+                movq		[esi],	mm4					/* Write new x1 */				
+                
+                /* sum += x5 -p1 */														
+                /* Des[-w3]=((sum+x2)>>3 */									
+                
+                movq		mm5,	[edi+32]			/* mm5= x2 */					
+                psubw		mm3,	mm1					/* sum=sum-p1 */				
+                
+                paddw		mm3,    [edi+80]			/* sum=sum+x5 */				
+                movq		mm4,	mm5					/* copy sum */					
+                
+                paddw		mm4,	mm3					/* mm4=sum+x2 */				
+                psraw		mm4,	3					/* mm4=((sum+x2)<<1-x5+x6)>>4 */
+                psubw		mm4,	mm5					/* new value - old value	*/	
+                
+                pand		mm4,	mm0					/* And the flag */				
+                paddw		mm4,	mm5					/* add the old value back */	
+                
+                movq		[esi+16], mm4				/* write new x2 */				
+                
+                /* sum += x6 - p1 */													
+                /* Des[-w2]=((sum+x[3])>>3 */								
+                
+                movq		mm5,	[edi+48]			/* mm5= x3 */					
+                psubw		mm3,	mm1					/* sum=sum-p1 */				
+                
+                paddw		mm3,    [edi+96]			/* sum=sum+x6 */				
+                movq		mm4,	mm5					/* copy x3 */					
+                
+                paddw		mm4,	mm3					/* mm4=sum+x3 */				
+                psraw		mm4,	3					/* mm4=((sum+x3)<<1-x6+x7)>>4 */
+                
+                psubw		mm4,	mm5					/* new value - old value	*/	
+                pand		mm4,	mm0					/* And the flag */				
+                
+                paddw		mm4,	mm5					/* add the old value back */	
+                movq		[esi+32], mm4				/* write new x3 */				
+                
+                /* sum += x7 - p1 */													
+                /* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+                
+                movq		mm5,	[edi+64]			/* mm5 = x4 */					
+                psubw		mm3,	mm1					/* sum = sum-p1 */				
+                
+                paddw		mm3,	[edi+112]			/* sum = sum+x7 */				
+                movq		mm4,	mm5					/* mm4 = x4 */					
+                
+                paddw		mm4,	mm3					/* mm4 = sum + x4 */			
+                movq        mm5,    LoopFilteredValuesUp/* Read the loopfiltered value of x4 */
+                
+                psraw		mm4,	3					/* >>=4 */						
+                psubw		mm4,	mm5					/* -=x4 */						
+                
+                pand		mm4,	mm0					/* and flag */					
+                paddw		mm4,	mm5					/* += x4 */						
+                
+                movq		[esi+48], mm4				/* write new x4 */				
+                
+                /* sum+= x8-x1 */														
+                /* Des[0]=((sum+x5)>>3 */								
+                
+                movq		mm5,	[edi+80]			/* mm5 = x5 */					
+                psubw		mm3,	[edi+16]			/* sum -= x1 */					
+                
+                paddw		mm3,	[edi+128]			/* sub += x8 */					
+                movq		mm4,	mm5					/* mm4 = x5 */					
+                
+                paddw		mm4,	mm3					/* mm4= sum+x5 */				
+                movq        mm5,    LoopFilteredValuesDown/* Read the loopfiltered value of x4 */
+                
+                psraw		mm4,	3					/* >>=4 */						
+                psubw		mm4,	mm5					/* -=x5 */						
+                
+                pand		mm4,	mm0					/* and flag */					
+                paddw		mm4,	mm5					/* += x5 */										
+                
+                movq		[esi+64], mm4				/* write new x5 */				
+                
+                /* sum += p2 - x2 */													
+                /* Des[w1] = ((sum+x6)>>3 */								
+                
+                movq		mm5,	[edi+96]			/* mm5 = x6 */					
+                psubw		mm3,	[edi+32]			/* -= x2 */						
+                
+                paddw		mm3,	mm2					/* += p2 */						
+                movq		mm4,	mm5					/* mm4 = x6 */					
+                
+                paddw		mm4,	mm3					/* mm4 = sum+x6 */				
+                psraw		mm4,	3					/* >>=3 */						
+                
+                psubw		mm4,	mm5					/* -=x6 */						
+                pand		mm4,	mm0					/* and flag */					
+                
+                paddw		mm4,	mm5					/* += x6 */						
+                movq		[esi+80], mm4				/* write new x6 */				
+                
+                /* sum += p2 - x3 */													
+                /* Des[w2] = (sum+x7)>>3 */								
+                
+                movq		mm5,	[edi+112]			/* mm5 = x7 */					
+                psubw		mm3,	[edi+48]			/* -= x3 */						
+                
+                paddw		mm3,	mm2					/* += p2 */						
+                movq		mm4,	mm5					/* mm4 = x7 */					
+                
+                paddw		mm4,	mm3					/* mm4 = sum+x7 */				
+                psraw		mm4,	3					/* >>=3 */						
+                
+                psubw		mm4,	mm5					/* -=x7 */						
+                pand		mm4,	mm0					/* and flag */					
+                
+                paddw		mm4,	mm5					/* += x7 */						
+                movq		[esi+96], mm4				/* write new x7 */				
+                
+                /* sum += p2 - x4 */													
+                /* Des[w3] = ((sum+x8)>>3 */								
+                
+                movq		mm5,	[edi+128]			/* mm5 = x8 */					
+                psubw		mm3,	[edi+64]			/* -= x4 */						
+                
+                paddw		mm3,	mm2					/* += p2 */						
+                movq		mm4,	mm5					/* mm4 = x8 */					
+                
+                paddw		mm4,	mm3					/* mm4 = sum+x8 */				
+                psraw		mm4,	3					/* >>=3 */						
+                
+                psubw		mm4,	mm5					/* -=x8 */						
+                pand		mm4,	mm0					/* and flag */					
+                
+                paddw		mm4,	mm5					/* += x8 */						
+                movq		[esi+112], mm4				/* write new x8 */				
+				
+				/* done with right four column */	
+				/* transpose */
+				mov			eax,	Des					/* the destination */			
+				add			edi,	8					/* shift edi to point x1 */
+
+				sub			esi,	8					/* shift esi back to left x1 */
+				sub			eax,	4
+
+				movq		mm0,	[esi]				/* mm0 = 30 20 10 00 */
+				movq		mm1,	[esi+16]			/* mm1 = 31 21 11 01 */
+
+				movq		mm4,	mm0					/* mm4 = 30 20 10 00 */
+				punpcklwd	mm0,	mm1					/* mm0 = 11 10 01 00 */
+
+				punpckhwd	mm4,	mm1					/* mm4 = 31 30 21 20 */
+				movq		mm2,	[esi+32]			/* mm2 = 32 22 12 02 */
+
+				movq		mm3,	[esi+48]			/* mm3 = 33 23 13 03 */
+				movq		mm5,	mm2					/* mm5 = 32 22 12 02 */
+
+				punpcklwd	mm2,	mm3					/* mm2 = 13 12 03 02 */
+				punpckhwd	mm5,	mm3					/* mm5 = 33 32 23 22 */
+
+				movq		mm1,	mm0					/* mm1 = 11 10 01 00 */
+				punpckldq	mm0,	mm2					/* mm0 = 03 02 01 00 */
+
+				movq		[edi],	mm0					/* write 00 01 02 03 */
+				punpckhdq	mm1,	mm2					/* mm1 = 13 12 11 10 */
+				
+				movq		mm0,	mm4					/* mm0 = 31 30 21 20 */
+				movq		[edi+16], mm1				/* write 10 11 12 13 */
+
+				punpckldq	mm0,	mm5					/* mm0 = 23 22 21 20 */
+				punpckhdq	mm4,	mm5					/* mm4 = 33 32 31 30 */
+
+				movq		mm1,	[esi+64]			/* mm1 = 34 24 14 04 */
+				movq		mm2,	[esi+80]			/* mm2 = 35 25 15 05 */				
+
+				movq		mm5,	[esi+96]			/* mm5 = 36 26 16 06 */
+				movq		mm6,	[esi+112]			/* mm6 = 37 27 17 07 */
+								
+				movq		mm3,	mm1					/* mm3 = 34 24 14 04 */
+				movq		mm7,	mm5					/* mm7 = 36 26 16 06 */
+
+				punpcklwd	mm1,	mm2					/* mm1 = 15 14 05 04 */
+				punpckhwd	mm3,	mm2					/* mm3 = 35 34 25 24 */
+
+				punpcklwd	mm5,	mm6					/* mm5 = 17 16 07 06 */
+				punpckhwd	mm7,	mm6					/* mm7 = 37 36 27 26 */
+
+				movq		mm2,	mm1					/* mm2 = 15 14 05 04 */
+				movq		mm6,	mm3					/* mm6 = 35 34 25 24 */
+
+				punpckldq	mm1,	mm5					/* mm1 = 07 06 05 04 */
+				punpckhdq	mm2,	mm5					/* mm2 = 17 16 15 14 */
+
+				punpckldq	mm3,	mm7					/* mm3 = 27 26 25 24 */
+				punpckhdq	mm6,	mm7					/* mm6 = 37 36 35 34 */
+			
+				movq		mm5,	[edi]				/* mm5 = 03 02 01 00 */
+				packuswb	mm5,	mm1					/* mm5 = 07 06 05 04 03 02 01 00 */
+				
+				movq		[eax],	mm5					/* write 00 01 02 03 04 05 06 07 */
+				movq		mm7,	[edi+16]			/* mm7 = 13 12 11 10 */
+
+				packuswb	mm7,	mm2					/* mm7 = 17 16 15 14 13 12 11 10 */
+				movq		[eax+ecx], mm7				/* write 10 11 12 13 14 15 16 17 */
+
+				packuswb	mm0,	mm3					/* mm0 = 27 26 25 24 23 22 21 20 */
+				packuswb	mm4,	mm6					/* mm4 = 37 36 35 34 33 32 31 30 */
+				
+				movq		[eax+ecx*2], mm0			/* write 20 21 22 23 24 25 26 27 */
+				lea			eax,	[eax+ecx*4]			/* mov forward the desPtr */
+
+				movq		[eax+edx],	mm4				/* write 30 31 32 33 34 35 36 37 */
+				add			edi, 8						/* move to right four column */
+				add			esi, 8						/* move to right x1 */
+
+				movq		mm0,	[esi]				/* mm0 = 70 60 50 40 */
+				movq		mm1,	[esi+16]			/* mm1 = 71 61 51 41 */
+
+				movq		mm4,	mm0					/* mm4 = 70 60 50 40 */
+				punpcklwd	mm0,	mm1					/* mm0 = 51 50 41 40 */
+
+				punpckhwd	mm4,	mm1					/* mm4 = 71 70 61 60 */
+				movq		mm2,	[esi+32]			/* mm2 = 72 62 52 42 */
+
+				movq		mm3,	[esi+48]			/* mm3 = 73 63 53 43 */
+				movq		mm5,	mm2					/* mm5 = 72 62 52 42 */
+
+				punpcklwd	mm2,	mm3					/* mm2 = 53 52 43 42 */
+				punpckhwd	mm5,	mm3					/* mm5 = 73 72 63 62 */
+
+				movq		mm1,	mm0					/* mm1 = 51 50 41 40 */
+				punpckldq	mm0,	mm2					/* mm0 = 43 42 41 40 */
+
+				movq		[edi],	mm0					/* write 40 41 42 43 */
+				punpckhdq	mm1,	mm2					/* mm1 = 53 52 51 50 */
+				
+				movq		mm0,	mm4					/* mm0 = 71 70 61 60 */
+				movq		[edi+16], mm1				/* write 50 51 52 53 */
+
+				punpckldq	mm0,	mm5					/* mm0 = 63 62 61 60 */
+				punpckhdq	mm4,	mm5					/* mm4 = 73 72 71 70 */
+
+				movq		mm1,	[esi+64]			/* mm1 = 74 64 54 44 */
+				movq		mm2,	[esi+80]			/* mm2 = 75 65 55 45 */				
+
+				movq		mm5,	[esi+96]			/* mm5 = 76 66 56 46 */
+				movq		mm6,	[esi+112]			/* mm6 = 77 67 57 47 */
+								
+				movq		mm3,	mm1					/* mm3 = 74 64 54 44 */
+				movq		mm7,	mm5					/* mm7 = 76 66 56 46 */
+
+				punpcklwd	mm1,	mm2					/* mm1 = 55 54 45 44 */
+				punpckhwd	mm3,	mm2					/* mm3 = 75 74 65 64 */
+
+				punpcklwd	mm5,	mm6					/* mm5 = 57 56 47 46 */
+				punpckhwd	mm7,	mm6					/* mm7 = 77 76 67 66 */
+
+				movq		mm2,	mm1					/* mm2 = 55 54 45 44 */
+				movq		mm6,	mm3					/* mm6 = 75 74 65 64 */
+
+				punpckldq	mm1,	mm5					/* mm1 = 47 46 45 44 */
+				punpckhdq	mm2,	mm5					/* mm2 = 57 56 55 54 */
+
+				punpckldq	mm3,	mm7					/* mm3 = 67 66 65 64 */
+				punpckhdq	mm6,	mm7					/* mm6 = 77 76 75 74 */
+			
+				movq		mm5,	[edi]				/* mm5 = 43 42 41 40 */
+				packuswb	mm5,	mm1					/* mm5 = 47 46 45 44 43 42 41 40 */
+				
+				movq		[eax],	mm5					/* write 40 41 42 43 44 45 46 47 */
+				movq		mm7,	[edi+16]			/* mm7 = 53 52 51 50 */
+
+				packuswb	mm7,	mm2					/* mm7 = 57 56 55 54 53 52 51 50 */
+				movq		[eax+ecx], mm7				/* write 50 51 52 53 54 55 56 57 */
+
+				packuswb	mm0,	mm3					/* mm0 = 67 66 65 64 63 62 61 60 */
+				packuswb	mm4,	mm6					/* mm4 = 77 76 75 74 73 72 71 70 */
+				
+				movq		[eax+ecx*2], mm0			/* write 60 61 62 63 64 65 66 67 */
+				lea			eax,	[eax+ecx*4]			/* mov forward the desPtr */
+
+				movq		[eax+edx],	mm4				/* write 70 71 72 73 74 75 76 77 */
+				
+				pop			edi
+				pop			esi
+				pop			edx
+				pop			ecx
+				pop			ebp
+				pop			eax
+			}//__asm	
+		Var1 = Variance11[0]+ Variance11[1]+Variance11[2]+Variance11[3];
+		Var1 += Variance11[4]+ Variance11[5]+Variance11[6]+Variance11[7];
+		pbi->FragmentVariances[CurrentFrag-1] += Var1;
+
+		Var2 = Variance21[0]+ Variance21[1]+Variance21[2]+Variance21[3];
+		Var2 += Variance21[4]+ Variance21[5]+Variance21[6]+Variance21[7];
+		pbi->FragmentVariances[CurrentFrag] += Var2;
+
+
+        CurrentFrag ++;
+		}//else
+			
+	}//while
+
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PlaneAddNoise_mmx
+ *
+ *  INPUTS        : UINT8 *Start    starting address of buffer to add gaussian
+ *                                  noise to
+ *                  UINT32 Width    width of plane
+ *                  UINT32 Height   height of plane
+ *                  INT32  Pitch    distance between subsequent lines of frame
+ *                  INT32  q        quantizer used to determine amount of noise 
+ *                                  to add
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void.   
+ * 
+ *  FUNCTION      : adds gaussian noise to a plane of pixels
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PlaneAddNoise_mmx( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q)
+{
+    unsigned int i;
+    INT32 Pitch4 = Pitch * 4;
+    const int noiseAmount = 2;
+    const int noiseAdder = 2 * noiseAmount + 1;
+
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+	unsigned char blackclamp[16];
+	unsigned char whiteclamp[16];
+	unsigned char bothclamp[16];
+#pragma pack()
+#else
+	__declspec(align(16)) unsigned char blackclamp[16];
+	__declspec(align(16)) unsigned char whiteclamp[16];
+	__declspec(align(16)) unsigned char bothclamp[16];
+#endif 
+    char CharDist[300];
+    char Rand[2048] = 
+    {
+    -2,0,-2,2,0,0,-1,2,2,1,-2,2,1,0,-1,-2,-2,-1,-2,-2,2,0,-2,-2,-2,-1,0,0,1,1,-2,1,0,-1,-2,1,1,2,0,-1,2,1,2,2,0,-2,0,-1,2,-1,1,2,2,2,1,-1,-1,-1,2,-2,-1,-2,1,-2,-2,2,-1,-1,0,1,2,1,0,-1,1,0,0,2,1,-2,0,-1,1,1,0,-1,-2,-1,0,2,0,2,1,-1,-2,1,0,-2,1,0,-2,2,-2,2,1,-1,0,-2,2,1,-2,2,2,0,-2,-2,2,0,-2,0,1,0,-1,0,1,1,1,0,-2,-1,2,-2,0,1,0,-2,2,2,0,-1,0,-1,2,-1,0,-1,2,-1,1,0,-2,1,2,-1,0,2,-2,2,0,-2,0,-2,2,1,1,-2,2,-2,-2,1,-1,2,-1,-1,-2,1,2,1,1,1,-1,-2,-2,-2,2,2,-1,-2,0,-2,-2,0,1,1,0,-2,0,-1,1,-1,0,-1,0,0,1,-2,0,2,1,2,-2,-1,-2,2,0,2,-2,1,-2,0,2,-2,2,-1,-1,1,0,-1,1,1,0,0,0,1,2,2,1,1,0,-1,-2,1,0,2,-1,-2,1,1,0,-1,0,-2,1,1,1,1,2,-2,0,2,2,1,1,-2,1,2,-1,0,-1,-2,-2,2,2,1,-2,-1,-2,-2,1,2,0,0,0,-1,0,0,-2,-1,1,-1,2,2,2,1,-1,2,-2,-2,1,0,1,2,-2,2,1,-1,-2,0,-1,-1,2,0,1,-2,0,-1,0,1,0,-1,1,0,1,-1,-2,1,-2,1,2,0,1,0,-1,1,0,-1,2,1,-2,-1,-2,1,2,1,-2,-1,-2,1,-2,2,2,0,1,2,-2,-2,1,1,-1,-2,-2,1,-1,-1,-1,1,2,2,0,1,1,2,-2,1,0,-1,-2,2,-2,0,0,-1,0,-1,-1,-2,2,-2,-1,1,2,1,1,1,-1,2,-1,2,-1,-1,0,2,-2,-2,0,0,-2,-1,2,-1,-2,-2,2,-2,-2,-2,-1,2,-1,0,2,2,0,2,1,-1,-1,-2,0,2,-1,-1,0,-1,1,2,0,2,-2,2,1,1,0,-2,-1,-1,-2,0,-2,1,2,-2,2,1,1,2,0,1,-2,1,1,1,-2,2,1,1,-2,0,2,-2,-1,-2,2,1,-1,2,-1,1,-1,-2,-1,0,2,-2,2,0,-2,1,-2,2,1,2,-1,0,-2,1,-2,0,-1,2,-2,-1,-2,-1,-2,1,2,2,-2,1,1,1,2,0,2,1,-2,1,0,0,2,0,0,0,-1,-1,-1,-2,1,-2,-2,-1,0,-2,
+    -2,-2,1,0,1,1,0,1,-1,2,0,-2,2,2,-1,2,-2,2,0,0,1,1,-2,-1,-1,0,2,1,1,2,-1,-1,2,-1,-1,0,-1,1,1,1,1,-2,-1,-1,1,2,-1,0,-2,2,-1,0,1,0,1,-2,-2,-2,-2,-1,-1,1,-2,-1,-2,1,1,-2,1,1,1,0,-2,0,-2,2,0,2,1,0,1,1,-1,-1,-2,2,-2,-2,-1,1,-1,-1,0,-2,0,0,1,1,0,-1,2,2,1,2,-2,0,2,-1,-1,-1,-2,1,-1,-2,-2,0,2,2,0,1,1,2,2,0,0,-2,1,0,0,0,0,2,1,-1,-2,-1,-1,-1,1,-1,2,-2,1,1,2,-2,0,2,1,2,-2,2,1,2,2,2,1,-2,1,-1,-1,1,1,-2,1,0,-2,2,2,-2,-1,0,0,1,-2,1,2,-2,1,1,-2,-2,-1,1,2,0,-1,1,-1,1,-1,-1,2,-1,-2,1,-2,-2,-2,-1,1,-1,0,0,-2,0,1,-1,1,2,0,0,-2,0,-1,0,2,0,-2,0,1,1,2,2,-1,2,1,1,2,1,2,2,2,0,0,-2,-1,2,0,-2,-2,1,1,-2,-2,-1,1,2,-2,-2,-2,-1,-2,2,1,-2,2,1,0,-2,-1,-1,1,1,-2,2,-2,1,0,2,0,-1,-1,1,-1,0,1,-2,2,1,-2,0,1,2,1,1,1,2,1,-1,0,-1,0,1,-1,0,0,2,1,1,1,0,1,1,2,-1,1,2,0,2,0,0,0,2,2,-2,-1,-1,1,2,1,-2,1,-2,0,0,0,-2,2,-2,1,-2,-2,1,-1,-1,1,0,0,-1,1,-2,0,0,2,0,-2,-1,-1,-2,2,1,2,1,1,0,1,1,2,0,-1,-2,2,2,0,-2,2,1,-2,0,2,-2,-2,-1,-2,0,-2,1,0,1,1,2,1,-1,2,-1,2,1,-1,-2,-1,-2,0,-2,2,-2,-1,-1,-2,-2,-2,1,1,2,-2,0,0,2,0,0,1,-1,0,-2,2,2,2,-2,0,1,1,1,-1,2,1,-2,0,-2,0,1,1,-2,1,0,2,2,1,-1,-1,0,-2,1,-2,1,1,-1,-2,-2,1,-2,-1,1,1,0,2,1,-1,0,2,-2,-2,-2,-2,2,-1,-1,2,-2,2,-1,2,-1,-1,-1,-1,2,2,2,2,1,-2,-2,-2,-1,0,-2,2,1,0,2,0,1,2,2,2,2,-2,-1,-1,-2,2,1,1,-2,1,2,1,2,-2,1,-1,1,2,2,-2,1,0,-2,-1,0,-2,2,0,-1,1,2,-1,-2,1,-1,0,2,2,-1,0,2,2,1,
+    -1,2,-1,-1,-2,0,-1,-2,-1,2,-1,2,-2,2,2,0,-1,1,0,1,0,-2,2,-2,-1,-1,1,0,2,1,1,0,2,1,-2,0,-2,-2,1,-1,2,0,1,-2,1,-2,1,2,0,1,-1,2,1,0,-1,2,0,1,-1,-2,0,1,0,-1,-2,-1,0,2,0,2,-1,0,-2,2,2,0,1,-1,1,0,0,-2,-1,-1,2,2,2,1,0,-2,0,-1,0,-2,2,-1,1,2,0,-1,-1,0,2,-1,-1,1,2,-1,-2,0,2,0,-2,2,-2,1,-1,-2,-2,-1,0,2,-2,-2,-1,-1,0,0,0,2,1,-1,0,0,2,0,2,1,2,0,2,-1,2,-1,2,1,-2,1,0,-2,-2,-2,0,2,-2,-2,-1,2,1,1,1,-1,1,2,2,-1,0,-2,-2,-2,-1,1,0,-2,-1,-2,1,-2,-2,0,-1,2,-2,2,-2,-2,-2,2,-1,0,-1,0,1,2,2,2,-2,-2,0,2,2,-2,2,2,-1,0,1,0,-1,2,2,1,0,-1,-2,-2,1,0,-1,-1,0,1,2,1,2,-1,0,-1,2,0,-1,0,0,-1,-1,-2,-1,-1,2,1,2,1,1,-1,1,-2,1,2,-1,-2,0,-2,2,1,0,1,0,1,1,1,1,2,-2,0,1,-2,0,-2,0,-1,-2,-1,2,0,1,-2,-1,2,2,-1,-1,-1,-2,2,-2,-2,-1,-1,1,1,-2,-1,-2,-1,0,-2,1,-2,0,1,-1,-2,-1,1,2,0,2,-2,1,2,1,1,0,0,-2,2,-1,-2,-1,-1,0,1,-1,2,-1,1,-1,-2,1,-1,-1,1,2,-1,2,-1,2,1,-1,-1,-1,0,-1,-1,-2,-2,1,2,1,2,-2,0,1,2,-1,1,1,2,2,2,1,-1,1,-2,0,1,-1,2,-2,0,-2,1,-1,-2,-1,-2,2,1,-2,0,-2,2,-2,0,2,0,2,0,0,0,1,2,2,-1,-2,1,-2,1,0,2,1,-1,0,-1,1,2,-2,-2,-1,-1,-1,2,2,-1,-2,0,0,2,0,-1,0,-1,0,2,-1,-1,2,0,0,1,1,-2,-2,-1,-2,-1,0,1,-1,-2,1,-2,-1,2,0,2,-1,-2,0,-1,-2,0,1,-2,2,-1,2,0,-1,-1,0,-1,0,1,2,-1,0,1,1,-2,-2,1,2,1,-1,0,-2,0,-2,-1,2,-1,-1,-2,-1,-2,-1,-1,-2,-1,-2,0,2,2,0,2,-2,0,0,1,-1,2,-1,-1,2,2,1,1,-2,-1,-1,2,2,0,1,-1,2,0,-2,2,-2,-1,-1,1,0,0,-2,
+    2,-2,-2,2,0,1,-2,-2,0,1,0,2,2,-1,0,2,-2,2,0,-1,-2,-1,-2,-2,-2,2,0,1,-1,1,1,2,2,2,-1,-2,-2,2,-2,2,-1,2,-1,-1,1,2,-1,0,1,-1,0,0,2,1,1,0,2,0,-1,-1,-2,2,1,-1,-1,-1,-1,-2,2,-1,0,-2,2,1,1,-2,0,1,0,1,2,-2,-1,2,1,-2,2,-2,1,-2,-2,-2,0,0,0,-1,-2,-1,-2,0,-2,-1
+    };
+
+    double sigma;
+    __asm emms
+    sigma = 1 + .8*(63-q) / 63.0;
+
+    // set up a lookup table of 256 entries that matches 
+    // a gaussian distribution with sigma determined by q.
+    // 
+    {
+        double i,sum=0;
+        int next,j;
+
+        next=0;
+        for(i=-32;i<32;i++)
+        {
+            int a = (int)(.5+256*gaussian(sigma,0,i));
+
+            if(a)
+            {
+                for(j=0;j<a;j++)
+                {
+                    CharDist[next+j]=(char) i;
+                }
+                next = next+j;
+            }
+
+        }
+        for(next=next;next<256;next++)
+            CharDist[next] = 0;
+
+    }
+
+    for(i=0;i<2048;i++)
+    {
+        Rand[i]=CharDist[rand() & 0xff];
+    }
+
+	for(i=0;i<16;i++)
+	{
+		blackclamp[i]=-CharDist[0];
+		whiteclamp[i]=-CharDist[0];
+		bothclamp[i]=-2*CharDist[0];
+	}
+
+    for(i=0;i<Height;i++)
+    {
+        UINT8 *Pos = Start + i *Pitch;
+        INT8  *Ref = Rand + (rand() & 0xff);
+
+        __asm
+        {
+			mov ecx, [Width]
+            mov esi,Pos
+            mov edi,Ref
+			xor		    eax,eax
+
+    		nextset:
+            movq        mm1,[esi+eax]         // get the source
+
+			psubusb     mm1,blackclamp        // clamp both sides so we don't outrange adding noise
+			paddusb     mm1,bothclamp          
+			psubusb     mm1,whiteclamp
+
+            movq        mm2,[edi+eax]         // get the noise for this line
+            paddb       mm1,mm2              // add it in 
+            movq        [esi+eax],mm1         // store the result
+
+            add         eax,8                 // move to the next line
+
+			cmp         eax, ecx
+			jl			nextset
+
+
+        }
+
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockwmtopt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockwmtopt.c
new file mode 100644
index 00000000..10ff9cee
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/deblockwmtopt.c
@@ -0,0 +1,2828 @@
+/****************************************************************************
+ *
+ *   Module Title :     DeblockwmtOpt.c
+ *
+ *   Description  :     Optimized functions for deblocking 
+ *
+ *   AUTHOR       :     Yaowu Xu
+ *
+ *****************************************************************************
+ *  Revision History
+ *
+ *  1.02 YWX 08-Dec-00 Configuration baseline from deblockopt.c
+ *
+ *****************************************************************************
+ */
+ 
+
+/****************************************************************************
+ *  Header Frames
+ *****************************************************************************
+ */
+
+
+
+#include "postp.h"
+#include "stdlib.h"
+#include <math.h>
+
+/****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */        
+
+#if defined(_WIN32_WCE)
+#else
+__declspec(align(16)) static short Eight128s[] = {128, 128, 128, 128,128, 128, 128, 128 };
+__declspec(align(16)) static short Eight64s[] = {64, 64, 64, 64, 64, 64, 64, 64  };
+__declspec(align(16)) static short EightThrees[]= {3, 3, 3, 3, 3, 3, 3, 3};
+__declspec(align(16)) static short EightFours[]= {4, 4, 4, 4, 4, 4, 4, 4};
+__declspec(align(16)) static short Four128s[] = {128, 128, 128, 128};
+__declspec(align(16)) static short Four64s[] = {64, 64, 64, 64 };
+__declspec(align(16)) static short FourThrees[]= {3, 3, 3, 3};
+__declspec(align(16)) static short FourFours[]= {4, 4, 4, 4};
+__declspec(align(16)) static short EightOnes[]= { 1, 1, 1, 1, 1, 1, 1, 1};
+#endif
+
+/****************************************************************************
+ *  Explicit Imports
+ *****************************************************************************
+ */              
+
+extern double gaussian(double sigma, double mu, double x);
+extern UINT32 *DeblockLimitValuesV2;
+
+/****************************************************************************
+ *  Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ *  Exported Functions
+ *****************************************************************************
+ */              
+
+/****************************************************************************
+ *  Module Statics
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeblockLoopFilteredBand_MMX
+ *
+ *  INPUTS        :     None
+ *                               
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Filter both horizontal and vertical edge in a band
+ *
+ *  SPECIAL NOTES :     
+ *
+ *	REFERENCE	  :		
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void DeblockLoopFilteredBand_WMT(
+                                 POSTPROC_INSTANCE *pbi, 
+                                 UINT8 *SrcPtr, 
+                                 UINT8 *DesPtr,
+                                 UINT32 PlaneLineStep, 
+                                 UINT32 FragAcross,
+                                 UINT32 StartFrag,
+                                 UINT32 *QuantScale
+							    )
+{
+	UINT32 j;
+	UINT32 CurrentFrag=StartFrag;
+	UINT32 QStep;
+	UINT8 *Src, *Des;
+	UINT32 Var1, Var2;
+
+#if defined(_WIN32_WCE)
+	return;
+#else
+
+__declspec(align(16)) short QStepWMT[8];
+__declspec(align(16)) short FLimitWMT[8];
+__declspec(align(16)) short Rows[80];
+
+__declspec(align(16)) unsigned short Variance1[8];
+__declspec(align(16)) unsigned short Variance2[8];
+
+
+	Src=SrcPtr;
+	Des=DesPtr;
+
+	while(CurrentFrag < StartFrag + FragAcross )
+    {
+        
+        QStep = QuantScale[ pbi->FragQIndex[CurrentFrag+FragAcross]];
+        if( QStep > 3 )
+        {
+            QStepWMT[0] = (INT16)QStep;
+            QStepWMT[1] = (INT16)QStep;
+            QStepWMT[2] = (INT16)QStep;
+            QStepWMT[3] = (INT16)QStep;
+            QStepWMT[4] = (INT16)QStep;
+            QStepWMT[5] = (INT16)QStep;
+            QStepWMT[6] = (INT16)QStep;
+            QStepWMT[7] = (INT16)QStep;
+
+			__asm 
+			{
+				
+				/* Save the registers */
+				push		eax
+				push		ecx			
+				push		edx
+				push		esi
+				push		edi
+				
+				
+				/* Calculate the FLimit and store FLimit and QStep */					
+				
+				movdqa		xmm0,	QStepWMT			/* xmm0 = QStep */				
+				movdqa		xmm1,	EightThrees			/* mm1 = 03030303 */			
+
+                pmullw		xmm1,	xmm0				/* mm1 = QStep * 3 */			
+				pmullw		xmm1,	xmm0				/* mm1 = QStep * QStep * 3 */	
+				
+				psrlw		xmm1,	5					/* mm1 = FLimit */				
+				movdqa		[FLimitWMT], xmm1			/* Save FLimit */				
+				
+				/* setup the pointers */
+				mov			eax,	Src					/* eax = Src */					
+				xor			edx,	edx					/* clear edx */					
+
+				mov			esi,	Des					/* esi = Des */
+				lea			edi,	Rows				/* edi = Rows */				
+
+				mov			ecx,	PlaneLineStep		/* ecx = Pitch */								
+				pxor		xmm7,	xmm7				/* Clear xmm7 */				
+				
+				sub			edx,	ecx					/* edx = -Pitch */								
+				
+				lea			eax,	[eax + edx * 4 ]	/* eax = Src - 4*Pitch */		
+				lea			esi,	[esi + edx * 2 ]	/* esi = Des - 2 * Pitch */
+
+				/* Copy the data to the intermediate buffer */							
+				
+				movq		xmm0,	QWORD PTR [eax + edx]/* xmm0 = Src[-5*Pitch] */		
+				movq		xmm1,	QWORD PTR [eax ]	/* xmm1 = Src[-4*Pitch */
+				
+				punpcklbw	xmm0,	xmm7				/* expand to words */
+				punpcklbw	xmm1,	xmm7				/* expand to words */
+
+				movdqa		[edi],	xmm0				/* write 8 words */
+				movdqa		[edi+16], xmm1				/* write 8 words */
+
+				movq		xmm2,	QWORD PTR [eax+ecx]	/* xmm2 = Src[-3*Pitch] */		
+				movq		xmm3,	QWORD PTR [eax+ecx*2]/* xmm3 = Src[-2*Pitch] */
+
+				punpcklbw	xmm2,	xmm7				/* expand to words */
+				punpcklbw	xmm3,	xmm7				/* expand to words */
+				
+				movdqa		[edi+32], xmm2				/* write 8 words */
+				movdqa		[edi+48], xmm3				/* write 8 words */
+
+				lea			eax,	[eax+ecx*4]			/* eax= Src */
+
+				movq		xmm0,	QWORD PTR [eax + edx]/* xmm0 = Src[-Pitch] */		
+				movq		xmm1,	QWORD PTR [eax ]	/* xmm1 = Src[0] */
+				
+				punpcklbw	xmm0,	xmm7				/* expand to words */
+				punpcklbw	xmm1,	xmm7				/* expand to words */
+
+				movdqa		[edi+64], xmm0				/* write 8 words */
+				movdqa		[edi+80], xmm1				/* write 8 words */
+
+				movq		xmm2,	QWORD PTR [eax+ecx]	/* xmm2 = Src[Pitch] */		
+				movq		xmm3,	QWORD PTR [eax+ecx*2]/* xmm3 = Src[2*Pitch] */
+
+				punpcklbw	xmm2,	xmm7				/* expand to words */
+				punpcklbw	xmm3,	xmm7				/* expand to words */
+				
+				movdqa		[edi+96],  xmm2				/* write 8 words */
+				movdqa		[edi+112], xmm3				/* write 8 words */
+
+				lea			eax,	[eax+ecx*4]			/* eax= Src+4*Pitch */
+
+				movq		xmm0,	QWORD PTR [eax + edx]/* xmm0 = Src[3*Pitch] */		
+				movq		xmm1,	QWORD PTR [eax ]	/* xmm1 = Src[4*Pitch] */
+				
+				punpcklbw	xmm0,	xmm7				/* expand to words */
+				punpcklbw	xmm1,	xmm7				/* expand to words */
+
+				movdqa		[edi+128], xmm0				/* write 8 words */
+				movdqa		[edi+144], xmm1				/* write 8 words */
+
+				
+				/* done with copying everything to intermediate buffer */				
+				/* Now, compute the variances for Pixel  1-4 and 5-8 */					
+		
+				/* we use xmm0,xmm1,xmm2 for 1234 and xmm4, xmm5, xmm6 for 5-8 */				
+				/* xmm7 = 0, xmm3 = {128, 128, 128, 128, 128, 128, 128, 128} */								
+				
+				pcmpeqw		xmm3,	xmm3				/* xmm3 = FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF */	
+				psllw		xmm3,	15					/* xmm3 = 80008000800080008000800080008000 */	
+				psrlw		xmm3,	8					/* xmm3 = 00800080008000800080008000800080 */
+				
+				movdqa		xmm2,	[edi+16]			/* Pixel 1 */					
+				movdqa		xmm6,	[edi+80]			/* Pixel 5 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				movdqa		xmm0,	xmm2				/* xmm0 = pixel 1 */				
+				movdqa		xmm4,	xmm6				/* xmm4 = pixel 5 */				
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel1 * pixel1 */		
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel5 * pixel5 */		
+				
+				movdqa		xmm1,	xmm2				/* xmm1 = pixel1^2 */			
+				movdqa		xmm5,	xmm6				/* xmm5 = pixel5^2 */			
+				
+				movdqa		xmm2,	[edi+32]			/* Pixel 2 */					
+				movdqa		xmm6,	[edi+96]			/* Pixel 6 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 2 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 6 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel2^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel6^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 += pixel2^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 += pixel6^2 */			
+				
+				movdqa		xmm2,	[edi+48]			/* Pixel 3 */					
+				movdqa		xmm6,	[edi+112]			/* Pixel 7 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 3 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 7 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel3^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel7^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 += pixel3^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 += pixel7^2 */			
+				
+				movdqa		xmm2,	[edi+64]			/* Pixel 4 */					
+				movdqa		xmm6,	[edi+128]			/* Pixel 8 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 4 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 8 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel4^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel8^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 = pixel4^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 = pixel8^2 */			
+				
+				/* xmm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+				/* xmm1 = x1 + x2 + x3 + x4 */											
+				/* xmm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+				/* xmm5 = x5 + x6 + x7 + x8 */											
+				
+				movdqa		xmm7,	xmm3				/* xmm7 = xmm3 */					
+				psrlw		xmm7,	7					/* xmm7 = 00010001000100010001000100010001 */	
+				
+				movdqa		xmm2,	xmm0				/* make copy of sum1 */			
+				movdqa		xmm6,	xmm4				/* make copy of sum2 */			
+				
+				paddw		xmm0,	xmm7				/* (sum1 + 1) */				
+				paddw		xmm4,	xmm7				/* (sum2 + 1) */				
+				
+				psraw		xmm2,	1					/* sum1 /2 */					
+				psraw		xmm6,	1					/* sum2 /2 */					
+				
+				psraw		xmm0,	1					/* (sum1 + 1)/2 */				
+				psraw		xmm4,	1					/* (sum2 + 1)/2 */				
+				
+				pmullw		xmm2,	xmm0				/* (sum1)/2*(sum1+1)/2 */		
+				pmullw		xmm6,	xmm4				/* (sum2)/2*(sum2+1)/2 */		
+				
+				psubw		xmm1,	xmm2				/* Variance 1 */				
+				psubw		xmm5,	xmm6				/* Variance 2 */				
+				
+				movdqa		xmm7,	FLimitWMT			/* xmm7 = FLimit */				
+				movdqa		xmm2,	xmm1				/* copy of Varinace 1*/
+
+				movdqa		[Variance1], xmm1			/* save the varinace1 */
+				movdqa		[Variance2], xmm5			/* save the varinace2 */
+
+				movdqa		xmm6,	xmm5				/* Variance 2 */
+				psubw		xmm1,	xmm7				/* Variance 1 < Flimit? */		
+				
+				psubw		xmm5,	xmm7				/* Variance 2 < Flimit? */		
+				psraw		xmm2,	15					/* Variance 1 > 32768? */
+
+				psraw		xmm6,	15					/* Vaiance  2 > 32768? */	
+				psraw		xmm1,	15					/* FFFF/0000 for true/false */	
+				
+				psraw		xmm5,	15					/* FFFF/0000 for true/false */	
+				movdqa		xmm7,	[edi+64]			/* xmm0 = Pixel 4			*/	
+
+				pandn		xmm2,	xmm1				/* Variance1<32678 && 
+															Variance1<Limit			*/
+				pandn		xmm6,	xmm5				/* Variance2<32678 && 
+														   Variance1<Limit			*/
+				
+				movdqa		xmm4,	[edi+80]			/* xmm4 = Pixel 5			*/	
+				pand		xmm6,	xmm2				/* xmm6 = Variance1 < Flimit */	
+														/*     &&Variance2 < Flimit */	
+
+				movdqa		xmm2,	xmm7				/* make copy of Pixel4		*/	
+
+				psubusw		xmm7,	xmm4				/* 4 - 5 */						
+				psubusw		xmm4,	xmm2				/* 5 - 4 */						
+				
+				por			xmm7,	xmm4				/* abs(4 - 5) */				
+				psubw		xmm7,	QStepWMT			/* abs(4-5)<QStepxmmx ? */		
+				
+				psraw		xmm7,	15					/* FFFF/0000 for True/Flase */
+				pand		xmm7,	xmm6													
+				
+				/* xmm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* xmm7 now are in use  */										
+				/* Let's do the filtering now */										
+				/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+				/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+				
+				movdqa		xmm5,	[edi]				/* xmm5 = -5 */					
+				movdqa		xmm4,	[edi + 16]			/* xmm4 = -4 */					
+				
+				movdqa		xmm3,	xmm4				/* copy of -4 */				
+				movdqa		xmm6,	xmm5				/* copy of -5 */				
+				
+				psubusw		xmm4,	xmm6				/* xmm4 = [-4] - [-5] */			
+				psubusw		xmm5,	xmm3				/* xmm5 = [-5] - [-4] */			
+				
+				por			xmm4,	xmm5				/* abs([-4]-[-5] ) */			
+				psubw		xmm4,	QStepWMT			/* abs([-4]-[-5] )<QStep? */	
+				
+				psraw		xmm4,	15					/* FFFF/0000 for True/False */	
+				movdqa		xmm1,	xmm4				/* copy of the xmm4 */			
+				
+				pand		xmm4,	xmm6				/*							*/	
+				pandn		xmm1,	xmm3				/*							*/	
+				
+				por			xmm1,	xmm4				/* xmm1 = p1				*/	
+				
+				/* now find P2 */														
+				
+				movdqa		xmm4,	[edi+128]			/* xmm4 = [3] */					
+				movdqa		xmm5,	[edi+144]			/* xmm5 = [4] */					
+				
+				movdqa		xmm3,	xmm4				/* copy of 3 */					
+				movdqa		xmm6,	xmm5				/* copy of 4 */					
+				
+				psubusw		xmm4,	xmm6				/* xmm4 = [3] - [4] */			
+				psubusw		xmm5,	xmm3				/* xmm5 = [4] - [3] */			
+				
+				por			xmm4,	xmm5				/* abs([3]-[4] ) */				
+				psubw		xmm4,	QStepWMT			/* abs([3]-[4] )<QStep? */		
+				
+				psraw		xmm4,	15					/* FFFF/0000 for True/False */	
+				movdqa		xmm2,	xmm4				/* copy of the xmm4 */			
+				
+				pand		xmm4,	xmm6				/*							*/	
+				pandn		xmm2,	xmm3				/*							*/	
+				
+				por			xmm2,	xmm4				/* xmm2 = p2				*/	
+
+				/* Data is ready, now do the filtering */
+				
+				pxor		xmm0,	xmm0				/* clear xmm0 */
+
+				/* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+				/* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */			
+				/* Des[-w4] = Src[-w4]; */												
+				/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+
+				
+				movdqa		xmm3,	xmm1				/* xmm3 = p1 */					
+				paddw		xmm3,	xmm3				/* xmm3 = p1 + p1 */				
+				
+				paddw		xmm3,	xmm1				/* xmm3 = p1 + p1 + p1 */		
+				movdqa		xmm4,	[edi+16]			/* xmm4 = x1 */					
+				
+				paddw		xmm3,	[edi+32]			/* xmm3 = p1+p1+p1+ x2 */		
+				paddw		xmm4,	[edi+48]			/* xmm4 = x1+x3 */				
+				
+				paddw		xmm3,	[edi+64]			/* xmm3 += x4 */					
+				paddw		xmm4,	EightFours			/* xmm4 = x1 + x3 + 4 */			
+				
+				paddw		xmm3,	xmm4				/* xmm3 = 3*p1+x1+x2+x3+x4+4 */	
+				movdqa		xmm4,	xmm3				/* xmm4 = xmm3 */					
+				
+				movdqa		xmm5,	[edi+16]			/* xmm5 = x1 */					
+				paddw		xmm4,	xmm5				/* xmm4 = sum+x1 */				
+				
+				psllw		xmm4,	1					/* xmm4 = (sum+x1)<<1 */			
+				psubw		xmm4,	[edi+64]			/* xmm4 = (sum+x1)<<1-x4 */		
+				
+				paddw		xmm4,	[edi+80]			/* xmm4 = (sum+x1)<<1-x4+x5 */	
+				psraw		xmm4,	4					/* xmm4 >>=4 */					
+				
+				psubw		xmm4,	xmm5				/* New Value - old Value */		
+				pand		xmm4,	xmm7				/* And the flag */				
+				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				
+				movq		QWORD PTR [esi+edx*2], xmm4	/* Write new x1 */				
+				
+				/* sum += x5 -p1 */														
+				/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+				
+				movdqa		xmm5,	[edi+32]			/* xmm5= x2 */					
+				psubw		xmm3,	xmm1				/* sum=sum-p1 */				
+				
+				paddw		xmm3,    [edi+80]			/* sum=sum+x5 */				
+				movdqa		xmm4,	xmm5				/* copy sum */					
+				
+				paddw		xmm4,	xmm3				/* xmm4=sum+x2 */				
+				paddw		xmm4,	xmm4				/* xmm4 <<= 1 */					
+				
+				psubw		xmm4,	[edi+80]			/* xmm4 =(sum+x2)<<1-x5 */		
+				paddw		xmm4,	[edi+96]			/* xmm4 =(sum+x2)<<1-x5+x6 */	
+				
+				psraw		xmm4,	4					/* xmm4=((sum+x2)<<1-x5+x6)>>4 */
+				psubw		xmm4,	xmm5				/* new value - old value	*/	
+				
+				pand		xmm4,	xmm7				/* And the flag */				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				movq		QWORD PTR [esi+edx], xmm4	/* write new x2 */				
+				
+				/* sum += x6 - p1 */													
+				/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+				
+				movdqa		xmm5,	[edi+48]			/* xmm5= x3 */					
+				psubw		xmm3,	xmm1				/* sum=sum-p1 */				
+				
+				paddw		xmm3,    [edi+96]			/* sum=sum+x6 */				
+				movdqa		xmm4,	xmm5				/* copy x3 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4=sum+x3 */				
+				paddw		xmm4,	xmm4				/* xmm4 <<= 1 */					
+				
+				psubw		xmm4,	[edi+96]			/* xmm4 =(sum+x3)<<1-x6 */		
+				paddw		xmm4,	[edi+112]			/* xmm4 =(sum+x3)<<1-x6+x7 */	
+				
+				psraw		xmm4,	4					/* xmm4=((sum+x3)<<1-x6+x7)>>4 */
+				psubw		xmm4,	xmm5				/* new value - old value	*/	
+				
+				pand		xmm4,	xmm7				/* And the flag */				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+				
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				movq		QWORD PTR [esi],xmm4		/* write new x3 */				
+				
+				/* sum += x7 - p1 */													
+				/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+				
+				movdqa		xmm5,	[edi+64]			/* xmm5 = x4 */					
+				psubw		xmm3,	xmm1				/* sum = sum-p1 */				
+				
+				paddw		xmm3,	[edi+112]			/* sum = sum+x7 */				
+				movdqa		xmm4,	xmm5				/* xmm4 = x4 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum + x4 */			
+				paddw		xmm4,	xmm4				/* xmm4 *=2 */					
+				
+				paddw		xmm4,	xmm1				/* += p1 */						
+				psubw		xmm4,	[edi+16]			/* -= x1 */						
+				
+				psubw		xmm4,	[edi+112]			/* -= x7 */						
+				paddw		xmm4,	[edi+128]			/* += x8 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x4 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x4 */						
+				
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				movq	    QWORD PTR [esi+ecx], xmm4	/* write new x4 */				
+				
+				/* sum+= x8-x1 */														
+				/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+				
+				movdqa		xmm5,	[edi+80]			/* xmm5 = x5 */					
+				psubw		xmm3,	[edi+16]			/* sum -= x1 */					
+				
+				paddw		xmm3,	[edi+128]			/* sub += x8 */					
+				movdqa		xmm4,	xmm5				/* xmm4 = x5 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4= sum+x5 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2 */					
+				
+				paddw		xmm4,	[edi+16]			/* += x1 */						
+				psubw		xmm4,	[edi+32]			/* -= x2 */						
+				
+				psubw		xmm4,	[edi+128]			/* -= x8 */						
+				paddw		xmm4,	xmm2				/* += p2 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x5 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x5 */						
+				
+				lea			esi,	[esi+ecx*4]			/* esi=des + 2*pitch */
+				packuswb	xmm4,	xmm0				/* pack to bytes */
+
+				movq		QWORD PTR [esi+edx*2], xmm4	/* write new x5 */				
+				
+				/* sum += p2 - x2 */													
+				/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+				
+				movdqa		xmm5,	[edi+96]			/* xmm5 = x6 */					
+				psubw		xmm3,	[edi+32]			/* -= x2 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x6 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x6 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+32]			/* +=x2 */						
+				psubw		xmm4,	[edi+48]			/* -=x3 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x6 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x6 */						
+				
+				packuswb	xmm4,	xmm0				/* pack to bytes */
+				movq		QWORD PTR [esi+edx], xmm4	/* write new x6 */				
+				
+				/* sum += p2 - x3 */													
+				/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+				
+				movdqa		xmm5,	[edi+112]			/* xmm5 = x7 */					
+				psubw		xmm3,	[edi+48]			/* -= x3 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x7 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x7 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+48]			/* +=x3 */						
+				psubw		xmm4,	[edi+64]			/* -=x4 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x7 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x7 */						
+
+				packuswb	xmm4,	xmm0				/* pack to bytes */				
+				movq		QWORD PTR [esi],xmm4		/* write new x7 */				
+				
+				/* sum += p2 - x4 */													
+				/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+				
+				movdqa		xmm5,	[edi+128]			/* xmm5 = x8 */					
+				psubw		xmm3,	[edi+64]			/* -= x4 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x8 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x8 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+64]			/* +=x4 */						
+				psubw		xmm4,	[edi+80]			/* -=x5 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x8 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x8 */						
+
+				packuswb	xmm4,	xmm0				/* pack to bytes */				
+				movq		QWORD PTR [esi+ecx], xmm4				/* write new x8 */				
+
+				pop			edi
+				pop			esi
+				pop			edx
+				pop			ecx
+				pop			eax
+
+                } /* end of the macro */
+    		Var1=Variance1[0]+Variance1[1]+Variance1[2]+Variance1[3]+Variance1[4]+Variance1[5]+Variance1[6]+Variance1[7];
+	    	Var2=Variance2[0]+Variance2[1]+Variance2[2]+Variance2[3]+Variance2[4]+Variance2[5]+Variance2[6]+Variance2[7];
+
+		    pbi->FragmentVariances[CurrentFrag] += Var1;
+		    pbi->FragmentVariances[CurrentFrag + FragAcross] += Var2;
+        
+        }
+        else
+        {
+
+			/* copy from src to des */
+			__asm	
+			{
+				push		esi
+				push		edi
+				push		ecx
+				
+				mov			esi,	Src					/* esi = Src */					
+				mov			edi,	Des					/* edi = Des */				
+
+				push		edx
+
+				mov			ecx,	PlaneLineStep		/* ecx = Pitch */				
+				xor			edx,	edx					/* clear edx */					
+				
+				sub			edx,	ecx					/* edx = -Pitch */				
+				lea			esi,	[esi+edx*4]			/* esi=Src-4*Pitch*/
+				
+				movq		mm0,	[esi]				/* first row */
+				movq		[edi+edx*4],	mm0			/* write first row */
+				
+				lea			edi,	[edi+edx*4]			/* edi=Des-4*Pitch*/
+				movq		mm1,	[esi+ecx]			/* Src-3*Pitch */
+
+				movq		[edi+ecx],	mm1				/* write second row */
+				movq		mm2,	[esi+ecx*2]			/* Src-2*Pitch */
+
+				lea			esi,	[esi+ecx*4]			/* Src */
+				movq		[edi+ecx*2], mm2			/* write third row */
+
+				lea			edi,	[edi+ecx*4]			/* Des */
+				movq		mm3,	[esi+edx]			/* Src-Pitch */
+				
+				movq		[edi+edx],	mm3				/* write fourth row */				
+				movq		mm4,	[esi]				/* Src */
+
+				movq		mm5,	[esi+ecx]			/* Src+Pitch */
+				movq		[edi],	mm4					/* write fifth rwo */
+
+				movq		mm6,	[esi+ecx*2]
+				lea			esi,	[esi+ecx*4]			/* Src+pitch*4 */
+
+				movq		[edi+ecx], mm5				/* write the sixth rwo */
+				movq		[edi+ecx*2], mm6			/* write the seventh row */
+
+				movq		mm7,	[esi+edx]
+				lea			edi,	[edi+ecx*4]			/* Des+Pitch*4 */
+
+				movq		[edi+edx], mm7				/* write the last row */
+
+				pop			edx
+				pop			ecx
+				pop			edi
+				pop			esi				
+			}
+
+        }
+		
+		Src += 8;
+		Des += 8;
+		CurrentFrag ++;
+	}
+
+	Des -= ((PlaneLineStep + FragAcross)<<3);
+	Des += 8;
+	Src = Des;
+
+	CurrentFrag = StartFrag ;
+
+	while(CurrentFrag < StartFrag + FragAcross - 1)
+	{
+
+        QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];		
+
+        if( QStep > 3 )
+        {
+            QStepWMT[0] = (INT16)QStep;
+            QStepWMT[1] = (INT16)QStep;
+            QStepWMT[2] = (INT16)QStep;
+            QStepWMT[3] = (INT16)QStep;
+            QStepWMT[4] = (INT16)QStep;
+            QStepWMT[5] = (INT16)QStep;
+            QStepWMT[6] = (INT16)QStep;
+            QStepWMT[7] = (INT16)QStep;
+
+		    for( j=0; j<8;j++)
+		    {
+    			Rows[j] = (short) (Src[-5 +j*PlaneLineStep]);
+	    		Rows[72+j] = (short)(Src[4+j*PlaneLineStep]);		
+    		}
+
+	    	__asm
+    		{
+				/* Save the registers */
+				push		eax
+				push		ecx			
+				push		edx
+				push		esi
+				push		edi
+				
+				/* Calculate the FLimit and store FLimit and QStep */					
+				
+				movdqa		xmm0,	QStepWMT            /* Get QStep */
+				movdqa		xmm1,	EightThrees			/* mm1 = 03030303 */			
+
+                pmullw		xmm1,	xmm0				/* mm1 = QStep * 3 */							
+				pmullw		xmm1,	xmm0				/* mm1 = QStep * QStep * 3 */					
+				
+                psrlw		xmm1,	5					/* mm1 = FLimit */				
+				movdqa		[FLimitWMT], xmm1			/* Save FLimit */				
+
+				/* setup the pointers to data */
+
+				mov			eax,	Src					/* eax = Src */
+				xor			edx,	edx					/* clear edx */
+				
+				mov			esi,	Des					/* esi = Des */
+				sub			eax,	4					/* eax = Src-4 */
+
+				sub			esi,	4					/* esi = Des-4 */
+				lea			edi,	Rows				/* edi = Rows */				
+
+				mov			ecx,	PlaneLineStep		/* ecx = Pitch */				
+				sub			edx,	ecx					/* edx = -Pitch */				
+
+				lea			esi,	[esi+ecx*2]			/* esi = Des-4 + 2 * Pitch */
+				
+				/* Get the data to the intermediate buffer */
+
+				movq		mm0,	[eax]				/* mm0 = 07 06 05 04 03 02 01 00 */
+				movq		mm1,	[eax+ecx]			/* mm1 = 17 16 15 14 13 12 11 10 */
+
+				movq		mm2,	[eax+ecx*2]			/* mm2 = 27 26 25 24 23 22 21 20 */
+				lea			eax,	[eax+ecx*4]			/* Go down four Rows */	
+
+				movq		mm3,	[eax+edx]			/* mm3 = 37 36 35 34 33 32 31 30 */
+				movq		mm4,	mm0					/* mm4 = 07 06 05 04 03 02 01 00 */
+			
+				punpcklbw	mm0,	mm1					/* mm0 = 13 03 12 02 11 01 10 00 */
+				punpckhbw	mm4,	mm1					/* mm4 = 17 07 16 06 15 05 14 04 */
+
+				movq		mm5,	mm2					/* mm5 = 27 26 25 24 23 22 21 20 */
+				punpcklbw	mm2,	mm3					/* mm2 = 33 23 32 22 31 21 30 20 */
+
+				punpckhbw	mm5,	mm3					/* mm5 = 37 27 36 26 35 25 34 24 */
+				movq		mm1,	mm0					/* mm1 = 13 03 12 02 11 01 10 00 */
+
+				punpcklwd	mm0,	mm2					/* mm0 = 31 21 11 01 30 20 10 00 */
+				punpckhwd	mm1,	mm2					/* mm1 = 33 23 13 03 32 22 12 02 */
+				
+				movq		mm2,	mm4					/* mm2 = 17 07 16 06 15 05 14 04 */
+				punpckhwd	mm4,	mm5					/* mm4 = 37 27 17 07 36 26 16 06 */
+
+				punpcklwd	mm2,	mm5					/* mm2 = 35 25 15 05 34 24 14 04 */
+				pxor		mm7,	mm7					/* clear mm7 */
+
+				movq		mm5,	mm0					/* make a copy */
+				punpcklbw	mm0,	mm7					/* mm0 = 30 20 10 00 */
+
+				movq		[edi+16], mm0				/* write 00 10 20 30 */
+				punpckhbw	mm5,	mm7					/* mm5 = 31 21 11 01 */
+
+				movq		mm0,	mm1					/* mm0 =33 23 13 03 32 22 12 02 */
+				movq		[edi+32], mm5				/* write 01 11 21 31 */
+				
+				punpcklbw	mm1,	mm7					/* mm1 = 32 22 12 02 */
+				punpckhbw	mm0,	mm7					/* mm0 = 33 23 12 03 */
+
+				movq		[edi+48], mm1				/* write 02 12 22 32 */
+				movq		mm3,	mm2					/* mm3 = 35 25 15 05 34 24 14 04 */
+				
+				movq		mm5,	mm4					/* mm5 = 37 27 17 07 36 26 16 06 */
+				movq		[edi+64], mm0				/* write 03 13 23 33 */
+
+				punpcklbw	mm2,	mm7					/* mm2 = 34 24 14 04 */
+				punpckhbw	mm3,	mm7					/* mm3 = 35 25 15 05 */
+
+				movq		[edi+80], mm2				/* write 04 14 24 34 */
+				punpcklbw	mm4,	mm7					/* mm4 = 36 26 16 06 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 37 27 17 07 */
+				movq		[edi+96], mm3				/* write 05 15 25 35 */
+			
+				movq		mm0,	[eax]				/* mm0 = 47 46 45 44 43 42 41 40 */
+				movq		mm1,	[eax + ecx ]		/* mm1 = 57 56 55 54 53 52 51 50 */
+
+				movq		[edi+112], mm4				/* write 06 16 26 37 */
+				movq		mm2,	[eax+ecx*2]			/* mm2 = 67 66 65 64 63 62 61 60 */
+
+				lea			eax,	[eax+ ecx*4]		/* Go down four rows */
+				movq		[edi+128], mm5				/* write 07 17 27 37 */
+
+				movq		mm4,	mm0					/* mm4 = 47 46 45 44 43 42 41 40 */
+				movq		mm3,	[eax+edx]			/* mm3 = 77 76 75 74 73 72 71 70 */
+
+				punpcklbw	mm0,	mm1					/* mm0 = 53 43 52 42 51 41 50 40 */
+				punpckhbw	mm4,	mm1					/* mm4 = 57 57 56 46 55 45 54 44 */
+
+				movq		mm5,	mm2					/* mm5 = 67 66 65 64 63 62 61 60 */
+				punpcklbw	mm2,	mm3					/* mm2 = 73 63 72 62 71 61 70 60 */
+
+				punpckhbw	mm5,	mm3					/* mm5 = 77 67 76 66 75 65 74 64 */
+				movq		mm1,	mm0					/* mm1 = 53 43 52 42 51 41 50 40 */
+
+				punpcklwd	mm0,	mm2					/* mm0 = 71 61 51 41 70 60 50 40 */
+				punpckhwd	mm1,	mm2					/* mm1 = 73 63 53 43 72 62 52 42 */
+				
+				movq		mm2,	mm4					/* mm2 = 57 57 56 46 55 45 54 44 */
+				punpckhwd	mm4,	mm5					/* mm4 = 77 67 57 47 76 66 56 46 */
+
+				punpcklwd	mm2,	mm5					/* mm2 = 75 65 55 45 74 64 54 44 */
+
+				movq		mm5,	mm0					/* make a copy */
+				punpcklbw	mm0,	mm7					/* mm0 = 70 60 50 40 */
+
+				movq		[edi+24], mm0				/* write 40 50 60 70 */
+				punpckhbw	mm5,	mm7					/* mm5 = 71 61 51 41 */
+
+				movq		mm0,	mm1					/* mm0 = 73 63 53 43 72 62 52 42 */
+				movq		[edi+40], mm5				/* write 41 51 61 71 */
+				
+				punpcklbw	mm1,	mm7					/* mm1 = 72 62 52 42 */
+				punpckhbw	mm0,	mm7					/* mm0 = 73 63 53 43 */
+
+				movq		[edi+56], mm1				/* write 42 52 62 72 */
+				movq		mm3,	mm2					/* mm3 = 75 65 55 45 74 64 54 44 */
+				
+				movq		mm5,	mm4					/* mm5 = 77 67 57 47 76 66 56 46 */
+				movq		[edi+72], mm0				/* write 43 53 63 73 */
+
+				punpcklbw	mm2,	mm7					/* mm2 = 74 64 54 44 */
+				punpckhbw	mm3,	mm7					/* mm3 = 75 65 55 45 */
+
+				movq		[edi+88], mm2				/* write 44 54 64 74 */
+				punpcklbw	mm4,	mm7					/* mm4 = 76 66 56 46 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 77 67 57 47 */
+				movq		[edi+104], mm3				/* write 45 55 65 75 */
+			
+				movq		[edi+120], mm4				/* write 46 56 66 76 */
+				movq		[edi+136], mm5				/* write 47 57 67 77 */
+
+				/* we use xmm0,xmm1,xmm2 for 1234 and xmm4, xmm5, xmm6 for 5-8 */				
+				/* xmm7 = 0, xmm3 = {128, 128, 128, 128, 128, 128, 128, 128} */								
+				
+				pcmpeqw		xmm3,	xmm3				/* xmm3 = FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF */	
+				psllw		xmm3,	15					/* xmm3 = 80008000800080008000800080008000 */	
+				psrlw		xmm3,	8					/* xmm3 = 00800080008000800080008000800080 */
+				
+				movdqa		xmm2,	[edi+16]			/* Pixel 1 */					
+				movdqa		xmm6,	[edi+80]			/* Pixel 5 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				movdqa		xmm0,	xmm2				/* xmm0 = pixel 1 */				
+				movdqa		xmm4,	xmm6				/* xmm4 = pixel 5 */				
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel1 * pixel1 */		
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel5 * pixel5 */		
+				
+				movdqa		xmm1,	xmm2				/* xmm1 = pixel1^2 */			
+				movdqa		xmm5,	xmm6				/* xmm5 = pixel5^2 */			
+				
+				movdqa		xmm2,	[edi+32]			/* Pixel 2 */					
+				movdqa		xmm6,	[edi+96]			/* Pixel 6 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 2 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 6 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel2^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel6^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 += pixel2^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 += pixel6^2 */			
+				
+				movdqa		xmm2,	[edi+48]			/* Pixel 3 */					
+				movdqa		xmm6,	[edi+112]			/* Pixel 7 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 3 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 7 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel3^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel7^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 += pixel3^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 += pixel7^2 */			
+				
+				movdqa		xmm2,	[edi+64]			/* Pixel 4 */					
+				movdqa		xmm6,	[edi+128]			/* Pixel 8 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 4 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 8 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel4^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel8^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 = pixel4^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 = pixel8^2 */			
+				
+				/* xmm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+				/* xmm1 = x1 + x2 + x3 + x4 */											
+				/* xmm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+				/* xmm5 = x5 + x6 + x7 + x8 */											
+				
+				movdqa		xmm7,	xmm3				/* xmm7 = xmm3 */					
+				psrlw		xmm7,	7					/* xmm7 = 00010001000100010001000100010001 */	
+				
+				movdqa		xmm2,	xmm0				/* make copy of sum1 */			
+				movdqa		xmm6,	xmm4				/* make copy of sum2 */			
+				
+				paddw		xmm0,	xmm7				/* (sum1 + 1) */				
+				paddw		xmm4,	xmm7				/* (sum2 + 1) */				
+				
+				psraw		xmm2,	1					/* sum1 /2 */					
+				psraw		xmm6,	1					/* sum2 /2 */					
+				
+				psraw		xmm0,	1					/* (sum1 + 1)/2 */				
+				psraw		xmm4,	1					/* (sum2 + 1)/2 */				
+				
+				pmullw		xmm2,	xmm0				/* (sum1)/2*(sum1+1)/2 */		
+				pmullw		xmm6,	xmm4				/* (sum2)/2*(sum2+1)/2 */		
+				
+				psubw		xmm1,	xmm2				/* Variance 1 */				
+				psubw		xmm5,	xmm6				/* Variance 2 */				
+				
+				movdqa		xmm7,	FLimitWMT			/* xmm7 = FLimit */				
+				movdqa		xmm2,	xmm1				/* copy of Varinace 1*/
+
+                movdqa		[Variance1], xmm1			/* save the varinace1 */
+				movdqa		[Variance2], xmm5			/* save the varinace2 */
+
+				movdqa		xmm6,	xmm5				/* Variance 2 */
+				psubw		xmm1,	xmm7				/* Variance 1 < Flimit? */		
+				
+				psubw		xmm5,	xmm7				/* Variance 2 < Flimit? */		
+				psraw		xmm2,	15					/* Variance 1 > 32768? */
+
+				psraw		xmm6,	15					/* Vaiance  2 > 32768? */	
+				psraw		xmm1,	15					/* FFFF/0000 for true/false */	
+				
+				psraw		xmm5,	15					/* FFFF/0000 for true/false */	
+				movdqa		xmm7,	[edi+64]			/* xmm0 = Pixel 4			*/	
+
+				pandn		xmm2,	xmm1				/* Variance1<32678 && 
+															Variance1<Limit			*/
+				pandn		xmm6,	xmm5				/* Variance2<32678 && 
+														   Variance1<Limit			*/
+				
+				movdqa		xmm4,	[edi+80]			/* xmm4 = Pixel 5			*/	
+				pand		xmm6,	xmm2				/* xmm6 = Variance1 < Flimit */	
+														/*     &&Variance2 < Flimit */	
+
+				movdqa		xmm2,	xmm7				/* make copy of Pixel4		*/	
+
+				psubusw		xmm7,	xmm4				/* 4 - 5 */						
+				psubusw		xmm4,	xmm2				/* 5 - 4 */						
+				
+				por			xmm7,	xmm4				/* abs(4 - 5) */				
+				psubw		xmm7,	QStepWMT			/* abs(4-5)<QStepxmmx ? */		
+				
+				psraw		xmm7,	15					/* FFFF/0000 for True/Flase */
+				pand		xmm7,	xmm6													
+				
+				/* xmm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* xmm7 now are in use  */										
+				/* Let's do the filtering now */										
+				/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+				/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+				
+				movdqa		xmm5,	[edi]				/* xmm5 = -5 */					
+				movdqa		xmm4,	[edi + 16]			/* xmm4 = -4 */					
+				
+				movdqa		xmm3,	xmm4				/* copy of -4 */				
+				movdqa		xmm6,	xmm5				/* copy of -5 */				
+				
+				psubusw		xmm4,	xmm6				/* xmm4 = [-4] - [-5] */			
+				psubusw		xmm5,	xmm3				/* xmm5 = [-5] - [-4] */			
+				
+				por			xmm4,	xmm5				/* abs([-4]-[-5] ) */			
+				psubw		xmm4,	QStepWMT			/* abs([-4]-[-5] )<QStep? */	
+				
+				psraw		xmm4,	15					/* FFFF/0000 for True/False */	
+				movdqa		xmm1,	xmm4				/* copy of the xmm4 */			
+				
+				pand		xmm4,	xmm6				/*							*/	
+				pandn		xmm1,	xmm3				/*							*/	
+				
+				por			xmm1,	xmm4				/* xmm1 = p1				*/	
+				
+				/* now find P2 */														
+				
+				movdqa		xmm4,	[edi+128]			/* xmm4 = [3] */					
+				movdqa		xmm5,	[edi+144]			/* xmm5 = [4] */					
+				
+				movdqa		xmm3,	xmm4				/* copy of 3 */					
+				movdqa		xmm6,	xmm5				/* copy of 4 */					
+				
+				psubusw		xmm4,	xmm6				/* xmm4 = [3] - [4] */			
+				psubusw		xmm5,	xmm3				/* xmm5 = [4] - [3] */			
+				
+				por			xmm4,	xmm5				/* abs([3]-[4] ) */				
+				psubw		xmm4,	QStepWMT			/* abs([3]-[4] )<QStep? */		
+				
+				psraw		xmm4,	15					/* FFFF/0000 for True/False */	
+				movdqa		xmm2,	xmm4				/* copy of the xmm4 */			
+				
+				pand		xmm4,	xmm6				/*							*/	
+				pandn		xmm2,	xmm3				/*							*/	
+				
+				por			xmm2,	xmm4				/* xmm2 = p2				*/	
+
+				/* Data is ready, now do the filtering */
+				
+				pxor		xmm0,	xmm0				/* clear xmm0 */
+
+				/* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+				/* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */			
+				/* Des[-w4] = Src[-w4]; */												
+				/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+
+				
+				movdqa		xmm3,	xmm1				/* xmm3 = p1 */					
+				paddw		xmm3,	xmm3				/* xmm3 = p1 + p1 */				
+				
+				paddw		xmm3,	xmm1				/* xmm3 = p1 + p1 + p1 */		
+				movdqa		xmm4,	[edi+16]			/* xmm4 = x1 */					
+				
+				paddw		xmm3,	[edi+32]			/* xmm3 = p1+p1+p1+ x2 */		
+				paddw		xmm4,	[edi+48]			/* xmm4 = x1+x3 */				
+				
+				paddw		xmm3,	[edi+64]			/* xmm3 += x4 */					
+				paddw		xmm4,	EightFours			/* xmm4 = x1 + x3 + 4 */			
+				
+				paddw		xmm3,	xmm4				/* xmm3 = 3*p1+x1+x2+x3+x4+4 */	
+				movdqa		xmm4,	xmm3				/* xmm4 = xmm3 */					
+				
+				movdqa		xmm5,	[edi+16]			/* xmm5 = x1 */					
+				paddw		xmm4,	xmm5				/* xmm4 = sum+x1 */				
+				
+				psllw		xmm4,	1					/* xmm4 = (sum+x1)<<1 */			
+				psubw		xmm4,	[edi+64]			/* xmm4 = (sum+x1)<<1-x4 */		
+				
+				paddw		xmm4,	[edi+80]			/* xmm4 = (sum+x1)<<1-x4+x5 */	
+				psraw		xmm4,	4					/* xmm4 >>=4 */					
+				
+				psubw		xmm4,	xmm5				/* New Value - old Value */		
+				pand		xmm4,	xmm7				/* And the flag */				
+				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				
+				movdq2q		mm0,	xmm4				/* Write new x1 */				
+				
+				/* sum += x5 -p1 */														
+				/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+				
+				movdqa		xmm5,	[edi+32]			/* xmm5= x2 */					
+				psubw		xmm3,	xmm1				/* sum=sum-p1 */				
+				
+				paddw		xmm3,    [edi+80]			/* sum=sum+x5 */				
+				movdqa		xmm4,	xmm5				/* copy sum */					
+				
+				paddw		xmm4,	xmm3				/* xmm4=sum+x2 */				
+				paddw		xmm4,	xmm4				/* xmm4 <<= 1 */					
+				
+				psubw		xmm4,	[edi+80]			/* xmm4 =(sum+x2)<<1-x5 */		
+				paddw		xmm4,	[edi+96]			/* xmm4 =(sum+x2)<<1-x5+x6 */	
+				
+				psraw		xmm4,	4					/* xmm4=((sum+x2)<<1-x5+x6)>>4 */
+				psubw		xmm4,	xmm5				/* new value - old value	*/	
+				
+				pand		xmm4,	xmm7				/* And the flag */				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				movdq2q		mm1,	xmm4				/* write new x2 */				
+				
+				/* sum += x6 - p1 */													
+				/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+				
+				movdqa		xmm5,	[edi+48]			/* xmm5= x3 */					
+				psubw		xmm3,	xmm1				/* sum=sum-p1 */				
+				
+				paddw		xmm3,    [edi+96]			/* sum=sum+x6 */				
+				movdqa		xmm4,	xmm5				/* copy x3 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4=sum+x3 */				
+				paddw		xmm4,	xmm4				/* xmm4 <<= 1 */					
+				
+				psubw		xmm4,	[edi+96]			/* xmm4 =(sum+x3)<<1-x6 */		
+				paddw		xmm4,	[edi+112]			/* xmm4 =(sum+x3)<<1-x6+x7 */	
+				
+				psraw		xmm4,	4					/* xmm4=((sum+x3)<<1-x6+x7)>>4 */
+				psubw		xmm4,	xmm5				/* new value - old value	*/	
+				
+				pand		xmm4,	xmm7				/* And the flag */				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+				
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				movdq2q		mm2,	xmm4				/* write new x3 */				
+				
+				/* sum += x7 - p1 */													
+				/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+				
+				movdqa		xmm5,	[edi+64]			/* xmm5 = x4 */					
+				psubw		xmm3,	xmm1				/* sum = sum-p1 */				
+				
+				paddw		xmm3,	[edi+112]			/* sum = sum+x7 */				
+				movdqa		xmm4,	xmm5				/* xmm4 = x4 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum + x4 */			
+				paddw		xmm4,	xmm4				/* xmm4 *=2 */					
+				
+				paddw		xmm4,	xmm1				/* += p1 */						
+				psubw		xmm4,	[edi+16]			/* -= x1 */						
+				
+				psubw		xmm4,	[edi+112]			/* -= x7 */						
+				paddw		xmm4,	[edi+128]			/* += x8 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x4 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x4 */						
+				
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				movdq2q		mm3,	xmm4				/* write new x4 */				
+				
+
+				/* sum+= x8-x1 */														
+				/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+				
+				movdqa		xmm5,	[edi+80]			/* xmm5 = x5 */					
+				psubw		xmm3,	[edi+16]			/* sum -= x1 */					
+				
+				paddw		xmm3,	[edi+128]			/* sub += x8 */					
+				movdqa		xmm4,	xmm5				/* xmm4 = x5 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4= sum+x5 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2 */					
+				
+				paddw		xmm4,	[edi+16]			/* += x1 */						
+				psubw		xmm4,	[edi+32]			/* -= x2 */						
+				
+				psubw		xmm4,	[edi+128]			/* -= x8 */						
+				paddw		xmm4,	xmm2				/* += p2 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x5 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x5 */						
+				
+				packuswb	xmm4,	xmm0				/* pack to bytes */
+				movdq2q		mm4,	xmm4				/* write new x5 */				
+				
+				/* sum += p2 - x2 */													
+				/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+				
+				movdqa		xmm5,	[edi+96]			/* xmm5 = x6 */					
+				psubw		xmm3,	[edi+32]			/* -= x2 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x6 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x6 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+32]			/* +=x2 */						
+				psubw		xmm4,	[edi+48]			/* -=x3 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x6 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x6 */						
+				
+				packuswb	xmm4,	xmm0				/* pack to bytes */
+				movdq2q		mm5,	xmm4				/* write new x6 */				
+				
+				/* sum += p2 - x3 */													
+				/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+				
+				movdqa		xmm5,	[edi+112]			/* xmm5 = x7 */					
+				psubw		xmm3,	[edi+48]			/* -= x3 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x7 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x7 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+48]			/* +=x3 */						
+				psubw		xmm4,	[edi+64]			/* -=x4 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x7 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x7 */						
+
+				packuswb	xmm4,	xmm0				/* pack to bytes */				
+				movdq2q		mm6,	xmm4				/* write new x7 */				
+				
+				/* sum += p2 - x4 */													
+				/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+				
+				movdqa		xmm5,	[edi+128]			/* xmm5 = x8 */					
+				psubw		xmm3,	[edi+64]			/* -= x4 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x8 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x8 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+64]			/* +=x4 */						
+				psubw		xmm4,	[edi+80]			/* -=x5 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x8 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x8 */						
+
+				packuswb	xmm4,	xmm0				/* pack to bytes */				
+				movdq2q		mm7,	xmm4				/* write new x8 */				
+
+
+				/* transpose */
+				movq2dq		xmm0,	mm0					/* xmm0 = 70 60 50 40 30 20 10 00 */
+				movq2dq		xmm1,	mm1					/* xmm1 = 71 61 51 41 31 21 11 01 */
+
+				movq2dq		xmm2,	mm2					/* xmm2 = 72 62 52 42 32 22 12 02 */
+				movq2dq		xmm3,	mm3					/* xmm3 = 73 63 53 43 33 23 13 03 */
+
+				punpcklbw	xmm0,	xmm1				/* xmm0 = 7170 6160 5150 4140 3130 2120 1110 0100 */
+				punpcklbw	xmm2,	xmm3				/* xmm2 = 7372 6362 5352 4342 3332 2322 1312 0302 */
+
+				movdqa		xmm1,	xmm0				/* xmm1 = 7170 6160 5150 4140 3130 2120 1110 0100 */
+				punpcklwd	xmm0,	xmm2				/* xmm0 = 33323130 23222120 13121110 03020100 */
+
+				punpckhwd	xmm1,	xmm2				/* xmm1 = 73727170 63626160 53525150 43424140 */
+				
+				movq2dq		xmm4,	mm4					/* xmm4 = 74 64 54 44 34 24 14 04 */
+				movq2dq		xmm5,	mm5					/* xmm5 = 75 65 55 45 35 25 15 05 */				
+
+				movq2dq		xmm6,	mm6 				/* xmm6 = 76 66 56 46 36 26 16 06 */
+				movq2dq		xmm7,	mm7					/* xmm7 = 77 67 57 47 37 27 17 07 */
+								
+				punpcklbw	xmm4,	xmm5				/* xmm4 = 7574 6564 5554 4544 3534 2524 1514 0504 */
+				punpcklbw	xmm6,	xmm7				/* xmm6 = 7776 6766 5756 4746 3736 2726 1716 0706 */
+
+				movdqa		xmm5,	xmm4				/* xmm5 = 7574 6564 5554 4544 3534 2524 1514 0504 */
+				punpcklwd	xmm4,	xmm6				/* xmm4 = 37363534 27262524 17161514 07060504 */
+
+				punpckhwd	xmm5,	xmm6				/* xmm5 = 77767574 67666564 57565554 47464544 */
+				movdqa		xmm2,	xmm0				/* xmm2 = 33323130 23222120 13121110 03020100 */
+
+				punpckldq	xmm0,	xmm4				/* xmm0 = 1716151413121110	0706050403020100 */
+				movq		QWORD PTR [esi+edx*2],xmm0	/* write 00 01 02 03 04 05 06 07 */
+
+				psrldq		xmm0,	8					/* xmm0 = 1716151413121110 */
+				punpckhdq	xmm2,	xmm4				/* xmm2 = 3736353433323130	2726252423222120 */
+
+				movq		QWORD PTR [esi+edx], xmm0	/* write 10 11 12 13 14 15 16 17 */
+				movdqa		xmm3,	xmm1				/* xmm3 = 73727170 63626160 53525150 43424140 */
+				
+				punpckldq	xmm1,	xmm5				/* xmm1 = 5756555453525150 4746454443424140 */
+				movq		QWORD PTR [esi],	xmm2	/* write 20 21 22 23 24 25 26 27 */
+				
+				psrldq		xmm2,	8					/* xmm2 = 3736353433323130 */
+				punpckhdq	xmm3,	xmm5				/* xmm3 = 7776757473727170 6766656463626160 */
+
+				movq		QWORD PTR [esi+ecx], xmm2	/* write 30 31 32 33 34 35 36 37 */
+				lea			esi,	[esi+ecx*4]			/* esi= Des - 4 + 4 *pitch */
+				
+				movq		QWORD PTR [esi+edx*2], xmm1	/* write 40 41 42 43 44 45 46 47 */
+				movq		QWORD PTR [esi],	xmm3	/* write 60 61 62 63 64 65 66 67 */
+
+				psrldq		xmm1,	8					/* xmm1 = 5756555453525150 */
+				psrldq		xmm3,	8					/* xmm3 = 7776757473727170 */
+
+				movq		QWORD PTR [esi+edx], xmm1	/* write 50 51 52 53 54 55 56 57 */
+				movq		QWORD PTR [esi+ecx], xmm3	/* write 70 71 72 73 74 75 76 77 */
+
+
+				pop			edi
+				pop			esi
+				pop			edx
+				pop			ecx
+				pop			eax
+	    	}// end of __asm	
+
+            Var1=Variance1[0]+Variance1[1]+Variance1[2]+Variance1[3]+Variance1[4]+Variance1[5]+Variance1[6]+Variance1[7];
+	    	Var2=Variance2[0]+Variance2[1]+Variance2[2]+Variance2[3]+Variance2[4]+Variance2[5]+Variance2[6]+Variance2[7];
+
+		    pbi->FragmentVariances[CurrentFrag] += Var1;
+		    pbi->FragmentVariances[CurrentFrag + 1] += Var2;
+        }// end of if
+		CurrentFrag ++;
+		Src += 8;
+		Des += 8;		
+	}//end of while
+#endif
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     DeblockNonFilteredBand_WMT
+ *
+ *  INPUTS        :     None
+ *                               
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Filter both horizontal and vertical edge in a band
+ *
+ *  SPECIAL NOTES :     
+ *
+ *	REFERENCE	  :		
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void DeblockNonFilteredBand_WMT(
+                                 POSTPROC_INSTANCE *pbi, 
+                                 UINT8 *SrcPtr, 
+                                 UINT8 *DesPtr,
+                                 UINT32 PlaneLineStep, 
+                                 UINT32 FragAcross,
+                                 UINT32 StartFrag,
+                                 UINT32 *QuantScale
+							    )
+{
+	UINT32 j;
+	UINT32 CurrentFrag=StartFrag;
+	UINT32 QStep;
+    UINT32 LoopFLimit;
+	UINT8 *Src, *Des;
+	UINT32 Var1, Var2;
+#if defined(_WIN32_WCE)
+	return;
+#else
+__declspec(align(16)) short QStepWMT[8];
+__declspec(align(16)) short FLimitWMT[8];
+__declspec(align(16)) short Rows[80];
+__declspec(align(16)) short LoopFLimitWMT[8];
+__declspec(align(16)) short LoopFilteredValuesUp[8];
+__declspec(align(16)) short LoopFilteredValuesDown[8];
+
+__declspec(align(16)) unsigned short Variance1[8];
+__declspec(align(16)) unsigned short Variance2[8];
+
+
+    LoopFLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+    LoopFLimitWMT[0] = (INT16)LoopFLimit;
+    LoopFLimitWMT[1] = (INT16)LoopFLimit;
+    LoopFLimitWMT[2] = (INT16)LoopFLimit;
+    LoopFLimitWMT[3] = (INT16)LoopFLimit;
+    LoopFLimitWMT[4] = (INT16)LoopFLimit;
+    LoopFLimitWMT[5] = (INT16)LoopFLimit;
+    LoopFLimitWMT[6] = (INT16)LoopFLimit;
+    LoopFLimitWMT[7] = (INT16)LoopFLimit;
+
+
+	while(CurrentFrag < StartFrag + FragAcross )
+	{
+
+		Src=SrcPtr+8*(CurrentFrag-StartFrag);
+		Des=DesPtr+8*(CurrentFrag-StartFrag);
+
+		QStep = QuantScale[ pbi->FragQIndex[CurrentFrag+FragAcross]];
+
+
+		__asm 
+		{
+			
+		    	push		eax
+		        push		ecx			
+		    	push		edx
+		    	push		esi
+		        push		edi
+	
+				/* Calculate the FLimit and store FLimit and QStep */					
+				/* Copy the data to the intermediate buffer */							
+				mov			eax,	    QStep
+				xor			edx,	    edx					/* clear edx */					
+
+				mov			ecx,	    PlaneLineStep		/* ecx = Pitch */				
+			    pcmpeqw		xmm6,	    xmm6				/* xmm6 = FFFFFF... */	
+				
+				
+				movd		mm5,	    eax                 /* mm5 = QStep */
+				psrlw		xmm6,	    14					/* xmm6 = 3, 3, 3, 3, 3, 3, 3, 3*/
+	
+			    punpcklwd	mm5,	    mm5					/* mm5 = QQ */
+            	mov			eax,	    Src					/* eax = Src */												
+	
+                punpckldq	mm5,	    mm5                 /* mm5 = QQQQ */
+            	sub			edx,	    ecx					/* edx = - Pitch */
+				
+                movq2dq     xmm5,       mm5                 /* xmm5 = QQQQ */
+            	punpcklqdq  xmm5,       xmm5                /* xmm5 = QQQQQQQQ */
+	
+            	pmullw		xmm6,	    xmm5			    /* Qstep * 3 */
+				movdqa      QStepWMT,	xmm5
+	
+                lea			edi,	    Rows				/* edi = Rows */				
+				pxor		xmm7,	    xmm7				/* Clear mm7 */					
+
+            	mov         esi,        Des                 /* esi = des */
+				pmullw		xmm6,	    xmm5
+	
+				lea			eax,	    [eax + edx * 4 ]	/* eax = Src - 4*Pitch */		
+            	lea         esi,        [esi + edx * 2]     /* esi = Des - 2*Pitch */
+
+                psraw       xmm6,       5
+                movdqa      FLimitWMT,  xmm6
+
+            	/* Copy the data to the intermediate buffer */
+            	
+				movq		xmm0,	    QWORD PTR [eax + edx]/* xmm0 = Src[-5*Pitch] */		
+				movq		xmm1,	    QWORD PTR [eax ]	/* xmm1 = Src[-4*Pitch */
+				
+				punpcklbw	xmm0,	    xmm7				/* expand to words */
+				punpcklbw	xmm1,	    xmm7				/* expand to words */
+
+				movdqa		[edi],	    xmm0				/* write 8 words */
+				movdqa		[edi+16],   xmm1				/* write 8 words */
+
+				movq		xmm2,	    QWORD PTR [eax+ecx]	/* xmm2 = Src[-3*Pitch] */		
+				movq		xmm3,	    QWORD PTR [eax+ecx*2]/* xmm3 = Src[-2*Pitch] */
+
+				punpcklbw	xmm2,	    xmm7				/* expand to words */
+				punpcklbw	xmm3,	    xmm7				/* expand to words */
+				
+				movdqa		[edi+32],   xmm2				/* write 8 words */
+				movdqa		[edi+48],   xmm3				/* write 8 words */
+
+				lea			eax,	    [eax+ecx*4]			/* eax= Src */
+
+				movq		xmm0,	    QWORD PTR [eax + edx]/* xmm0 = Src[-Pitch] */		
+				movq		xmm1,	    QWORD PTR [eax ]	/* xmm1 = Src[0] */
+				
+				punpcklbw	xmm0,	    xmm7				/* expand to words */
+				punpcklbw	xmm1,	    xmm7				/* expand to words */
+
+				movdqa		[edi+64],   xmm0				/* write 8 words */
+				movdqa		[edi+80],   xmm1				/* write 8 words */
+
+				movq		xmm2,	    QWORD PTR [eax+ecx]	/* xmm2 = Src[Pitch] */		
+				movq		xmm3,	    QWORD PTR [eax+ecx*2]/* xmm3 = Src[2*Pitch] */
+
+				punpcklbw	xmm2,	    xmm7				/* expand to words */
+				punpcklbw	xmm3,	    xmm7				/* expand to words */
+				
+				movdqa		[edi+96],   xmm2				/* write 8 words */
+				movdqa		[edi+112],  xmm3				/* write 8 words */
+
+				lea			eax,	    [eax+ecx*4]			/* eax= Src+4*Pitch */
+
+				movq		xmm0,	    QWORD PTR [eax + edx]/* xmm0 = Src[3*Pitch] */		
+				movq		xmm1,	    QWORD PTR [eax ]	/* xmm1 = Src[4*Pitch] */
+				
+				punpcklbw	xmm0,	    xmm7				/* expand to words */
+				punpcklbw	xmm1,	    xmm7				/* expand to words */
+
+				movdqa		[edi+128],  xmm0				/* write 8 words */
+				movdqa		[edi+144],  xmm1				/* write 8 words */
+
+	
+				/* done with copying everything to intermediate buffer */				
+				/* Now, compute the variances for Pixel  1-4 and 5-8 */					
+		
+				/* we use xmm0,xmm1,xmm2 for 1234 and xmm4, xmm5, xmm6 for 5-8 */				
+				/* xmm7 = 0, xmm3 = {128, 128, 128, 128, 128, 128, 128, 128} */								
+				
+				pcmpeqw		xmm3,	xmm3				/* xmm3 = FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF */	
+				psllw		xmm3,	15					/* xmm3 = 80008000800080008000800080008000 */	
+				psrlw		xmm3,	8					/* xmm3 = 00800080008000800080008000800080 */
+				
+				movdqa		xmm2,	[edi+16]			/* Pixel 1 */					
+				movdqa		xmm6,	[edi+80]			/* Pixel 5 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				movdqa		xmm0,	xmm2				/* xmm0 = pixel 1 */				
+				movdqa		xmm4,	xmm6				/* xmm4 = pixel 5 */				
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel1 * pixel1 */		
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel5 * pixel5 */		
+				
+				movdqa		xmm1,	xmm2				/* xmm1 = pixel1^2 */			
+				movdqa		xmm5,	xmm6				/* xmm5 = pixel5^2 */			
+				
+				movdqa		xmm2,	[edi+32]			/* Pixel 2 */					
+				movdqa		xmm6,	[edi+96]			/* Pixel 6 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 2 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 6 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel2^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel6^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 += pixel2^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 += pixel6^2 */			
+				
+				movdqa		xmm2,	[edi+48]			/* Pixel 3 */					
+				movdqa		xmm6,	[edi+112]			/* Pixel 7 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 3 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 7 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel3^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel7^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 += pixel3^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 += pixel7^2 */			
+				
+				movdqa		xmm2,	[edi+64]			/* Pixel 4 */					
+				movdqa		xmm6,	[edi+128]			/* Pixel 8 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 4 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 8 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel4^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel8^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 = pixel4^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 = pixel8^2 */			
+				
+				/* xmm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+				/* xmm1 = x1 + x2 + x3 + x4 */											
+				/* xmm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+				/* xmm5 = x5 + x6 + x7 + x8 */											
+				
+				movdqa		xmm7,	xmm3				/* xmm7 = xmm3 */					
+				psrlw		xmm7,	7					/* xmm7 = 00010001000100010001000100010001 */	
+				
+				movdqa		xmm2,	xmm0				/* make copy of sum1 */			
+				movdqa		xmm6,	xmm4				/* make copy of sum2 */			
+				
+				paddw		xmm0,	xmm7				/* (sum1 + 1) */				
+				paddw		xmm4,	xmm7				/* (sum2 + 1) */				
+				
+				psraw		xmm2,	1					/* sum1 /2 */					
+				psraw		xmm6,	1					/* sum2 /2 */					
+				
+				psraw		xmm0,	1					/* (sum1 + 1)/2 */				
+				psraw		xmm4,	1					/* (sum2 + 1)/2 */				
+				
+				pmullw		xmm2,	xmm0				/* (sum1)/2*(sum1+1)/2 */		
+				pmullw		xmm6,	xmm4				/* (sum2)/2*(sum2+1)/2 */		
+				
+				psubw		xmm1,	xmm2				/* Variance 1 */				
+				psubw		xmm5,	xmm6				/* Variance 2 */				
+				
+				movdqa		xmm7,	FLimitWMT			/* xmm7 = FLimit */				
+				movdqa		xmm2,	xmm1				/* copy of Varinace 1*/
+
+				movdqa		[Variance1], xmm1			/* save the varinace1 */
+				movdqa		[Variance2], xmm5			/* save the varinace2 */
+
+				movdqa		xmm6,	xmm5				/* Variance 2 */
+				psubw		xmm1,	xmm7				/* Variance 1 < Flimit? */		
+				
+				psubw		xmm5,	xmm7				/* Variance 2 < Flimit? */		
+				psraw		xmm2,	15					/* Variance 1 > 32768? */
+
+				psraw		xmm6,	15					/* Vaiance  2 > 32768? */	
+				psraw		xmm1,	15					/* FFFF/0000 for true/false */	
+				
+				psraw		xmm5,	15					/* FFFF/0000 for true/false */	
+				movdqa		xmm7,	[edi+64]			/* xmm0 = Pixel 4			*/	
+
+				pandn		xmm2,	xmm1				/* Variance1<32678 && 
+															Variance1<Limit			*/
+				pandn		xmm6,	xmm5				/* Variance2<32678 && 
+														   Variance1<Limit			*/
+				
+				movdqa		xmm4,	[edi+80]			/* xmm4 = Pixel 5			*/	
+				pand		xmm6,	xmm2				/* xmm6 = Variance1 < Flimit */	
+														/*     &&Variance2 < Flimit */	
+
+				movdqa		xmm2,	xmm7				/* make copy of Pixel4		*/	
+
+				psubusw		xmm7,	xmm4				/* 4 - 5 */						
+				psubusw		xmm4,	xmm2				/* 5 - 4 */						
+				
+				por			xmm7,	xmm4				/* abs(4 - 5) */				
+				psubw		xmm7,	QStepWMT			/* abs(4-5)<QStepxmmx ? */		
+				
+				psraw		xmm7,	15					/* FFFF/0000 for True/Flase */
+				pand		xmm7,	xmm6													
+		
+                /* xmm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* xmm7 now are in use  */										
+
+                
+                /* find the loop filtered values for the pixels on block boundary */
+                movdqa      xmm1,       LoopFLimitWMT;   /* Get the Flimit values for loop filter */
+                movdqa      xmm3,       [edi + 48]       /* xmm3 = x3 = p[-2] */
+
+                movdqa      xmm4,       [edi + 64]       /* mm4 = x4 = p[-1] */
+                movdqa      xmm5,       [edi + 80]       /* mm5 = x5 = p[ 0] */
+
+                movdqa      xmm6,       [edi + 96]       /* mm6 = x6 = p[ 1] */
+                psubw       xmm5,       xmm4             /* mm5 = p[ 0] - p[-1] */
+
+                psubw       xmm3,       xmm6             /* mm3 = p[-2] - p[ 1] */
+                movdqa      xmm4,       xmm5             /* make a copy */
+
+                paddw       xmm4,       xmm5             /* 2 * ( p[0] - p[-1] ) */
+                paddw       xmm3,       EightFours       /* mm3 + 4 */
+
+                paddw       xmm5,       xmm4             /* 3 * ( p[0] - p[-1] ) */
+                paddw       xmm3,       xmm5             /* Filtval before shift */
+
+                psraw       xmm3,       3                /* FiltVal */
+                movdqa      xmm2,       xmm3             /* make a copy */
+
+                psraw       xmm3,       15               /* FFFF->Neg, 0000->Pos */
+                pxor        xmm2,       xmm3
+
+                psubsw      xmm2,       xmm3             /* mm2 = abs(FiltVal) */
+                por         xmm3,       EightOnes        /* -1 and 1 for + and - */
+
+                movdqa      xmm4,       xmm1             /* make a copy of Flimit */
+                psubw       xmm1,       xmm2             /* mm1= Flimit - abs(FiltVal) */
+
+                movdqa      xmm5,       xmm1             /* copy Flimit - abs(FiltVal) */
+                psraw       xmm1,       15               /* FFFF or 0000 */
+
+                pxor        xmm5,       xmm1                 
+                psubsw      xmm5,       xmm1             /* abs(Flimit - abs(FiltVal)) */
+
+                psubusw     xmm4,       xmm5             /* Flimit-abs(Flimit - abs(FiltVal)) */
+                pmullw      xmm4,       xmm3             /* get the sign back */
+
+                movdqa      xmm1,       [edi+64]         /* p[-1] */
+                movdqa      xmm2,       [edi+80]         /* p[0] */
+            
+                paddw       xmm1,       mm4              /* p[-1] + NewFiltVal */
+                psubw       xmm2,       mm4              /* p[0] - NewFiltVal */
+
+                pxor        xmm6,       xmm6             /* clear mm6 */
+                packuswb    xmm1,       xmm1             /* clamping */
+
+                packuswb    xmm2,       xmm2
+                punpcklbw   xmm1,       xmm6             /* unpack to word */
+            
+                movdqa      LoopFilteredValuesUp, xmm1   /* save the values */
+                punpcklbw   xmm2,       xmm6                 /* unpack to word */
+
+                movdqa      LoopFilteredValuesDown, xmm2 /* save the values */
+                
+				/* Let's do the filtering now */										
+				/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+				/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+				
+				movdqa		xmm5,	[edi]				/* xmm5 = -5 */					
+				movdqa		xmm4,	[edi + 16]			/* xmm4 = -4 */					
+				
+				movdqa		xmm3,	xmm4				/* copy of -4 */				
+				movdqa		xmm6,	xmm5				/* copy of -5 */				
+				
+				psubusw		xmm4,	xmm6				/* xmm4 = [-4] - [-5] */			
+				psubusw		xmm5,	xmm3				/* xmm5 = [-5] - [-4] */			
+				
+				por			xmm4,	xmm5				/* abs([-4]-[-5] ) */			
+				psubw		xmm4,	QStepWMT			/* abs([-4]-[-5] )<QStep? */	
+				
+				psraw		xmm4,	15					/* FFFF/0000 for True/False */	
+				movdqa		xmm1,	xmm4				/* copy of the xmm4 */			
+				
+				pand		xmm4,	xmm6				/*							*/	
+				pandn		xmm1,	xmm3				/*							*/	
+				
+				por			xmm1,	xmm4				/* xmm1 = p1				*/	
+				
+				/* now find P2 */														
+				
+				movdqa		xmm4,	[edi+128]			/* xmm4 = [3] */					
+				movdqa		xmm5,	[edi+144]			/* xmm5 = [4] */					
+				
+				movdqa		xmm3,	xmm4				/* copy of 3 */					
+				movdqa		xmm6,	xmm5				/* copy of 4 */					
+				
+				psubusw		xmm4,	xmm6				/* xmm4 = [3] - [4] */			
+				psubusw		xmm5,	xmm3				/* xmm5 = [4] - [3] */			
+				
+				por			xmm4,	xmm5				/* abs([3]-[4] ) */				
+				psubw		xmm4,	QStepWMT			/* abs([3]-[4] )<QStep? */		
+				
+				psraw		xmm4,	15					/* FFFF/0000 for True/False */	
+				movdqa		xmm2,	xmm4				/* copy of the xmm4 */			
+				
+				pand		xmm4,	xmm6				/*							*/	
+				pandn		xmm2,	xmm3				/*							*/	
+				
+				por			xmm2,	xmm4				/* xmm2 = p2				*/	
+
+				/* Data is ready, now do the filtering */
+				
+				pxor		xmm0,	xmm0				/* clear xmm0 */
+
+				/* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+				/* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */			
+				/* Des[-w4] = Src[-w4]; */												
+				/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+
+				
+				movdqa		xmm3,	xmm1				/* xmm3 = p1 */					
+				paddw		xmm3,	xmm3				/* xmm3 = p1 + p1 */				
+				
+				paddw		xmm3,	xmm1				/* xmm3 = p1 + p1 + p1 */		
+				movdqa		xmm4,	[edi+16]			/* xmm4 = x1 */					
+				
+				paddw		xmm3,	[edi+32]			/* xmm3 = p1+p1+p1+ x2 */		
+				paddw		xmm4,	[edi+48]			/* xmm4 = x1+x3 */				
+				
+				paddw		xmm3,	[edi+64]			/* xmm3 += x4 */					
+				paddw		xmm4,	EightFours			/* xmm4 = x1 + x3 + 4 */			
+				
+				paddw		xmm3,	xmm4				/* xmm3 = 3*p1+x1+x2+x3+x4+4 */	
+				movdqa		xmm4,	xmm3				/* xmm4 = xmm3 */					
+				
+				movdqa		xmm5,	[edi+16]			/* xmm5 = x1 */					
+				paddw		xmm4,	xmm5				/* xmm4 = sum+x1 */				
+				
+				psllw		xmm4,	1					/* xmm4 = (sum+x1)<<1 */			
+				psubw		xmm4,	[edi+64]			/* xmm4 = (sum+x1)<<1-x4 */		
+				
+				paddw		xmm4,	[edi+80]			/* xmm4 = (sum+x1)<<1-x4+x5 */	
+				psraw		xmm4,	4					/* xmm4 >>=4 */					
+				
+				psubw		xmm4,	xmm5				/* New Value - old Value */		
+				pand		xmm4,	xmm7				/* And the flag */				
+				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				
+				movq		QWORD PTR [esi+edx*2], xmm4	/* Write new x1 */				
+				
+				/* sum += x5 -p1 */														
+				/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+				
+				movdqa		xmm5,	[edi+32]			/* xmm5= x2 */					
+				psubw		xmm3,	xmm1				/* sum=sum-p1 */				
+				
+				paddw		xmm3,    [edi+80]			/* sum=sum+x5 */				
+				movdqa		xmm4,	xmm5				/* copy sum */					
+				
+				paddw		xmm4,	xmm3				/* xmm4=sum+x2 */				
+				paddw		xmm4,	xmm4				/* xmm4 <<= 1 */					
+				
+				psubw		xmm4,	[edi+80]			/* xmm4 =(sum+x2)<<1-x5 */		
+				paddw		xmm4,	[edi+96]			/* xmm4 =(sum+x2)<<1-x5+x6 */	
+				
+				psraw		xmm4,	4					/* xmm4=((sum+x2)<<1-x5+x6)>>4 */
+				psubw		xmm4,	xmm5				/* new value - old value	*/	
+				
+				pand		xmm4,	xmm7				/* And the flag */				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				movq		QWORD PTR [esi+edx], xmm4	/* write new x2 */				
+				
+				/* sum += x6 - p1 */													
+				/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+				
+				movdqa		xmm5,	[edi+48]			/* xmm5= x3 */					
+				psubw		xmm3,	xmm1				/* sum=sum-p1 */				
+				
+				paddw		xmm3,    [edi+96]			/* sum=sum+x6 */				
+				movdqa		xmm4,	xmm5				/* copy x3 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4=sum+x3 */				
+				paddw		xmm4,	xmm4				/* xmm4 <<= 1 */					
+				
+				psubw		xmm4,	[edi+96]			/* xmm4 =(sum+x3)<<1-x6 */		
+				paddw		xmm4,	[edi+112]			/* xmm4 =(sum+x3)<<1-x6+x7 */	
+				
+				psraw		xmm4,	4					/* xmm4=((sum+x3)<<1-x6+x7)>>4 */
+				psubw		xmm4,	xmm5				/* new value - old value	*/	
+				
+				pand		xmm4,	xmm7				/* And the flag */				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+				
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				movq		QWORD PTR [esi],xmm4		/* write new x3 */				
+				
+				/* sum += x7 - p1 */													
+				/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+				
+				movdqa		xmm5,	[edi+64]			/* xmm5 = x4 */					
+				psubw		xmm3,	xmm1				/* sum = sum-p1 */				
+				
+				paddw		xmm3,	[edi+112]			/* sum = sum+x7 */				
+				movdqa		xmm4,	xmm5				/* xmm4 = x4 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum + x4 */			
+				paddw		xmm4,	xmm4				/* xmm4 *=2 */					
+				
+				paddw		xmm4,	xmm1				/* += p1 */						
+				psubw		xmm4,	[edi+16]			/* -= x1 */						
+				
+				psubw		xmm4,	[edi+112]			/* -= x7 */						
+				paddw		xmm4,	[edi+128]			/* += x8 */						
+				
+				movdqa      xmm5,   LoopFilteredValuesUp /* Read the loop filtered value of x4 */
+                psraw		xmm4,	4					/* >>=4 */						
+
+				psubw		xmm4,	xmm5				/* -=x4 */						
+				pand		xmm4,	xmm7				/* and flag */					
+
+				paddw		xmm4,	xmm5				/* += x4 */										
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+
+				movq	    QWORD PTR [esi+ecx], xmm4	/* write new x4 */				
+				
+				/* sum+= x8-x1 */														
+				/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+				
+				movdqa		xmm5,	[edi+80]			/* xmm5 = x5 */					
+				psubw		xmm3,	[edi+16]			/* sum -= x1 */					
+				
+				paddw		xmm3,	[edi+128]			/* sub += x8 */					
+				movdqa		xmm4,	xmm5				/* xmm4 = x5 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4= sum+x5 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2 */					
+				
+				paddw		xmm4,	[edi+16]			/* += x1 */						
+				psubw		xmm4,	[edi+32]			/* -= x2 */						
+				
+				psubw		xmm4,	[edi+128]			/* -= x8 */						
+				paddw		xmm4,	xmm2				/* += p2 */						
+				
+                movdqa      xmm5,   LoopFilteredValuesDown /* Read the loop filtered value of x5 */
+				psraw		xmm4,	4					/* >>=4 */						
+
+				psubw		xmm4,	xmm5				/* -=x5 */						
+				pand		xmm4,	xmm7				/* and flag */					
+
+				paddw		xmm4,	xmm5				/* += x5 */						
+				lea			esi,	[esi+ecx*4]			/* esi=des + 2*pitch */
+
+				packuswb	xmm4,	xmm0				/* pack to bytes */
+				movq		QWORD PTR [esi+edx*2], xmm4	/* write new x5 */				
+				
+				/* sum += p2 - x2 */													
+				/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+				
+				movdqa		xmm5,	[edi+96]			/* xmm5 = x6 */					
+				psubw		xmm3,	[edi+32]			/* -= x2 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x6 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x6 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+32]			/* +=x2 */						
+				psubw		xmm4,	[edi+48]			/* -=x3 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x6 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x6 */						
+				
+				packuswb	xmm4,	xmm0				/* pack to bytes */
+				movq		QWORD PTR [esi+edx], xmm4	/* write new x6 */				
+				
+				/* sum += p2 - x3 */													
+				/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+				
+				movdqa		xmm5,	[edi+112]			/* xmm5 = x7 */					
+				psubw		xmm3,	[edi+48]			/* -= x3 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x7 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x7 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+48]			/* +=x3 */						
+				psubw		xmm4,	[edi+64]			/* -=x4 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x7 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x7 */						
+
+				packuswb	xmm4,	xmm0				/* pack to bytes */				
+				movq		QWORD PTR [esi],xmm4		/* write new x7 */				
+				
+				/* sum += p2 - x4 */													
+				/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+				
+				movdqa		xmm5,	[edi+128]			/* xmm5 = x8 */					
+				psubw		xmm3,	[edi+64]			/* -= x4 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x8 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x8 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+64]			/* +=x4 */						
+				psubw		xmm4,	[edi+80]			/* -=x5 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x8 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x8 */						
+
+				packuswb	xmm4,	xmm0				/* pack to bytes */				
+				movq		QWORD PTR [esi+ecx], xmm4				/* write new x8 */				
+
+				pop			edi
+				pop			esi
+				pop			edx
+				pop			ecx
+				pop			eax
+
+			
+        } /* end of the macro */
+        
+    	Var1=Variance1[0]+Variance1[1]+Variance1[2]+Variance1[3]+Variance1[4]+Variance1[5]+Variance1[6]+Variance1[7];
+	    Var2=Variance2[0]+Variance2[1]+Variance2[2]+Variance2[3]+Variance2[4]+Variance2[5]+Variance2[6]+Variance2[7];
+        pbi->FragmentVariances[CurrentFrag] += Var1;        
+        pbi->FragmentVariances[CurrentFrag + FragAcross] += Var2;
+        
+
+        if(CurrentFrag==StartFrag)
+			CurrentFrag++;
+		else
+		{
+			
+			Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag);
+			Src=Des;
+
+			QStep = QuantScale[pbi->FragQIndex[CurrentFrag]];		
+			QStepWMT[0] = (INT16)QStep;
+            QStepWMT[1] = (INT16)QStep;
+            QStepWMT[2] = (INT16)QStep;
+            QStepWMT[3] = (INT16)QStep;
+            QStepWMT[4] = (INT16)QStep;
+            QStepWMT[5] = (INT16)QStep;
+            QStepWMT[6] = (INT16)QStep;
+            QStepWMT[7] = (INT16)QStep;
+
+		    for( j=0; j<8;j++)
+		    {
+    			Rows[j] = (short) (Src[-5 +j*PlaneLineStep]);
+	    		Rows[72+j] = (short)(Src[4+j*PlaneLineStep]);		
+    		}
+
+			__asm
+			{
+				/* Save the registers */
+				push		eax
+				push		ecx			
+				push		edx
+				push		esi
+				push		edi
+				
+				/* Calculate the FLimit and store FLimit and QStep */					
+				
+				movdqa		xmm0,	QStepWMT            /* Get QStep */
+				movdqa		xmm1,	EightThrees			/* mm1 = 03030303 */			
+
+                pmullw		xmm1,	xmm0				/* mm1 = QStep * 3 */							
+				pmullw		xmm1,	xmm0				/* mm1 = QStep * QStep * 3 */					
+				
+                psrlw		xmm1,	5					/* mm1 = FLimit */				
+				movdqa		[FLimitWMT], xmm1			/* Save FLimit */				
+
+				/* setup the pointers to data */
+
+				mov			eax,	Src					/* eax = Src */
+				xor			edx,	edx					/* clear edx */
+				
+				mov			esi,	Des					/* esi = Des */
+				sub			eax,	4					/* eax = Src-4 */
+
+				sub			esi,	4					/* esi = Des-4 */
+				lea			edi,	Rows				/* edi = Rows */				
+
+				mov			ecx,	PlaneLineStep		/* ecx = Pitch */				
+				sub			edx,	ecx					/* edx = -Pitch */				
+
+				lea			esi,	[esi+ecx*2]			/* esi = Des-4 + 2 * Pitch */
+				
+				/* Get the data to the intermediate buffer */
+
+				movq		mm0,	[eax]				/* mm0 = 07 06 05 04 03 02 01 00 */
+				movq		mm1,	[eax+ecx]			/* mm1 = 17 16 15 14 13 12 11 10 */
+
+				movq		mm2,	[eax+ecx*2]			/* mm2 = 27 26 25 24 23 22 21 20 */
+				lea			eax,	[eax+ecx*4]			/* Go down four Rows */	
+
+				movq		mm3,	[eax+edx]			/* mm3 = 37 36 35 34 33 32 31 30 */
+				movq		mm4,	mm0					/* mm4 = 07 06 05 04 03 02 01 00 */
+			
+				punpcklbw	mm0,	mm1					/* mm0 = 13 03 12 02 11 01 10 00 */
+				punpckhbw	mm4,	mm1					/* mm4 = 17 07 16 06 15 05 14 04 */
+
+				movq		mm5,	mm2					/* mm5 = 27 26 25 24 23 22 21 20 */
+				punpcklbw	mm2,	mm3					/* mm2 = 33 23 32 22 31 21 30 20 */
+
+				punpckhbw	mm5,	mm3					/* mm5 = 37 27 36 26 35 25 34 24 */
+				movq		mm1,	mm0					/* mm1 = 13 03 12 02 11 01 10 00 */
+
+				punpcklwd	mm0,	mm2					/* mm0 = 31 21 11 01 30 20 10 00 */
+				punpckhwd	mm1,	mm2					/* mm1 = 33 23 13 03 32 22 12 02 */
+				
+				movq		mm2,	mm4					/* mm2 = 17 07 16 06 15 05 14 04 */
+				punpckhwd	mm4,	mm5					/* mm4 = 37 27 17 07 36 26 16 06 */
+
+				punpcklwd	mm2,	mm5					/* mm2 = 35 25 15 05 34 24 14 04 */
+				pxor		mm7,	mm7					/* clear mm7 */
+
+				movq		mm5,	mm0					/* make a copy */
+				punpcklbw	mm0,	mm7					/* mm0 = 30 20 10 00 */
+
+				movq		[edi+16], mm0				/* write 00 10 20 30 */
+				punpckhbw	mm5,	mm7					/* mm5 = 31 21 11 01 */
+
+				movq		mm0,	mm1					/* mm0 =33 23 13 03 32 22 12 02 */
+				movq		[edi+32], mm5				/* write 01 11 21 31 */
+				
+				punpcklbw	mm1,	mm7					/* mm1 = 32 22 12 02 */
+				punpckhbw	mm0,	mm7					/* mm0 = 33 23 12 03 */
+
+				movq		[edi+48], mm1				/* write 02 12 22 32 */
+				movq		mm3,	mm2					/* mm3 = 35 25 15 05 34 24 14 04 */
+				
+				movq		mm5,	mm4					/* mm5 = 37 27 17 07 36 26 16 06 */
+				movq		[edi+64], mm0				/* write 03 13 23 33 */
+
+				punpcklbw	mm2,	mm7					/* mm2 = 34 24 14 04 */
+				punpckhbw	mm3,	mm7					/* mm3 = 35 25 15 05 */
+
+				movq		[edi+80], mm2				/* write 04 14 24 34 */
+				punpcklbw	mm4,	mm7					/* mm4 = 36 26 16 06 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 37 27 17 07 */
+				movq		[edi+96], mm3				/* write 05 15 25 35 */
+			
+				movq		mm0,	[eax]				/* mm0 = 47 46 45 44 43 42 41 40 */
+				movq		mm1,	[eax + ecx ]		/* mm1 = 57 56 55 54 53 52 51 50 */
+
+				movq		[edi+112], mm4				/* write 06 16 26 37 */
+				movq		mm2,	[eax+ecx*2]			/* mm2 = 67 66 65 64 63 62 61 60 */
+
+				lea			eax,	[eax+ ecx*4]		/* Go down four rows */
+				movq		[edi+128], mm5				/* write 07 17 27 37 */
+
+				movq		mm4,	mm0					/* mm4 = 47 46 45 44 43 42 41 40 */
+				movq		mm3,	[eax+edx]			/* mm3 = 77 76 75 74 73 72 71 70 */
+
+				punpcklbw	mm0,	mm1					/* mm0 = 53 43 52 42 51 41 50 40 */
+				punpckhbw	mm4,	mm1					/* mm4 = 57 57 56 46 55 45 54 44 */
+
+				movq		mm5,	mm2					/* mm5 = 67 66 65 64 63 62 61 60 */
+				punpcklbw	mm2,	mm3					/* mm2 = 73 63 72 62 71 61 70 60 */
+
+				punpckhbw	mm5,	mm3					/* mm5 = 77 67 76 66 75 65 74 64 */
+				movq		mm1,	mm0					/* mm1 = 53 43 52 42 51 41 50 40 */
+
+				punpcklwd	mm0,	mm2					/* mm0 = 71 61 51 41 70 60 50 40 */
+				punpckhwd	mm1,	mm2					/* mm1 = 73 63 53 43 72 62 52 42 */
+				
+				movq		mm2,	mm4					/* mm2 = 57 57 56 46 55 45 54 44 */
+				punpckhwd	mm4,	mm5					/* mm4 = 77 67 57 47 76 66 56 46 */
+
+				punpcklwd	mm2,	mm5					/* mm2 = 75 65 55 45 74 64 54 44 */
+
+				movq		mm5,	mm0					/* make a copy */
+				punpcklbw	mm0,	mm7					/* mm0 = 70 60 50 40 */
+
+				movq		[edi+24], mm0				/* write 40 50 60 70 */
+				punpckhbw	mm5,	mm7					/* mm5 = 71 61 51 41 */
+
+				movq		mm0,	mm1					/* mm0 = 73 63 53 43 72 62 52 42 */
+				movq		[edi+40], mm5				/* write 41 51 61 71 */
+				
+				punpcklbw	mm1,	mm7					/* mm1 = 72 62 52 42 */
+				punpckhbw	mm0,	mm7					/* mm0 = 73 63 53 43 */
+
+				movq		[edi+56], mm1				/* write 42 52 62 72 */
+				movq		mm3,	mm2					/* mm3 = 75 65 55 45 74 64 54 44 */
+				
+				movq		mm5,	mm4					/* mm5 = 77 67 57 47 76 66 56 46 */
+				movq		[edi+72], mm0				/* write 43 53 63 73 */
+
+				punpcklbw	mm2,	mm7					/* mm2 = 74 64 54 44 */
+				punpckhbw	mm3,	mm7					/* mm3 = 75 65 55 45 */
+
+				movq		[edi+88], mm2				/* write 44 54 64 74 */
+				punpcklbw	mm4,	mm7					/* mm4 = 76 66 56 46 */
+
+				punpckhbw	mm5,	mm7					/* mm5 = 77 67 57 47 */
+				movq		[edi+104], mm3				/* write 45 55 65 75 */
+			
+				movq		[edi+120], mm4				/* write 46 56 66 76 */
+				movq		[edi+136], mm5				/* write 47 57 67 77 */
+
+				/* we use xmm0,xmm1,xmm2 for 1234 and xmm4, xmm5, xmm6 for 5-8 */				
+				/* xmm7 = 0, xmm3 = {128, 128, 128, 128, 128, 128, 128, 128} */								
+				
+				pcmpeqw		xmm3,	xmm3				/* xmm3 = FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF */	
+				psllw		xmm3,	15					/* xmm3 = 80008000800080008000800080008000 */	
+				psrlw		xmm3,	8					/* xmm3 = 00800080008000800080008000800080 */
+				
+				movdqa		xmm2,	[edi+16]			/* Pixel 1 */					
+				movdqa		xmm6,	[edi+80]			/* Pixel 5 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				movdqa		xmm0,	xmm2				/* xmm0 = pixel 1 */				
+				movdqa		xmm4,	xmm6				/* xmm4 = pixel 5 */				
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel1 * pixel1 */		
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel5 * pixel5 */		
+				
+				movdqa		xmm1,	xmm2				/* xmm1 = pixel1^2 */			
+				movdqa		xmm5,	xmm6				/* xmm5 = pixel5^2 */			
+				
+				movdqa		xmm2,	[edi+32]			/* Pixel 2 */					
+				movdqa		xmm6,	[edi+96]			/* Pixel 6 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 2 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 6 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel2^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel6^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 += pixel2^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 += pixel6^2 */			
+				
+				movdqa		xmm2,	[edi+48]			/* Pixel 3 */					
+				movdqa		xmm6,	[edi+112]			/* Pixel 7 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 3 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 7 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel3^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel7^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 += pixel3^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 += pixel7^2 */			
+				
+				movdqa		xmm2,	[edi+64]			/* Pixel 4 */					
+				movdqa		xmm6,	[edi+128]			/* Pixel 8 */					
+				
+				psubw		xmm2,	xmm3				/* xmm2 -=128 */					
+				psubw		xmm6,	xmm3				/* xmm6 -=128 */					
+				
+				paddw		xmm0,	xmm2				/* xmm0 += pixel 4 */			
+				paddw		xmm4,	xmm6				/* xmm4 += pixel 8 */			
+				
+				pmullw		xmm2,	xmm2				/* xmm2 = pixel4^2 */			
+				pmullw		xmm6,	xmm6				/* xmm6 = pixel8^2 */			
+				
+				paddw		xmm1,	xmm2				/* xmm1 = pixel4^2 */			
+				paddw		xmm5,	xmm6				/* xmm5 = pixel8^2 */			
+				
+				/* xmm0 = x1^2 + x2^2 + x3^2 + x4^2 */									
+				/* xmm1 = x1 + x2 + x3 + x4 */											
+				/* xmm4 = x5^2 + x6^2 + x7^2 + x8^2 */									
+				/* xmm5 = x5 + x6 + x7 + x8 */											
+				
+				movdqa		xmm7,	xmm3				/* xmm7 = xmm3 */					
+				psrlw		xmm7,	7					/* xmm7 = 00010001000100010001000100010001 */	
+				
+				movdqa		xmm2,	xmm0				/* make copy of sum1 */			
+				movdqa		xmm6,	xmm4				/* make copy of sum2 */			
+				
+				paddw		xmm0,	xmm7				/* (sum1 + 1) */				
+				paddw		xmm4,	xmm7				/* (sum2 + 1) */				
+				
+				psraw		xmm2,	1					/* sum1 /2 */					
+				psraw		xmm6,	1					/* sum2 /2 */					
+				
+				psraw		xmm0,	1					/* (sum1 + 1)/2 */				
+				psraw		xmm4,	1					/* (sum2 + 1)/2 */				
+				
+				pmullw		xmm2,	xmm0				/* (sum1)/2*(sum1+1)/2 */		
+				pmullw		xmm6,	xmm4				/* (sum2)/2*(sum2+1)/2 */		
+				
+				psubw		xmm1,	xmm2				/* Variance 1 */				
+				psubw		xmm5,	xmm6				/* Variance 2 */				
+				
+				movdqa		xmm7,	FLimitWMT			/* xmm7 = FLimit */				
+				movdqa		xmm2,	xmm1				/* copy of Varinace 1*/
+
+                movdqa		[Variance1], xmm1			/* save the varinace1 */
+				movdqa		[Variance2], xmm5			/* save the varinace2 */
+
+				movdqa		xmm6,	xmm5				/* Variance 2 */
+				psubw		xmm1,	xmm7				/* Variance 1 < Flimit? */		
+				
+				psubw		xmm5,	xmm7				/* Variance 2 < Flimit? */		
+				psraw		xmm2,	15					/* Variance 1 > 32768? */
+
+				psraw		xmm6,	15					/* Vaiance  2 > 32768? */	
+				psraw		xmm1,	15					/* FFFF/0000 for true/false */	
+				
+				psraw		xmm5,	15					/* FFFF/0000 for true/false */	
+				movdqa		xmm7,	[edi+64]			/* xmm0 = Pixel 4			*/	
+
+				pandn		xmm2,	xmm1				/* Variance1<32678 && 
+															Variance1<Limit			*/
+				pandn		xmm6,	xmm5				/* Variance2<32678 && 
+														   Variance1<Limit			*/
+				
+				movdqa		xmm4,	[edi+80]			/* xmm4 = Pixel 5			*/	
+				pand		xmm6,	xmm2				/* xmm6 = Variance1 < Flimit */	
+														/*     &&Variance2 < Flimit */	
+
+				movdqa		xmm2,	xmm7				/* make copy of Pixel4		*/	
+
+				psubusw		xmm7,	xmm4				/* 4 - 5 */						
+				psubusw		xmm4,	xmm2				/* 5 - 4 */						
+				
+				por			xmm7,	xmm4				/* abs(4 - 5) */				
+				psubw		xmm7,	QStepWMT			/* abs(4-5)<QStepxmmx ? */		
+				
+				psraw		xmm7,	15					/* FFFF/0000 for True/Flase */
+				pand		xmm7,	xmm6													
+				
+				/* xmm7 = Variance 1< Flimit && Variance 2<Flimit && abs(4-5)<QStep */	
+				/* xmm7 now are in use  */										
+                /* find the loop filtered values for the pixels on block boundary */
+                movdqa      xmm1,       LoopFLimitWMT;   /* Get the Flimit values for loop filter */
+                movdqa      xmm3,       [edi + 48]       /* xmm3 = x3 = p[-2] */
+
+                movdqa      xmm4,       [edi + 64]       /* mm4 = x4 = p[-1] */
+                movdqa      xmm5,       [edi + 80]       /* mm5 = x5 = p[ 0] */
+
+                movdqa      xmm6,       [edi + 96]       /* mm6 = x6 = p[ 1] */
+                psubw       xmm5,       xmm4             /* mm5 = p[ 0] - p[-1] */
+
+                psubw       xmm3,       xmm6             /* mm3 = p[-2] - p[ 1] */
+                movdqa      xmm4,       xmm5             /* make a copy */
+
+                paddw       xmm4,       xmm5             /* 2 * ( p[0] - p[-1] ) */
+                paddw       xmm3,       EightFours       /* mm3 + 4 */
+
+                paddw       xmm5,       xmm4             /* 3 * ( p[0] - p[-1] ) */
+                paddw       xmm3,       xmm5             /* Filtval before shift */
+
+                psraw       xmm3,       3                /* FiltVal */
+                movdqa      xmm2,       xmm3             /* make a copy */
+
+                psraw       xmm3,       15               /* FFFF->Neg, 0000->Pos */
+                pxor        xmm2,       xmm3
+
+                psubsw      xmm2,       xmm3             /* mm2 = abs(FiltVal) */
+                por         xmm3,       EightOnes        /* -1 and 1 for + and - */
+
+                movdqa      xmm4,       xmm1             /* make a copy of Flimit */
+                psubw       xmm1,       xmm2             /* mm1= Flimit - abs(FiltVal) */
+
+                movdqa      xmm5,       xmm1             /* copy Flimit - abs(FiltVal) */
+                psraw       xmm1,       15               /* FFFF or 0000 */
+
+                pxor        xmm5,       xmm1                 
+                psubsw      xmm5,       xmm1             /* abs(Flimit - abs(FiltVal)) */
+
+                psubusw     xmm4,       xmm5             /* Flimit-abs(Flimit - abs(FiltVal)) */
+                pmullw      xmm4,       xmm3             /* get the sign back */
+
+                movdqa      xmm1,       [edi+64]         /* p[-1] */
+                movdqa      xmm2,       [edi+80]         /* p[0] */
+            
+                paddw       xmm1,       mm4              /* p[-1] + NewFiltVal */
+                psubw       xmm2,       mm4              /* p[0] - NewFiltVal */
+
+                pxor        xmm6,       xmm6             /* clear mm6 */
+                packuswb    xmm1,       xmm1             /* clamping */
+
+                packuswb    xmm2,       xmm2
+                punpcklbw   xmm1,       xmm6             /* unpack to word */
+            
+                movdqa      LoopFilteredValuesUp, xmm1   /* save the values */
+                punpcklbw   xmm2,       xmm6                 /* unpack to word */
+
+                movdqa      LoopFilteredValuesDown, xmm2 /* save the values */
+
+                /* Let's do the filtering now */										
+				/* p1 = (abs(Src[-4] - Src[-5]) < QStep ) ?  Src[-5] : Src[-4]; */		
+				/* p2 = (abs(Src[+3] - Src[+4]) < QStep ) ?  Src[+4] : Src[+3]; */		
+				
+				movdqa		xmm5,	[edi]				/* xmm5 = -5 */					
+				movdqa		xmm4,	[edi + 16]			/* xmm4 = -4 */					
+				
+				movdqa		xmm3,	xmm4				/* copy of -4 */				
+				movdqa		xmm6,	xmm5				/* copy of -5 */				
+				
+				psubusw		xmm4,	xmm6				/* xmm4 = [-4] - [-5] */			
+				psubusw		xmm5,	xmm3				/* xmm5 = [-5] - [-4] */			
+				
+				por			xmm4,	xmm5				/* abs([-4]-[-5] ) */			
+				psubw		xmm4,	QStepWMT			/* abs([-4]-[-5] )<QStep? */	
+				
+				psraw		xmm4,	15					/* FFFF/0000 for True/False */	
+				movdqa		xmm1,	xmm4				/* copy of the xmm4 */			
+				
+				pand		xmm4,	xmm6				/*							*/	
+				pandn		xmm1,	xmm3				/*							*/	
+				
+				por			xmm1,	xmm4				/* xmm1 = p1				*/	
+				
+				/* now find P2 */														
+				
+				movdqa		xmm4,	[edi+128]			/* xmm4 = [3] */					
+				movdqa		xmm5,	[edi+144]			/* xmm5 = [4] */					
+				
+				movdqa		xmm3,	xmm4				/* copy of 3 */					
+				movdqa		xmm6,	xmm5				/* copy of 4 */					
+				
+				psubusw		xmm4,	xmm6				/* xmm4 = [3] - [4] */			
+				psubusw		xmm5,	xmm3				/* xmm5 = [4] - [3] */			
+				
+				por			xmm4,	xmm5				/* abs([3]-[4] ) */				
+				psubw		xmm4,	QStepWMT			/* abs([3]-[4] )<QStep? */		
+				
+				psraw		xmm4,	15					/* FFFF/0000 for True/False */	
+				movdqa		xmm2,	xmm4				/* copy of the xmm4 */			
+				
+				pand		xmm4,	xmm6				/*							*/	
+				pandn		xmm2,	xmm3				/*							*/	
+				
+				por			xmm2,	xmm4				/* xmm2 = p2				*/	
+
+				/* Data is ready, now do the filtering */
+				
+				pxor		xmm0,	xmm0				/* clear xmm0 */
+
+				/* sum = p1 + p1 + p1 + x1 + x2 + x3 + x4 + 4; */				
+				/* Des[-w4] = (((sum + x1) << 1) - (x4 - x5)) >> 4; */			
+				/* Des[-w4] = Src[-w4]; */												
+				/* which is equivalent to Src[-w4] + flag * ( newvalue - Src[-w4] */	
+
+				
+				movdqa		xmm3,	xmm1				/* xmm3 = p1 */					
+				paddw		xmm3,	xmm3				/* xmm3 = p1 + p1 */				
+				
+				paddw		xmm3,	xmm1				/* xmm3 = p1 + p1 + p1 */		
+				movdqa		xmm4,	[edi+16]			/* xmm4 = x1 */					
+				
+				paddw		xmm3,	[edi+32]			/* xmm3 = p1+p1+p1+ x2 */		
+				paddw		xmm4,	[edi+48]			/* xmm4 = x1+x3 */				
+				
+				paddw		xmm3,	[edi+64]			/* xmm3 += x4 */					
+				paddw		xmm4,	EightFours			/* xmm4 = x1 + x3 + 4 */			
+				
+				paddw		xmm3,	xmm4				/* xmm3 = 3*p1+x1+x2+x3+x4+4 */	
+				movdqa		xmm4,	xmm3				/* xmm4 = xmm3 */					
+				
+				movdqa		xmm5,	[edi+16]			/* xmm5 = x1 */					
+				paddw		xmm4,	xmm5				/* xmm4 = sum+x1 */				
+				
+				psllw		xmm4,	1					/* xmm4 = (sum+x1)<<1 */			
+				psubw		xmm4,	[edi+64]			/* xmm4 = (sum+x1)<<1-x4 */		
+				
+				paddw		xmm4,	[edi+80]			/* xmm4 = (sum+x1)<<1-x4+x5 */	
+				psraw		xmm4,	4					/* xmm4 >>=4 */					
+				
+				psubw		xmm4,	xmm5				/* New Value - old Value */		
+				pand		xmm4,	xmm7				/* And the flag */				
+				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				
+				movdq2q		mm0,	xmm4				/* Write new x1 */				
+				
+				/* sum += x5 -p1 */														
+				/* Des[-w3]=((sum+x2)<<1-x5+x6)>>4 */									
+				
+				movdqa		xmm5,	[edi+32]			/* xmm5= x2 */					
+				psubw		xmm3,	xmm1				/* sum=sum-p1 */				
+				
+				paddw		xmm3,    [edi+80]			/* sum=sum+x5 */				
+				movdqa		xmm4,	xmm5				/* copy sum */					
+				
+				paddw		xmm4,	xmm3				/* xmm4=sum+x2 */				
+				paddw		xmm4,	xmm4				/* xmm4 <<= 1 */					
+				
+				psubw		xmm4,	[edi+80]			/* xmm4 =(sum+x2)<<1-x5 */		
+				paddw		xmm4,	[edi+96]			/* xmm4 =(sum+x2)<<1-x5+x6 */	
+				
+				psraw		xmm4,	4					/* xmm4=((sum+x2)<<1-x5+x6)>>4 */
+				psubw		xmm4,	xmm5				/* new value - old value	*/	
+				
+				pand		xmm4,	xmm7				/* And the flag */				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				movdq2q		mm1,	xmm4				/* write new x2 */				
+				
+				/* sum += x6 - p1 */													
+				/* Des[-w2]=((sum+x[3])<<1-x[6]+x[7])>>4 */								
+				
+				movdqa		xmm5,	[edi+48]			/* xmm5= x3 */					
+				psubw		xmm3,	xmm1				/* sum=sum-p1 */				
+				
+				paddw		xmm3,    [edi+96]			/* sum=sum+x6 */				
+				movdqa		xmm4,	xmm5				/* copy x3 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4=sum+x3 */				
+				paddw		xmm4,	xmm4				/* xmm4 <<= 1 */					
+				
+				psubw		xmm4,	[edi+96]			/* xmm4 =(sum+x3)<<1-x6 */		
+				paddw		xmm4,	[edi+112]			/* xmm4 =(sum+x3)<<1-x6+x7 */	
+				
+				psraw		xmm4,	4					/* xmm4=((sum+x3)<<1-x6+x7)>>4 */
+				psubw		xmm4,	xmm5				/* new value - old value	*/	
+				
+				pand		xmm4,	xmm7				/* And the flag */				
+				paddw		xmm4,	xmm5				/* add the old value back */	
+				
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+				movdq2q		mm2,	xmm4				/* write new x3 */				
+				
+				/* sum += x7 - p1 */													
+				/* Des[-w1]=((sum+x4)<<1+p1-x1-x7+x8]>>4 */						
+				
+				movdqa		xmm5,	[edi+64]			/* xmm5 = x4 */					
+				psubw		xmm3,	xmm1				/* sum = sum-p1 */				
+				
+				paddw		xmm3,	[edi+112]			/* sum = sum+x7 */				
+				movdqa		xmm4,	xmm5				/* xmm4 = x4 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum + x4 */			
+				paddw		xmm4,	xmm4				/* xmm4 *=2 */					
+				
+				paddw		xmm4,	xmm1				/* += p1 */						
+				psubw		xmm4,	[edi+16]			/* -= x1 */						
+				
+				psubw		xmm4,	[edi+112]			/* -= x7 */						
+				paddw		xmm4,	[edi+128]			/* += x8 */						
+				
+				movdqa      xmm5,   LoopFilteredValuesUp /* Read the loop filtered value of x4 */
+                psraw		xmm4,	4					/* >>=4 */						
+
+                psubw		xmm4,	xmm5				/* -=x4 */						
+				pand		xmm4,	xmm7				/* and flag */					
+
+                paddw		xmm4,	xmm5				/* += x4 */						
+				packuswb	xmm4,	xmm0				/* pack it to bytes */
+
+                movdq2q		mm3,	xmm4				/* write new x4 */				
+				
+
+				/* sum+= x8-x1 */														
+				/* Des[0]=((sum+x5)<<1+x1-x2-x8+p2)>>4 */								
+				
+				movdqa		xmm5,	[edi+80]			/* xmm5 = x5 */					
+				psubw		xmm3,	[edi+16]			/* sum -= x1 */					
+				
+				paddw		xmm3,	[edi+128]			/* sub += x8 */					
+				movdqa		xmm4,	xmm5				/* xmm4 = x5 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4= sum+x5 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2 */					
+				
+				paddw		xmm4,	[edi+16]			/* += x1 */						
+				psubw		xmm4,	[edi+32]			/* -= x2 */						
+				
+				psubw		xmm4,	[edi+128]			/* -= x8 */						
+				paddw		xmm4,	xmm2				/* += p2 */						
+
+				movdqa      xmm5,   LoopFilteredValuesDown /*  Read the loop filtered value of x4 */
+   				psraw		xmm4,	4					/* >>=4 */						
+
+                psubw		xmm4,	xmm5				/* -=x5 */						
+				pand		xmm4,	xmm7				/* and flag */					
+
+                paddw		xmm4,	xmm5				/* += x5 */						
+				packuswb	xmm4,	xmm0				/* pack to bytes */
+
+                movdq2q		mm4,	xmm4				/* write new x5 */				
+				
+				/* sum += p2 - x2 */													
+				/* Des[w1] = ((sum+x6)<<1 + x2-x3)>>4 */								
+				
+				movdqa		xmm5,	[edi+96]			/* xmm5 = x6 */					
+				psubw		xmm3,	[edi+32]			/* -= x2 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x6 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x6 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+32]			/* +=x2 */						
+				psubw		xmm4,	[edi+48]			/* -=x3 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x6 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x6 */						
+				
+				packuswb	xmm4,	xmm0				/* pack to bytes */
+				movdq2q		mm5,	xmm4				/* write new x6 */				
+				
+				/* sum += p2 - x3 */													
+				/* Des[w2] = ((sum+x7)<<1 + x3-x4)>>4 */								
+				
+				movdqa		xmm5,	[edi+112]			/* xmm5 = x7 */					
+				psubw		xmm3,	[edi+48]			/* -= x3 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x7 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x7 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+48]			/* +=x3 */						
+				psubw		xmm4,	[edi+64]			/* -=x4 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x7 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x7 */						
+
+				packuswb	xmm4,	xmm0				/* pack to bytes */				
+				movdq2q		mm6,	xmm4				/* write new x7 */				
+				
+				/* sum += p2 - x4 */													
+				/* Des[w3] = ((sum+x8)<<1 + x4-x5)>>4 */								
+				
+				movdqa		xmm5,	[edi+128]			/* xmm5 = x8 */					
+				psubw		xmm3,	[edi+64]			/* -= x4 */						
+				
+				paddw		xmm3,	xmm2				/* += p2 */						
+				movdqa		xmm4,	xmm5				/* xmm4 = x8 */					
+				
+				paddw		xmm4,	xmm3				/* xmm4 = sum+x8 */				
+				paddw		xmm4,	xmm4				/* xmm4 *= 2*/					
+				
+				paddw		xmm4,	[edi+64]			/* +=x4 */						
+				psubw		xmm4,	[edi+80]			/* -=x5 */						
+				
+				psraw		xmm4,	4					/* >>=4 */						
+				psubw		xmm4,	xmm5				/* -=x8 */						
+				
+				pand		xmm4,	xmm7				/* and flag */					
+				paddw		xmm4,	xmm5				/* += x8 */						
+
+				packuswb	xmm4,	xmm0				/* pack to bytes */				
+				movdq2q		mm7,	xmm4				/* write new x8 */				
+
+
+				/* transpose */
+				movq2dq		xmm0,	mm0					/* xmm0 = 70 60 50 40 30 20 10 00 */
+				movq2dq		xmm1,	mm1					/* xmm1 = 71 61 51 41 31 21 11 01 */
+
+				movq2dq		xmm2,	mm2					/* xmm2 = 72 62 52 42 32 22 12 02 */
+				movq2dq		xmm3,	mm3					/* xmm3 = 73 63 53 43 33 23 13 03 */
+
+				punpcklbw	xmm0,	xmm1				/* xmm0 = 7170 6160 5150 4140 3130 2120 1110 0100 */
+				punpcklbw	xmm2,	xmm3				/* xmm2 = 7372 6362 5352 4342 3332 2322 1312 0302 */
+
+				movdqa		xmm1,	xmm0				/* xmm1 = 7170 6160 5150 4140 3130 2120 1110 0100 */
+				punpcklwd	xmm0,	xmm2				/* xmm0 = 33323130 23222120 13121110 03020100 */
+
+				punpckhwd	xmm1,	xmm2				/* xmm1 = 73727170 63626160 53525150 43424140 */
+				
+				movq2dq		xmm4,	mm4					/* xmm4 = 74 64 54 44 34 24 14 04 */
+				movq2dq		xmm5,	mm5					/* xmm5 = 75 65 55 45 35 25 15 05 */				
+
+				movq2dq		xmm6,	mm6 				/* xmm6 = 76 66 56 46 36 26 16 06 */
+				movq2dq		xmm7,	mm7					/* xmm7 = 77 67 57 47 37 27 17 07 */
+								
+				punpcklbw	xmm4,	xmm5				/* xmm4 = 7574 6564 5554 4544 3534 2524 1514 0504 */
+				punpcklbw	xmm6,	xmm7				/* xmm6 = 7776 6766 5756 4746 3736 2726 1716 0706 */
+
+				movdqa		xmm5,	xmm4				/* xmm5 = 7574 6564 5554 4544 3534 2524 1514 0504 */
+				punpcklwd	xmm4,	xmm6				/* xmm4 = 37363534 27262524 17161514 07060504 */
+
+				punpckhwd	xmm5,	xmm6				/* xmm5 = 77767574 67666564 57565554 47464544 */
+				movdqa		xmm2,	xmm0				/* xmm2 = 33323130 23222120 13121110 03020100 */
+
+				punpckldq	xmm0,	xmm4				/* xmm0 = 1716151413121110	0706050403020100 */
+				movq		QWORD PTR [esi+edx*2],xmm0	/* write 00 01 02 03 04 05 06 07 */
+
+				psrldq		xmm0,	8					/* xmm0 = 1716151413121110 */
+				punpckhdq	xmm2,	xmm4				/* xmm2 = 3736353433323130	2726252423222120 */
+
+				movq		QWORD PTR [esi+edx], xmm0	/* write 10 11 12 13 14 15 16 17 */
+				movdqa		xmm3,	xmm1				/* xmm3 = 73727170 63626160 53525150 43424140 */
+				
+				punpckldq	xmm1,	xmm5				/* xmm1 = 5756555453525150 4746454443424140 */
+				movq		QWORD PTR [esi],	xmm2	/* write 20 21 22 23 24 25 26 27 */
+				
+				psrldq		xmm2,	8					/* xmm2 = 3736353433323130 */
+				punpckhdq	xmm3,	xmm5				/* xmm3 = 7776757473727170 6766656463626160 */
+
+				movq		QWORD PTR [esi+ecx], xmm2	/* write 30 31 32 33 34 35 36 37 */
+				lea			esi,	[esi+ecx*4]			/* esi= Des - 4 + 4 *pitch */
+				
+				movq		QWORD PTR [esi+edx*2], xmm1	/* write 40 41 42 43 44 45 46 47 */
+				movq		QWORD PTR [esi],	xmm3	/* write 60 61 62 63 64 65 66 67 */
+
+				psrldq		xmm1,	8					/* xmm1 = 5756555453525150 */
+				psrldq		xmm3,	8					/* xmm3 = 7776757473727170 */
+
+				movq		QWORD PTR [esi+edx], xmm1	/* write 50 51 52 53 54 55 56 57 */
+				movq		QWORD PTR [esi+ecx], xmm3	/* write 70 71 72 73 74 75 76 77 */
+
+
+				pop			edi
+				pop			esi
+				pop			edx
+				pop			ecx
+				pop			eax
+	    	}// end of __asm	
+
+    		Var1=Variance1[0]+Variance1[1]+Variance1[2]+Variance1[3]+Variance1[4]+Variance1[5]+Variance1[6]+Variance1[7];
+	    	Var2=Variance2[0]+Variance2[1]+Variance2[2]+Variance2[3]+Variance2[4]+Variance2[5]+Variance2[6]+Variance2[7];
+			pbi->FragmentVariances[CurrentFrag-1] += Var1;
+			pbi->FragmentVariances[CurrentFrag] += Var2;
+			CurrentFrag ++;
+
+		}//else
+			
+	}//while
+#endif
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PlaneAddNoise_wmt
+ *
+ *  INPUTS        : UINT8 *Start    starting address of buffer to add gaussian
+ *                                  noise to
+ *                  UINT32 Width    width of plane
+ *                  UINT32 Height   height of plane
+ *                  INT32  Pitch    distance between subsequent lines of frame
+ *                  INT32  q        quantizer used to determine amount of noise 
+ *                                  to add
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void.   
+ * 
+ *  FUNCTION      : adds gaussian noise to a plane of pixels
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void PlaneAddNoise_wmt( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q)
+{
+    unsigned int i;
+    INT32 Pitch4 = Pitch * 4;
+    const int noiseAmount = 2;
+    const int noiseAdder = 2 * noiseAmount + 1;
+#if defined(_WIN32_WCE)
+	return;
+#else
+
+	__declspec(align(16)) unsigned char blackclamp[16];
+	__declspec(align(16)) unsigned char whiteclamp[16];
+	__declspec(align(16)) unsigned char bothclamp[16];
+    char CharDist[300];
+    char Rand[2048];
+    double sigma;
+//    return;
+    __asm emms
+    sigma = 1 + .8*(63-q) / 63.0;
+
+    // set up a lookup table of 256 entries that matches 
+    // a gaussian distribution with sigma determined by q.
+    // 
+    {
+        double i,sum=0;
+        int next,j;
+
+        next=0;
+        for(i=-32;i<32;i++)
+        {
+            int a = (int)(.5+256*gaussian(sigma,0,i));
+
+            if(a)
+            {
+                for(j=0;j<a;j++)
+                {
+                    CharDist[next+j]=(char) i;
+                }
+                next = next+j;
+            }
+
+        }
+        for(next=next;next<256;next++)
+            CharDist[next] = 0;
+
+    }
+
+    for(i=0;i<2048;i++)
+    {
+        Rand[i]=CharDist[rand() & 0xff];
+    }
+
+	for(i=0;i<16;i++)
+	{
+		blackclamp[i]=-CharDist[0];
+		whiteclamp[i]=-CharDist[0];
+		bothclamp[i]=-2*CharDist[0];
+	}
+
+    for(i=0;i<Height;i++)
+    {
+        UINT8 *Pos = Start + i *Pitch;
+        INT8  *Ref = Rand + (rand() & 0xff);
+
+        __asm
+        {
+			mov ecx, [Width]
+            mov esi,Pos
+            mov edi,Ref
+			xor		    eax,eax
+
+    		nextset:
+            movdqu      xmm1,[esi+eax]         // get the source
+
+			psubusb     xmm1,blackclamp        // clamp both sides so we don't outrange adding noise
+			paddusb     xmm1,bothclamp          
+			psubusb     xmm1,whiteclamp
+
+            movdqu      xmm2,[edi+eax]         // get the noise for this line
+            paddb       xmm1,xmm2              // add it in 
+            movdqu      [esi+eax],xmm1         // store the result
+
+            add         eax,16                 // move to the next line
+
+			cmp         eax, ecx
+			jl			nextset
+
+
+        }
+
+    }
+#endif
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/deringopt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/deringopt.c
new file mode 100644
index 00000000..35a39265
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/deringopt.c
@@ -0,0 +1,2529 @@
+/****************************************************************************
+ *
+ *   Module Title :     DeRingingOpt.c
+ *
+ *   Description  :     Optimized functions for PostProcessor
+ *
+ ***************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include "postp.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#pragma warning(disable:4799)
+#pragma warning(disable:4731)
+#pragma warning(disable:4305)
+
+/****************************************************************************
+*  Module constants.
+****************************************************************************/        
+#if defined(_WIN32_WCE)
+#pragma pack(16)
+static unsigned short FourOnes[] = {  1,   1,   1,   1 };
+static unsigned short Four128s[] = { 128, 128, 128, 128 };
+static unsigned short Four64s[]  = { 64,  64,  64,  64};
+
+static char eight64s [] = { 64, 64, 64, 64, 64, 64, 64, 64 };
+static char eight32s [] = { 32, 32, 32, 32, 32, 32, 32, 32 };
+static char eight127s []= { 127, 127, 127, 127, 127, 127, 127, 127 };
+static char eight128s []= { 128, 128, 128, 128, 128, 128, 128, 128 };
+static unsigned char eight223s[] = { 223, 223, 223, 223, 223, 223, 223, 223 };
+static unsigned char eight231s[] = { 231, 231, 231, 231, 231, 231, 231, 231 };
+#pragma pack()
+#else
+
+__declspec(align(16)) static unsigned short FourOnes[] = {  1,   1,   1,   1 };
+__declspec(align(16)) static unsigned short Four128s[] = { 128, 128, 128, 128 };
+__declspec(align(16)) static unsigned short Four64s[]  = { 64,  64,  64,  64};
+
+__declspec(align(16)) static char eight64s [] = { 64, 64, 64, 64, 64, 64, 64, 64 };
+__declspec(align(16)) static char eight32s [] = { 32, 32, 32, 32, 32, 32, 32, 32 };
+__declspec(align(16)) static char eight127s []= { 127, 127, 127, 127, 127, 127, 127, 127 };
+__declspec(align(16)) static char eight128s []= { 128, 128, 128, 128, 128, 128, 128, 128 };
+__declspec(align(16)) static unsigned char eight223s[] = { 223, 223, 223, 223, 223, 223, 223, 223 };
+__declspec(align(16)) static unsigned char eight231s[] = { 231, 231, 231, 231, 231, 231, 231, 231 };
+
+#endif
+/****************************************************************************
+*  Imports
+****************************************************************************/              
+extern UINT32 SharpenModifier[];
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeRingBlockStrong_MMX
+ *
+ *  INPUTS        : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  const UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DstPtr                : Pointer to output image.
+ *                  const INT32 Pitch            : Image stride.
+ *                  UINT32 FragQIndex            : Q-index block encoded with.
+ *                  UINT32 *QuantScale           : Array of quantization scale factors.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filtering a block for de-ringing purpose.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockStrong_MMX
+( 
+    const POSTPROC_INSTANCE *pbi, 
+    const UINT8 *SrcPtr,
+    UINT8 *DstPtr,
+    const INT32 Pitch,
+    UINT32 FragQIndex,
+    UINT32 *QuantScale
+)
+{
+
+#if defined(_WIN32_WCE)
+	#pragma pack(16)
+	short UDMod[72];
+	short	LRMod[128];
+	#pragma pack()
+#else
+	__declspec(align(16)) short UDMod[72];
+	__declspec(align(16)) short	LRMod[128];
+#endif
+	unsigned int PlaneLineStep = Pitch;
+	const unsigned char *Src = SrcPtr;
+	unsigned char *Des       = DstPtr;
+    
+	short *UDPointer = UDMod;
+	short *LRPointer = LRMod;
+    
+    UINT32 QStep  = QuantScale[FragQIndex];
+	INT32 Sharpen = SharpenModifier[FragQIndex];
+	(void) pbi;
+
+	__asm 
+	{
+		push		esi
+		push		edi
+		
+		mov			esi,	Src						/* Source Pointer */
+		mov			edi,	UDPointer				/* UD modifier pointer */
+
+		push		ecx
+		push		edx
+
+		mov			ecx,	PlaneLineStep			/* Pitch Step */
+        xor         edx,    edx
+
+		push		eax
+		push		ebx
+
+		mov			eax,	QStep					/* QValue */
+		mov			ebx,	Sharpen					/* Sharpen */
+
+		movd		mm0,	eax						/* QValue */
+		movd		mm2,	ebx						/* sharpen */
+
+		punpcklbw	mm0,	mm0						/* 00 00 00 QQ */
+        sub         edx,    ecx                     /* Negative Pitch */
+
+		punpcklbw	mm2,	mm2						/* 00 00 00 SS */
+        pxor        mm7,    mm7                     /* clear mm7 for unpacks */
+
+		punpcklbw	mm0,	mm0						/* 00 00 qq qq */
+		mov			eax,	LRPointer				/* Left and Right Modifier */                
+
+		punpcklbw	mm2,	mm2						/* 00 00 ss ss */
+		lea         ebx,    [esi+ecx*8]             /* Source Pointer of last row */        
+
+		punpcklbw	mm0,	mm0						/* qq qq qq qq */
+		movq        mm1,    mm0;                    /* make a copy */
+		
+		punpcklbw	mm2,	mm2						/* ss ss ss ss */
+		paddb		mm1,	mm0						/* QValue * 2 */
+
+        paddb       mm1,    mm0                     /* High = 3 * Qvalue */
+        paddusb		mm1,	eight223s				/* clamping high to 32 */	
+
+		paddb       mm0,    eight32s                /* 32+QValues */
+		psubusb		mm1,	eight223s				/* Get the real value back */
+
+        movq		mm3,	eight127s				/* 7f 7f 7f 7f 7f 7f 7f 7f */
+        pandn       mm1,    mm3                     /* ClampHigh */
+
+        /* mm0,mm1,mm2,mm7 are in use  */
+        /* mm0---> QValue+32           */
+        /* mm1---> ClampHigh		   */
+		/* mm2---> Sharpen             */
+		/* mm7---> Cleared for unpack  */
+
+FillModLoop1:
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        movq        mm4,    QWORD PTR [esi+edx]     /* Pixels on top pu */
+
+        movq        mm5,    mm3                     /* make a copy of p */
+        psubusb     mm3,    mm4                     /* p-pu */
+        
+        psubusb     mm4,    mm5                     /* pu-p */
+        por         mm3,    mm4                     /* abs(p-pu) */
+
+        movq        mm6,    mm0                     /* 32+QValues */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128s				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64s				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+		pxor		mm5,	mm5						/* clear mm5 */
+
+		pxor		mm4,	mm4						/* clear mm4 */
+   		punpcklbw	mm5,	mm6						/* 03 xx 02 xx 01 xx 00 xx */
+
+		psraw		mm5,	8						/* sign extended */
+		movq        QWORD PTR [edi], mm5            /* writeout UDmod, low four */
+		
+		punpckhbw	mm4,	mm6
+		psraw		mm4,	8
+
+        movq        QWORD PTR [edi+8], mm4          /* writeout UDmod, high four */
+
+        
+        /* left Mod */
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        movq        mm4,    QWORD PTR [esi-1]     /* Pixels on top pu */
+
+        movq        mm5,    mm3                     /* make a copy of p */
+        psubusb     mm3,    mm4                     /* p-pu */
+        
+        psubusb     mm4,    mm5                     /* pu-p */
+        por         mm3,    mm4                     /* abs(p-pu) */
+
+        movq        mm6,    mm0                     /* 32+QValues */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128s				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64s				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+		pxor		mm5,	mm5						/* clear mm5 */
+
+		pxor		mm4,	mm4						/* clear mm4 */
+   		punpcklbw	mm5,	mm6						/* 03 xx 02 xx 01 xx 00 xx */
+
+		psraw		mm5,	8						/* sign extended */
+		movq        QWORD PTR [eax], mm5            /* writeout UDmod, low four */
+		
+		punpckhbw	mm4,	mm6
+		psraw		mm4,	8
+
+        movq        QWORD PTR [eax+8], mm4          /* writeout UDmod, high four */
+
+
+
+        /* Right Mod */
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        movq        mm4,    QWORD PTR [esi+1]       /* Pixels on top pu */
+
+        movq        mm5,    mm3                     /* make a copy of p */
+        psubusb     mm3,    mm4                     /* p-pu */
+        
+        psubusb     mm4,    mm5                     /* pu-p */
+        por         mm3,    mm4                     /* abs(p-pu) */
+
+        movq        mm6,    mm0                     /* 32+QValues */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128s				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64s				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+		pxor		mm5,	mm5						/* clear mm5 */
+
+		pxor		mm4,	mm4						/* clear mm4 */
+   		punpcklbw	mm5,	mm6						/* 03 xx 02 xx 01 xx 00 xx */
+
+		psraw		mm5,	8						/* sign extended */
+		movq        QWORD PTR [eax+128], mm5            /* writeout UDmod, low four */
+		
+		punpckhbw	mm4,	mm6
+		psraw		mm4,	8
+
+        movq        QWORD PTR [eax+136], mm4          /* writeout UDmod, high four */
+        add         esi,    ecx
+        
+        
+        add         edi,    16                  
+        add         eax,    16      
+
+        cmp         esi,    ebx
+        jne         FillModLoop1
+        
+        /* last UDMod */
+
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        movq        mm4,    QWORD PTR [esi+edx]     /* Pixels on top pu */
+
+        movq        mm5,    mm3                     /* make a copy of p */
+        psubusb     mm3,    mm4                     /* p-pu */
+        
+        psubusb     mm4,    mm5                     /* pu-p */
+        por         mm3,    mm4                     /* abs(p-pu) */
+
+        movq        mm6,    mm0                     /* 32+QValues */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128s				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64s				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+		pxor		mm5,	mm5						/* clear mm5 */
+
+		pxor		mm4,	mm4						/* clear mm4 */
+   		punpcklbw	mm5,	mm6						/* 03 xx 02 xx 01 xx 00 xx */
+
+		psraw		mm5,	8						/* sign extended */
+		movq        QWORD PTR [edi], mm5            /* writeout UDmod, low four */
+		
+		punpckhbw	mm4,	mm6
+		psraw		mm4,	8
+
+        movq        QWORD PTR [edi+8], mm4          /* writeout UDmod, high four */
+
+		mov			esi,	Src
+		mov			edi,	Des
+		
+		mov			eax,	UDPointer
+		mov			ebx,	LRPointer
+
+		/* First Row */
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+		/* Second Row */
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+        /* Third Row */
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+
+
+        /* Fourth Row */
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+        /* Fifth Row */
+
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+        /* Sixth Row */
+
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+        /* Seventh Row */
+
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+        /* Eighth Row */
+
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+        pop         ebx
+        pop         eax
+
+        pop         edx
+        pop         ecx
+
+        pop         edi
+        pop         esi
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeRingBlockWeak_MMX
+ *
+ *  INPUTS        : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  const UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DstPtr                : Pointer to output image.
+ *                  const INT32 Pitch            : Image stride.
+ *                  UINT32 FragQIndex            : Q-index block encoded with.
+ *                  UINT32 *QuantScale           : Array of quantization scale factors.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filters a block for de-ringing purpose.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockWeak_MMX
+( 
+    const POSTPROC_INSTANCE *pbi, 
+    const UINT8 *SrcPtr,
+    UINT8 *DstPtr,
+    const INT32 Pitch,
+    UINT32 FragQIndex,
+    UINT32 *QuantScale
+)
+{
+#if defined(_WIN32_WCE)
+	#pragma pack(16)
+	short UDMod[72];
+	short	LRMod[128];
+	#pragma pack()
+#else
+	__declspec(align(16)) short UDMod[72];
+	__declspec(align(16)) short	LRMod[128];
+#endif
+    
+	unsigned int PlaneLineStep = Pitch;
+	const unsigned char *Src   = SrcPtr;
+	unsigned char *Des         = DstPtr;
+    
+	short *UDPointer = UDMod;
+	short *LRPointer = LRMod;
+    
+    UINT32 QStep  = QuantScale[FragQIndex];
+	INT32 Sharpen = SharpenModifier[FragQIndex];
+	(void) pbi;
+
+	__asm 
+	{
+		push		esi
+		push		edi
+		
+		mov			esi,	Src						/* Source Pointer */
+		mov			edi,	UDPointer				/* UD modifier pointer */
+
+		push		ecx
+		push		edx
+
+		mov			ecx,	PlaneLineStep			/* Pitch Step */
+        xor         edx,    edx
+
+		push		eax
+		push		ebx
+
+		mov			eax,	QStep					/* QValue */
+		mov			ebx,	Sharpen					/* Sharpen */
+
+		movd		mm0,	eax						/* QValue */
+		movd		mm2,	ebx						/* sharpen */
+
+		punpcklbw	mm0,	mm0						/* 00 00 00 QQ */
+        sub         edx,    ecx                     /* Negative Pitch */
+
+		punpcklbw	mm2,	mm2						/* 00 00 00 SS */
+        pxor        mm7,    mm7                     /* clear mm7 for unpacks */
+
+		punpcklbw	mm0,	mm0						/* 00 00 qq qq */
+		mov			eax,	LRPointer				/* Left and Right Modifier */                
+
+		punpcklbw	mm2,	mm2						/* 00 00 ss ss */
+		lea         ebx,    [esi+ecx*8]             /* Source Pointer of last row */        
+
+		punpcklbw	mm0,	mm0						/* qq qq qq qq */
+		movq        mm1,    mm0;                    /* make a copy */
+		
+		punpcklbw	mm2,	mm2						/* ss ss ss ss */
+		paddb		mm1,	mm0						/* QValue * 2 */
+
+        paddb       mm1,    mm0                     /* High = 3 * Qvalue */
+        paddusb		mm1,	eight231s				/* clamping high to 24 */	
+
+		paddb       mm0,    eight32s                /* 32+QValues */
+		psubusb		mm1,	eight231s				/* Get the real value back */
+
+        movq		mm3,	eight127s				/* 7f 7f 7f 7f 7f 7f 7f 7f */
+        pandn       mm1,    mm3                     /* ClampHigh */
+
+        /* mm0,mm1,mm2,mm7 are in use  */
+        /* mm0---> QValue+32           */
+        /* mm1---> ClampHigh		   */
+		/* mm2---> Sharpen             */
+		/* mm7---> Cleared for unpack  */
+
+FillModLoop1:
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        movq        mm4,    QWORD PTR [esi+edx]     /* Pixels on top pu */
+
+        movq        mm5,    mm3                     /* make a copy of p */
+        psubusb     mm3,    mm4                     /* p-pu */
+        
+        psubusb     mm4,    mm5                     /* pu-p */
+        por         mm3,    mm4                     /* abs(p-pu) */
+
+        movq        mm6,    mm0                     /* 32+QValues */
+		paddusb		mm3,	mm3						/* 2*abs(p-pu) */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128s				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64s				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+		pxor		mm5,	mm5						/* clear mm5 */
+
+		pxor		mm4,	mm4						/* clear mm4 */
+   		punpcklbw	mm5,	mm6						/* 03 xx 02 xx 01 xx 00 xx */
+
+		psraw		mm5,	8						/* sign extended */
+		movq        QWORD PTR [edi], mm5            /* writeout UDmod, low four */
+		
+		punpckhbw	mm4,	mm6
+		psraw		mm4,	8
+
+        movq        QWORD PTR [edi+8], mm4          /* writeout UDmod, high four */
+
+        
+        /* left Mod */
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        movq        mm4,    QWORD PTR [esi-1]     /* Pixels on top pu */
+
+        movq        mm5,    mm3                     /* make a copy of p */
+        psubusb     mm3,    mm4                     /* p-pu */
+        
+        psubusb     mm4,    mm5                     /* pu-p */
+        por         mm3,    mm4                     /* abs(p-pu) */
+
+        movq        mm6,    mm0                     /* 32+QValues */
+		paddusb		mm3,	mm3						/* 2*abs(p-pu) */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128s				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64s				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+		pxor		mm5,	mm5						/* clear mm5 */
+
+		pxor		mm4,	mm4						/* clear mm4 */
+   		punpcklbw	mm5,	mm6						/* 03 xx 02 xx 01 xx 00 xx */
+
+		psraw		mm5,	8						/* sign extended */
+		movq        QWORD PTR [eax], mm5            /* writeout UDmod, low four */
+		
+		punpckhbw	mm4,	mm6
+		psraw		mm4,	8
+
+        movq        QWORD PTR [eax+8], mm4          /* writeout UDmod, high four */
+
+
+
+        /* Right Mod */
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        movq        mm4,    QWORD PTR [esi+1]       /* Pixels on top pu */
+
+        movq        mm5,    mm3                     /* make a copy of p */
+        psubusb     mm3,    mm4                     /* p-pu */
+        
+        psubusb     mm4,    mm5                     /* pu-p */
+        por         mm3,    mm4                     /* abs(p-pu) */
+
+        movq        mm6,    mm0                     /* 32+QValues */
+		paddusb		mm3,	mm3						/* 2*abs(p-pu) */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128s				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64s				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+		pxor		mm5,	mm5						/* clear mm5 */
+
+		pxor		mm4,	mm4						/* clear mm4 */
+   		punpcklbw	mm5,	mm6						/* 03 xx 02 xx 01 xx 00 xx */
+
+		psraw		mm5,	8						/* sign extended */
+		movq        QWORD PTR [eax+128], mm5            /* writeout UDmod, low four */
+		
+		punpckhbw	mm4,	mm6
+		psraw		mm4,	8
+
+        movq        QWORD PTR [eax+136], mm4          /* writeout UDmod, high four */
+        add         esi,    ecx
+        
+        
+        add         edi,    16                  
+        add         eax,    16      
+
+        cmp         esi,    ebx
+        jne         FillModLoop1
+        
+        /* last UDMod */
+
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        movq        mm4,    QWORD PTR [esi+edx]     /* Pixels on top pu */
+
+        movq        mm5,    mm3                     /* make a copy of p */
+        psubusb     mm3,    mm4                     /* p-pu */
+        
+        psubusb     mm4,    mm5                     /* pu-p */
+        por         mm3,    mm4                     /* abs(p-pu) */
+
+        movq        mm6,    mm0                     /* 32+QValues */
+		paddusb		mm3,	mm3						/* 2*abs(p-pu) */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128s				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64s				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+		pxor		mm5,	mm5						/* clear mm5 */
+
+		pxor		mm4,	mm4						/* clear mm4 */
+   		punpcklbw	mm5,	mm6						/* 03 xx 02 xx 01 xx 00 xx */
+
+		psraw		mm5,	8						/* sign extended */
+		movq        QWORD PTR [edi], mm5            /* writeout UDmod, low four */
+		
+		punpckhbw	mm4,	mm6
+		psraw		mm4,	8
+
+        movq        QWORD PTR [edi+8], mm4          /* writeout UDmod, high four */
+
+		mov			esi,	Src
+		mov			edi,	Des
+		
+		mov			eax,	UDPointer
+		mov			ebx,	LRPointer
+
+		/* First Row */
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+		/* Second Row */
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+        /* Third Row */
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+
+
+        /* Fourth Row */
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+        /* Fifth Row */
+
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+        /* Sixth Row */
+
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+
+        /* Seventh Row */
+
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+		add			esi,	ecx				/* Src += Pitch */
+		add			edi,	ecx				/* Des += Pitch */
+
+		add			eax,	16				/* UDPointer += 8 */
+        add         ebx,    16              /* LPointer +=8 */
+		
+        /* Eighth Row */
+
+		movq		mm0,	[esi+edx]		/* mm0 = Pixels above */
+		pxor		mm7,	mm7				/* clear mm7 */
+
+		movq		mm1,	mm0				/* make a copy of mm0 */			
+		punpcklbw	mm0,	mm7				/* lower four pixels */
+		
+		movq		mm4,	[eax]			/* au */
+		punpckhbw	mm1,	mm7				/* high four pixels */
+		
+		movq		mm5,	[eax+8]			/* au */
+		
+		pmullw		mm0,	mm4				/* pu*au */
+		movq		mm2,	[esi+ecx]		/* mm2 = pixels below */
+		
+		pmullw		mm1,	mm5				/* pu*au */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* lower four */
+		movq		mm6,	[eax+16]		/* ad */
+
+		punpckhbw	mm3,	mm7				/* higher four */			
+		paddw		mm4,	mm6				/* au+ad */
+		
+		pmullw		mm2,	mm6				/* au*pu+ad*pd */
+		movq		mm6,	[eax+24]		/* ad */
+
+		paddw		mm0,	mm2			
+		paddw		mm5,	mm6				/* au+ad */
+		
+		pmullw		mm3,	mm6				/* ad*pd */
+		movq		mm2,	[esi-1]			/* pixel to the left */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd */
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx]			/* al */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */
+		paddw		mm4,	mm6				/* au + ad + al */
+		
+		pmullw		mm2,	mm6				/* pl * al */
+		movq		mm6,	[ebx+8]			/* al */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl */
+		paddw		mm5,	mm6				/* au+ad+al */
+		
+		pmullw		mm3,	mm6				/* al*pl */
+		movq		mm2,	[esi+1]			/* pixel to the right */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl */			
+		movq		mm3,	mm2				/* make a copy of mm2 */
+		
+		punpcklbw	mm2,	mm7				/* four left pixels */
+		movq		mm6,	[ebx+128]			/* ar */
+
+		punpckhbw	mm3,	mm7				/* four right pixels */			
+		paddw		mm4,	mm6				/* au + ad + al + ar */
+		
+		pmullw		mm2,	mm6				/* pr * ar */
+		movq		mm6,	[ebx+136]		/* ar */
+
+		paddw		mm0,	mm2				/* au*pu+ad*pd+al*pl+pr*ar */
+		paddw		mm5,	mm6				/* au+ad+al+ar */
+		
+		pmullw		mm3,	mm6				/* ar*pr */
+		movq		mm2,	[esi]			/* p */
+
+		paddw		mm1,	mm3				/* au*pu+ad*pd+al*pl+ar*pr */
+		movq		mm3,	mm2				/* make a copy of the pixel */
+		
+		/* mm0, mm1 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* mm4, mm5	---	 au + ad + al + ar */
+		
+		punpcklbw	mm2,	mm7				/* left four pixels */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		punpckhbw	mm3,	mm7				/* right four pixels */
+		psubw		mm6,	mm4				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm2,	mm6				/* p*(128-(au+ad+al+ar)) */
+		movq		mm6,	Four128s		/* 0080  0080 0080 0080 */
+
+		paddw		mm0,	mm2				/* sum */
+		psubw		mm6,	mm5				/* 128-(au+ad+al+ar) */
+		
+		pmullw		mm3,	mm6				/* p*(128-(au+ad+al+ar)) */ 
+		movq		mm6,	Four64s			/* {64, 64, 64, 64 } */
+
+		movq		mm7,	mm6				/* {64, 64, 64, 64} */
+		paddw		mm0,	mm6				/* sum+B */
+
+		paddw		mm1,	mm3				/* sum */
+		psllw		mm7,	8				/* {16384, .. } */
+
+		paddw		mm0,	mm7				/* clamping */
+		paddw		mm1,	mm6				/* sum+B */
+
+		paddw		mm1,	mm7				/* clamping */
+		psubusw		mm0,	mm7				/* clamping */
+
+		psubusw		mm1,	mm7				/* clamping */
+		psrlw		mm0,	7				/* (sum+B)>>7 */
+
+		psrlw		mm1,	7				/* (sum+B)>>7 */
+		packuswb	mm0,	mm1				/* pack to 8 bytes */
+		
+		movq		[edi],	mm0				/* write to destination */
+
+        pop         ebx
+        pop         eax
+
+        pop         edx
+        pop         ecx
+
+        pop         edi
+        pop         esi
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/deringwmtopt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/deringwmtopt.c
new file mode 100644
index 00000000..574ac1ba
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/deringwmtopt.c
@@ -0,0 +1,748 @@
+/****************************************************************************
+ *
+ *   Module Title :     DeRingingWmtOpt.c
+ *
+ *   Description  :     Optimized functions for PostProcessor
+ *
+ ***************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "postp.h"
+
+/****************************************************************************
+*  MAcros
+****************************************************************************/        
+#pragma warning(disable:4305)
+#pragma warning(disable:4731)
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/        
+#if defined(_WIN32_WCE)
+#else
+__declspec(align(16)) static unsigned short eight128s []= { 128, 128, 128, 128, 128, 128, 128, 128};
+__declspec(align(16)) static unsigned short eight64s[]  = { 64,  64,  64,  64, 64,  64,  64,  64};
+__declspec(align(16)) static char eight64c [] = { 64, 64, 64,64,64,64,64,64};
+__declspec(align(16)) static char eight32c [] = { 32,32,32,32,32,32,32,32};
+__declspec(align(16)) static char eight127c []= { 127, 127, 127, 127, 127, 127, 127, 127};
+__declspec(align(16)) static char eight128c []= { 128, 128, 128, 128, 128, 128, 128, 128};
+__declspec(align(16)) static unsigned char eight223c[] = { 223,223,223,223,223,223,223,223};
+__declspec(align(16)) static unsigned char eight231c[] = { 231,231,231,231,231,231,231,231};
+#endif
+/****************************************************************************
+*  Imports
+****************************************************************************/              
+extern UINT32 SharpenModifier[];
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeRingBlockStrong_WMT
+ *
+ *  INPUTS        : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  const UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DstPtr                : Pointer to output image.
+ *                  const INT32 Pitch            : Image stride.
+ *                  UINT32 FragQIndex            : Q-index block encoded with.
+ *                  UINT32 *QuantScale           : Array of quantization scale factors.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filtering a block for de-ringing purpose.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockStrong_WMT
+( 
+    const POSTPROC_INSTANCE *pbi, 
+    const UINT8 *SrcPtr,
+    UINT8 *DstPtr,
+    const INT32 Pitch,
+    UINT32 FragQIndex,
+    UINT32 *QuantScale
+)
+{
+#if defined(_WIN32_WCE)
+	return;
+#else
+
+	__declspec(align(16)) short UDMod[72];
+	__declspec(align(16)) short	LRMod[128];
+
+	unsigned int PlaneLineStep = Pitch;
+	const unsigned char *Src   = SrcPtr;
+	unsigned char *Des         = DstPtr;
+    
+	short *UDPointer = UDMod;
+	short *LRPointer = LRMod;
+    
+    UINT32 QStep  = QuantScale[FragQIndex];
+	INT32 Sharpen = SharpenModifier[FragQIndex];
+	(void) pbi;
+
+	__asm 
+	{
+		push		esi
+		push		edi
+		
+		mov			esi,	Src						/* Source Pointer */
+		mov			edi,	UDPointer				/* UD modifier pointer */
+
+		push		ecx
+		push		edx
+
+		mov			ecx,	PlaneLineStep			/* Pitch Step */
+        xor         edx,    edx
+
+		push		eax
+		push		ebx
+
+		mov			eax,	QStep					/* QValue */
+		mov			ebx,	Sharpen					/* Sharpen */
+
+		movd		mm0,	eax						/* QValue */
+		movd		mm2,	ebx						/* sharpen */
+
+        push        ebp
+
+		punpcklbw	mm0,	mm0						/* 00 00 00 QQ */
+        sub         edx,    ecx                     /* Negative Pitch */
+
+		punpcklbw	mm2,	mm2						/* 00 00 00 SS */
+        pxor        mm7,    mm7                     /* clear mm7 for unpacks */
+
+		punpcklbw	mm0,	mm0						/* 00 00 qq qq */
+		mov			eax,	LRPointer				/* Left and Right Modifier */                
+
+		punpcklbw	mm2,	mm2						/* 00 00 ss ss */
+		lea         ebx,    [esi+ecx*8]             /* Source Pointer of last row */        
+
+		punpcklbw	mm0,	mm0						/* qq qq qq qq */
+		movq        mm1,    mm0;                    /* make a copy */
+		
+		punpcklbw	mm2,	mm2						/* ss ss ss ss */
+		paddb		mm1,	mm0						/* QValue * 2 */
+
+        paddb       mm1,    mm0                     /* High = 3 * Qvalue */
+        paddusb		mm1,	eight223c				/* clamping high to 32 */	
+
+		paddb       mm0,    eight32c                /* 32+QValues */
+		psubusb		mm1,	eight223c				/* Get the real value back */
+
+        movq		mm3,	eight127c				/* 7f 7f 7f 7f 7f 7f 7f 7f */
+        pandn       mm1,    mm3                     /* ClampHigh */
+
+        /* mm0,mm1,mm2,mm7 are in use  */
+        /* mm0---> QValue+32           */
+        /* mm1---> ClampHigh		   */
+		/* mm2---> Sharpen             */
+		/* mm7---> Cleared for unpack  */
+
+FillModLoop1:
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        pxor        xmm7,   xmm7                    /* clear xmm7 */ 
+
+        movq        mm4,    QWORD PTR [esi+edx]     /* Pixels on top pu */
+        movq        mm5,    mm3                     /* make a copy of p */
+
+        psubusb     mm3,    mm4                     /* p-pu */       
+        psubusb     mm4,    mm5                     /* pu-p */
+
+        por         mm3,    mm4                     /* abs(p-pu) */
+        movq        mm6,    mm0                     /* 32+QValues */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128c				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64c				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+
+        movq2dq     xmm0,   mm6                     
+        movq        mm4,    QWORD PTR [esi-1]       /* Pixels on top pu */
+
+        punpcklbw	xmm7,	xmm0					/* extended to words */
+        movq        mm5,    mm3                     /* make a copy of p */
+
+        psraw		xmm7,	8						/* sign extended */
+        psubusb     mm3,    mm4                     /* p-pu */
+
+        movdqa      [edi],  xmm7                    /* writeout UDmod*/
+        psubusb     mm4,    mm5                     /* pu-p */
+
+        por         mm3,    mm4                     /* abs(p-pu) */
+        movq        mm6,    mm0                     /* 32+QValues */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128c				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64c				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+
+        pxor        xmm7,   xmm7                    /* clear xmm7 */
+        movq        mm4,    QWORD PTR [esi+1]       /* Pixels on top pu */
+
+   		movq2dq 	xmm0,	mm6						
+        movq        mm5,    mm3                     /* make a copy of p */
+
+        punpcklbw   xmm7,   xmm0                    /* extened  to shorts */
+        psubusb     mm3,    mm4                     /* p-pu */
+
+		psraw		xmm7,	8						/* sign extended */
+        psubusb     mm4,    mm5                     /* pu-p */
+
+        movdqa      [eax],  xmm7                    /* writeout UDmod*/
+        por         mm3,    mm4                     /* abs(p-pu) */
+
+        movq        mm6,    mm0                     /* 32+QValues */
+        pxor        xmm7,   xmm7                    /* clear xmm7 */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128c				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64c				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+        add         esi,    ecx
+        
+        movq2dq     xmm0,   mm6
+        add         edi,    16                  
+
+        punpcklbw	xmm7,	mm0						/* extended to shorts */
+        add         eax,    16      
+
+        psraw		xmm7,	8						/* sign extended */
+        cmp         esi,    ebx
+
+        movdqa      [eax+112], xmm7                 /* writeout UDmod*/		
+        jne         FillModLoop1
+        
+        /* last UDMod */
+
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        pxor        xmm7,   xmm7                    /* clear xmm7 */
+
+
+        movq        mm4,    QWORD PTR [esi+edx]     /* Pixels on top pu */
+        movq        mm5,    mm3                     /* make a copy of p */
+        
+        psubusb     mm3,    mm4                     /* p-pu */
+        psubusb     mm4,    mm5                     /* pu-p */
+
+        por         mm3,    mm4                     /* abs(p-pu) */
+        movq        mm6,    mm0                     /* 32+QValues */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128c				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64c				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+        movq2dq     xmm6,   mm6                     
+
+        punpcklbw   xmm7,	xmm6					/* 03 xx 02 xx 01 xx 00 xx */
+
+		psraw		xmm7,	8						/* sign extended */
+		movdqa      [edi],  xmm7                    /* writeout UDmod */
+
+		mov			esi,	Src
+		mov			edi,	Des
+		
+		mov			eax,	UDPointer
+		mov			ebx,	LRPointer
+
+        mov         ebp,    8
+
+FilterLoop1:        
+
+        movq		xmm0,	QWORD PTR [esi+edx]		/* mm0 = Pixels above */
+		pxor		xmm7,	xmm7				    /* clear mm7 */
+
+		movdqa		xmm4,	[eax]			        /* au */
+        punpcklbw	xmm0,	xmm7				    /* extended to shorts */
+		
+		movq		xmm2,	QWORD PTR [esi+ecx]		/* mm2 = pixels below */
+        pmullw		xmm0,	xmm4				    /* pu*au */
+		
+		movdqa		xmm6,	[eax+16]		        /* ad */
+        punpcklbw	xmm2,	xmm7				    /* extened to shorts*/
+		
+		movq		xmm1,	QWORD PTR [esi-1]		/* pixel to the left */
+        pmullw		xmm2,	xmm6				    /* ad*pd */
+        
+        movdqa      xmm3,   [ebx]                   /* al */
+        punpcklbw   xmm1,   xmm7                    /* extended to shorts */
+
+        movq        xmm5,   QWORD PTR [esi+1]       /* pixel to the right */
+        pmullw      xmm1,   xmm3                    /* al * pl */
+
+        paddw		xmm4,	xmm6				    /* au+ad */
+        punpcklbw   xmm5,   xmm7                    /* extends to shorts */
+        
+        movdqa      xmm6,   [ebx+128]               /* ar */
+        pmullw      xmm5,   xmm6                    /* ar * pr */
+        
+        paddw		xmm0,	xmm2			        /* au*pu + ad*pd */
+        paddw       xmm4,   xmm3                    /* au+ad+al */
+
+        paddw       xmm0,   xmm1                    /* au*pu+ad*pd+al*pl */
+        paddw       xmm4,   xmm6                    /* au+ad+al+ar */
+
+        movq		xmm2,	QWORD PTR [esi]			/* p */
+        paddw       xmm0,   xmm5                    /* au*pu+ad*pd+al*pl+ar*pr */
+
+		
+		/* xmm0 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* xmm4 ---	 au + ad + al + ar */
+		
+		movdqa		xmm1,	eight128s		        /* 0080 0080 0080 0080 0080 0080 0080 0080 */
+        punpcklbw	xmm2,	xmm7				    /* extended to shorts */
+
+		psubw		xmm1,	xmm4				    /* 128-(au+ad+al+ar) */		
+		pmullw		xmm2,	xmm1				    /* p*(128-(au+ad+al+ar)) */
+        
+		add			esi,	ecx				        /* Src += Pitch */
+		movdqa		xmm6,	eight64s			    /* 64, 64, 64, 64, 64, 64, 64, 64 */
+
+		movdqa      xmm7,   xmm6                    /* 64, 64, 64, 64, 64, 64, 64, 64 */
+        add			eax,	16				        /* UDPointer += 8 */
+
+        psllw		xmm7,	8				        /* {16384, .. } */
+        paddw		xmm0,	xmm2				    /* sum */
+
+        add			edi,	ecx				        /* Des += Pitch */
+        paddw		xmm0,	xmm6				    /* sum+B */
+
+        add         ebx,    16                      /* LPointer +=8 */
+		paddw		xmm0,	xmm7				    /* clamping */
+
+		psubusw		xmm0,	xmm7				    /* clamping */
+		dec         ebp
+
+        psrlw		xmm0,	7				        /* (sum+B)>>7 */
+		packuswb	xmm0,	xmm7				    /* pack to 8 bytes */		
+
+        movq		QWORD PTR [edi+edx],	xmm0	/* write to destination */
+        jnz         FilterLoop1
+        
+
+        pop         ebp
+
+        pop         ebx
+        pop         eax
+
+        pop         edx
+        pop         ecx
+
+        pop         edi
+        pop         esi
+    }
+#endif
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : DeRingBlockWeak_WMT
+ *
+ *  INPUTS        : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
+ *                  const UINT8 *SrcPtr          : Pointer to input image.
+ *                  UINT8 *DstPtr                : Pointer to output image.
+ *                  const INT32 Pitch            : Image stride.
+ *                  UINT32 FragQIndex            : Q-index block encoded with.
+ *                  UINT32 *QuantScale           : Array of quantization scale factors.
+ *                               
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filtering a block for de-ringing purpose.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void DeringBlockWeak_WMT
+( 
+    const POSTPROC_INSTANCE *pbi, 
+    const UINT8 *SrcPtr,
+    UINT8 *DstPtr,
+    const INT32 Pitch,
+    UINT32 FragQIndex,
+    UINT32 *QuantScale
+)
+{
+#if defined(_WIN32_WCE)
+	return;
+#else
+
+	__declspec(align(16)) short UDMod[72];
+	__declspec(align(16)) short	LRMod[128];
+    
+	unsigned int PlaneLineStep = Pitch;
+	const unsigned char *Src   = SrcPtr;
+	unsigned char *Des         = DstPtr;
+    
+	short *UDPointer = UDMod;
+	short *LRPointer = LRMod;
+    
+    UINT32 QStep  = QuantScale[FragQIndex];
+	INT32 Sharpen = SharpenModifier[FragQIndex];
+	(void) pbi;
+
+	__asm 
+	{
+		push		esi
+		push		edi
+		
+		mov			esi,	Src						/* Source Pointer */
+		mov			edi,	UDPointer				/* UD modifier pointer */
+
+		push		ecx
+		push		edx
+
+		mov			ecx,	PlaneLineStep			/* Pitch Step */
+        xor         edx,    edx
+
+		push		eax
+		push		ebx
+
+		mov			eax,	QStep					/* QValue */
+		mov			ebx,	Sharpen					/* Sharpen */
+
+		movd		mm0,	eax						/* QValue */
+		movd		mm2,	ebx						/* sharpen */
+
+        push        ebp
+
+		punpcklbw	mm0,	mm0						/* 00 00 00 QQ */
+        sub         edx,    ecx                     /* Negative Pitch */
+
+		punpcklbw	mm2,	mm2						/* 00 00 00 SS */
+        pxor        mm7,    mm7                     /* clear mm7 for unpacks */
+
+		punpcklbw	mm0,	mm0						/* 00 00 qq qq */
+		mov			eax,	LRPointer				/* Left and Right Modifier */                
+
+		punpcklbw	mm2,	mm2						/* 00 00 ss ss */
+		lea         ebx,    [esi+ecx*8]             /* Source Pointer of last row */        
+
+		punpcklbw	mm0,	mm0						/* qq qq qq qq */
+		movq        mm1,    mm0;                    /* make a copy */
+		
+		punpcklbw	mm2,	mm2						/* ss ss ss ss */
+		paddb		mm1,	mm0						/* QValue * 2 */
+
+        paddb       mm1,    mm0                     /* High = 3 * Qvalue */
+        paddusb		mm1,	eight231c				/* clamping high to 24 */	
+
+		paddb       mm0,    eight32c                /* 32+QValues */
+		psubusb		mm1,	eight231c				/* Get the real value back */
+
+        movq		mm3,	eight127c				/* 7f 7f 7f 7f 7f 7f 7f 7f */
+        pandn       mm1,    mm3                     /* ClampHigh */
+
+        /* mm0,mm1,mm2,mm7 are in use  */
+        /* mm0---> QValue+32           */
+        /* mm1---> ClampHigh		   */
+		/* mm2---> Sharpen             */
+		/* mm7---> Cleared for unpack  */
+
+FillModLoop1:
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        pxor        xmm7,   xmm7                    /* clear xmm7 */ 
+
+        movq        mm4,    QWORD PTR [esi+edx]     /* Pixels on top pu */
+        movq        mm5,    mm3                     /* make a copy of p */
+
+        psubusb     mm3,    mm4                     /* p-pu */       
+        psubusb     mm4,    mm5                     /* pu-p */
+
+        por         mm3,    mm4                     /* abs(p-pu) */
+        movq        mm6,    mm0                     /* 32+QValues */
+
+        paddusb     mm3,    mm3                     /* 2*abs(p-pu) */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128c				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64c				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+
+        movq2dq     xmm0,   mm6                     
+        movq        mm4,    QWORD PTR [esi-1]       /* Pixels on top pu */
+
+        punpcklbw	xmm7,	xmm0					/* extended to words */
+        movq        mm5,    mm3                     /* make a copy of p */
+
+        psraw		xmm7,	8						/* sign extended */
+        psubusb     mm3,    mm4                     /* p-pu */
+
+        movdqa      [edi],  xmm7                    /* writeout UDmod*/
+        psubusb     mm4,    mm5                     /* pu-p */
+
+        por         mm3,    mm4                     /* abs(p-pu) */
+        movq        mm6,    mm0                     /* 32+QValues */
+
+        paddusb     mm3,    mm3                     /* 2*abs(p-pu) */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128c				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64c				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+
+        pxor        xmm7,   xmm7                    /* clear xmm7 */
+        movq        mm4,    QWORD PTR [esi+1]       /* Pixels on top pu */
+
+   		movq2dq 	xmm0,	mm6						
+        movq        mm5,    mm3                     /* make a copy of p */
+
+        punpcklbw   xmm7,   xmm0                    /* extened  to shorts */
+        psubusb     mm3,    mm4                     /* p-pu */
+
+		psraw		xmm7,	8						/* sign extended */
+        psubusb     mm4,    mm5                     /* pu-p */
+
+        movdqa      [eax],  xmm7                    /* writeout UDmod*/
+        por         mm3,    mm4                     /* abs(p-pu) */
+
+        movq        mm6,    mm0                     /* 32+QValues */
+        paddusb     mm3,    mm3                     /* 2*abs(p-pu) */
+
+        pxor        xmm7,   xmm7                    /* clear xmm7 */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128c				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64c				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+        add         esi,    ecx
+        
+        movq2dq     xmm0,   mm6
+        add         edi,    16                  
+
+        punpcklbw	xmm7,	mm0						/* extended to shorts */
+        add         eax,    16      
+
+        psraw		xmm7,	8						/* sign extended */
+        cmp         esi,    ebx
+
+        movdqa      [eax+112], xmm7                 /* writeout UDmod*/		
+        jne         FillModLoop1
+        
+        /* last UDMod */
+
+        movq        mm3,    QWORD PTR [esi]         /* read 8 pixels p  */
+        pxor        xmm7,   xmm7                    /* clear xmm7 */
+
+
+        movq        mm4,    QWORD PTR [esi+edx]     /* Pixels on top pu */
+        movq        mm5,    mm3                     /* make a copy of p */
+        
+        psubusb     mm3,    mm4                     /* p-pu */
+        psubusb     mm4,    mm5                     /* pu-p */
+
+        por         mm3,    mm4                     /* abs(p-pu) */
+        movq        mm6,    mm0                     /* 32+QValues */
+
+        paddusb     mm3,    mm3                     /* 2*abs(p-pu) */
+
+        movq		mm4,	mm0						/* 32+QValues */
+		psubusb		mm6,    mm3                     /* zero clampled TmpMod */
+
+		movq		mm5,	eight128c				/* 80 80 80 80 80 80 80 80 */
+		paddb		mm4,	eight64c				/* 32+QValues + 64 */
+
+		pxor		mm4,	mm5						/* convert to a sign number */
+		pxor		mm3,	mm5						/* convert to a sign number */
+
+		pcmpgtb		mm3,	mm4						/* 32+QValue- 2*abs(p-pu) <-64 ? */
+		pand		mm3,	mm2						/* use sharpen */
+
+        paddsb		mm6,    mm1						/* clamping to high */
+		psubsb		mm6,	mm1						/* offset back */
+
+		por			mm6,	mm3						/* Mod value to be stored */
+        movq2dq     xmm6,   mm6                     
+
+        punpcklbw   xmm7,	xmm6					/* 03 xx 02 xx 01 xx 00 xx */
+
+		psraw		xmm7,	8						/* sign extended */
+		movdqa      [edi],  xmm7                    /* writeout UDmod */
+
+		mov			esi,	Src
+		mov			edi,	Des
+		
+		mov			eax,	UDPointer
+		mov			ebx,	LRPointer
+
+        mov         ebp,    8
+
+FilterLoop1:        
+
+        movq		xmm0,	QWORD PTR [esi+edx]		/* mm0 = Pixels above */
+		pxor		xmm7,	xmm7				    /* clear mm7 */
+
+		movdqa		xmm4,	[eax]			        /* au */
+        punpcklbw	xmm0,	xmm7				    /* extended to shorts */
+		
+		movq		xmm2,	QWORD PTR [esi+ecx]		/* mm2 = pixels below */
+        pmullw		xmm0,	xmm4				    /* pu*au */
+		
+		movdqa		xmm6,	[eax+16]		        /* ad */
+        punpcklbw	xmm2,	xmm7				    /* extened to shorts*/
+		
+		movq		xmm1,	QWORD PTR [esi-1]		/* pixel to the left */
+        pmullw		xmm2,	xmm6				    /* ad*pd */
+        
+        movdqa      xmm3,   [ebx]                   /* al */
+        punpcklbw   xmm1,   xmm7                    /* extended to shorts */
+
+        movq        xmm5,   QWORD PTR [esi+1]       /* pixel to the right */
+        pmullw      xmm1,   xmm3                    /* al * pl */
+
+        paddw		xmm4,	xmm6				    /* au+ad */
+        punpcklbw   xmm5,   xmm7                    /* extends to shorts */
+        
+        movdqa      xmm6,   [ebx+128]               /* ar */
+        pmullw      xmm5,   xmm6                    /* ar * pr */
+        
+        paddw		xmm0,	xmm2			        /* au*pu + ad*pd */
+        paddw       xmm4,   xmm3                    /* au+ad+al */
+
+        paddw       xmm0,   xmm1                    /* au*pu+ad*pd+al*pl */
+        paddw       xmm4,   xmm6                    /* au+ad+al+ar */
+
+        movq		xmm2,	QWORD PTR [esi]			/* p */
+        paddw       xmm0,   xmm5                    /* au*pu+ad*pd+al*pl+ar*pr */
+
+		
+		/* xmm0 ---  au*pu+ad*pd+al*pl+ar*pr */
+		/* xmm4 ---	 au + ad + al + ar */
+		
+		movdqa		xmm1,	eight128s		        /* 0080 0080 0080 0080 0080 0080 0080 0080 */
+        punpcklbw	xmm2,	xmm7				    /* extended to shorts */
+
+		psubw		xmm1,	xmm4				    /* 128-(au+ad+al+ar) */		
+		pmullw		xmm2,	xmm1				    /* p*(128-(au+ad+al+ar)) */
+        
+		add			esi,	ecx				        /* Src += Pitch */
+		movdqa		xmm6,	eight64s			    /* 64, 64, 64, 64, 64, 64, 64, 64 */
+
+		movdqa      xmm7,   xmm6                    /* 64, 64, 64, 64, 64, 64, 64, 64 */
+        add			eax,	16				        /* UDPointer += 8 */
+
+        psllw		xmm7,	8				        /* {16384, .. } */
+        paddw		xmm0,	xmm2				    /* sum */
+
+        add			edi,	ecx				        /* Des += Pitch */
+        paddw		xmm0,	xmm6				    /* sum+B */
+
+        add         ebx,    16                      /* LPointer +=8 */
+		paddw		xmm0,	xmm7				    /* clamping */
+
+		psubusw		xmm0,	xmm7				    /* clamping */
+		dec         ebp
+
+        psrlw		xmm0,	7				        /* (sum+B)>>7 */
+		packuswb	xmm0,	xmm7				    /* pack to 8 bytes */		
+
+        movq		QWORD PTR [edi+edx],	xmm0	/* write to destination */
+        jnz         FilterLoop1
+        
+
+        pop         ebp
+
+        pop         ebx
+        pop         eax
+
+        pop         edx
+        pop         ecx
+
+        pop         edi
+        pop         esi
+    }
+#endif
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/doptsystemdependant.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/doptsystemdependant.c
new file mode 100644
index 00000000..633174a8
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/doptsystemdependant.c
@@ -0,0 +1,211 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/ 
+#include "postp.h"
+ 
+/****************************************************************************
+*  Imports
+*****************************************************************************/
+extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
+
+// c imports
+extern void FilteringVert_12_C(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+extern void FilteringHoriz_12_C(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void FilteringVert_8_C(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void FilteringHoriz_8_C(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void HorizontalLine_1_2_Scale_C( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void HorizontalLine_3_5_Scale_C( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void HorizontalLine_4_5_Scale_C( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void VerticalBand_4_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_4_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_3_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_3_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_1_2_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_1_2_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void FilterHoriz_Simple_C( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_Simple_C( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterHoriz_Generic( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_Generic( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern INT32 *SetupBoundingValueArray_Generic( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern INT32 *SetupDeblockValueArray_Generic( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern void DeringBlockWeak_C( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void DeringBlockStrong_C( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale);
+extern void DeblockLoopFilteredBand_C( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBand_C( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBandNewFilter_C( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void PlaneAddNoise_C( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+// mmx imports
+extern void FilteringVert_12_MMX(UINT32 QValue,UINT8 * Src, INT32 Pitch);
+extern void FilteringHoriz_12_MMX(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void FilteringVert_8_MMX(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void FilteringHoriz_8_MMX(UINT32 QValue, UINT8 * Src, INT32 Pitch );
+extern void HorizontalLine_1_2_Scale_MMX( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void HorizontalLine_3_5_Scale_MMX( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void HorizontalLine_4_5_Scale_MMX( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
+extern void VerticalBand_4_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_4_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_3_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_3_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void VerticalBand_1_2_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void LastVerticalBand_1_2_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
+extern void FilterHoriz_Simple_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_Simple_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterHoriz_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern void FilterVert_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
+extern INT32 *SetupBoundingValueArray_ForMMX( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern INT32 *SetupDeblockValueArray_ForMMX( POSTPROC_INSTANCE *pbi, INT32 FLimit );
+extern void DeringBlockWeak_MMX( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void DeringBlockStrong_MMX( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale);
+extern void DeblockLoopFilteredBand_MMX( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBand_MMX( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBandNewFilter_MMX( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void FillLoopFilterLimitValues_MMX(void);
+extern INT16 *LoopFilterLimitValuesV2_MMX;             
+extern void PlaneAddNoise_mmx( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+
+// wmt imports
+extern void DeblockLoopFilteredBand_WMT( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeblockNonFilteredBand_WMT( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
+extern void DeringBlockWeak_WMT( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void DeringBlockStrong_WMT( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
+extern void CFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
+extern void MmxFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
+extern void WmtFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
+extern void ClampLevels_C( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
+extern void ClampLevels_wmt( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
+extern void ClampLevels_mmx( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
+extern void PlaneAddNoise_wmt( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : PostProcMachineSpecificConfig
+ *
+ *  INPUTS        : UINT32 Version : Codec version number.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Checks for machine specifc features such as MMX support 
+ *                  sets appropriate flags and function pointers.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void PostProcMachineSpecificConfig ( UINT32 Version )
+{
+	// If MMX supported then set to use MMX versions of functions else 
+    // use original 'C' versions.
+	INT32 MmxEnabled;
+	INT32 XmmEnabled; 
+	INT32 WmtEnabled;
+
+	GetProcessorFlags( &MmxEnabled, &XmmEnabled, &WmtEnabled );
+	
+	if ( WmtEnabled )
+	{
+        // Willamette
+		FillLoopFilterLimitValues_MMX();
+
+        FilterHoriz                     = FilterHoriz_MMX;
+        FilterVert                      = FilterVert_MMX;
+        SetupBoundingValueArray         = SetupBoundingValueArray_ForMMX;
+        SetupDeblockValueArray          = SetupDeblockValueArray_ForMMX;
+        DeringBlockWeak                 = DeringBlockWeak_WMT;
+        DeringBlockStrong               = DeringBlockStrong_WMT;
+		DeblockLoopFilteredBand         = DeblockLoopFilteredBand_WMT;
+		DeblockNonFilteredBand          = DeblockNonFilteredBand_WMT;
+        DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_MMX;
+		FilterHoriz_Simple              = FilterHoriz_Simple_MMX;
+		FilterVert_Simple               = FilterVert_Simple_MMX;
+        HorizontalLine_1_2_Scale        = HorizontalLine_1_2_Scale_MMX;
+        HorizontalLine_3_5_Scale        = HorizontalLine_3_5_Scale_MMX;
+        HorizontalLine_4_5_Scale        = HorizontalLine_4_5_Scale_MMX;
+        VerticalBand_1_2_Scale          = VerticalBand_1_2_Scale_MMX;
+        LastVerticalBand_1_2_Scale      = LastVerticalBand_1_2_Scale_MMX;
+        VerticalBand_3_5_Scale          = VerticalBand_3_5_Scale_MMX;
+        LastVerticalBand_3_5_Scale      = LastVerticalBand_3_5_Scale_MMX;
+        VerticalBand_4_5_Scale          = VerticalBand_4_5_Scale_MMX;
+        LastVerticalBand_4_5_Scale      = LastVerticalBand_4_5_Scale_MMX;
+        FilteringHoriz_8                = FilteringHoriz_8_MMX;
+        FilteringVert_8                 = FilteringVert_8_MMX;
+        FilteringHoriz_12               = FilteringHoriz_12_MMX;
+        FilteringVert_12                = FilteringVert_12_MMX;
+        FastDeInterlace                 = WmtFastDeInterlace;
+        ClampLevels                     = ClampLevels_wmt; 
+        PlaneAddNoise                   = PlaneAddNoise_wmt;
+	}
+	else if ( MmxEnabled )
+    {
+		FillLoopFilterLimitValues_MMX();
+
+        FilterHoriz                     = FilterHoriz_MMX;
+        FilterVert                      = FilterVert_MMX;
+        SetupBoundingValueArray         = SetupBoundingValueArray_ForMMX;
+        SetupDeblockValueArray          = SetupDeblockValueArray_ForMMX;
+        DeringBlockWeak                 = DeringBlockWeak_MMX;
+        DeringBlockStrong               = DeringBlockStrong_MMX;
+		DeblockLoopFilteredBand         = DeblockLoopFilteredBand_MMX;
+		DeblockNonFilteredBand          = DeblockNonFilteredBand_MMX;
+		DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_MMX;
+		FilterHoriz_Simple              = FilterHoriz_Simple_MMX;
+		FilterVert_Simple               = FilterVert_Simple_MMX;
+        HorizontalLine_1_2_Scale        = HorizontalLine_1_2_Scale_MMX;
+        HorizontalLine_3_5_Scale        = HorizontalLine_3_5_Scale_MMX;
+        HorizontalLine_4_5_Scale        = HorizontalLine_4_5_Scale_MMX;
+        VerticalBand_1_2_Scale          = VerticalBand_1_2_Scale_MMX;
+        LastVerticalBand_1_2_Scale      = LastVerticalBand_1_2_Scale_MMX;
+        VerticalBand_3_5_Scale          = VerticalBand_3_5_Scale_MMX;
+        LastVerticalBand_3_5_Scale      = LastVerticalBand_3_5_Scale_MMX;
+        VerticalBand_4_5_Scale          = VerticalBand_4_5_Scale_MMX;
+        LastVerticalBand_4_5_Scale      = LastVerticalBand_4_5_Scale_MMX;
+        FilteringHoriz_8                = FilteringHoriz_8_MMX;
+        FilteringVert_8                 = FilteringVert_8_MMX;
+        FilteringHoriz_12               = FilteringHoriz_12_MMX;
+        FilteringVert_12                = FilteringVert_12_MMX;
+        FastDeInterlace                 = MmxFastDeInterlace;
+        ClampLevels                     = ClampLevels_mmx; 
+        PlaneAddNoise                   = PlaneAddNoise_mmx;
+    }
+    else
+    {
+
+        FilterHoriz                     = FilterHoriz_Generic;
+        FilterVert                      = FilterVert_Generic;
+        SetupBoundingValueArray         = SetupBoundingValueArray_Generic;
+        SetupDeblockValueArray          = SetupDeblockValueArray_Generic;
+        DeringBlockWeak                 = DeringBlockWeak_C;
+        DeringBlockStrong               = DeringBlockStrong_C;
+		DeblockLoopFilteredBand         = DeblockLoopFilteredBand_C;
+		DeblockNonFilteredBand          = DeblockNonFilteredBand_C;
+		DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_C;
+		FilterHoriz_Simple              = FilterHoriz_Simple_C;
+		FilterVert_Simple               = FilterVert_Simple_C;
+        HorizontalLine_1_2_Scale        = HorizontalLine_1_2_Scale_C;        
+        VerticalBand_1_2_Scale          = VerticalBand_1_2_Scale_C;
+        LastVerticalBand_1_2_Scale      = LastVerticalBand_1_2_Scale_C;
+        HorizontalLine_3_5_Scale        = HorizontalLine_3_5_Scale_C;
+        VerticalBand_3_5_Scale          = VerticalBand_3_5_Scale_C;
+        LastVerticalBand_3_5_Scale      = LastVerticalBand_3_5_Scale_C;
+        HorizontalLine_4_5_Scale        = HorizontalLine_4_5_Scale_C;
+        VerticalBand_4_5_Scale          = VerticalBand_4_5_Scale_C;
+        LastVerticalBand_4_5_Scale      = LastVerticalBand_4_5_Scale_C;
+        FilteringHoriz_8                = FilteringHoriz_8_C;
+        FilteringVert_8                 = FilteringVert_8_C;
+        FilteringHoriz_12               = FilteringHoriz_12_C;
+        FilteringVert_12                = FilteringVert_12_C;
+        FastDeInterlace                 = CFastDeInterlace;
+        ClampLevels                     = ClampLevels_C; 
+        PlaneAddNoise                   = PlaneAddNoise_C;
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/loopf_asm.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/loopf_asm.c
new file mode 100644
index 00000000..68f779a7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/loopf_asm.c
@@ -0,0 +1,540 @@
+/****************************************************************************
+*
+*   Module Title :     loopf_asm.c
+*
+*   Description  :     Optimized version of the loop filter.
+*
+****************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Frames
+****************************************************************************/
+#include <stdio.h>
+#include <memory.h>
+#include "postp.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#pragma warning (disable:4799)
+#pragma warning (disable:4731)
+
+#define LIMIT_OFFSET        0
+#define FOURONES_OFFSET     8
+#define LFABS_OFFSET        16
+#define TRANS_OFFSET        24
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SetupBoundingValueArray_ForMMX
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi : Pointer to post-processing instance.
+ *                  INT32 FLimit           : Filter limiting value.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : INT32*: Pointer to bounding value array.
+ *
+ *  FUNCTION      : Sets up bounding value array used in filtering operations.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+INT32 *SetupBoundingValueArray_ForMMX ( POSTPROC_INSTANCE *pbi, INT32 FLimit )
+{
+    INT32 *BoundingValuePtr;
+
+    /* Since the FiltBoundingValue array is currently only used in the generic */
+    /* version, we are going to reuse this memory for our own purposes.        */
+    /* 2 longs for limit, 2 longs for _4ONES, 2 longs for LFABS_MMX, and       */
+    /* 8 longs for temp work storage                                           */
+   BoundingValuePtr = (INT32 *)((UINT32)(&pbi->FiltBoundingValue[256]) & 0xffffffe0);    
+
+    // expand for mmx code
+    BoundingValuePtr[0] = BoundingValuePtr[1] = FLimit * 0x00010001;
+    BoundingValuePtr[2] = BoundingValuePtr[3] = 0x00010001;
+    BoundingValuePtr[4] = BoundingValuePtr[5] = 0x00040004;
+
+    return BoundingValuePtr;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterHoriz_MMX
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi  : Pointer to post-processing instance.
+ *                  UINT8 *PixelPtr         : Pointer to input frame.
+ *                  INT32 LineLength        : Length of line in input frame.
+ *                  INT32 *BoundingValuePtr : Pointer to bouning value array.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a loop filter to the vertical edge (i.e. horizontally).
+ *
+ *  SPECIAL NOTES : This version attempts to fix the DC_misalign stalls.
+ *
+ ****************************************************************************/
+void FilterHoriz_MMX
+(
+    POSTPROC_INSTANCE *pbi, 
+    UINT8 *PixelPtr, 
+    INT32 LineLength, 
+    INT32 *BoundingValuePtr
+)
+{
+	(void) pbi;
+
+    /* A somewhat optimized MMX version of the left edge filter. */
+    __asm 
+    {
+        mov         eax,[BoundingValuePtr]
+        mov         edx,[LineLength]            //stride
+
+        mov         ebx,[PixelPtr]
+        mov         ecx,[LineLength]            //stride
+
+        movd        mm0,[ebx + -2]              //xx xx xx xx 01 00 xx xx
+    ;-
+
+        movd        mm4,[ebx + 2]               //xx xx xx xx xx xx 03 02
+        psrld       mm0,16                      //xx xx xx xx 00 00 01 00
+
+        movd        mm1,[ebx + ecx + -2]        //xx xx xx xx 11 10 xx xx
+        punpcklwd   mm0,mm4                     //xx xx xx xx 03 02 01 00
+
+        movd        mm4,[ebx + ecx + 2]         //xx xx xx xx xx xx 13 12
+        psrld       mm1,16                      //xx xx xx xx 00 00 11 10
+
+        punpcklwd   mm1,mm4                     //xx xx xx xx 13 12 11 10
+        lea         edx,[edx + edx*2]           //stride * 3
+
+        movd        mm2,[ebx + ecx*2 + -2]      //xx xx xx xx 21 20 xx xx
+        punpcklbw   mm0,mm1                     //13 03 12 02 11 01 10 00
+
+        movd        mm4,[ebx + ecx*2 + 2]       //xx xx xx xx xx xx 23 22
+        psrld       mm2,16                      //xx xx xx xx 00 00 21 20
+
+        movd        mm1,[ebx + edx + -2]        //xx xx xx xx 31 30 xx xx
+        punpcklwd   mm2,mm4                     //xx xx xx xx 23 22 21 20
+
+        movd        mm4,[ebx + edx + 2]         //xx xx xx xx xx xx 33 32
+        psrld       mm1,16                      //xx xx xx xx 00 00 31 30
+
+        punpcklwd   mm1,mm4                     //xx xx xx xx 33 32 31 30
+        pxor        mm4,mm4
+
+        punpcklbw   mm2,mm1                     //33 23 32 22 31 21 30 20
+        movq        mm1,mm0
+
+        punpcklwd   mm0,mm2                     //31 21 11 01 30 20 10 00
+        lea         ebx,[ebx + ecx*4]           //base + (stride * 4)
+
+        punpckhwd   mm1,mm2                     //33 23 13 03 32 22 12 02
+        movq        mm6,mm0                     //xx xx xx xx 30 20 10 00
+
+        movq        [eax + TRANS_OFFSET + 0],mm0
+        movq        mm2,mm1
+
+        movq        [eax + TRANS_OFFSET + 8],mm1
+        psrlq       mm0,32                      //xx xx xx xx 31 21 11 01
+
+;-----------
+        movd        mm7,[ebx + -2]              //xx xx xx xx 41 40 xx xx
+        punpcklbw   mm1,mm4                     //convert to words
+
+        movd        mm4,[ebx + 2]               //xx xx xx xx xx xx 43 42
+        psrld       mm7,16                      //xx xx xx xx 00 00 41 40
+
+        movd        mm5,[ebx + ecx + -2]        //xx xx xx xx 51 50 xx xx
+        punpcklwd   mm7,mm4                     //xx xx xx xx 43 42 41 40
+
+        movd        mm4,[ebx + ecx + 2]         //xx xx xx xx xx xx 53 52
+        psrld       mm5,16
+
+        punpcklwd   mm5,mm4
+        pxor        mm4,mm4
+
+        punpcklbw   mm0,mm4
+;-
+
+        psrlq       mm2,32                      //xx xx xx xx 33 23 13 03
+        psubw       mm1,mm0                     //x = p[0] - p[ms]
+
+        punpcklbw   mm7,mm5                     //53 43 52 42 51 41 50 40
+        movq        mm3,mm1
+;-------------------
+        punpcklbw   mm6,mm4
+        paddw       mm3,mm1
+
+        punpcklbw   mm2,mm4
+        paddw       mm1,mm3
+
+        paddw       mm1,[eax + LFABS_OFFSET]    //x += LoopFilterAdjustBeforeShift
+        psubw       mm6,mm2
+
+        movd        mm2,[ebx + ecx*2 + -2]      //xx xx xx xx 61 60 xx xx
+        paddw       mm6,mm1
+
+        movd        mm4,[ebx + ecx*2 + 2]       //xx xx xx xx xx xx 63 62
+        psrld       mm2,16
+
+        movd        mm5,[ebx + edx + -2]        //xx xx xx xx 71 70 xx xx
+        punpcklwd   mm2,mm4                     //xx xx xx xx 63 62 61 60
+
+        movd        mm4,[ebx + edx + 2]         //xx xx xx xx xx xx 73 72
+        psrld       mm5,16                      //xx xx xx xx 00 00 71 70
+
+        mov         ebx,[PixelPtr]              //restore PixelPtr
+        punpcklwd   mm5,mm4                     //xx xx xx xx 73 72 71 70
+
+        psraw       mm6,3                       //values to be clipped
+        pxor        mm4,mm4
+
+        punpcklbw   mm2,mm5                     //73 63 72 62 71 61 70 60
+        movq        mm5,mm7                     //53 43 52 42 51 41 50 40
+
+        movq        mm1,mm6
+        punpckhwd   mm5,mm2                     //73 63 53 43 72 62 52 42
+
+
+        movq        [eax + TRANS_OFFSET + 24],mm5   //save for later
+        punpcklwd   mm7,mm2                     //71 61 51 41 70 60 50 40
+
+        movq        [eax + TRANS_OFFSET + 16],mm7   //save for later
+        psraw       mm6,15
+
+        movq        mm2,[eax + LIMIT_OFFSET]        //get the limit value
+        movq        mm0,mm7                         //xx xx xx xx 70 60 50 41
+
+        psrlq       mm7,32                          //xx xx xx xx 71 61 51 41
+        pxor        mm1,mm6
+
+        psubsw      mm1,mm6                         //abs(i)
+        punpcklbw   mm5,mm4
+
+        por         mm6,[eax + FOURONES_OFFSET]     //now have -1 or 1 
+        movq        mm3,mm2
+
+        punpcklbw   mm7,mm4
+        psubw       mm3,mm1                         //limit - abs(i)
+
+        movq        mm4,mm3
+        psraw       mm3,15
+
+        push        ebp                        
+    ;-
+
+        psubw       mm5,mm7                         //x = p[0] - p[ms]
+        pxor        mm4,mm3
+
+        psubsw      mm4,mm3                         //abs(limit - abs(i))
+        pxor        mm3,mm3
+
+        movq        mm1,[eax + TRANS_OFFSET + 28]  //xx xx xx xx 73 63 53 43
+        psubusw     mm2,mm4                     //limit - abs(limit - abs(i))
+
+        punpcklbw   mm0,mm3
+        movq        mm7,mm5
+
+        paddw       mm7,mm5
+        pmullw      mm2,mm6                     //new y -- wait 3 cycles
+
+        punpcklbw   mm1,mm3
+        paddw       mm5,mm7
+
+        paddw       mm5,[eax + LFABS_OFFSET]             //x += LoopFilterAdjustBeforeShift
+        psubw       mm0,mm1
+
+        paddw       mm0,mm5
+        pxor        mm6,mm6     
+
+        movd        mm7,[eax + TRANS_OFFSET + 8]  //xx xx xx xx 32 22 12 02
+        psraw       mm0,3                       //values to be clipped
+
+        movd        mm3,[eax + TRANS_OFFSET + 4]  //xx xx xx xx 31 21 11 01
+        punpcklbw   mm7,mm6
+
+        psubw       mm7,mm2                     //p[ms] + y
+        punpcklbw   mm3,mm6
+
+        paddw       mm3,mm2                     //p[0] - y
+        packuswb    mm7,mm7                     //clamp[ p[ms] + y]
+
+        packuswb    mm3,mm3                     //clamp[ p[0] - y]
+        movq        mm1,mm0
+
+        movq        mm2,[eax + LIMIT_OFFSET]                 //get the limit value
+        psraw       mm0,15
+
+        //values to write out
+        punpcklbw   mm3,mm7                     //32 31 22 21 12 11 02 01                    
+        movq        mm7,mm0                     //save sign
+
+        movd        ebp,mm3                     //12 11 02 01
+        pxor        mm1,mm0
+
+        //xor bp,bp
+
+        mov         WORD PTR[ebx + 1],bp                //02 01
+        psubsw      mm1,mm0                     //abs(i)
+
+        shr         ebp,16
+        movq        mm5,mm2
+
+        mov         WORD PTR[ebx + ecx + 1],bp
+        psrlq       mm3,32                      //xx xx xx xx 32 31 22 21
+
+        por         mm7,[eax + FOURONES_OFFSET]                //now have -1 or 1 
+        psubw       mm5,mm1                     //limit - abs(i)
+
+        movd        ebp,mm3                     //32 31 22 21
+        movq        mm4,mm5
+
+        mov         [ebx + ecx*2 + 1],bp
+        psraw       mm5,15
+
+        shr         ebp,16
+        pxor        mm4,mm5
+
+        mov         [ebx + edx + 1],bp
+        psubsw      mm4,mm5                     //abs(limit - abs(i))
+
+        movd        mm5,[eax + TRANS_OFFSET + 24]  //xx xx xx xx 72 62 52 42
+        psubusw     mm2,mm4                     //limit - abs(limit - abs(i))
+
+        pmullw      mm2,mm7                     //new y
+        pxor        mm6,mm6
+
+        movd        mm3,[eax + TRANS_OFFSET + 20]  //xx xx xx xx 71 61 51 41
+        punpcklbw   mm5,mm6
+
+        lea         ebx,[ebx + ecx*4]
+        punpcklbw   mm3,mm6
+
+        paddw       mm3,mm2                     //p[ms] + y
+        psubw       mm5,mm2                     //p[0] - y
+
+        packuswb    mm3,mm3                     //clamp[ p[ms] + y]
+        pop         ebp
+    ;-
+
+//
+//NOTE: optimize the following somehow
+//
+        packuswb    mm5,mm5                     //clamp[ p[0] - y]
+    ;-
+        punpcklbw   mm3,mm5                     //72 71 62 61 52 51 42 41
+    ;-
+
+        movd        eax,mm3                     //52 51 42 41
+        psrlq       mm3,32                      //xx xx xx xx 72 71 62 61
+
+        mov         [ebx + 1],ax
+    ;-
+        shr         eax,16
+    ;-
+
+        mov         [ebx + ecx + 1],ax
+    ;-
+
+
+        movd        eax,mm3
+    ;-
+
+        mov         [ebx + ecx*2 + 1],ax
+    ;-
+
+        shr         eax,16
+    ;-
+
+        mov         [ebx + edx + 1],ax
+    ;-
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterVert_MMX
+ *
+ *  INPUTS        : POSTPROC_INSTANCE *pbi  : Pointer to post-processing instance.
+ *                  UINT8 *PixelPtr         : Pointer to input frame.
+ *                  INT32 LineLength        : Length of line in input frame.
+ *                  INT32 *BoundingValuePtr : Pointer to bouning value array.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a loop filter to the horizontal edge (i.e. vertically).
+ *
+ *  SPECIAL NOTES : This version attempts to fix the DC_misalign stalls.
+ *
+ ****************************************************************************/
+void FilterVert_MMX
+(
+    POSTPROC_INSTANCE *pbi, 
+    UINT8 *PixelPtr, 
+    INT32 LineLength, 
+    INT32 *BoundingValuePtr
+)
+{
+    INT32 ms = -LineLength;
+	(void) pbi;
+
+    /* A somewhat optimized MMX version of the top edge filter. */
+    __asm 
+    {
+        mov         eax,[BoundingValuePtr]
+    ;-
+
+        mov         ebx,[PixelPtr]
+        mov         ecx,[ms]                    //negative stride
+
+        movd        mm1,[ebx + 0]               //p[0]   
+        pxor        mm4,mm4
+
+        movd        mm0,[ebx + ecx]             //get row above -- p[ms]
+        punpcklbw   mm1,mm4                     //convert to words
+
+        mov         edx,[LineLength]
+        punpcklbw   mm0,mm4
+
+        movd        mm6,[ebx + ecx*2]           //p[ms2]
+        psubw       mm1,mm0                     //x = p[0] - p[ms]
+
+        movq        mm2,[ebx + edx]             //p[stride]
+        movq        mm3,mm1
+
+        punpcklbw   mm6,mm4
+        paddw       mm3,mm1
+
+        punpcklbw   mm2,mm4
+        paddw       mm1,mm3
+
+        paddw       mm1,[eax + LFABS_OFFSET]             //x += LoopFilterAdjustBeforeShift
+        psubw       mm6,mm2
+
+        movq        mm2,[eax + LIMIT_OFFSET]                 //get the limit value
+        paddw       mm6,mm1
+
+        movd        mm5,[ebx + 4]               //p[0]   
+        psraw       mm6,3                       //values to be clipped
+
+        movq        mm1,mm6
+        psraw       mm6,15
+
+        movd        mm7,[ebx + ecx + 4]         //p[ms]
+        pxor        mm1,mm6
+
+        psubsw      mm1,mm6                     //abs(i)
+        pxor        mm0,mm0
+
+        punpcklbw   mm5,mm0
+        movq        mm3,mm2
+
+        por         mm6,[eax + FOURONES_OFFSET]                //now have -1 or 1 
+        punpcklbw   mm7,mm0
+
+        psubw       mm3,mm1                     //limit - abs(i)
+        psubw       mm5,mm7                     //x = p[0] - p[ms]
+
+        movq        mm4,mm3
+        psraw       mm3,15
+
+        movd        mm0,[ebx + ecx*2 + 4]       //p[ms2]
+        pxor        mm4,mm3
+
+        movd        mm1,[ebx + edx +4]          //p[stride]
+        psubsw      mm4,mm3                     //abs(limit - abs(i))
+
+        pxor        mm3,mm3
+        psubusw     mm2,mm4                     //limit - abs(limit - abs(i))
+
+        punpcklbw   mm0,mm3
+        movq        mm7,mm5
+
+        paddw       mm7,mm5
+        pmullw      mm2,mm6                     //new y -- wait 3 cycles
+
+        punpcklbw   mm1,mm3
+        paddw       mm5,mm7
+
+        paddw       mm5,[eax + LFABS_OFFSET]             //x += LoopFilterAdjustBeforeShift
+        psubw       mm0,mm1
+
+        paddw       mm0,mm5
+        pxor        mm6,mm6     
+
+        movd        mm7,[ebx + 0]               //p[0]   
+        psraw       mm0,3                       //values to be clipped
+
+        movd        mm3,[ebx + ecx]             //get row above -- p[ms]
+        punpcklbw   mm7,mm6
+
+        psubw       mm7,mm2                     //p[ms] + y
+        punpcklbw   mm3,mm6
+
+        paddw       mm3,mm2                     //p[0] - y
+        packuswb    mm7,mm7                     //clamp[ p[ms] + y]
+
+        packuswb    mm3,mm3                     //clamp[ p[0] - y]
+        movq        mm1,mm0
+
+        movd        [ebx + 0],mm7               //write p[0]
+        psraw       mm0,15
+
+        movq        mm7,mm0                     //save sign
+        pxor        mm1,mm0
+
+;
+;
+        movq        mm2,[eax + LIMIT_OFFSET]                 //get the limit value
+;
+;
+
+        psubsw      mm1,mm0                     //abs(i)
+        movq        mm5,mm2
+
+        por         mm7,[eax + FOURONES_OFFSET]                //now have -1 or 1 
+        psubw       mm5,mm1                     //limit - abs(i)
+
+        movq        mm4,mm5
+        psraw       mm5,15
+
+        movd        [ebx + ecx],mm3             //write p[ms]
+        pxor        mm4,mm5
+
+        psubsw      mm4,mm5                     //abs(limit - abs(i))
+        pxor        mm6,mm6
+
+        movd        mm5,[ebx + 4]               //p[0]  
+        psubusw     mm2,mm4                     //limit - abs(limit - abs(i))
+
+        movd        mm3,[ebx + ecx + 4]         //p[ms]
+        pmullw      mm2,mm7                     //new y
+
+        punpcklbw   mm5,mm6
+    ;-
+
+        punpcklbw   mm3,mm6
+    ;-
+
+        paddw       mm3,mm2                     //p[ms] + y
+        psubw       mm5,mm2                     //p[0] - y
+
+        packuswb    mm3,mm3                     //clamp[ p[ms] + y]
+    ;-
+
+        packuswb    mm5,mm5                     //clamp[ p[0] - y]
+    ;-
+
+        movd        [ebx + ecx + 4],mm3         //write p[ms]
+;
+
+        movd        [ebx + 4],mm5               //write p[0]
+    }
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/newlooptest_asm.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/newlooptest_asm.c
new file mode 100644
index 00000000..f33df64a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/newlooptest_asm.c
@@ -0,0 +1,1123 @@
+/****************************************************************************
+ *
+ *   Module Title :     newlooptest_asm.c 
+ *
+ *   Description  :     Codec specific functions
+ *
+ ***************************************************************************/ 
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include <math.h>
+#include "postp.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/        
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
+
+/****************************************************************************
+*  Imports
+****************************************************************************/ 
+extern UINT32 *LoopFilterLimitValuesV2;             
+
+/****************************************************************************
+*  Exports
+****************************************************************************/ 
+INT16 LoopFilterLimitValuesV2_MMX[64*4];             
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FillLoopFilterLimitValues_MMX
+ *
+ *  INPUTS        : None.
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Sets-up array of limit values for use in loop-filter.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/                       
+void FillLoopFilterLimitValues_MMX ( void )
+{
+	int i;
+
+    for ( i=0; i<64; i++ )
+	{
+		LoopFilterLimitValuesV2_MMX[i*4+0] = LoopFilterLimitValuesV2[i];
+		LoopFilterLimitValuesV2_MMX[i*4+1] = LoopFilterLimitValuesV2[i];
+		LoopFilterLimitValuesV2_MMX[i*4+2] = LoopFilterLimitValuesV2[i];
+		LoopFilterLimitValuesV2_MMX[i*4+3] = LoopFilterLimitValuesV2[i];
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilteringHoriz
+ *
+ *  INPUTS        : UINT32 QIndex : Quantization index.
+ *                  UINT8 *Src    : Pointer to source block.
+ *                  INT32 Pitch   : Pitch of input image.
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filters the vertical block edge inside a prediction
+ *                  block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/                       
+void FilteringHoriz_8_MMX ( UINT32 QIndex, UINT8 *Src, INT32 Pitch ) 
+{    
+    INT16 *FLimitPtr = &LoopFilterLimitValuesV2_MMX[QIndex*4];
+    
+    __declspec(align(16)) const short fourFours[] = {4, 4, 4, 4};
+    __declspec(align(16)) const short fourOnes[] = { 1, 1, 1, 1};
+    __declspec(align(16)) unsigned char Temp[32];
+
+    __asm 
+    {
+        mov         eax,    FLimitPtr
+        mov         edx,    Pitch
+        
+        mov         esi,    Src
+        lea         edi,    Temp
+
+        mov         ecx,    edx                     //stride
+        movd        mm0,    [esi + -4]              //xx xx xx xx 01 00 xx xx
+
+        movd        mm4,    [esi]                   //xx xx xx xx xx xx 03 02
+        psrld       mm0,    16                      //xx xx xx xx 00 00 01 00
+
+        movd        mm1,    [esi + ecx + -4]        //xx xx xx xx 11 10 xx xx
+        punpcklwd   mm0,    mm4                     //xx xx xx xx 03 02 01 00
+
+        movd        mm4,    [esi + ecx]             //xx xx xx xx xx xx 13 12
+        psrld       mm1,    16                      //xx xx xx xx 00 00 11 10
+
+        punpcklwd   mm1,    mm4                     //xx xx xx xx 13 12 11 10
+        lea         edx,    [edx + edx*2]           //stride * 3
+
+        movd        mm2,    [esi + ecx*2 + -4]      //xx xx xx xx 21 20 xx xx
+        punpcklbw   mm0,    mm1                     //13 03 12 02 11 01 10 00
+
+        movd        mm4,    [esi + ecx*2]           //xx xx xx xx xx xx 23 22
+        psrld       mm2,    16                      //xx xx xx xx 00 00 21 20
+
+        movd        mm1,    [esi + edx + -4]        //xx xx xx xx 31 30 xx xx
+        punpcklwd   mm2,    mm4                     //xx xx xx xx 23 22 21 20
+
+        movd        mm4,    [esi + edx]             //xx xx xx xx xx xx 33 32
+        psrld       mm1,    16                      //xx xx xx xx 00 00 31 30
+
+        punpcklwd   mm1,    mm4                     //xx xx xx xx 33 32 31 30
+        pxor        mm4,    mm4                     // clear mm4
+
+        punpcklbw   mm2,    mm1                     //33 23 32 22 31 21 30 20
+        movq        mm1,    mm0                     //13 03 12 03 11 01 10 00
+
+        punpcklwd   mm0,    mm2                     //31 21 11 01 30 20 10 00
+        lea         esi,    [esi + ecx*4]           //base + (stride * 4)
+
+        punpckhwd   mm1,    mm2                     //33 23 13 03 32 22 12 02
+        movq        mm6,    mm0                     //xx xx xx xx 30 20 10 00
+
+        movq        [edi],  mm0                     // save to memory
+        movq        mm2,    mm1                     // make a copy 
+
+        movq        [edi+8],  mm1                   // save to memory
+        psrlq       mm0,    32                      //xx xx xx xx 31 21 11 01
+
+        movd        mm7,    [esi + -4]              //xx xx xx xx 41 40 xx xx
+        punpcklbw   mm1,    mm4                     //xx 32 xx 22 xx 12 xx 02
+
+        movd        mm4,    [esi]                   //xx xx xx xx xx xx 43 42
+        psrld       mm7,    16                      //xx xx xx xx 00 00 41 40
+
+        movd        mm5,    [esi + ecx + -4]        //xx xx xx xx 51 50 xx xx
+        punpcklwd   mm7,    mm4                     //xx xx xx xx 43 42 41 40
+
+        movd        mm4,    [esi + ecx]             //xx xx xx xx xx xx 53 52
+        psrld       mm5,    16                      //xx xx xx xx xx xx 51 50
+
+        punpcklwd   mm5,    mm4                     //xx xx xx xx 53 52 51 50
+        pxor        mm4,    mm4                     // clear mm4
+
+        punpcklbw   mm0,    mm4                     //xx 31 xx 21 xx 11 xx 01
+
+        psrlq       mm2,    32                      //xx xx xx xx 33 23 13 03
+        psubw       mm1,    mm0                     //x = p[0] - p[ms]
+
+        punpcklbw   mm7,    mm5                     //53 43 52 42 51 41 50 40
+        movq        mm3,    mm1                     // make a copy of x
+
+        punpcklbw   mm6,    mm4                     //xx 30 xx 20 xx 10 xx 00    
+        paddw       mm3,    mm1                     //x = 2*(p[0] - p[ms])
+
+        punpcklbw   mm2,    mm4                     //xx 33 xx 23 xx 13 xx 03
+        paddw       mm1,    mm3                     //mm1 = 3*(p[0] - p[-1])
+
+        paddw       mm1,    fourFours               //mm1 += LoopFilterAdjustBeforeShift
+        psubw       mm6,    mm2                     //mm6 = (p[-2]-p[1])
+
+        movd        mm2,    [esi + ecx*2 + -4]      //xx xx xx xx 61 60 xx xx
+        paddw       mm6,    mm1                     //mm6 = 3*(p[0] - p[-1]) +(p[-2]-p[1]) + 4
+
+        movd        mm4,    [esi + ecx*2]           //xx xx xx xx xx xx 63 62
+        psrld       mm2,    16                      //xx xx xx xx xx xx 61 60
+
+        movd        mm5,    [esi + edx + -4]        //xx xx xx xx 71 70 xx xx
+        punpcklwd   mm2,    mm4                     //xx xx xx xx 63 62 61 60
+
+        movd        mm4,    [esi + edx]             //xx xx xx xx xx xx 73 72
+        psrld       mm5,    16                      //xx xx xx xx 00 00 71 70
+
+        mov         esi,    Src                     //restore PixelPtr
+        punpcklwd   mm5,    mm4                     //xx xx xx xx 73 72 71 70
+
+        psraw       mm6,    3                       //values to be clipped
+        pxor        mm4,    mm4                     // clear mm4
+
+        punpcklbw   mm2,    mm5                     //73 63 72 62 71 61 70 60
+        movq        mm5,    mm7                     //53 43 52 42 51 41 50 40
+
+        movq        mm1,    mm6                     // make a copy of results
+        punpckhwd   mm5,    mm2                     //73 63 53 43 72 62 52 42
+
+
+        movq        [edi+24],  mm5                  //save for later
+        punpcklwd   mm7,    mm2                     //71 61 51 41 70 60 50 40
+
+        movq        [edi+16],  mm7                  //save for later
+        psraw       mm6,    15                      // FFFF or 0000 
+
+        movq        mm2,    [eax]                   //get the limit value
+        movq        mm0,    mm7                     //xx xx xx xx 70 60 50 41
+
+        psrlq       mm7,    32                      //xx xx xx xx 71 61 51 41
+        pxor        mm1,    mm6
+
+        psubsw      mm1,    mm6                     //abs(i)
+        punpcklbw   mm5,    mm4
+
+        por         mm6,    fourOnes                //now have -1 or 1 
+        movq        mm3,    mm2
+
+        punpcklbw   mm7,    mm4
+        psubw       mm3,    mm1                     //limit - abs(i)
+
+        movq        mm4,    mm3
+        psraw       mm3,    15
+
+        psubw       mm5,    mm7                     //x = p[0] - p[ms]
+        pxor        mm4,    mm3
+
+        psubsw      mm4,    mm3                     //abs(limit - abs(i))
+        pxor        mm3,    mm3
+
+        movd        mm1,    [edi + 28]              //xx xx xx xx 73 63 53 43
+        psubusw     mm2,    mm4                     //limit - abs(limit - abs(i))
+
+        punpcklbw   mm0,    mm3
+        movq        mm7,    mm5
+
+        paddw       mm7,    mm5
+        pmullw      mm2,    mm6                     //new y -- wait 3 cycles
+
+        punpcklbw   mm1,    mm3
+        paddw       mm5,    mm7
+
+        paddw       mm5,    fourFours               //x += LoopFilterAdjustBeforeShift
+        psubw       mm0,    mm1
+
+        paddw       mm0,    mm5
+        pxor        mm6,    mm6     
+
+        movd        mm7,    [edi + 8]               //xx xx xx xx 32 22 12 02
+        psraw       mm0,    3                       //values to be clipped
+
+        movd        mm3,    [edi + 4]               //xx xx xx xx 31 21 11 01
+        punpcklbw   mm7,    mm6
+
+        psubw       mm7,    mm2                     //p[ms] + y
+        punpcklbw   mm3,    mm6
+
+        paddw       mm3,    mm2                     //p[0] - y
+        packuswb    mm7,    mm7                     //clamp[ p[ms] + y]
+
+        packuswb    mm3,    mm3                     //clamp[ p[0] - y]
+        movq        mm1,    mm0
+
+        movq        mm2,    [eax]                   //get the limit value
+        psraw       mm0,    15
+
+        punpcklbw   mm3,    mm7                     //32 31 22 21 12 11 02 01                    
+        movq        mm7,    mm0                     //save sign
+
+        movd        eax,    mm3                     //12 11 02 01
+        pxor        mm1,    mm0
+
+
+        mov         WORD PTR [esi - 1],ax           //02 01
+        psubsw      mm1,    mm0                     //abs(i)
+
+        shr         eax,    16
+        movq        mm5,    mm2
+
+        mov         WORD PTR [esi + ecx - 1],ax
+        psrlq       mm3,    32                      //xx xx xx xx 32 31 22 21
+
+        por         mm7,    fourOnes                //now have -1 or 1 
+        psubw       mm5,    mm1                     //limit - abs(i)
+
+        movd        eax,    mm3                     //32 31 22 21
+        movq        mm4,    mm5
+
+        mov         [esi + ecx*2 - 1],ax
+        psraw       mm5,    15
+
+        shr         eax,    16
+        pxor        mm4,    mm5
+
+        mov         [esi + edx - 1],ax
+        psubsw      mm4,    mm5                     //abs(limit - abs(i))
+
+        movd        mm5,    [edi + 24]              //xx xx xx xx 72 62 52 42
+        psubusw     mm2,    mm4                     //limit - abs(limit - abs(i))
+
+        pmullw      mm2,    mm7                     //new y
+        pxor        mm6,    mm6
+
+        movd        mm3,    [edi + 20]              //xx xx xx xx 71 61 51 41
+        punpcklbw   mm5,    mm6
+
+        lea         esi,    [esi + ecx*4]
+        punpcklbw   mm3,    mm6
+
+        paddw       mm3,    mm2                     //p[ms] + y
+        psubw       mm5,    mm2                     //p[0] - y
+
+        packuswb    mm3,    mm3                     //clamp[ p[ms] + y]
+
+
+        packuswb    mm5,    mm5                     //clamp[ p[0] - y]
+        punpcklbw   mm3,    mm5                     //72 71 62 61 52 51 42 41
+
+        movd        eax,    mm3                     //52 51 42 41
+        psrlq       mm3,    32                      //xx xx xx xx 72 71 62 61
+
+        mov         [esi - 1],ax
+        shr         eax,    16
+
+        mov         [esi + ecx - 1],ax
+        movd        eax,    mm3
+
+        mov         [esi + ecx*2 - 1],ax
+        shr         eax,16
+
+        mov         [esi + edx - 1],ax
+    
+    } 
+/*
+    INT32 j;
+	INT32 FiltVal;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+    UINT32 FLimit;
+
+    FLimit = LoopFilterLimitValuesV2[QValue];
+
+	for ( j=0; j<Length; j++ )
+	{            
+        // set up blur kernel for differences
+		FiltVal =  (( Src[-2]     ) - 
+			        ( Src[-1] * 3 ) +
+			        ( Src[ 0] * 3 ) - 
+			        ( Src[ 1]     ) + 4 ) >> 3;
+
+        FiltVal = Bound ( FLimit, FiltVal );
+
+		Dest[-1] = LimitTable[(INT32)Src[-1] + FiltVal];
+		Dest[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
+		
+        Src  += SrcPitch;
+        Dest += DestPitch;
+	}
+*/
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilteringVert_8_MMX
+ *
+ *  INPUTS        : UINT32 QIndex   : Quantization index.
+ *                  UINT8 *PixelPtr : Pointer to source block.
+ *                  INT32 Pitch     : Pitch of input image.
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filters the horizontal block edge inside a prediction
+ *                  block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/                       
+void FilteringVert_8_MMX ( UINT32 QIndex, UINT8 *PixelPtr, INT32 Pitch ) 
+{    
+    INT16 *FLimitPtr = &LoopFilterLimitValuesV2_MMX[QIndex*4];
+    
+    __declspec(align(16)) const short fourFours[] = { 4, 4, 4, 4 };
+    __declspec(align(16)) const short fourOnes[]  = { 1, 1, 1, 1 };
+
+    __asm
+    {  
+        mov         eax,    FLimitPtr               // 4 FLimit Values in shorts
+        mov         edx,    Pitch                   // Pitch
+   
+        xor         ecx,    ecx                     // clear ecx to get negative Pitch
+        sub         ecx,    edx                     // Negative Pitch
+
+        mov         esi,    PixelPtr                // Src and Dest pointer
+        movd        mm0,    [esi]                   // p[0], four pixels
+
+        pxor        mm7,    mm7                     // clear mm7
+        movd        mm1,    [esi+ecx]               // p[-1], four pixels
+
+        punpcklbw   mm0,    mm7                     // unpack to short
+        movd        mm2,    [esi+edx]               // p[1], four pixels
+
+        punpcklbw   mm1,    mm7                     // unpack p[-1] to shorts
+        movd        mm3,    [esi+ecx*2]             // p[-2], four pixels
+
+        movq        mm5,    mm0                     // copy of unpacked p[0]
+        movq        mm6,    mm1                     // copy of unpacked p[-1]
+
+        psubw       mm0,    mm1                     // p[0] - p[-1]
+        punpcklbw   mm2,    mm7                     // unpack p[1]
+
+        movq        mm1,    mm0                     // make a copy of p[0]-p[-1]
+        punpcklbw   mm3,    mm7                     // unpack p[-2]
+
+        paddw       mm0,    mm1                     // (p[0]-p[-1]) * 2
+        psubw       mm3,    mm2                     // (p[-2]-p[1])
+
+        paddw       mm1,    mm0                     // (p[0]-p[-1]) * 3
+        paddw       mm3,    mm1                     // p[-2]-3*p[-1]+3*p[0]-p[1]
+
+        paddw       mm3,    fourFours               // p[-2]-3*p[-1]+3*p[0]-p[1]+4
+        movq        mm0,    [eax]                   // FLimit
+
+        psraw       mm3,    3                       // FiltVal
+        movq        mm1,    mm3                     // FiltVal
+
+        psraw       mm3,    15                      // FFFF-> Neg, 0->Pos
+        pxor        mm1,    mm3                     //  
+
+        psubsw      mm1,    mm3                     // abs(FiltVal)
+        por         mm3,    fourOnes                // -1 or 1, corresponding the sign
+
+        movq        mm2,    mm0                     // Copy of FLimit
+        psubw       mm0,    mm1                     // FLimit - abs(FiltVal)
+
+        movq        mm4,    mm0                     // copy FLimit-abs(FiltVal)
+        psraw       mm0,    15                      // FFFF->Neg, 0->Pos
+
+        pxor        mm4,    mm0                     //
+        psubsw      mm4,    mm0                     // abs(FLimit-abs(FiltVal))
+
+        psubusw     mm2,    mm4                     // FLimit-abs(FLimit-abs(FiltVal))
+        pmullw      mm2,    mm3                     // Get the sign back
+
+        psubw       mm5,    mm2                     // p[0] - FiltVal
+        paddw       mm6,    mm2                     // p[-1] + FiltVal
+
+        packuswb    mm5,    mm5                     // clamping
+        packuswb    mm6,    mm6                     // clamping
+        
+        movd        [esi],  mm5                     // write p[0]
+        movd        [esi+ecx], mm6                  // write p[-1]
+
+        movd        mm0,    [esi+4]                   // p[0], four pixels
+        movd        mm1,    [esi+ecx+4]               // p[-1], four pixels
+
+        punpcklbw   mm0,    mm7                     // unpack to short
+        movd        mm2,    [esi+edx+4]               // p[1], four pixels
+
+        punpcklbw   mm1,    mm7                     // unpack p[-1] to shorts
+        movd        mm3,    [esi+ecx*2+4]             // p[-2], four pixels
+
+        movq        mm5,    mm0                     // copy of unpacked p[0]
+        movq        mm6,    mm1                     // copy of unpacked p[-1]
+
+        psubw       mm0,    mm1                     // p[0] - p[-1]
+        punpcklbw   mm2,    mm7                     // unpack p[1]
+
+        movq        mm1,    mm0                     // make a copy of p[0]-p[-1]
+        punpcklbw   mm3,    mm7                     // unpack p[-2]
+
+        paddw       mm0,    mm1                     // (p[0]-p[-1]) * 2
+        psubw       mm3,    mm2                     // (p[-2]-p[1])
+
+        paddw       mm1,    mm0                     // (p[0]-p[-1]) * 3
+        paddw       mm3,    mm1                     // p[-2]-3*p[-1]+3*p[0]-p[1]
+
+        paddw       mm3,    fourFours               // p[-2]-3*p[-1]+3*p[0]-p[1]+4
+        movq        mm0,    [eax]                   // FLimit
+
+        psraw       mm3,    3                       // FiltVal
+        movq        mm1,    mm3                     // FiltVal
+
+        psraw       mm3,    15                      // FFFF-> Neg, 0->Pos
+        pxor        mm1,    mm3                     //  
+
+        psubsw      mm1,    mm3                     // abs(FiltVal)
+        por         mm3,    fourOnes                // -1 or 1, corresponding the sign
+
+        movq        mm2,    mm0                     // Copy of FLimit
+        psubw       mm0,    mm1                     // FLimit - abs(FiltVal)
+
+        movq        mm4,    mm0                     // copy FLimit-abs(FiltVal)
+        psraw       mm0,    15                      // FFFF->Neg, 0->Pos
+
+        pxor        mm4,    mm0                     //
+        psubsw      mm4,    mm0                     // abs(FLimit-abs(FiltVal))
+
+        psubusw     mm2,    mm4                     // FLimit-abs(FLimit-abs(FiltVal))
+        pmullw      mm2,    mm3                     // Get the sign back
+
+        psubw       mm5,    mm2                     // p[0] - FiltVal
+        paddw       mm6,    mm2                     // p[-1] + FiltVal
+
+        packuswb    mm5,    mm5                     // clamping
+        packuswb    mm6,    mm6                     // clamping
+        
+        movd        [esi+4],  mm5                   // write p[0]
+        movd        [esi+ecx+4], mm6                // write p[-1]
+
+    }
+        
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilteringHoriz
+ *
+ *  INPUTS        : UINT32 QIndex : Quantization index.
+ *                  UINT8 *Src    : Pointer to source block.
+ *                  INT32 Pitch   : Pitch of input image.
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filters the vertical block edge inside a prediction
+ *                  block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ *
+ ****************************************************************************/                       
+void FilteringHoriz_12_MMX ( UINT32 QIndex, UINT8 *Src, INT32 Pitch ) 
+{    
+    INT16 *FLimitPtr = &LoopFilterLimitValuesV2_MMX[QIndex*4];
+
+    __declspec(align(16)) const short fourFours[] = {4, 4, 4, 4};
+    __declspec(align(16)) const short fourOnes[] = { 1, 1, 1, 1};
+    __declspec(align(16)) unsigned char Temp[32];
+
+    __asm 
+    {
+        mov         eax,    FLimitPtr
+        mov         edx,    Pitch
+        
+        mov         esi,    Src
+        lea         edi,    Temp
+
+        mov         ecx,    edx                     //stride
+        movd        mm0,    [esi + -4]              //xx xx xx xx 01 00 xx xx
+
+        movd        mm4,    [esi]                   //xx xx xx xx xx xx 03 02
+        psrld       mm0,    16                      //xx xx xx xx 00 00 01 00
+
+        movd        mm1,    [esi + ecx + -4]        //xx xx xx xx 11 10 xx xx
+        punpcklwd   mm0,    mm4                     //xx xx xx xx 03 02 01 00
+
+        movd        mm4,    [esi + ecx]             //xx xx xx xx xx xx 13 12
+        psrld       mm1,    16                      //xx xx xx xx 00 00 11 10
+
+        punpcklwd   mm1,    mm4                     //xx xx xx xx 13 12 11 10
+        lea         edx,    [edx + edx*2]           //stride * 3
+
+        movd        mm2,    [esi + ecx*2 + -4]      //xx xx xx xx 21 20 xx xx
+        punpcklbw   mm0,    mm1                     //13 03 12 02 11 01 10 00
+
+        movd        mm4,    [esi + ecx*2]           //xx xx xx xx xx xx 23 22
+        psrld       mm2,    16                      //xx xx xx xx 00 00 21 20
+
+        movd        mm1,    [esi + edx + -4]        //xx xx xx xx 31 30 xx xx
+        punpcklwd   mm2,    mm4                     //xx xx xx xx 23 22 21 20
+
+        movd        mm4,    [esi + edx]             //xx xx xx xx xx xx 33 32
+        psrld       mm1,    16                      //xx xx xx xx 00 00 31 30
+
+        punpcklwd   mm1,    mm4                     //xx xx xx xx 33 32 31 30
+        pxor        mm4,    mm4
+
+        punpcklbw   mm2,    mm1                     //33 23 32 22 31 21 30 20
+        movq        mm1,    mm0
+
+        punpcklwd   mm0,    mm2                     //31 21 11 01 30 20 10 00
+        lea         esi,    [esi + ecx*4]           //base + (stride * 4)
+
+        punpckhwd   mm1,    mm2                     //33 23 13 03 32 22 12 02
+        movq        mm6,    mm0                     //xx xx xx xx 30 20 10 00
+
+        movq        [edi],  mm0
+        movq        mm2,    mm1
+
+        movq        [edi+8],  mm1
+        psrlq       mm0,    32                      //xx xx xx xx 31 21 11 01
+
+        movd        mm7,    [esi + -4]              //xx xx xx xx 41 40 xx xx
+        punpcklbw   mm1,    mm4                     //convert to words
+
+        movd        mm4,    [esi]                   //xx xx xx xx xx xx 43 42
+        psrld       mm7,    16                      //xx xx xx xx 00 00 41 40
+
+        movd        mm5,    [esi + ecx + -4]        //xx xx xx xx 51 50 xx xx
+        punpcklwd   mm7,    mm4                     //xx xx xx xx 43 42 41 40
+
+        movd        mm4,    [esi + ecx]             //xx xx xx xx xx xx 53 52
+        psrld       mm5,    16
+
+        punpcklwd   mm5,    mm4
+        pxor        mm4,    mm4
+
+        punpcklbw   mm0,    mm4
+
+        psrlq       mm2,    32                      //xx xx xx xx 33 23 13 03
+        psubw       mm1,    mm0                     //x = p[0] - p[ms]
+
+        punpcklbw   mm7,    mm5                     //53 43 52 42 51 41 50 40
+        movq        mm3,    mm1
+
+        punpcklbw   mm6,    mm4
+        paddw       mm3,    mm1
+
+        punpcklbw   mm2,    mm4
+        paddw       mm1,    mm3
+
+        paddw       mm1,    fourFours               //x += LoopFilterAdjustBeforeShift
+        psubw       mm6,    mm2
+
+        movd        mm2,    [esi + ecx*2 + -4]      //xx xx xx xx 61 60 xx xx
+        paddw       mm6,    mm1
+
+        movd        mm4,    [esi + ecx*2]           //xx xx xx xx xx xx 63 62
+        psrld       mm2,    16
+
+        movd        mm5,    [esi + edx + -4]        //xx xx xx xx 71 70 xx xx
+        punpcklwd   mm2,    mm4                     //xx xx xx xx 63 62 61 60
+
+        movd        mm4,    [esi + edx]             //xx xx xx xx xx xx 73 72
+        psrld       mm5,    16                      //xx xx xx xx 00 00 71 70
+
+        mov         esi,    Src                     //restore PixelPtr
+        punpcklwd   mm5,    mm4                     //xx xx xx xx 73 72 71 70
+
+        psraw       mm6,    3                       //values to be clipped
+        pxor        mm4,    mm4
+
+        punpcklbw   mm2,    mm5                     //73 63 72 62 71 61 70 60
+        movq        mm5,    mm7                     //53 43 52 42 51 41 50 40
+
+        movq        mm1,    mm6
+        punpckhwd   mm5,    mm2                     //73 63 53 43 72 62 52 42
+
+
+        movq        [edi+24],  mm5                  //save for later
+        punpcklwd   mm7,    mm2                     //71 61 51 41 70 60 50 40
+
+        movq        [edi+16],  mm7                  //save for later
+        psraw       mm6,    15
+
+        movq        mm2,    [eax]                   //get the limit value
+        movq        mm0,    mm7                     //xx xx xx xx 70 60 50 41
+
+        psrlq       mm7,    32                      //xx xx xx xx 71 61 51 41
+        pxor        mm1,    mm6
+
+        psubsw      mm1,    mm6                     //abs(i)
+        punpcklbw   mm5,    mm4
+
+        por         mm6,    fourOnes                //now have -1 or 1 
+        movq        mm3,    mm2
+
+        punpcklbw   mm7,    mm4
+        psubw       mm3,    mm1                     //limit - abs(i)
+
+        movq        mm4,    mm3
+        psraw       mm3,    15
+
+        psubw       mm5,    mm7                     //x = p[0] - p[ms]
+        pxor        mm4,    mm3
+
+        psubsw      mm4,    mm3                     //abs(limit - abs(i))
+        pxor        mm3,    mm3
+
+        movd        mm1,    [edi + 28]              //xx xx xx xx 73 63 53 43
+        psubusw     mm2,    mm4                     //limit - abs(limit - abs(i))
+
+        punpcklbw   mm0,    mm3
+        movq        mm7,    mm5
+
+        paddw       mm7,    mm5
+        pmullw      mm2,    mm6                     //new y -- wait 3 cycles
+
+        punpcklbw   mm1,    mm3
+        paddw       mm5,    mm7
+
+        paddw       mm5,    fourFours               //x += LoopFilterAdjustBeforeShift
+        psubw       mm0,    mm1
+
+        paddw       mm0,    mm5
+        pxor        mm6,    mm6     
+
+        movd        mm7,    [edi + 8]               //xx xx xx xx 32 22 12 02
+        psraw       mm0,    3                       //values to be clipped
+
+        movd        mm3,    [edi + 4]               //xx xx xx xx 31 21 11 01
+        punpcklbw   mm7,    mm6
+
+        psubw       mm7,    mm2                     //p[ms] + y
+        punpcklbw   mm3,    mm6
+
+        paddw       mm3,    mm2                     //p[0] - y
+        packuswb    mm7,    mm7                     //clamp[ p[ms] + y]
+
+        packuswb    mm3,    mm3                     //clamp[ p[0] - y]
+        movq        mm1,    mm0
+
+        movq        mm2,    [eax]                   //get the limit value
+        psraw       mm0,    15
+
+        punpcklbw   mm3,    mm7                     //32 31 22 21 12 11 02 01                    
+        movq        mm7,    mm0                     //save sign
+
+        movd        eax,    mm3                     //12 11 02 01
+        pxor        mm1,    mm0
+
+        mov         [esi - 1],ax                    //02 01
+        psubsw      mm1,    mm0                     //abs(i)
+
+        shr         eax,    16
+        movq        mm5,    mm2
+
+        mov         [esi + ecx - 1],ax
+        psrlq       mm3,    32                      //xx xx xx xx 32 31 22 21
+
+        por         mm7,    fourOnes                //now have -1 or 1 
+        psubw       mm5,    mm1                     //limit - abs(i)
+
+        movd        eax,    mm3                     //32 31 22 21
+        movq        mm4,    mm5
+
+        mov         [esi + ecx*2 - 1],ax
+        psraw       mm5,    15
+
+        shr         eax,    16
+        pxor        mm4,    mm5
+
+        mov         [esi + edx - 1],ax
+        psubsw      mm4,    mm5                     //abs(limit - abs(i))
+
+        movd        mm5,    [edi + 24]              //xx xx xx xx 72 62 52 42
+        psubusw     mm2,    mm4                     //limit - abs(limit - abs(i))
+
+        pmullw      mm2,    mm7                     //new y
+        pxor        mm6,    mm6
+
+        movd        mm3,    [edi + 20]              //xx xx xx xx 71 61 51 41
+        punpcklbw   mm5,    mm6
+
+        lea         esi,    [esi + ecx*4]
+        punpcklbw   mm3,    mm6
+
+        paddw       mm3,    mm2                     //p[ms] + y
+        psubw       mm5,    mm2                     //p[0] - y
+
+        packuswb    mm3,    mm3                     //clamp[ p[ms] + y]
+        packuswb    mm5,    mm5                     //clamp[ p[0] - y]
+
+        punpcklbw   mm3,    mm5                     //72 71 62 61 52 51 42 41
+        movd        eax,    mm3                     //52 51 42 41
+
+        psrlq       mm3,    32                      //xx xx xx xx 72 71 62 61
+        mov         [esi - 1],ax
+
+        shr         eax,    16
+        mov         [esi + ecx - 1],ax
+
+        movd        eax,    mm3
+        mov         [esi + ecx*2 - 1],ax
+
+        shr         eax,16
+        mov         [esi + edx - 1],ax
+
+        mov         eax,    FLimitPtr               //
+        lea         esi,    [esi+ ecx * 4]          // four line below
+
+        movd        mm0,    [esi + -4]              //xx xx xx xx 01 00 xx xx
+        movd        mm4,    [esi]                   //xx xx xx xx xx xx 03 02
+
+        psrld       mm0,    16                      //xx xx xx xx 00 00 01 00
+        movd        mm1,    [esi + ecx + -4]        //xx xx xx xx 11 10 xx xx
+
+        punpcklwd   mm0,    mm4                     //xx xx xx xx 03 02 01 00
+        movd        mm4,    [esi + ecx]             //xx xx xx xx xx xx 13 12
+
+        psrld       mm1,    16                      //xx xx xx xx 00 00 11 10
+        punpcklwd   mm1,    mm4                     //xx xx xx xx 13 12 11 10
+
+        movd        mm2,    [esi + ecx*2 + -4]      //xx xx xx xx 21 20 xx xx
+        punpcklbw   mm0,    mm1                     //13 03 12 02 11 01 10 00
+
+        movd        mm4,    [esi + ecx*2]           //xx xx xx xx xx xx 23 22
+        psrld       mm2,    16                      //xx xx xx xx 00 00 21 20
+
+        movd        mm1,    [esi + edx + -4]        //xx xx xx xx 31 30 xx xx
+        punpcklwd   mm2,    mm4                     //xx xx xx xx 23 22 21 20
+
+        movd        mm4,    [esi + edx]             //xx xx xx xx xx xx 33 32
+        psrld       mm1,    16                      //xx xx xx xx 00 00 31 30
+
+        punpcklwd   mm1,    mm4                     //xx xx xx xx 33 32 31 30
+        pxor        mm4,    mm4                     //clear mm4 for unpacking
+
+        punpcklbw   mm2,    mm1                     //33 23 32 22 31 21 30 20
+        movq        mm1,    mm0                     //13 03 12 02 11 01 10 00
+
+        punpcklwd   mm0,    mm2                     //31 21 11 01 30 20 10 00
+        punpckhwd   mm1,    mm2                     //33 23 13 03 32 22 12 02
+
+        movq        mm6,    mm0                     //xx xx xx xx 30 20 10 00
+        movq        [edi],  mm0
+        
+        movq        mm2,    mm1
+        movq        [edi+8],  mm1
+
+        psrlq       mm0,    32                      //xx xx xx xx 31 21 11 01
+        punpcklbw   mm1,    mm4                     //-- 32 -- 22 -- 12 -- 02
+
+        punpcklbw   mm0,    mm4                     //-- 31 -- 21 -- 11 -- 01
+        psrlq       mm2,    32                      //xx xx xx xx 33 23 13 03
+
+        psubw       mm1,    mm0                     // mm1 = p[0] - p[ms]
+        movq        mm3,    mm1                     // mm3 = p[0] - p[ms]
+
+        punpcklbw   mm6,    mm4                     //-- 30 -- 20 -- 10 -- 00
+        paddw       mm3,    mm1                     // mm3 = (p[0] - p[ms])*2
+
+        punpcklbw   mm2,    mm4                     //-- 33 -- 23 -- 13 -- 03
+        paddw       mm1,    mm3                     // mm1 = (p[0] - p[ms])*3
+
+        paddw       mm1,    fourFours               // mm1 = (p[0] - p[ms])*3 + 4
+        psubw       mm6,    mm2                     // mm6 = (p[ms2]-p[1])
+        
+        paddw       mm6,    mm1                     // mm6 = (p[0] - p[ms])*3 + 4 + (p[ms2]-p[1])
+        psraw       mm6,    3                       // mm6 = mm6 / 8
+
+        movq        mm1,    mm6                     // make a copy of initial FiltVal
+        psraw       mm6,    15                      // FFFF for negative, 0000 for positive
+
+        pxor        mm1,    mm6                     // 
+        psubsw      mm1,    mm6                     // abs(FiltVal)
+
+        por         mm6,    fourOnes                // -1 or 1 for negative or positive
+        movq        mm2,    [eax]                   // mm2 = FLimit
+        
+        movq        mm3,    mm2                     // mm3 = FLimit
+        psubw       mm3,    mm1                     // mm3 = FLimit - abs(FiltVal)
+
+        movq        mm4,    mm3                     // Make a copy of FLimit - abs(FiltVal)
+        psraw       mm3,    15                      // FFFF and 0000 for - and +
+
+        pxor        mm4,    mm3                     // 
+        psubsw      mm4,    mm3                     // abs(Limit-abs(FiltVal))
+    
+        psubusw     mm2,    mm4                     // Limit - abs(Limit-abs(FiltVal)
+        pmullw      mm2,    mm6                     // get the sign back
+
+        pxor        mm5,    mm5                     // clear mm5 for unpacking
+        movd        mm7,    [edi+8]                 // xx xx xx xx 32 22 12 02
+
+        punpcklbw   mm7,    mm5                     // -- 32 -- 22 -- 12 -- 02
+        movd        mm3,    [edi+4]                 // xx xx xx xx 31 21 11 01        
+
+        psubw       mm7,    mm2                     // p[ms] - FiltVal
+        punpcklbw   mm3,    mm5                     // -- 31 -- 21 -- 11 -- 01
+        
+        paddw       mm3,    mm2                     // p[0] + FiltVal
+        packuswb    mm7,    mm7                     // clamping
+
+        packuswb    mm3,    mm3                     // clamping
+        punpcklbw   mm3,    mm7                     // 32 31 22 21 12 11 02 01
+
+        movd        eax,    mm3                     // 12 11 02 01
+        psrlq       mm3,    32                      // xx xx xx xx 32 31 22 21
+
+        mov         [esi-1], ax                     // write 01 02
+        shr         eax,    16                      // xx xx 12 11
+        
+        mov         [esi+ecx -1], ax                // write 11 12
+        movd        eax,    mm3                     // 32 31 22 21
+
+        mov         [esi+ecx*2 -1], ax              // write 21 22
+        shr         eax,    16                      // xx xx 32 31
+
+        mov         [esi+edx-1], ax                 // write 31 32
+
+    } 
+    
+/*
+    INT32 j;
+	INT32 FiltVal;
+    UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
+    UINT32 FLimit;
+    FLimit = LoopFilterLimitValuesV2[QValue];
+	for ( j=0; j<Length; j++ )
+	{            
+        // set up blur kernel for differences
+		FiltVal =  (( Src[-2]     ) - 
+			        ( Src[-1] * 3 ) +
+			        ( Src[ 0] * 3 ) - 
+			        ( Src[ 1]     ) + 4 ) >> 3;
+
+        FiltVal = Bound ( FLimit, FiltVal );
+
+		Dest[-1] = LimitTable[(INT32)Src[-1] + FiltVal];
+		Dest[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
+		
+        Src  += SrcPitch;
+        Dest += DestPitch;
+	}
+*/
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilteringVert_12_MMX
+ *
+ *  INPUTS        : UINT32 QIndex   : Quantization index.
+ *                  UINT8 *PixelPtr : Pointer to source block.
+ *                  INT32 Pitch     : Pitch of input image.
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Filters the horizontal block edge inside a prediction
+ *                  block.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/                       
+
+void FilteringVert_12_MMX ( UINT32 QIndex, UINT8 *PixelPtr, INT32 Pitch ) 
+{    
+    INT16 *FLimitPtr = &LoopFilterLimitValuesV2_MMX[QIndex*4];
+
+    __declspec(align(16)) const short fourFours[] = { 4, 4, 4, 4 };
+    __declspec(align(16)) const short fourOnes[]  = { 1, 1, 1, 1 };
+
+    __asm
+    {
+        mov         eax,    FLimitPtr               // 4 FLimit Values in shorts
+        mov         edx,    Pitch                   // Pitch
+   
+        xor         ecx,    ecx                     // clear ecx to get negative Pitch
+        sub         ecx,    edx                     // Negative Pitch
+
+        mov         esi,    PixelPtr                // Src and Dest pointer
+        movd        mm0,    [esi]                   // p[0], four pixels
+
+        pxor        mm7,    mm7                     // clear mm7
+        movd        mm1,    [esi+ecx]               // p[-1], four pixels
+
+        punpcklbw   mm0,    mm7                     // unpack to short
+        movd        mm2,    [esi+edx]               // p[1], four pixels
+
+        punpcklbw   mm1,    mm7                     // unpack p[-1] to shorts
+        movd        mm3,    [esi+ecx*2]             // p[-2], four pixels
+
+        movq        mm5,    mm0                     // copy of unpacked p[0]
+        movq        mm6,    mm1                     // copy of unpacked p[-1]
+
+        psubw       mm0,    mm1                     // p[0] - p[-1]
+        punpcklbw   mm2,    mm7                     // unpack p[1]
+
+        movq        mm1,    mm0                     // make a copy of p[0]-p[-1]
+        punpcklbw   mm3,    mm7                     // unpack p[-2]
+
+        paddw       mm0,    mm1                     // (p[0]-p[-1]) * 2
+        psubw       mm3,    mm2                     // (p[-2]-p[1])
+
+        paddw       mm1,    mm0                     // (p[0]-p[-1]) * 3
+        paddw       mm3,    mm1                     // p[-2]-3*p[-1]+3*p[0]-p[1]
+
+        paddw       mm3,    fourFours               // p[-2]-3*p[-1]+3*p[0]-p[1]+4
+        movq        mm0,    [eax]                   // FLimit
+
+        psraw       mm3,    3                       // FiltVal
+        movq        mm1,    mm3                     // FiltVal
+
+        psraw       mm3,    15                      // FFFF-> Neg, 0->Pos
+        pxor        mm1,    mm3                     //  
+
+        psubsw      mm1,    mm3                     // abs(FiltVal)
+        por         mm3,    fourOnes                // -1 or 1, corresponding the sign
+
+        movq        mm2,    mm0                     // Copy of FLimit
+        psubw       mm0,    mm1                     // FLimit - abs(FiltVal)
+
+        movq        mm4,    mm0                     // copy FLimit-abs(FiltVal)
+        psraw       mm0,    15                      // FFFF->Neg, 0->Pos
+
+        pxor        mm4,    mm0                     //
+        psubsw      mm4,    mm0                     // abs(FLimit-abs(FiltVal))
+
+        psubusw     mm2,    mm4                     // FLimit-abs(FLimit-abs(FiltVal))
+        pmullw      mm2,    mm3                     // Get the sign back
+
+        psubw       mm5,    mm2                     // p[0] - FiltVal
+        paddw       mm6,    mm2                     // p[-1] + FiltVal
+
+        packuswb    mm5,    mm5                     // clamping
+        packuswb    mm6,    mm6                     // clamping
+        
+        movd        [esi],  mm5                     // write p[0]
+        movd        [esi+ecx], mm6                  // write p[-1]
+
+        movd        mm0,    [esi+4]                   // p[0], four pixels
+        movd        mm1,    [esi+ecx+4]               // p[-1], four pixels
+
+        punpcklbw   mm0,    mm7                     // unpack to short
+        movd        mm2,    [esi+edx+4]               // p[1], four pixels
+
+        punpcklbw   mm1,    mm7                     // unpack p[-1] to shorts
+        movd        mm3,    [esi+ecx*2+4]             // p[-2], four pixels
+
+        movq        mm5,    mm0                     // copy of unpacked p[0]
+        movq        mm6,    mm1                     // copy of unpacked p[-1]
+
+        psubw       mm0,    mm1                     // p[0] - p[-1]
+        punpcklbw   mm2,    mm7                     // unpack p[1]
+
+        movq        mm1,    mm0                     // make a copy of p[0]-p[-1]
+        punpcklbw   mm3,    mm7                     // unpack p[-2]
+
+        paddw       mm0,    mm1                     // (p[0]-p[-1]) * 2
+        psubw       mm3,    mm2                     // (p[-2]-p[1])
+
+        paddw       mm1,    mm0                     // (p[0]-p[-1]) * 3
+        paddw       mm3,    mm1                     // p[-2]-3*p[-1]+3*p[0]-p[1]
+
+        paddw       mm3,    fourFours               // p[-2]-3*p[-1]+3*p[0]-p[1]+4
+        movq        mm0,    [eax]                   // FLimit
+
+        psraw       mm3,    3                       // FiltVal
+        movq        mm1,    mm3                     // FiltVal
+
+        psraw       mm3,    15                      // FFFF-> Neg, 0->Pos
+        pxor        mm1,    mm3                     //  
+
+        psubsw      mm1,    mm3                     // abs(FiltVal)
+        por         mm3,    fourOnes                // -1 or 1, corresponding the sign
+
+        movq        mm2,    mm0                     // Copy of FLimit
+        psubw       mm0,    mm1                     // FLimit - abs(FiltVal)
+
+        movq        mm4,    mm0                     // copy FLimit-abs(FiltVal)
+        psraw       mm0,    15                      // FFFF->Neg, 0->Pos
+
+        pxor        mm4,    mm0                     //
+        psubsw      mm4,    mm0                     // abs(FLimit-abs(FiltVal))
+
+        psubusw     mm2,    mm4                     // FLimit-abs(FLimit-abs(FiltVal))
+        pmullw      mm2,    mm3                     // Get the sign back
+
+        psubw       mm5,    mm2                     // p[0] - FiltVal
+        paddw       mm6,    mm2                     // p[-1] + FiltVal
+
+        packuswb    mm5,    mm5                     // clamping
+        packuswb    mm6,    mm6                     // clamping
+        
+        movd        [esi+4],  mm5                   // write p[0]
+        movd        [esi+ecx+4], mm6                // write p[-1]
+
+        movd        mm0,    [esi+8]                   // p[0], four pixels
+        movd        mm1,    [esi+ecx+8]               // p[-1], four pixels
+
+        punpcklbw   mm0,    mm7                     // unpack to short
+        movd        mm2,    [esi+edx+8]               // p[1], four pixels
+
+        punpcklbw   mm1,    mm7                     // unpack p[-1] to shorts
+        movd        mm3,    [esi+ecx*2+8]             // p[-2], four pixels
+
+        movq        mm5,    mm0                     // copy of unpacked p[0]
+        movq        mm6,    mm1                     // copy of unpacked p[-1]
+
+        psubw       mm0,    mm1                     // p[0] - p[-1]
+        punpcklbw   mm2,    mm7                     // unpack p[1]
+
+        movq        mm1,    mm0                     // make a copy of p[0]-p[-1]
+        punpcklbw   mm3,    mm7                     // unpack p[-2]
+
+        paddw       mm0,    mm1                     // (p[0]-p[-1]) * 2
+        psubw       mm3,    mm2                     // (p[-2]-p[1])
+
+        paddw       mm1,    mm0                     // (p[0]-p[-1]) * 3
+        paddw       mm3,    mm1                     // p[-2]-3*p[-1]+3*p[0]-p[1]
+
+        paddw       mm3,    fourFours               // p[-2]-3*p[-1]+3*p[0]-p[1]+4
+        movq        mm0,    [eax]                   // FLimit
+
+        psraw       mm3,    3                       // FiltVal
+        movq        mm1,    mm3                     // FiltVal
+
+        psraw       mm3,    15                      // FFFF-> Neg, 0->Pos
+        pxor        mm1,    mm3                     //  
+
+        psubsw      mm1,    mm3                     // abs(FiltVal)
+        por         mm3,    fourOnes                // -1 or 1, corresponding the sign
+
+        movq        mm2,    mm0                     // Copy of FLimit
+        psubw       mm0,    mm1                     // FLimit - abs(FiltVal)
+
+        movq        mm4,    mm0                     // copy FLimit-abs(FiltVal)
+        psraw       mm0,    15                      // FFFF->Neg, 0->Pos
+
+        pxor        mm4,    mm0                     //
+        psubsw      mm4,    mm0                     // abs(FLimit-abs(FiltVal))
+
+        psubusw     mm2,    mm4                     // FLimit-abs(FLimit-abs(FiltVal))
+        pmullw      mm2,    mm3                     // Get the sign back
+
+        psubw       mm5,    mm2                     // p[0] - FiltVal
+        paddw       mm6,    mm2                     // p[-1] + FiltVal
+
+        packuswb    mm5,    mm5                     // clamping
+        packuswb    mm6,    mm6                     // clamping
+        
+        movd        [esi+8],  mm5                   // write p[0]
+        movd        [esi+ecx+8], mm6                // write p[-1]
+    }
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/scaleopt.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/scaleopt.c
new file mode 100644
index 00000000..e0aa3c57
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/scaleopt.c
@@ -0,0 +1,1267 @@
+/****************************************************************************
+*        
+*   Module Title :     scaleopt.cpp
+*
+*   Description  :     Optimized scaling functions
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Module Statics
+****************************************************************************/        
+__declspec(align(16)) const static unsigned short oneFifth[]  = { 51, 51, 51, 51 };
+__declspec(align(16)) const static unsigned short twoFifths[] = { 102, 102, 102, 102 };
+__declspec(align(16)) const static unsigned short threeFifths[] = { 154, 154, 154, 154 };
+__declspec(align(16)) const static unsigned short fourFifths[] = { 205, 205, 205, 205 };
+__declspec(align(16)) const static unsigned short roundValues[] = { 128, 128, 128, 128 };
+__declspec(align(16)) const static unsigned short fourOnes[]= { 1, 1, 1, 1};
+__declspec(align(16)) const static unsigned short const45_2[] = {205, 154, 102,  51 };
+__declspec(align(16)) const static unsigned short const45_1[] = { 51, 102, 154, 205 };
+__declspec(align(16)) const static unsigned char  mask45[] = { 0, 0, 0, 0, 0, 0, 255, 0};
+__declspec(align(16)) const static unsigned short const35_2[] = { 154,  51, 205, 102 };
+__declspec(align(16)) const static unsigned short const35_1[] = { 102, 205,  51, 154 };
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : HorizontalLine_3_5_Scale_MMX
+ *
+ *  INPUTS        : const unsigned char *source :
+ *                  unsigned int sourceWidth    :
+ *                  unsigned char *dest         :
+ *                  unsigned int destWidth      :
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 3 to 5 up-scaling of a horizontal line of pixels.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void HorizontalLine_3_5_Scale_MMX 
+(
+    const unsigned char *source,
+    unsigned int sourceWidth,
+    unsigned char *dest,
+    unsigned int destWidth
+)
+{
+    (void) destWidth;
+
+    __asm
+    {
+
+        push ebx
+
+        mov         esi,    source
+        mov         edi,    dest
+        
+        mov         ecx,    sourceWidth
+        lea         edx,    [esi+ecx-3];
+
+        movq        mm5,    const35_1       // mm5 = 66 xx cd xx 33 xx 9a xx
+        movq        mm6,    const35_2       // mm6 = 9a xx 33 xx cd xx 66 xx
+        
+        movq        mm4,    roundValues     // mm4 = 80 xx 80 xx 80 xx 80 xx
+        pxor        mm7,    mm7             // clear mm7
+
+HorizLine_3_5_Loop:
+        
+        mov        eax,    DWORD PTR [esi] // eax = 00 01 02 03 
+        mov        ebx,    eax             
+        
+        and         ebx,    0xffff00        // ebx = xx 01 02 xx 
+        mov         ecx,    eax             // ecx = 00 01 02 03
+        
+        and         eax,    0xffff0000      // eax = xx xx 02 03
+        xor         ecx,    eax             // ecx = 00 01 xx xx
+
+        shr         ebx,    8               // ebx = 01 02 xx xx
+        or          eax,    ebx             // eax = 01 02 02 03
+
+        shl         ebx,    16              // ebx = xx xx 01 02
+        movd        mm1,    eax             // mm1 = 01 02 02 03 xx xx xx xx 
+        
+        or          ebx,    ecx             // ebx = 00 01 01 02
+        punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 02 xx 03 xx
+        
+        movd        mm0,    ebx             // mm0 = 00 01 01 02
+        pmullw      mm1,    mm6             //
+
+        punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 01 xx 02 xx
+        pmullw      mm0,    mm5             //
+
+        mov         [edi],  ebx             // writeoutput 00 xx xx xx    
+        add         esi,    3
+
+        add         edi,    5
+        paddw       mm0,    mm1
+        
+        paddw       mm0,    mm4
+        psrlw       mm0,    8
+
+        cmp         esi,    edx        
+        packuswb    mm0,    mm7
+        
+        movd        DWORD Ptr [edi-4], mm0
+        jl          HorizLine_3_5_Loop
+
+//Exit:        
+        mov         eax,    DWORD PTR [esi] // eax = 00 01 02 03 
+        mov         ebx,    eax             
+        
+        and         ebx,    0xffff00        // ebx = xx 01 02 xx 
+        mov         ecx,    eax             // ecx = 00 01 02 03
+        
+        and         eax,    0xffff0000      // eax = xx xx 02 03
+        xor         ecx,    eax             // ecx = 00 01 xx xx
+
+        shr         ebx,    8               // ebx = 01 02 xx xx
+        or          eax,    ebx             // eax = 01 02 02 03
+
+        shl         eax,    8               // eax = xx 01 02 02
+        and         eax,    0xffff0000      // eax = xx xx 02 02
+
+        or          eax,    ebx             // eax = 01 02 02 02
+
+        shl         ebx,    16              // ebx = xx xx 01 02
+        movd        mm1,    eax             // mm1 = 01 02 02 02 xx xx xx xx 
+        
+        or          ebx,    ecx             // ebx = 00 01 01 02
+        punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 02 xx 02 xx
+        
+        movd        mm0,    ebx             // mm0 = 00 01 01 02
+        pmullw      mm1,    mm6             //
+
+        punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 01 xx 02 xx
+        pmullw      mm0,    mm5             //
+
+        mov         [edi],  ebx             // writeoutput 00 xx xx xx    
+        paddw       mm0,    mm1
+
+        paddw       mm0,    mm4
+        psrlw       mm0,    8
+
+        packuswb    mm0,    mm7        
+        movd        DWORD Ptr [edi+1], mm0
+
+        pop ebx
+    
+    }
+
+    /*
+    const unsigned char *src = source;
+    unsigned char *des = dest;
+    unsigned int a, b, c ;
+    unsigned int i;
+    (void) destWidth;
+
+    for ( i=0; i<sourceWidth-3; i+=3 )
+    {     
+        a = src[0];
+        b = src[1];
+        des [0] = (UINT8) (a);
+        // 2 * left + 3 * right /5 
+        des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+        c = src[2] ;
+        // 4 * left + 1 * right /5
+        des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+        // 1 * left + 4 * right /5
+        des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+
+        a = src[3];
+        // 3 * left + 2 * right /5 
+        des [4] = (UINT8) (( c * 154 + a * 102 + 128 ) >> 8);
+
+        src += 3;
+        des += 5;
+    }
+
+    a = src[0];
+    b = src[1];
+    des [0] = (UINT8) (a);
+    // 2 * left + 3 * right /5 
+    des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
+    c = src[2] ;
+    // 4 * left + 1 * right /5
+    des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
+    // 1 * left + 4 * right /5
+    des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
+    
+    des [4] = (UINT8) (c);
+*/
+}        
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : HorizontalLine_4_5_Scale_MMX
+ *
+ *  INPUTS        : const unsigned char *source :
+ *                  unsigned int sourceWidth    :
+ *                  unsigned char *dest         :
+ *                  unsigned int destWidth      :
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 4 to 5 up-scaling of a horizontal line of pixels.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void HorizontalLine_4_5_Scale_MMX 
+(
+    const unsigned char *source,
+    unsigned int sourceWidth,
+    unsigned char *dest,
+    unsigned int destWidth
+)
+{
+    (void)destWidth;
+
+    __asm
+    {
+        
+        mov         esi,    source
+        mov         edi,    dest
+        
+        mov         ecx,    sourceWidth
+        lea         edx,    [esi+ecx-8];
+
+        movq        mm5,    const45_1       // mm5 = 33 xx 66 xx 9a xx cd xx   
+        movq        mm6,    const45_2       // mm6 = cd xx 9a xx 66 xx 33 xx
+        
+        movq        mm4,    roundValues     // mm4 = 80 xx 80 xx 80 xx 80 xx
+        pxor        mm7,    mm7             // clear mm7
+
+HorizLine_4_5_Loop:
+        
+        movq        mm0,    QWORD PTR [esi]           // mm0 = 00 01 02 03 04 05 06 07
+        movq        mm1,    QWORD PTR [esi+1];        // mm1 = 01 02 03 04 05 06 07 08
+
+        movq        mm2,    mm0             // mm2 = 00 01 02 03 04 05 06 07
+        movq        mm3,    mm1             // mm3 = 01 02 03 04 05 06 07 08
+        
+        movd        DWORD PTR [edi],  mm0             // write output 00 xx xx xx        
+        punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 02 xx 03 xx
+        
+        punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 03 xx 04 xx         
+        pmullw      mm0,    mm5             // 00* 51 01*102 02*154 03*205
+
+        pmullw      mm1,    mm6             // 01*205 02*154 03*102 04* 51
+        punpckhbw   mm2,    mm7             // mm2 = 04 xx 05 xx 06 xx 07 xx
+     
+        movd        DWORD PTR [edi+5], mm2            // write ouput 05 xx xx xx
+        pmullw      mm2,    mm5             // 04* 51 05*102 06*154 07*205
+
+        punpckhbw   mm3,    mm7             // mm3 = 05 xx 06 xx 07 xx 08 xx
+        pmullw      mm3,    mm6             // 05*205 06*154 07*102 08* 51    
+        
+        paddw       mm0,    mm1             // added round values
+        paddw       mm0,    mm4
+
+        psrlw       mm0,    8               // output: 01 xx 02 xx 03 xx 04 xx
+        packuswb    mm0,    mm7     
+
+        movd        DWORD PTR [edi+1], mm0  // write output 01 02 03 04
+        add         edi,    10
+
+        add         esi,    8                   
+        paddw       mm2,    mm3             // 
+
+        paddw       mm2,    mm4             // added round values
+        cmp         esi,    edx
+
+        psrlw       mm2,    8
+        packuswb    mm2,    mm7
+
+        movd        DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09
+        jl         HorizLine_4_5_Loop
+
+//Exit:        
+        movq        mm0,    [esi]           // mm0 = 00 01 02 03 04 05 06 07
+        movq        mm1,    mm0             // mm1 = 00 01 02 03 04 05 06 07
+
+        movq        mm2,    mm0             // mm2 = 00 01 02 03 04 05 06 07
+        psrlq       mm1,    8               // mm1 = 01 02 03 04 05 06 07 00
+
+        movq        mm3,    mask45          // mm3 = 00 00 00 00 00 00 ff 00
+        pand        mm3,    mm1             // mm3 = 00 00 00 00 00 00 07 00    
+        
+        psllq       mm3,    8               // mm3 = 00 00 00 00 00 00 00 07
+        por         mm1,    mm3             // mm1 = 01 02 03 04 05 06 07 07
+        
+        movq        mm3,    mm1             
+
+        movd        DWORD PTR [edi],  mm0   // write output 00 xx xx xx        
+        punpcklbw   mm0,    mm7             // mm0 = 00 xx 01 xx 02 xx 03 xx
+        
+        punpcklbw   mm1,    mm7             // mm1 = 01 xx 02 xx 03 xx 04 xx         
+        pmullw      mm0,    mm5             // 00* 51 01*102 02*154 03*205
+
+        pmullw      mm1,    mm6             // 01*205 02*154 03*102 04* 51
+        punpckhbw   mm2,    mm7             // mm2 = 04 xx 05 xx 06 xx 07 xx
+     
+        movd        DWORD PTR [edi+5], mm2  // write ouput 05 xx xx xx
+        pmullw      mm2,    mm5             // 04* 51 05*102 06*154 07*205
+
+        punpckhbw   mm3,    mm7             // mm3 = 05 xx 06 xx 07 xx 08 xx
+        pmullw      mm3,    mm6             // 05*205 06*154 07*102 07* 51    
+        
+        paddw       mm0,    mm1             // added round values
+        paddw       mm0,    mm4
+
+        psrlw       mm0,    8               // output: 01 xx 02 xx 03 xx 04 xx
+        packuswb    mm0,    mm7             // 01 02 03 04 xx xx xx xx
+
+        movd        DWORD PTR [edi+1], mm0  // write output 01 02 03 04
+        paddw       mm2,    mm3             // 
+
+        paddw       mm2,    mm4             // added round values        
+        psrlw       mm2,    8
+        
+        packuswb    mm2,    mm7
+        movd        DWORD PTR [edi+6], mm2  // writeoutput 06 07 08 09
+
+
+    }
+/*        
+    const unsigned char *src = source;
+    unsigned char *des = dest;
+    unsigned int a, b, c ;
+    unsigned i;
+	(void) destWidth;
+    
+    for ( i=0; i<sourceWidth-4; i+=4 )
+    {
+        a = src[0];
+        b = src[1];
+        des [0] = (UINT8) a;
+        des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8);
+        c = src[2] * 154;
+        a = src[3];
+        des [2] = (UINT8) (( b * 102 + c + 128) >> 8);
+        des [3] = (UINT8) (( c + 102 * a + 128) >> 8);
+        b = src[4];
+        des [4] = (UINT8) (( a * 205 + 51 * b + 128) >> 8);
+
+        src += 4;
+        des += 5;
+    }
+
+    a = src[0];
+    b = src[1];
+    des [0] = (UINT8) (a);
+    des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8);
+    c = src[2] * 154;
+    a = src[3];
+    des [2] = (UINT8) (( b * 102 + c + 128) >> 8);
+    des [3] = (UINT8) (( c + 102 * a + 128) >> 8);
+    des [4] = (UINT8) (a);
+*/
+}        
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VerticalBand_4_5_Scale_MMX
+ *
+ *  INPUTS        : unsigned char *dest    :
+ *                  unsigned int destPitch :
+ *                  unsigned int destWidth :
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 4 to 5 up-scaling of a 4 pixel high band of pixels.
+ *
+ *  SPECIAL NOTES : The routine uses the first line of the band below 
+ *                  the current band. The function also has a "C" only
+ *                  version.
+ *
+ ****************************************************************************/
+void VerticalBand_4_5_Scale_MMX
+(
+    unsigned char *dest,
+    unsigned int destPitch,
+    unsigned int destWidth
+)
+{
+    __asm 
+    {
+        
+        mov         esi,    dest                    // Get the source and destination pointer
+        mov         ecx,    destPitch               // Get the pitch size
+        
+        lea         edi,    [esi+ecx*2]             // tow lines below
+        add         edi,    ecx                     // three lines below
+
+        pxor        mm7,    mm7                     // clear out mm7        
+        mov         edx,    destWidth               // Loop counter
+
+VS_4_5_loop:
+
+        movq        mm0,    QWORD ptr [esi]         // src[0];
+        movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
+
+        movq        mm2,    mm0                     // Make a copy 
+        punpcklbw   mm0,    mm7                     // unpack low to word
+    
+        movq        mm5,    oneFifth                
+        punpckhbw   mm2,    mm7                     // unpack high to word
+
+        pmullw      mm0,    mm5                     // a * 1/5
+        
+        movq        mm3,    mm1                     // make a copy 
+        punpcklbw   mm1,    mm7                     // unpack low to word
+
+        pmullw      mm2,    mm5                     // a * 1/5
+        movq        mm6,    fourFifths               // constan 
+
+        movq        mm4,    mm1                     // copy of low b
+        pmullw      mm4,    mm6                     // b * 4/5
+
+        punpckhbw   mm3,    mm7                     // unpack high to word 
+        movq        mm5,    mm3                     // copy of high b
+
+        pmullw      mm5,    mm6                     // b * 4/5
+        paddw       mm0,    mm4                     // a * 1/5 + b * 4/5
+
+        paddw       mm2,    mm5                     // a * 1/5 + b * 4/5
+        paddw       mm0,    roundValues             // + 128
+
+        paddw       mm2,    roundValues             // + 128
+        psrlw       mm0,    8
+
+        psrlw       mm2,    8
+        packuswb    mm0,    mm2                     // des [1]
+
+        movq        QWORD ptr [esi+ecx], mm0        // write des[1]
+        movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
+
+        // mm1, mm3 --- Src[1]
+        // mm0 --- Src[2]
+        // mm7 for unpacking
+
+        movq        mm5,    twoFifths                 
+        movq        mm2,    mm0                     // make a copy 
+
+        pmullw      mm1,    mm5                     // b * 2/5 
+        movq        mm6,    threeFifths             
+
+
+        punpcklbw   mm0,    mm7                     // unpack low to word
+        pmullw      mm3,    mm5                     // b * 2/5
+
+        movq        mm4,    mm0                     // make copy of c
+        punpckhbw   mm2,    mm7                     // unpack high to word
+
+        pmullw      mm4,    mm6                     // c * 3/5
+        movq        mm5,    mm2                     
+
+        pmullw      mm5,    mm6                     // c * 3/5
+        paddw       mm1,    mm4                     // b * 2/5 + c * 3/5
+        
+        paddw       mm3,    mm5                     // b * 2/5 + c * 3/5
+        paddw       mm1,    roundValues             // + 128
+
+        paddw       mm3,    roundValues             // + 128
+        psrlw       mm1,    8
+
+        psrlw       mm3,    8
+        packuswb    mm1,    mm3                     // des[2]
+
+        movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
+        movq        mm1,    [edi]                   // mm1=Src[3];
+
+        // mm0, mm2 --- Src[2]
+        // mm1 --- Src[3]
+        // mm6 --- 3/5
+        // mm7 for unpacking
+
+        pmullw      mm0,    mm6                     // c * 3/5
+        movq        mm5,    twoFifths               // mm5 = 2/5
+
+        movq        mm3,    mm1                     // make a copy
+        pmullw      mm2,    mm6                     // c * 3/5 
+
+        punpcklbw   mm1,    mm7                     // unpack low
+        movq        mm4,    mm1                     // make a copy 
+
+        punpckhbw   mm3,    mm7                     // unpack high
+        pmullw      mm4,    mm5                     // d * 2/5
+
+        movq        mm6,    mm3                     // make a copy
+        pmullw      mm6,    mm5                     // d * 2/5
+    
+        paddw       mm0,    mm4                     // c * 3/5 + d * 2/5
+        paddw       mm2,    mm6                     // c * 3/5 + d * 2/5
+
+        paddw       mm0,    roundValues             // + 128
+        paddw       mm2,    roundValues             // + 128
+                  
+        psrlw       mm0,    8
+        psrlw       mm2,    8
+
+        packuswb    mm0,    mm2                     // des[3]
+        movq        QWORD ptr [edi], mm0            // write des[3]
+
+        //  mm1, mm3 --- Src[3]
+        //  mm7 -- cleared for unpacking
+
+        movq        mm0,    [edi+ecx*2]             // mm0, Src[0] of the next group
+
+        movq        mm5,    fourFifths              // mm5 = 4/5
+        pmullw      mm1,    mm5                     // d * 4/5
+
+        movq        mm6,    oneFifth                // mm6 = 1/5
+        movq        mm2,    mm0                     // make a copy     
+
+        pmullw      mm3,    mm5                     // d * 4/5
+        punpcklbw   mm0,    mm7                     // unpack low
+        
+        pmullw      mm0,    mm6                     // an * 1/5
+        punpckhbw   mm2,    mm7                     // unpack high
+
+        paddw       mm1,    mm0                     // d * 4/5 + an * 1/5
+        pmullw      mm2,    mm6                     // an * 1/5
+
+        paddw       mm3,    mm2                     // d * 4/5 + an * 1/5
+        paddw       mm1,    roundValues             // + 128
+
+        paddw       mm3,    roundValues             // + 128
+        psrlw       mm1,    8
+        
+        psrlw       mm3,    8
+        packuswb    mm1,    mm3                     // des[4]
+
+        movq        QWORD ptr [edi+ecx], mm1        // write des[4]
+
+        add         edi,    8
+        add         esi,    8
+
+        sub         edx,    8
+        jg         VS_4_5_loop
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : LastVerticalBand_4_5_Scale_MMX
+ *
+ *  INPUTS        : unsigned char *dest    :
+ *                  unsigned int destPitch :
+ *                  unsigned int destWidth :
+ *                  
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : None
+ *
+ *  FUNCTION      : 4 to 5 up-scaling of the last 4-pixel high band in an image. 
+ *
+ *  SPECIAL NOTES : The routine uses the first line of the band below 
+ *                  the current band. The function also has an "C" only
+ *                  version.
+ *
+ ****************************************************************************/
+void LastVerticalBand_4_5_Scale_MMX
+(
+    unsigned char *dest,
+    unsigned int destPitch,
+    unsigned int destWidth
+)
+{
+    __asm 
+    {
+        mov         esi,    dest                    // Get the source and destination pointer
+        mov         ecx,    destPitch               // Get the pitch size
+        
+        lea         edi,    [esi+ecx*2]             // tow lines below
+        add         edi,    ecx                     // three lines below
+
+        pxor        mm7,    mm7                     // clear out mm7        
+        mov         edx,    destWidth               // Loop counter
+
+LastVS_4_5_loop:
+
+        movq        mm0,    QWORD ptr [esi]         // src[0];
+        movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
+
+        movq        mm2,    mm0                     // Make a copy 
+        punpcklbw   mm0,    mm7                     // unpack low to word
+    
+        movq        mm5,    oneFifth                
+        punpckhbw   mm2,    mm7                     // unpack high to word
+
+        pmullw      mm0,    mm5                     // a * 1/5
+        
+        movq        mm3,    mm1                     // make a copy 
+        punpcklbw   mm1,    mm7                     // unpack low to word
+
+        pmullw      mm2,    mm5                     // a * 1/5
+        movq        mm6,    fourFifths               // constan 
+
+        movq        mm4,    mm1                     // copy of low b
+        pmullw      mm4,    mm6                     // b * 4/5
+
+        punpckhbw   mm3,    mm7                     // unpack high to word 
+        movq        mm5,    mm3                     // copy of high b
+
+        pmullw      mm5,    mm6                     // b * 4/5
+        paddw       mm0,    mm4                     // a * 1/5 + b * 4/5
+
+        paddw       mm2,    mm5                     // a * 1/5 + b * 4/5
+        paddw       mm0,    roundValues             // + 128
+
+        paddw       mm2,    roundValues             // + 128
+        psrlw       mm0,    8
+
+        psrlw       mm2,    8
+        packuswb    mm0,    mm2                     // des [1]
+
+        movq        QWORD ptr [esi+ecx], mm0        // write des[1]
+        movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
+
+        // mm1, mm3 --- Src[1]
+        // mm0 --- Src[2]
+        // mm7 for unpacking
+
+        movq        mm5,    twoFifths                 
+        movq        mm2,    mm0                     // make a copy 
+
+        pmullw      mm1,    mm5                     // b * 2/5 
+        movq        mm6,    threeFifths             
+
+
+        punpcklbw   mm0,    mm7                     // unpack low to word
+        pmullw      mm3,    mm5                     // b * 2/5
+
+        movq        mm4,    mm0                     // make copy of c
+        punpckhbw   mm2,    mm7                     // unpack high to word
+
+        pmullw      mm4,    mm6                     // c * 3/5
+        movq        mm5,    mm2                     
+
+        pmullw      mm5,    mm6                     // c * 3/5
+        paddw       mm1,    mm4                     // b * 2/5 + c * 3/5
+        
+        paddw       mm3,    mm5                     // b * 2/5 + c * 3/5
+        paddw       mm1,    roundValues             // + 128
+
+        paddw       mm3,    roundValues             // + 128
+        psrlw       mm1,    8
+
+        psrlw       mm3,    8
+        packuswb    mm1,    mm3                     // des[2]
+
+        movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
+        movq        mm1,    [edi]                   // mm1=Src[3];
+
+        movq        QWORD ptr [edi+ecx], mm1        // write des[4];
+
+        // mm0, mm2 --- Src[2]
+        // mm1 --- Src[3]
+        // mm6 --- 3/5
+        // mm7 for unpacking
+
+        pmullw      mm0,    mm6                     // c * 3/5
+        movq        mm5,    twoFifths               // mm5 = 2/5
+
+        movq        mm3,    mm1                     // make a copy
+        pmullw      mm2,    mm6                     // c * 3/5 
+
+        punpcklbw   mm1,    mm7                     // unpack low
+        movq        mm4,    mm1                     // make a copy 
+
+        punpckhbw   mm3,    mm7                     // unpack high
+        pmullw      mm4,    mm5                     // d * 2/5
+
+        movq        mm6,    mm3                     // make a copy
+        pmullw      mm6,    mm5                     // d * 2/5
+    
+        paddw       mm0,    mm4                     // c * 3/5 + d * 2/5
+        paddw       mm2,    mm6                     // c * 3/5 + d * 2/5
+
+        paddw       mm0,    roundValues             // + 128
+        paddw       mm2,    roundValues             // + 128
+
+        psrlw       mm0,    8
+        psrlw       mm2,    8
+
+        packuswb    mm0,    mm2                     // des[3]
+        movq        QWORD ptr [edi], mm0            // write des[3]
+
+        //  mm1, mm3 --- Src[3]
+        //  mm7 -- cleared for unpacking
+        add         edi,    8
+        add         esi,    8
+
+        sub         edx,    8
+        jg          LastVS_4_5_loop
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VerticalBand_3_5_Scale_MMX
+ *
+ *  INPUTS        : unsigned char *dest    :
+ *                  unsigned int destPitch :
+ *                  unsigned int destWidth :
+ *                   
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 3 to 5 up-scaling of a 3-pixel high band of pixels.
+ *
+ *  SPECIAL NOTES : The routine uses the first line of the band below 
+ *                  the current band. The function also has an "C" only
+ *                  version.
+ *
+ ****************************************************************************/
+void VerticalBand_3_5_Scale_MMX
+(
+    unsigned char *dest,
+    unsigned int destPitch,
+    unsigned int destWidth
+)
+{
+    __asm 
+    {
+        mov         esi,    dest                    // Get the source and destination pointer
+        mov         ecx,    destPitch               // Get the pitch size
+        
+        lea         edi,    [esi+ecx*2]             // tow lines below
+        add         edi,    ecx                     // three lines below
+
+        pxor        mm7,    mm7                     // clear out mm7        
+        mov         edx,    destWidth               // Loop counter
+
+VS_3_5_loop:
+
+        movq        mm0,    QWORD ptr [esi]         // src[0];
+        movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
+
+        movq        mm2,    mm0                     // Make a copy 
+        punpcklbw   mm0,    mm7                     // unpack low to word
+    
+        movq        mm5,    twoFifths               // mm5 = 2/5                
+        punpckhbw   mm2,    mm7                     // unpack high to word
+
+        pmullw      mm0,    mm5                     // a * 2/5
+        
+        movq        mm3,    mm1                     // make a copy 
+        punpcklbw   mm1,    mm7                     // unpack low to word
+
+        pmullw      mm2,    mm5                     // a * 2/5
+        movq        mm6,    threeFifths             // mm6 = 3/5
+
+        movq        mm4,    mm1                     // copy of low b
+        pmullw      mm4,    mm6                     // b * 3/5
+
+        punpckhbw   mm3,    mm7                     // unpack high to word 
+        movq        mm5,    mm3                     // copy of high b
+
+        pmullw      mm5,    mm6                     // b * 3/5
+        paddw       mm0,    mm4                     // a * 2/5 + b * 3/5
+
+        paddw       mm2,    mm5                     // a * 2/5 + b * 3/5
+        paddw       mm0,    roundValues             // + 128
+
+        paddw       mm2,    roundValues             // + 128
+        psrlw       mm0,    8
+
+        psrlw       mm2,    8
+        packuswb    mm0,    mm2                     // des [1]
+
+        movq        QWORD ptr [esi+ecx], mm0        // write des[1]
+        movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
+
+        // mm1, mm3 --- Src[1]
+        // mm0 --- Src[2]
+        // mm7 for unpacking
+
+        movq        mm4,    mm1                     // b low
+        pmullw      mm1,    fourFifths              // b * 4/5 low    
+
+        movq        mm5,    mm3                     // b high
+        pmullw      mm3,    fourFifths              // b * 4/5 high
+        
+        movq        mm2,    mm0                     // c
+        pmullw      mm4,    oneFifth                // b * 1/5
+
+        punpcklbw   mm0,    mm7                     // c low
+        pmullw      mm5,    oneFifth                // b * 1/5
+
+        movq        mm6,    mm0                     // make copy of c low
+        punpckhbw   mm2,    mm7                     // c high
+
+        pmullw      mm6,    oneFifth                // c * 1/5 low
+        movq        mm7,    mm2                     // make copy of c high
+
+        pmullw      mm7,    oneFifth                // c * 1/5 high
+        paddw       mm1,    mm6                     // b * 4/5 + c * 1/5 low
+
+        paddw       mm3,    mm7                     // b * 4/5 + c * 1/5 high
+        movq        mm6,    mm0                     // make copy of c low
+
+        pmullw      mm6,    fourFifths              // c * 4/5 low
+        movq        mm7,    mm2                     // make copy of c high
+
+        pmullw      mm7,    fourFifths              // c * 4/5 high
+
+        paddw       mm4,    mm6                     // b * 1/5 + c * 4/5 low
+        paddw       mm5,    mm7                     // b * 1/5 + c * 4/5 high
+
+        paddw       mm1,    roundValues             // + 128
+        paddw       mm3,    roundValues             // + 128
+
+        psrlw       mm1,    8
+        psrlw       mm3,    8
+
+        packuswb    mm1,    mm3                     // des[2]
+        movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
+
+        paddw       mm4,    roundValues             // + 128
+        paddw       mm5,    roundValues             // + 128
+
+        psrlw       mm4,    8
+        psrlw       mm5,    8
+
+        packuswb    mm4,    mm5                     // des[3]
+        movq        QWORD ptr [edi], mm4            // write des[3]
+
+        //  mm0, mm2 --- Src[3]
+        
+        pxor        mm7,    mm7                     // clear mm7 for unpacking
+        movq        mm1,    [edi+ecx*2]             // mm1 = Src[0] of the next group
+
+        movq        mm5,    threeFifths             // mm5 = 3/5
+        pmullw      mm0,    mm5                     // d * 3/5
+
+        movq        mm6,    twoFifths                // mm6 = 2/5
+        movq        mm3,    mm1                     // make a copy     
+
+        pmullw      mm2,    mm5                     // d * 3/5
+        punpcklbw   mm1,    mm7                     // unpack low
+        
+        pmullw      mm1,    mm6                     // an * 2/5
+        punpckhbw   mm3,    mm7                     // unpack high
+
+        paddw       mm0,    mm1                     // d * 3/5 + an * 2/5
+        pmullw      mm3,    mm6                     // an * 2/5
+
+        paddw       mm2,    mm3                     // d * 3/5 + an * 2/5
+        paddw       mm0,    roundValues             // + 128
+
+        paddw       mm2,    roundValues             // + 128
+        psrlw       mm0,    8
+        
+        psrlw       mm2,    8
+        packuswb    mm0,    mm2                     // des[4]
+
+        movq        QWORD ptr [edi+ecx], mm0        // write des[4]
+
+        add         edi,    8
+        add         esi,    8
+
+        sub         edx,    8
+        jg          VS_3_5_loop
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : LastVerticalBand_3_5_Scale_MMX
+ *
+ *  INPUTS        : unsigned char *dest    :
+ *                  unsigned int destPitch :
+ *                  unsigned int destWidth :
+ *                   
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 3 to 5 up-scaling of a 3-pixel high band of pixels.
+ *
+ *  SPECIAL NOTES : The routine uses the first line of the band below 
+ *                  the current band. The function also has an "C" only
+ *                  version.
+ *
+ ****************************************************************************/
+void LastVerticalBand_3_5_Scale_MMX
+(
+    unsigned char *dest,
+    unsigned int destPitch,
+    unsigned int destWidth
+)
+{
+    __asm 
+    {
+        mov         esi,    dest                    // Get the source and destination pointer
+        mov         ecx,    destPitch               // Get the pitch size
+        
+        lea         edi,    [esi+ecx*2]             // tow lines below
+        add         edi,    ecx                     // three lines below
+
+        pxor        mm7,    mm7                     // clear out mm7        
+        mov         edx,    destWidth               // Loop counter
+
+
+LastVS_3_5_loop:
+
+        movq        mm0,    QWORD ptr [esi]         // src[0];
+        movq        mm1,    QWORD ptr [esi+ecx]     // src[1];
+
+        movq        mm2,    mm0                     // Make a copy 
+        punpcklbw   mm0,    mm7                     // unpack low to word
+    
+        movq        mm5,    twoFifths               // mm5 = 2/5                
+        punpckhbw   mm2,    mm7                     // unpack high to word
+
+        pmullw      mm0,    mm5                     // a * 2/5
+        
+        movq        mm3,    mm1                     // make a copy 
+        punpcklbw   mm1,    mm7                     // unpack low to word
+
+        pmullw      mm2,    mm5                     // a * 2/5
+        movq        mm6,    threeFifths             // mm6 = 3/5
+
+        movq        mm4,    mm1                     // copy of low b
+        pmullw      mm4,    mm6                     // b * 3/5
+
+        punpckhbw   mm3,    mm7                     // unpack high to word 
+        movq        mm5,    mm3                     // copy of high b
+
+        pmullw      mm5,    mm6                     // b * 3/5
+        paddw       mm0,    mm4                     // a * 2/5 + b * 3/5
+
+        paddw       mm2,    mm5                     // a * 2/5 + b * 3/5
+        paddw       mm0,    roundValues             // + 128
+
+        paddw       mm2,    roundValues             // + 128
+        psrlw       mm0,    8
+
+        psrlw       mm2,    8
+        packuswb    mm0,    mm2                     // des [1]
+
+        movq        QWORD ptr [esi+ecx], mm0        // write des[1]
+        movq        mm0,    [esi+ecx*2]             // mm0 = src[2]
+
+        
+
+        // mm1, mm3 --- Src[1]
+        // mm0 --- Src[2]
+        // mm7 for unpacking
+
+        movq        mm4,    mm1                     // b low
+        pmullw      mm1,    fourFifths              // b * 4/5 low    
+        
+        movq        QWORD ptr [edi+ecx], mm0        // write des[4]
+
+        movq        mm5,    mm3                     // b high
+        pmullw      mm3,    fourFifths              // b * 4/5 high
+        
+        movq        mm2,    mm0                     // c
+        pmullw      mm4,    oneFifth                // b * 1/5
+
+        punpcklbw   mm0,    mm7                     // c low
+        pmullw      mm5,    oneFifth                // b * 1/5
+
+        movq        mm6,    mm0                     // make copy of c low
+        punpckhbw   mm2,    mm7                     // c high
+
+        pmullw      mm6,    oneFifth                // c * 1/5 low
+        movq        mm7,    mm2                     // make copy of c high
+
+        pmullw      mm7,    oneFifth                // c * 1/5 high
+        paddw       mm1,    mm6                     // b * 4/5 + c * 1/5 low
+
+        paddw       mm3,    mm7                     // b * 4/5 + c * 1/5 high
+        movq        mm6,    mm0                     // make copy of c low
+
+        pmullw      mm6,    fourFifths              // c * 4/5 low
+        movq        mm7,    mm2                     // make copy of c high
+
+        pmullw      mm7,    fourFifths              // c * 4/5 high
+
+        paddw       mm4,    mm6                     // b * 1/5 + c * 4/5 low
+        paddw       mm5,    mm7                     // b * 1/5 + c * 4/5 high
+
+        paddw       mm1,    roundValues             // + 128
+        paddw       mm3,    roundValues             // + 128
+
+        psrlw       mm1,    8
+        psrlw       mm3,    8
+
+        packuswb    mm1,    mm3                     // des[2]
+        movq        QWORD ptr [esi+ecx*2], mm1      // write des[2]
+
+        paddw       mm4,    roundValues             // + 128
+        paddw       mm5,    roundValues             // + 128
+
+        psrlw       mm4,    8
+        psrlw       mm5,    8
+
+        packuswb    mm4,    mm5                     // des[3]
+        movq        QWORD ptr [edi], mm4            // write des[3]
+
+        //  mm0, mm2 --- Src[3]
+        
+        add         edi,    8
+        add         esi,    8
+
+        sub         edx,    8
+        jg          LastVS_3_5_loop
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : VerticalBand_1_2_Scale_MMX
+ *
+ *  INPUTS        : unsigned char *dest    :
+ *                  unsigned int destPitch :
+ *                  unsigned int destWidth :
+ *                   
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 1 to 2 up-scaling of a band of pixels.
+ *
+ *  SPECIAL NOTES : The routine uses the first line of the band below 
+ *                  the current band. The function also has an "C" only
+ *                  version.
+ *
+ ****************************************************************************/
+void VerticalBand_1_2_Scale_MMX
+(
+    unsigned char *dest,
+    unsigned int destPitch,
+    unsigned int destWidth
+)
+{
+    __asm
+    {
+    
+        mov         esi,    dest                    // Get the source and destination pointer
+        mov         ecx,    destPitch               // Get the pitch size
+
+        pxor        mm7,    mm7                     // clear out mm7        
+        mov         edx,    destWidth               // Loop counter
+
+VS_1_2_loop:
+
+        movq        mm0,    [esi]                   // get Src[0]
+        movq        mm1,    [esi + ecx * 2]         // get Src[1]
+
+        movq        mm2,    mm0                     // make copy before unpack
+        movq        mm3,    mm1                     // make copy before unpack
+
+        punpcklbw   mm0,    mm7                     // low Src[0]
+        movq        mm6,    fourOnes                // mm6= 1, 1, 1, 1
+
+        punpcklbw   mm1,    mm7                     // low Src[1]
+        paddw       mm0,    mm1                     // low (a + b)
+        
+        punpckhbw   mm2,    mm7                     // high Src[0]
+        paddw       mm0,    mm6                     // low (a + b + 1)
+
+        punpckhbw   mm3,    mm7                    
+        paddw       mm2,    mm3                     // high (a + b )
+
+        psraw       mm0,    1                       // low (a + b +1 )/2
+        paddw       mm2,    mm6                     // high (a + b + 1)
+
+        psraw       mm2,    1                       // high (a + b + 1)/2
+        packuswb    mm0,    mm2                     // pack results
+
+        movq        [esi+ecx], mm0                  // write out eight bytes
+        add         esi,    8
+
+        sub         edx,    8
+        jg          VS_1_2_loop
+    }
+ 
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : LastVerticalBand_1_2_Scale_MMX
+ *
+ *  INPUTS        : unsigned char *dest    :
+ *                  unsigned int destPitch :
+ *                  unsigned int destWidth :
+ *                   
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 1 to 2 up-scaling of band of pixels.
+ *
+ *  SPECIAL NOTES : The routine uses the first line of the band below 
+ *                  the current band. The function also has an "C" only
+ *                  version.
+ *
+ ****************************************************************************/
+void LastVerticalBand_1_2_Scale_MMX
+(
+    unsigned char *dest,
+    unsigned int destPitch,
+    unsigned int destWidth
+)
+{
+    __asm
+    {
+        mov         esi,    dest                    // Get the source and destination pointer
+        mov         ecx,    destPitch               // Get the pitch size
+
+        mov         edx,    destWidth               // Loop counter
+
+LastVS_1_2_loop:
+
+        movq        mm0,    [esi]                   // get Src[0]
+        movq        [esi+ecx], mm0                  // write out eight bytes
+
+        add         esi,    8
+        sub         edx,    8
+
+        jg         LastVS_1_2_loop
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : HorizontalLine_1_2_Scale
+ *
+ *  INPUTS        : const unsigned char *source :
+ *                  unsigned int sourceWidth    :
+ *                  unsigned char *dest         :
+ *                  unsigned int destWidth      :
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 1 to 2 up-scaling of a horizontal line of pixels.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void HorizontalLine_1_2_Scale_MMX 
+(
+    const unsigned char *source,
+    unsigned int sourceWidth,
+    unsigned char *dest,
+    unsigned int destWidth
+)
+{
+	(void) destWidth;
+
+    __asm
+    {
+        mov         esi,    source
+        mov         edi,    dest
+
+        pxor        mm7,    mm7
+        movq        mm6,    fourOnes
+
+        mov         ecx,    sourceWidth
+
+HS_1_2_Loop:
+
+        movq        mm0,    [esi]
+        movq        mm1,    [esi+1]
+
+        movq        mm2,    mm0
+        movq        mm3,    mm1
+
+        movq        mm4,    mm0
+        punpcklbw   mm0,    mm7
+
+        punpcklbw   mm1,    mm7
+        paddw       mm0,    mm1
+
+        paddw       mm0,    mm6
+        punpckhbw   mm2,    mm7
+
+        punpckhbw   mm3,    mm7
+        paddw       mm2,    mm3
+
+        paddw       mm2,    mm6
+        psraw       mm0,    1
+
+        psraw       mm2,    1
+        packuswb    mm0,    mm2
+
+        movq        mm2,    mm4
+        punpcklbw   mm2,    mm0
+
+        movq        [edi],  mm2
+        punpckhbw   mm4,    mm0
+
+        movq        [edi+8], mm4
+        add         esi,    8
+
+        add         edi,    16
+        sub         ecx,    8
+
+        cmp         ecx,    8
+        jg          HS_1_2_Loop
+
+// last eight pixel
+
+        movq        mm0,    [esi]
+        movq        mm1,    mm0
+        
+        movq        mm2,    mm0
+        movq        mm3,    mm1
+
+        psrlq       mm1,    8
+        psrlq       mm3,    56
+        
+        psllq       mm3,    56
+        por         mm1,    mm3
+
+        movq        mm3,    mm1
+        movq        mm4,    mm0
+
+        punpcklbw   mm0,    mm7
+        punpcklbw   mm1,    mm7
+
+        paddw       mm0,    mm1
+        paddw       mm0,    mm6
+
+        punpckhbw   mm2,    mm7
+        punpckhbw   mm3,    mm7
+
+        paddw       mm2,    mm3
+        paddw       mm2,    mm6
+
+        psraw       mm0,    1
+        psraw       mm2,    1
+
+        packuswb    mm0,    mm2
+        movq        mm2,    mm4
+
+        punpcklbw   mm2,    mm0
+        movq        [edi],  mm2
+
+        punpckhbw   mm4,    mm0
+        movq        [edi+8], mm4
+    }
+}  
+    
+#if defined(__cplusplus)
+extern "C" {
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vppp/win32/simpledeblock_asm.c b/Src/libvpShared/corelibs/cdxv/vppp/win32/simpledeblock_asm.c
new file mode 100644
index 00000000..063c15d3
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vppp/win32/simpledeblock_asm.c
@@ -0,0 +1,733 @@
+/****************************************************************************
+ *
+ *   Module Title :     simpledeblock_asm.c
+ *
+ *   Description  :     Simple deblocking filter for low end machines
+ *
+ ***************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include "postp.h"
+
+/****************************************************************************
+* Module Statics
+****************************************************************************/        
+__declspec(align(16)) const unsigned char eightNOnes[]= {255, 255, 255, 255, 255, 255, 255, 255};
+__declspec(align(16)) const short fourFours[] = {4, 4, 4, 4};
+__declspec(align(16)) const short fourOnes[] = { 1, 1, 1, 1};
+__declspec(align(16)) const unsigned char eightFours[] = {4, 4, 4, 4, 4, 4, 4, 4};
+__declspec(align(16)) const unsigned char eightOnes[] = {1, 1, 1, 1, 1, 1, 1, 1};
+__declspec(align(16)) const unsigned char eight128s[] = {128, 128, 128, 128, 128, 128, 128, 128};
+
+/****************************************************************************
+* Imports
+****************************************************************************/              
+extern UINT32 LoopFilterLimitValuesV1[];
+extern UINT32 *DeblockLimitValuesV2;
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FilterHoriz_Simple_MMX
+ *
+ *  INPUTS        :     None
+ *                               
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Applies a loop filter to the vertical edge horizontally
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void FilterHoriz_Simple_MMX(
+						POSTPROC_INSTANCE *pbi, 
+						UINT8 * PixelPtr, 
+						INT32 LineLength, 
+						INT32 *BoundingValuePtr
+						)
+{	
+	/*************************************************************	
+		The following code in comments is the C version of the 
+		function, provided here for reference  
+	 *************************************************************
+
+	INT32 j;
+	INT32 FiltVal;
+    UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+
+
+
+	for ( j = 0; j < 8; j++ )
+	{            
+        INT32 UseHighVariance;
+		
+		FiltVal =  ( PixelPtr[2] * 3 ) - 
+			( PixelPtr[1] * 3 );
+
+        UseHighVariance =  abs(PixelPtr[0] - PixelPtr[1]) > 1 ||
+                 abs(PixelPtr[2] - PixelPtr[3]) > 1;
+
+        if(UseHighVariance)
+        {
+            FiltVal +=  ( PixelPtr[0] ) - 
+	    		( PixelPtr[3] );
+        }
+
+        FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+		
+		PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
+		PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
+
+        if(!UseHighVariance)
+        {
+            FiltVal >>= 1;
+
+            PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] + FiltVal];
+		    PixelPtr[3] = LimitTable[(INT32)PixelPtr[3] - FiltVal];
+        }
+		
+		PixelPtr += LineLength;
+	}
+	************************************************************/
+
+	UINT32 FLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+	__declspec(align(16)) unsigned char WorkingBuffer[32];
+	(void)BoundingValuePtr;	
+
+	__asm 
+	{
+		mov			eax,		FLimit					// Flimit
+		xor			ecx,		ecx						// clear ecx
+
+		mov			edx,		LineLength				// pitch
+		mov			esi,		PixelPtr				// src and des pointer
+
+		sub			ecx,		edx						// negative pitch
+		lea			esi,		[esi + edx]				// next line
+
+		movd		mm0,		[esi + ecx + -2]		// xx xx xx xx 01 00 xx xx	
+		movd		mm4,		[esi + -2]				// xx xx xx xx 11 10 xx xx
+
+		movd		mm2,		[esi + ecx + 2]			// xx xx xx xx xx xx 03 02
+		punpcklbw   mm0,		mm4						// 11 01 10 00 xx xx xx xx
+
+		movd		mm3,		[esi +  2]				// xx xx xx xx xx xx 13 12
+		punpcklbw	mm2,		mm3						// xx xx xx	xx 13 03 12 02
+
+		movd		mm1,		[esi+ edx + -2]			// xx xx xx xx 21 20 xx xx
+		movd		mm5,		[esi+ edx *2 + -2]		// xx xx xx xx 31 30 xx xx
+
+		movd		mm6,		[esi+ edx + 2]			// xx xx xx xx xx xx 23 22 	
+		punpcklbw	mm1,		mm5						// 31 21 30 20 xx xx xx xx 
+		
+		movd		mm7,		[esi+ edx*2 + 2]		// xx xx xx xx xx xx 33 32
+		punpckhwd	mm0,		mm1						// 31 21 11 01 30 20 10 00
+		
+		punpcklbw	mm6,		mm7						// xx xx xx xx 33 23 32 22
+		lea			edi,		WorkingBuffer
+
+		punpcklwd	mm2,		mm6						// 33 23 13 03 32 22 12 02
+		lea			esi,		[esi+edx*4]				// four lines below
+
+		movd		mm4,		[esi+ecx + -2]			// xx xx xx xx 41 40 xx xx
+		movd		mm1,		[esi + -2]				// xx xx xx xx 51 50 xx xx
+		
+		movd		mm3,		[esi+ecx + 2]			// xx xx xx xx xx xx 43 42
+		punpcklbw	mm4,		mm1						// 51 41 50 40 xx xx xx xx
+
+		movd		mm6,		[esi + 2]				// xx xx xx xx xx xx 53 52
+		movd		mm1,		[esi + edx + -2]		// xx xx xx xx 61 60 xx xx
+
+		punpcklbw	mm3,		mm6						// xx xx xx xx 53 43 52 42
+		movq		mm5,		[esi + edx*2 -2]		// xx xx xx xx 71 70 xx xx
+
+		movq		mm6,		[esi + edx +2]			// xx xx xx xx xx xx 63 62
+		punpcklbw	mm1,		mm5						// 71 61 70 60 xx xx xx xx
+		
+		movq		mm7,		[esi + edx*2 + 2]		// xx xx xx xx xx xx 73 72
+		punpckhwd	mm4,		mm1						// 71 61 51 41 70 60 50 40
+
+		punpcklbw	mm6,		mm7						// xx xx xx xx 73 63 72 62
+		movq		mm1,		mm0						// 31 21 11 01 30 20 10 00
+
+		punpcklwd	mm3,		mm6						// 73 63 53 43 72 62 52 42
+		movq		mm7,		mm2						// 33 23 13 03 32 22 12 02
+
+		punpckldq	mm0,		mm4						// 70 60 50 40 30 20 10 00
+		movq		[edi],		mm0						// save	p[0]					
+
+		punpckhdq	mm1,		mm4						// 71 61 51 41 31 21 11 01
+		movq		mm4,		mm0						// copy of p[0]
+
+		movq		[edi+8],	mm1						// save p[1]
+		punpckldq	mm2,		mm3						// 72 62 52 42 32 22 12 02
+
+		movq		mm5,		mm1						// copy of p[1]
+		movq		[edi+16],	mm2						// save p[2]
+
+		punpckhdq	mm7,		mm3						// 73 63 53 43 33 23 13 03
+		movq		mm6,		mm2						// copy of p[2]
+
+		movq		[edi+24],	mm7						// save p[3]
+
+		//	mm0, 4 ---> p[0]
+		//  mm1, 5 ---> p[1]		
+		//  mm2, 6 ---> p[2]
+		//  mm7, 3 ---> p[3]
+
+		movq		mm1,		eightNOnes				// mm1 = FFFFFFFFFFFFFFFFF
+		psubb		mm0,		mm5						// p[0]-p[1]
+
+		movq		mm7,		eightOnes				// mm7 = 0101010101010101
+		pcmpgtb		mm1,		mm0						// p[0]-p[1]<-1?
+
+		pcmpgtb		mm0,		mm7						// p[0]-p[1]>1?
+		movq		mm3,		eightNOnes				// mm1 = FFFFFFFFFFFFFFFFF
+
+		por			mm0,		mm1						// abs(p[0]-p[1])>1?
+		movq		mm1,		mm7						// mm1 = 0101010101010101
+
+		movq		mm7,		[edi+24]				// p[3]
+		psubb		mm2,		mm7						// p[2]-p[3]
+
+		pcmpgtb		mm3,		mm2						// p[2]-p[3]<-1?
+		pcmpgtb		mm2,		mm1						// p[2]-p[3]>1?
+
+		por			mm2,		mm3						// abs(p[3]-p[2])>1?
+		movq		mm3,		eight128s				// mm3 = 8080808080808080
+
+		por			mm0,		mm2						// mm0 = UseHighVariance
+
+		// mm0 = UseHighVariance
+		// mm4 = P[0]
+		// mm5 = P[1]
+		// mm6 = P[2]
+		// mm7 = P[3]
+		// mm3 = 8080808080808080
+
+		pxor		mm1,		mm1						// clear mm1 for unpack
+		movq		mm2,		mm5						// copy p[1]
+
+		movq		mm3,		mm6						// ocpy of p[2]
+		punpcklbw	mm2,		mm1						// low four p[1]
+
+		punpcklbw	mm3,		mm1						// low four p[2]
+		psubw		mm3,		mm2						// low four p[2]-p[1]
+
+		punpckhbw	mm5,		mm1						// high four p[1]
+		movq		mm2,		mm3						// low p[2]-p[1]
+
+		punpckhbw	mm6,		mm1						// high four p[2]
+		paddw		mm3,		mm3						// 2*(p[2]-p[1]) low four
+
+		psubw		mm6,		mm5						// high four p[2]-p[1]
+		paddw		mm2,		mm3						// 3*(p[2]-p[1]) low four
+
+		movq		mm5,		mm6						// high four p[2]-p[1]
+		movq		mm3,		mm4						// copy of p[0]
+
+		paddw		mm6,		mm6						// 2*(p[2]-p[1]) highfour
+		punpcklbw	mm3,		mm1						// low four p[0]
+		
+		paddw		mm5,		mm6						// 3*(p[2]-p[1]) highfour
+		punpckhbw	mm4,		mm1						// high four p[0]
+
+		movq		mm6,		mm7						// copy of p[3]		
+		punpcklbw	mm7,		mm1						// low four p[3]
+
+		punpckhbw	mm6,		mm1						// high four p[3]
+		psubw		mm3,		mm7						// low four p[0]-p[3]
+
+		punpcklbw	mm1,		mm0						// UseHighVariance Low four
+		pxor		mm7,		mm7						// clear mm7 for unpack 
+
+		psraw		mm1,		8						// FFFF or 0000
+		punpckhbw	mm7,		mm0						// UseHighVaraince high four
+
+		psubw		mm4,		mm6						// high four p[0]-p[3]
+		psraw		mm7,		8						// FFFF or 0000
+
+		pand		mm3,		mm1						// And UseHighVariance
+		pand		mm4,		mm7						// And UseHighVariance
+
+		paddw		mm2,		mm3						// Low four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
+		paddw		mm4,		mm5						// High four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
+
+		paddw		mm2,		fourFours				// adjust before shift
+		movd		mm1,		eax						// Flimit
+
+		paddw		mm4,		fourFours				// adjust before shift
+		psraw		mm2,		3						// shift
+		
+		psraw		mm4,		3						// shift
+		movq		mm3,		mm2						// copy of low four
+
+		punpcklwd	mm1,		mm1						// Flimit Flimit
+		movq		mm5,		mm4						// copy of Highfour
+
+		punpckldq	mm1,		mm1						// Four Flimit
+		psraw		mm2,		15						// FFFF or 0000
+
+		movq		mm6,		mm1						// copy of FLimit
+		psraw		mm4,		15						// FFFF or 0000
+
+		pxor		mm3,		mm2						
+		psubsw		mm3,		mm2						// abs(FiltVal) for Low
+
+		pxor		mm5,		mm4
+		psubsw		mm5,		mm4						// abs(FiltVal) for Low
+
+		por			mm2,		fourOnes				// -1 or -1 for sign
+		por			mm4,		fourOnes				// -1 or +1 for sign
+
+		//   mm0 = UseHIghVariance?
+		//   mm1 = FLimit in shorts
+		//	 mm2 = sign for lower four FiltVal
+		//	 mm3 = abs for lower four FiltVal
+		//	 mm4 = sign for higher four FiltVal
+		//	 mm5 = abs for higher four FiltVal
+		movq		mm6,		mm1						// copy of Flimit
+		psubusw		mm1,		mm3						// Flimit - abs(FiltVal)
+
+		psubusw		mm3,		mm6						// abs(Filtval) -FLimit
+		por			mm3,		mm1						// abs(Flimit-abs(FiltVal)
+
+		movq		mm1,		mm6						// Flimit
+		psubusw		mm1,		mm3						// Flimit-abs(FLimit-abs(FiltVal)
+
+		movq		mm3,		mm6						// copy of the Flimit
+		pmullw		mm1,		mm2						// Get the sign back
+
+		psubusw		mm3,		mm5						// Flimit-abs(Filtval)
+		psubusw		mm5,		mm6						// abs(Filtval)-Flimit)
+
+		por			mm5,		mm3						// abs(Flimit-abs(FiltVal)
+		movq		mm3,		mm6						// Flimit
+
+		psubusw		mm3,		mm5						// Flimit-abs(FLimit-abs(FiltVal)
+		pmullw		mm4,		mm3						// Get the sign back
+
+		movq		mm2,		mm4
+
+		// mm0 = UseHighVariance
+		// mm1 = low four
+		// mm2 = high four
+
+		movq		mm5,		[edi+8]					// p[1]
+		movq		mm3,		mm1						// copy of low four
+
+		movq		mm4,		eight128s				// 128 for offset
+		packsswb	mm1,		mm2						// pack to chars
+
+		movq		mm6,		[edi+16]				// p[2]
+		psubb		mm5,		mm4						// unsigned -> signed
+
+		psubb		mm6,		mm4						// unsigned -> signed
+		paddsb		mm5,		mm1						// p[1]+delta
+
+		psubsb		mm6,		mm1						// p[1]-delta
+		paddb		mm5,		mm4						// offset back
+		
+		paddb		mm6,		mm4						// offset back
+		movq		mm1,		[edi]					// p[0]
+		
+		psraw		mm3,		1						// delta/2
+		psraw		mm2,		1						// delta/2
+		
+		movq		mm7,		[edi+24]				// p[3]
+		packsswb	mm3,		mm2						// pack to chars
+
+		psubb		mm1,		mm4						// unsigned -> signed
+		pandn		mm0,		mm3						// and !UseHighVariance
+		
+		psubb		mm7,		mm4						// unsigned -> signed
+		psubsb		mm7,		mm0						// 
+
+		paddsb		mm0,		mm1						// 
+		paddb		mm7,		mm4						// offset back
+
+		paddb		mm0,		mm4						// offset back
+		lea			esi,		[esi+ecx*4]				// esi now point to the second line
+
+		//done with calculation, now write back the resutls
+		// mm0 -> 7060504030201000
+		// mm5 -> 7161514131211101
+		// mm6 -> 7262524232221202
+		// mm7 -> 7363534333231303
+
+		movq		mm4,		mm0						// 7060504030201000
+		punpcklbw	mm0,		mm5						// 3130212011100100
+
+		punpckhbw	mm4,		mm5						// 7170616051504140
+		movq		mm2,		mm6						// 7262524232221202
+
+		punpcklbw	mm2,		mm7						// 3332232213120302
+		punpckhbw	mm6,		mm7						// 7372636253524342
+
+		movq		mm1,		mm0						// 3130212011100100
+		punpcklwd	mm0,		mm2						// 1312111003020100
+
+		movd		[esi+ecx],	mm0						// write 03020100
+		punpckhwd	mm1,		mm2						// 3332313023222120
+
+		psrlq		mm0,		32						// xxxxxxxx13121110
+		movd		[esi],		mm0						// write 13121110
+
+		movq		mm5,		mm4						// 7170717051504140		
+		punpcklwd	mm4,		mm6						// 5352515043424140
+		
+		movd		[esi+edx],	mm1						// write 23222120
+		psrlq		mm1,		32						// xxxxxxxx33323130
+
+		punpckhwd	mm5,		mm6						// 7372717063626160
+		movd		[esi+edx*2],mm1						// write 33323130
+
+		lea			esi,		[esi+edx*4]				// fifth line
+		movd		[esi+ecx],	mm4						// write 43424140
+
+		psrlq		mm4,		32						// xxxxxxxx53525150
+		movd		[esi],		mm4						// write 53525150
+
+		movd		[esi+edx],	mm5						// write 63626160
+		psrlq		mm5,		32						// xxxxxxxx73727170
+
+		movd		[esi+edx*2], mm5					// write 73727170
+	
+	}
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FilterVert_Simple_MMX
+ *
+ *  INPUTS        :     None
+ *                               
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Applies a loop filter to a horizontal edge vertically
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void FilterVert_Simple_MMX(
+						   POSTPROC_INSTANCE *pbi, 
+						   UINT8 * PixelPtr,
+						   INT32 Pitch,
+							INT32 *BoundingValuePtr
+
+						   )
+{
+
+	/************************************************************	
+		The following code in comments is the C version of the 
+		function, provided here for reference  
+	 ************************************************************
+	
+	INT32 j;
+	INT32 FiltVal;
+    UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
+	for ( j = 0; j < 8; j++ )
+	{            
+        INT32 UseHighVariance;
+
+        FiltVal = ( ( (INT32) PixelPtr[0] * 3 ) - 
+			( (INT32)PixelPtr[- LineLength] * 3 ));
+
+        UseHighVariance =  abs(PixelPtr[- (2 * LineLength)] - PixelPtr[- LineLength]) > 1 ||
+                 abs(PixelPtr[0] - PixelPtr[LineLength]) > 1;
+
+        if(UseHighVariance)
+        {
+		    FiltVal += ( (INT32)PixelPtr[- (2 * LineLength)] ) - 
+			    ( (INT32)PixelPtr[LineLength] );
+        }
+
+        
+		FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
+		
+		PixelPtr[- LineLength] = LimitTable[(INT32)PixelPtr[- LineLength] + FiltVal];
+		PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] - FiltVal];
+		
+        if(!UseHighVariance)
+        {
+            FiltVal >>=1 ;
+            
+            PixelPtr[- 2* LineLength] = LimitTable[(INT32)PixelPtr[- 2 * LineLength] + FiltVal];
+            PixelPtr[LineLength] = LimitTable[(INT32)PixelPtr[LineLength] - FiltVal];
+        }
+
+        PixelPtr ++;
+	}
+	*************************************************************/
+
+
+	UINT32 FLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
+	(void)BoundingValuePtr;	
+    __asm
+    {
+    
+		mov			eax,		FLimit					// Flimit Values
+		xor			ecx,		ecx						// clear ecx for negative pitch
+
+		mov			edx,		Pitch					// Pitch 
+		mov			esi,		PixelPtr				// Pointer to Src and Destination
+
+		sub			ecx,		edx						// negative pitch
+		movq		mm2,		[esi]					// p[2]
+
+		movq		mm7,		eightOnes				// mm7 = 0101010101010101
+		movq		mm0,		[esi+ecx*2]				// p[0]
+
+		movq		mm6,		mm2						// Make a copy 
+		movq		mm5,		[esi+ecx]				// p[1]
+
+		movq		mm4,		mm0						// Make a copy
+		movq		mm1,		eightNOnes				// mm1 = FFFFFFFFFFFFFFFFF
+
+		psubb		mm0,		mm5						// p[0]-p[1]
+		pcmpgtb		mm1,		mm0						// p[0]-p[1]<-1?
+
+		pcmpgtb		mm0,		mm7						// p[0]-p[1]>1?
+		movq		mm3,		eightNOnes				// mm1 = FFFFFFFFFFFFFFFFF
+
+		por			mm0,		mm1						// abs(p[0]-p[1])>1?
+		movq		mm1,		mm7						// mm1 = 0101010101010101
+
+		movq		mm7,		[esi+edx]				// p[3]
+		psubb		mm2,		mm7						// p[2]-p[3]
+
+		pcmpgtb		mm3,		mm2						// p[2]-p[3]<-1?
+		pcmpgtb		mm2,		mm1						// p[2]-p[3]>1?
+
+		por			mm2,		mm3						// abs(p[3]-p[2])>1?
+		movq		mm3,		eight128s				// mm3 = 8080808080808080
+
+		por			mm0,		mm2						// mm0 = UseHighVariance
+
+		// mm0 = UseHighVariance
+		// mm4 = P[0]
+		// mm5 = P[1]
+		// mm6 = P[2]
+		// mm7 = P[3]
+		// mm3 = 8080808080808080
+
+		pxor		mm1,		mm1						// clear mm1 for unpack
+		movq		mm2,		mm5						// copy p[1]
+
+		movq		mm3,		mm6						// ocpy of p[2]
+		punpcklbw	mm2,		mm1						// low four p[1]
+
+		punpcklbw	mm3,		mm1						// low four p[2]
+		psubw		mm3,		mm2						// low four p[2]-p[1]
+
+		punpckhbw	mm5,		mm1						// high four p[1]
+		movq		mm2,		mm3						// low p[2]-p[1]
+
+		punpckhbw	mm6,		mm1						// high four p[2]
+		paddw		mm3,		mm3						// 2*(p[2]-p[1]) low four
+
+		psubw		mm6,		mm5						// high four p[2]-p[1]
+		paddw		mm2,		mm3						// 3*(p[2]-p[1]) low four
+
+		movq		mm5,		mm6						// high four p[2]-p[1]
+		movq		mm3,		mm4						// copy of p[0]
+
+		paddw		mm6,		mm6						// 2*(p[2]-p[1]) highfour
+		punpcklbw	mm3,		mm1						// low four p[0]
+		
+		paddw		mm5,		mm6						// 3*(p[2]-p[1]) highfour
+		punpckhbw	mm4,		mm1						// high four p[0]
+
+		movq		mm6,		mm7						// copy of p[3]		
+		punpcklbw	mm7,		mm1						// low four p[3]
+
+		punpckhbw	mm6,		mm1						// high four p[3]
+		psubw		mm3,		mm7						// low four p[0]-p[3]
+
+		punpcklbw	mm1,		mm0						// UseHighVariance Low four
+		pxor		mm7,		mm7						// clear mm7 for unpack 
+
+		psraw		mm1,		8						// FFFF or 0000
+		punpckhbw	mm7,		mm0						// UseHighVaraince high four
+
+		psubw		mm4,		mm6						// high four p[0]-p[3]
+		psraw		mm7,		8						// FFFF or 0000
+
+		pand		mm3,		mm1						// And UseHighVariance
+		pand		mm4,		mm7						// And UseHighVariance
+
+		paddw		mm2,		mm3						// Low four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
+		paddw		mm4,		mm5						// High four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
+
+		paddw		mm2,		fourFours				// adjust before shift
+		paddw		mm4,		fourFours				// adjust before shift
+		
+		movd		mm1,		eax						// Flimit
+		psraw		mm2,		3						// shift
+		
+		psraw		mm4,		3						// shift
+		movq		mm3,		mm2						// copy of low four
+
+		punpcklwd	mm1,		mm1						// Flimit Flimit
+		movq		mm5,		mm4						// copy of Highfour
+
+		punpckldq	mm1,		mm1						// Four Flimit
+		psraw		mm2,		15						// FFFF or 0000
+
+		movq		mm6,		mm1						// copy of FLimit
+		psraw		mm4,		15						// FFFF or 0000
+
+		pxor		mm3,		mm2						
+		psubsw		mm3,		mm2						// abs(FiltVal) for Low
+
+		pxor		mm5,		mm4
+		psubsw		mm5,		mm4						// abs(FiltVal) for Low
+
+		por			mm2,		fourOnes				// -1 or -1 for sign
+		por			mm4,		fourOnes				// -1 or +1 for sign
+	
+		/*
+		THE FOLLOWING CODE TRIED TO DO IT IN CHARS, BUT GENERATES DIFFERENT RESULTS
+		THAN THE C VERSION BECAUSE OF OVERFLOW IN VERY RARE CASES
+
+		pxor		mm4,		mm3						// offset all the pixels by 128		
+		pxor		mm5,		mm3
+
+		pxor		mm6,		mm3
+		pxor		mm7,		mm3
+		
+		psubsb		mm6,		mm5						// p[2]-p[1]
+		psubsb		mm4,		mm7						// p[0]-p[3]
+
+		movq		mm2,		mm6						// Make a copy p[2] - p[1]
+		paddsb		mm6,		mm6						// 2 * p[2] - p[1]
+
+		pand		mm4,		mm0						// UseHighVariance * (p[0]-p[3])
+		paddsb		mm2,		mm6						// 3*(p[2]-p[1])
+
+		paddsb		mm4,		mm2						// 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
+		paddsb		mm4,		eightFours				// adjust before shift
+		
+		pxor		mm7,		mm7						// clear mm7 for unpack
+		movd		mm1,		eax						// FLimit
+
+		pxor 		mm2,		mm2						// make a copy
+		punpcklwd	mm1,		mm1						// FLimit FLimit
+
+		punpcklbw	mm2,		mm4						// Unpack to shorts
+		punpckldq	mm1,		mm1						// 4 Flimit in short
+
+		punpckhbw	mm7,		mm4						// Unpcak to shorts
+		psraw		mm2,		11						// >> 3-> FiltVal low four
+
+		psraw		mm7,		11						// >> 3-> FiltVal High four
+		movq		mm3,		mm2						// make a copy of Low 4
+
+		movq		mm4,		mm7
+		pxor		mm7,		mm7
+
+		movq		mm5,		mm4						// make a copy of high 4
+		psraw		mm2,		15						// FFFF or 0000
+
+		movq		mm6,		mm1						// copy of FLimit
+		psraw		mm4,		15						// FFFF or 0000
+
+		pxor		mm3,		mm2						
+		psubsw		mm3,		mm2						// abs(FiltVal) for Low
+
+		pxor		mm5,		mm4
+		psubsw		mm5,		mm4						// abs(FiltVal) for Low
+
+		por			mm2,		fourOnes				// -1 or -1 for sign
+		por			mm4,		fourOnes				// -1 or +1 for sign
+	
+		*/
+		//   mm0 = UseHIghVariance?
+		//   mm1 = FLimit in shorts
+		//	 mm2 = sign for lower four FiltVal
+		//	 mm3 = abs for lower four FiltVal
+		//	 mm4 = sign for higher four FiltVal
+		//	 mm5 = abs for higher four FiltVal
+		
+		movq		mm6,		mm1						// copy of Flimit
+		psubusw		mm1,		mm3						// Flimit - abs(FiltVal)
+
+		psubusw		mm3,		mm6						// abs(Filtval) -FLimit
+		por			mm3,		mm1						// abs(Flimit-abs(FiltVal)
+
+		movq		mm1,		mm6						// Flimit
+		psubusw		mm1,		mm3						// Flimit-abs(FLimit-abs(FiltVal)
+
+		movq		mm3,		mm6						// copy of the Flimit
+		pmullw		mm2,		mm1						// Get the sign back
+
+		psubusw		mm3,		mm5						// Flimit-abs(Filtval)
+		psubusw		mm5,		mm6						// abs(Filtval)-Flimit)
+
+		por			mm5,		mm3						// abs(Flimit-abs(FiltVal)
+		movq		mm3,		mm6						// Flimit
+
+		psubusw		mm3,		mm5						// Flimit-abs(FLimit-abs(FiltVal)
+		pmullw		mm4,		mm3						// Get the sign back
+
+		// mm0 = UserHighVaraince
+		// mm2 = Final value with sign for lower four
+		// mm4 = Final value with sing for higher four
+		movq		mm5,		[esi+ecx]				// p[1]
+		movq		mm1,		mm2						// make a copy of low four
+
+		movq		mm7,		eight128s				// 128 for offset
+		packsswb	mm2,		mm4						// pack to chars for operation
+
+		movq		mm6,		[esi]					// p[2]
+		psubb		mm5,		mm7						// unsigned -> signed
+
+		psubb		mm6,		mm7						// unsgined -> signed
+		paddsb		mm5,		mm2						// p[1] + Delta
+						
+		psubsb		mm6,		mm2						// p[2] - Delta
+		paddb		mm5,		mm7						// offset back
+
+		paddb		mm6,		mm7						// offset back 
+		movq		[esi+ecx],	mm5						// write out p[1]
+		psraw		mm1,		1						// Delta/2
+
+		psraw		mm4,		1						// Delta/2
+		movq		[esi],		mm6						// write out p[2]
+
+		movq		mm2,		[esi+ecx*2]				// p[0]
+		packsswb	mm1,		mm4						// pack to chars
+		
+		movq		mm3,		[esi+edx]				// p[3]
+		pandn		mm0,		mm1						// and !UseHighVaraince
+
+		psubb		mm2,		mm7						// unsigned -> signed 
+		psubb		mm3,		mm7						// unsigned -> signed 
+
+		paddsb		mm2,		mm0						//  
+		paddb		mm2,		mm7						// offset back
+
+		movq		[esi+ecx*2],	mm2					// write p[0]
+		psubsb		mm3,		mm0						//
+
+		paddb		mm3,		mm7						// offset back
+		movq		[esi+edx],	mm3						// write p[3]
+
+	}
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/Makefile b/Src/libvpShared/corelibs/cdxv/vputil/Makefile
new file mode 100644
index 00000000..54b763d0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/Makefile
@@ -0,0 +1,61 @@
+## Target to built
+
+TARGET 			=libvputil
+
+## TOOLS
+CC      		= ecc
+LD      		= ecc
+AR      		= ar
+OBJDUMP 		= objdump
+RM      		= rm -f
+
+## Directories
+TOPDIR  		=C:\DuckSoft
+PRIVATEINCLUDE  =${TOPDIR}\private\include
+CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
+CDXVINCLUDE     =${TOPDIR}\private\corelibs\cdxv\include 
+VPPPINCLUDE     =${TOPDIR}\private\corelibs\cdxv\vputil\include 
+CURRENTDIR 		=${TOPDIR}\private\corelibs\cdxv\vputil
+LIBDIR			=${TOPDIR}\private\corelibs\lib\mapca 
+
+## Compile Flags
+ALLINCLUDES     =-I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${VPPPINCLUDE}
+VP6DEFINES		=-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
+ETIDEFINES      =-DMAPCA
+ALLDEFINES      =${VP6DEFINES} ${ETIDEFINES}
+DEBUG			=-O2
+CFLAGS 			=-msvc -align 8 -etswp -mP3OPT_nonlocal_calls_through_register=true \
+				-mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
+				-magen_interroutine_padding
+ALLFLAGS 		=$(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
+
+
+## Files
+OBJS			=generic\fdct.o				\
+				generic\idctpart.o			\
+				generic\reconstruct.o		\
+				generic\vputil.o			\
+				bsp\bspFdct.o				\
+				bsp\bspIDct.o				\
+				bsp\bsprecon.o				\
+				bsp\bspvputil.o				\
+				bsp\uoptsystemdependant.o
+
+
+SRCS			=$(OBJS:.o=.c)
+
+ARTARGET		=${TARGET}.a
+
+# archive
+
+ARTARGET:${OBJS}
+	${AR} -cr ${ARTARGET} ${OBJS}
+	mv ${ARTARGET} ${LIBDIR}
+
+${OBJS} : ${SRCS}
+	$(CC) $(ALLFLAGS) -c $*.c -o $*.o
+
+clean:
+	${RM} ${OBJS} ${ARTARGET}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/generic/fdct.c b/Src/libvpShared/corelibs/cdxv/vputil/generic/fdct.c
new file mode 100644
index 00000000..91ddba73
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/generic/fdct.c
@@ -0,0 +1,312 @@
+/****************************************************************************
+*
+*   Module Title :     fdct.c
+*
+*   Description  :     Fast 8x8 DCT C-Implementation.
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "dct.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define SIGNBITDUPPED(X) ( (signed )((X & 0x80000000)) >> 31 )
+#define DOROUND(X) X = ( (SIGNBITDUPPED(X) & (0xffff)) + X ); 
+
+/****************************************************************************
+*  Module statics
+****************************************************************************/
+static INT32 xC1S7 = 64277;
+static INT32 xC2S6 = 60547;
+static INT32 xC3S5 = 54491;
+static INT32 xC4S4 = 46341;
+static INT32 xC5S3 = 36410;
+static INT32 xC6S2 = 25080;
+static INT32 xC7S1 = 12785;
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : fdct_short_C_orig
+ *
+ *  INPUTS        : INT16 *InputData  : 16-bit input data.
+ *
+ *  OUTPUTS       : INT16 *OutputData : 16-bit transform coefficients.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Performs an 8x8 2-D fast DCT.
+ *
+ *                  The algorithm used is derived from the flowgraph for
+ *                  the Vetterli and Ligtenberg fast 1-D dct given in the
+ *                  JPEG reference book by Pennebaker and Mitchell.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void fdct_short_C_orig ( INT16 *InputData, INT16 *OutputData )
+{
+	int    loop;
+	INT32  is07, is12, is34, is56;
+	INT32  is0734, is1256;
+	INT32  id07, id12, id34, id56; 
+	INT32  irot_input_x, irot_input_y;
+	INT32  icommon_product1;            // Re-used product  (c4s4 * (s12 - s56)). 
+	INT32  icommon_product2;            // Re-used product  (c4s4 * (d12 + d56)).
+	INT32  temp1, temp2;	            // intermediate variable for computation
+	INT32  InterData[64];
+
+    INT32 *ip = InterData;
+	INT16 *op = OutputData;
+	
+    for ( loop=0; loop<8; loop++ )
+	{
+		// Pre calculate some common sums and differences.
+		is07 = InputData[0] + InputData[7];
+		is12 = InputData[1] + InputData[2];
+		is34 = InputData[3] + InputData[4];
+		is56 = InputData[5] + InputData[6];
+
+		id07 = InputData[0] - InputData[7];
+		id12 = InputData[1] - InputData[2];
+		id34 = InputData[3] - InputData[4];
+		id56 = InputData[5] - InputData[6];
+	
+		is0734 = is07 + is34;
+		is1256 = is12 + is56;
+		
+		// Pre-Calculate some common product terms.
+		icommon_product1 = xC4S4*(is12 - is56); 
+		DOROUND ( icommon_product1 )
+		icommon_product1 >>= 16;
+		
+		icommon_product2 = xC4S4*(id12 + id56);
+		DOROUND ( icommon_product2 )
+		icommon_product2 >>= 16;
+
+		ip[0] = (xC4S4*(is0734 + is1256));
+		DOROUND ( ip[0] );
+		ip[0] >>= 16;
+
+		ip[4] = (xC4S4*(is0734 - is1256));
+		DOROUND ( ip[4] );
+		ip[4] >>= 16;
+
+		// Define inputs to rotation for outputs 2 and 6 
+		irot_input_x = id12 - id56;
+		irot_input_y = is07 - is34;
+
+		// Apply rotation for outputs 2 and 6. 
+		temp1 = xC6S2*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC2S6*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[2] = temp1 + temp2;
+
+		temp1 = xC6S2*irot_input_y;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC2S6*irot_input_x;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[6] = temp1 -temp2;
+
+		// Define inputs to rotation for outputs 1 and 7 
+		irot_input_x = icommon_product1 + id07;
+		irot_input_y = -( id34 + icommon_product2 );
+
+		// Apply rotation for outputs 1 and 7. 
+		temp1 = xC1S7*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC7S1*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[1] = temp1 - temp2;
+
+		temp1 = xC7S1*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC1S7*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[7] = temp1 + temp2;
+		
+		// Define inputs to rotation for outputs 3 and 5 
+		irot_input_x = id07 - icommon_product1;
+		irot_input_y = id34 - icommon_product2;
+
+		// Apply rotation for outputs 3 and 5. 
+		temp1 = xC3S5 * irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC5S3*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[3] = temp1 - temp2;
+
+		temp1 = xC5S3*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC3S5*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[5] = temp1 + temp2;
+		
+		// Increment data pointer for next row. 
+		InputData += 8;
+		ip += 8;		// advance pointer to next row 
+	}
+
+	//	Performed DCT on rows, now transform the columns	
+	ip = InterData;
+	for ( loop=0; loop<8; loop++ )
+	{
+		// Pre calculate some common sums and differences. 
+		is07 = ip[0 * 8] + ip[7 * 8];
+		is12 = ip[1 * 8] + ip[2 * 8];
+		is34 = ip[3 * 8] + ip[4 * 8];
+		is56 = ip[5 * 8] + ip[6 * 8];
+
+		id07 = ip[0 * 8] - ip[7 * 8];
+		id12 = ip[1 * 8] - ip[2 * 8];
+		id34 = ip[3 * 8] - ip[4 * 8];
+		id56 = ip[5 * 8] - ip[6 * 8];
+	
+		is0734 = is07 + is34;
+		is1256 = is12 + is56;
+		
+		// Pre-Calculate some common product terms.
+		icommon_product1 = xC4S4*(is12 - is56); 
+		icommon_product2 = xC4S4*(id12 + id56);
+		DOROUND ( icommon_product1 )
+		DOROUND ( icommon_product2 )
+		icommon_product1 >>= 16;
+		icommon_product2 >>= 16;
+
+		temp1 = xC4S4*(is0734 + is1256);
+		temp2 = xC4S4*(is0734 - is1256);
+		DOROUND ( temp1 );
+		DOROUND ( temp2 );
+		temp1 >>= 16;
+		temp2 >>= 16;
+		op[0*8] = (INT16)temp1;
+		op[4*8] = (INT16)temp2;
+
+		// Define inputs to rotation for outputs 2 and 6 
+		irot_input_x = id12 - id56;
+		irot_input_y = is07 - is34;
+
+		// Apply rotation for outputs 2 and 6. 
+		temp1 = xC6S2*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC2S6*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[2*8] = (INT16)(temp1 + temp2);
+
+		temp1 = xC6S2*irot_input_y;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC2S6*irot_input_x;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[6*8] = (INT16)(temp1 -temp2);
+
+		// Define inputs to rotation for outputs 1 and 7 
+		irot_input_x = icommon_product1 + id07;
+		irot_input_y = -( id34 + icommon_product2 );
+
+		// Apply rotation for outputs 1 and 7. 
+		temp1 = xC1S7*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC7S1*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[1*8] = (INT16) (temp1 - temp2);
+
+		temp1 = xC7S1*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC1S7*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[7*8] = (INT16)(temp1 + temp2);
+
+		// Define inputs to rotation for outputs 3 and 5 
+		irot_input_x = id07 - icommon_product1;
+		irot_input_y = id34 - icommon_product2;
+
+		// Apply rotation for outputs 3 and 5. 
+		temp1 = xC3S5*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC5S3*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[3*8] = (INT16)(temp1 - temp2);
+
+		temp1 = xC5S3*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC3S5*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[5*8] = (INT16) (temp1 + temp2);
+
+		// Increment data pointer for next column. 
+		ip ++;
+		op ++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : fdct_short_C
+ *
+ *  INPUTS        : INT16 *InputData  : 16-bit input data.
+ *
+ *  OUTPUTS       : INT16 *OutputData : 16-bit transform coefficients.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Performs an 8x8 2-D fast DCT.
+ *
+ *                  The function to up the precision of FDCT by number of bits 
+ *                  defined by FDCT_PRECISION_BITS.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void fdct_short_C ( INT16 *DCTDataBuffer, INT16 *DCT_codes )
+{
+
+    INT32 i;
+
+	// Increase precision on input to fdct
+	for ( i = 0; i < 64; i++ )
+		DCTDataBuffer[i] = DCTDataBuffer[i] << FDCT_PRECISION_BITS;
+
+	// Transform the error signal using the forward DCT to get set of transform coefficients
+	fdct_short_C_orig ( DCTDataBuffer, DCT_codes );
+
+	// Strip off the extra bits from the DCT output.
+	// This should ultimately be merged into the quantize process but there are also
+	// implications for DC prediction that would then need to be sorted
+	for ( i = 0; i < 64; i++ )
+	{	
+		// signed shift modified so behaves like "/" (truncates towards 0 for + and -)
+		if ( DCT_codes[i]  >= 0 )
+			DCT_codes[i] = (DCT_codes[i]) >> FDCT_PRECISION_BITS;
+		else
+			DCT_codes[i] = (DCT_codes[i] + FDCT_PRECISION_NEG_ADJ) >> FDCT_PRECISION_BITS;
+	}
+
+}
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/generic/idctpart.c b/Src/libvpShared/corelibs/cdxv/vputil/generic/idctpart.c
new file mode 100644
index 00000000..980e4fc0
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/generic/idctpart.c
@@ -0,0 +1,921 @@
+/****************************************************************************
+*
+*   Module Title :     idctpart.c
+*
+*   Description  :     IDCT with multiple versions based on # of non 0 coeffs
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+
+#include "dct.h"
+#include "string.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define int32 int
+#define int16 short
+#define IdctAdjustBeforeShift 8
+
+#define xC1S7 64277
+#define xC2S6 60547
+#define xC3S5 54491
+#define xC4S4 46341
+#define xC5S3 36410
+#define xC6S2 25080
+#define xC7S1 12785
+
+/****************************************************************************
+*  Module statics
+****************************************************************************/
+static const UINT32 dequant_index[64] = 
+{	
+    0,  1,  8,  16,  9,  2,  3, 10,
+	17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+#if 0   // AWG CODE NO LONGER USED IN CODEBASE.
+/*	Cos and Sin constant multipliers used during DCT and IDCT */
+const double C1S7 = (double)0.9807852804032;
+const double C2S6 = (double)0.9238795325113;
+const double C3S5 = (double)0.8314696123025;
+const double C4S4 = (double)0.7071067811865;
+const double C5S3 = (double)0.5555702330196;
+const double C6S2 = (double)0.3826834323651;
+const double C7S1 = (double)0.1950903220161;
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+
+// DCT lookup tables
+INT32 * C4S4_TablePtr;
+INT32 C4S4_Table[(COEFF_MAX * 4) + 1];
+
+INT32 * C6S2_TablePtr;
+INT32 C6S2_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C2S6_TablePtr;
+INT32 C2S6_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C1S7_TablePtr;
+INT32 C1S7_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C7S1_TablePtr;
+INT32 C7S1_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C3S5_TablePtr;
+INT32 C3S5_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C5S3_TablePtr;
+INT32 C5S3_Table[(COEFF_MAX * 2) + 1];
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     InitDctTables
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Initialises lookup tables used in IDCT.
+ *
+ *  SPECIAL NOTES :     NO LONGER USED IN CODEBASE. 
+ *
+ ****************************************************************************/
+void InitDctTables ( void )
+{
+    INT32 i;
+
+    C4S4_TablePtr = &C4S4_Table[COEFF_MAX*2];
+    for( i = -(2 * COEFF_MAX); i < (2 * COEFF_MAX); i++ )
+    {
+        if ( i < 0 )
+            C4S4_TablePtr[i] = (INT32)((i * C4S4) - 0.5);
+        else
+            C4S4_TablePtr[i] = (INT32)((i * C4S4) + 0.5);
+    }
+
+    C6S2_TablePtr = &C6S2_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C6S2_TablePtr[i] = (INT32)((i * C6S2) - 0.5);
+        else
+            C6S2_TablePtr[i] = (INT32)((i * C6S2) + 0.5);
+    }
+
+    C2S6_TablePtr = &C2S6_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C2S6_TablePtr[i] = (INT32)((i * C2S6) - 0.5);
+        else
+            C2S6_TablePtr[i] = (INT32)((i * C2S6) + 0.5);
+    }
+
+    C1S7_TablePtr = &C1S7_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C1S7_TablePtr[i] = (INT32)((i * C1S7) - 0.5);
+        else
+            C1S7_TablePtr[i] = (INT32)((i * C1S7) + 0.5);
+    }
+
+    C7S1_TablePtr = &C7S1_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C7S1_TablePtr[i] = (INT32)((i * C7S1) - 0.5);
+        else
+            C7S1_TablePtr[i] = (INT32)((i * C7S1) + 0.5);
+    }
+
+    C3S5_TablePtr = &C3S5_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C3S5_TablePtr[i] = (INT32)((i * C3S5) - 0.5);
+        else
+            C3S5_TablePtr[i] = (INT32)((i * C3S5) + 0.5);
+    }
+
+    C5S3_TablePtr = &C5S3_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C5S3_TablePtr[i] = (INT32)((i * C5S3) - 0.5);
+        else
+            C5S3_TablePtr[i] = (INT32)((i * C5S3) + 0.5);
+    }
+}
+#endif
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : dequant_slow
+ *
+ *  INPUTS        : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
+ *                  INT16 *quantized_list : Pointer to quantized DCT coeffs
+ *                                          (in zig-zag order).
+ *
+ *  OUTPUTS       : INT32 *DCT_block      : Pointer to 8x8 de-quantized block
+ *                                          (in 2-D raster order).
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : De-quantizes an 8x8 block of quantized DCT coeffs.
+ *
+ *  SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering. 
+ *
+ ****************************************************************************/
+void dequant_slow ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
+{
+    // Loop fully expanded for maximum speed
+    DCT_block[dequant_index[0]]  = quantized_list[0]  * dequant_coeffs[0];
+    DCT_block[dequant_index[1]]  = quantized_list[1]  * dequant_coeffs[1];
+    DCT_block[dequant_index[2]]  = quantized_list[2]  * dequant_coeffs[2];
+    DCT_block[dequant_index[3]]  = quantized_list[3]  * dequant_coeffs[3];
+    DCT_block[dequant_index[4]]  = quantized_list[4]  * dequant_coeffs[4];
+    DCT_block[dequant_index[5]]  = quantized_list[5]  * dequant_coeffs[5];
+    DCT_block[dequant_index[6]]  = quantized_list[6]  * dequant_coeffs[6];
+    DCT_block[dequant_index[7]]  = quantized_list[7]  * dequant_coeffs[7];
+    DCT_block[dequant_index[8]]  = quantized_list[8]  * dequant_coeffs[8];
+    DCT_block[dequant_index[9]]  = quantized_list[9]  * dequant_coeffs[9];
+    DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
+    DCT_block[dequant_index[11]] = quantized_list[11] * dequant_coeffs[11];
+    DCT_block[dequant_index[12]] = quantized_list[12] * dequant_coeffs[12];
+    DCT_block[dequant_index[13]] = quantized_list[13] * dequant_coeffs[13];
+    DCT_block[dequant_index[14]] = quantized_list[14] * dequant_coeffs[14];
+    DCT_block[dequant_index[15]] = quantized_list[15] * dequant_coeffs[15];
+    DCT_block[dequant_index[16]] = quantized_list[16] * dequant_coeffs[16];
+    DCT_block[dequant_index[17]] = quantized_list[17] * dequant_coeffs[17];
+    DCT_block[dequant_index[18]] = quantized_list[18] * dequant_coeffs[18];
+    DCT_block[dequant_index[19]] = quantized_list[19] * dequant_coeffs[19];
+    DCT_block[dequant_index[20]] = quantized_list[20] * dequant_coeffs[20];
+    DCT_block[dequant_index[21]] = quantized_list[21] * dequant_coeffs[21];
+    DCT_block[dequant_index[22]] = quantized_list[22] * dequant_coeffs[22];
+    DCT_block[dequant_index[23]] = quantized_list[23] * dequant_coeffs[23];
+    DCT_block[dequant_index[24]] = quantized_list[24] * dequant_coeffs[24];
+    DCT_block[dequant_index[25]] = quantized_list[25] * dequant_coeffs[25];
+    DCT_block[dequant_index[26]] = quantized_list[26] * dequant_coeffs[26];
+    DCT_block[dequant_index[27]] = quantized_list[27] * dequant_coeffs[27];
+    DCT_block[dequant_index[28]] = quantized_list[28] * dequant_coeffs[28];
+    DCT_block[dequant_index[29]] = quantized_list[29] * dequant_coeffs[29];
+    DCT_block[dequant_index[30]] = quantized_list[30] * dequant_coeffs[30];
+    DCT_block[dequant_index[31]] = quantized_list[31] * dequant_coeffs[31];
+    DCT_block[dequant_index[32]] = quantized_list[32] * dequant_coeffs[32];
+    DCT_block[dequant_index[33]] = quantized_list[33] * dequant_coeffs[33];
+    DCT_block[dequant_index[34]] = quantized_list[34] * dequant_coeffs[34];
+    DCT_block[dequant_index[35]] = quantized_list[35] * dequant_coeffs[35];
+    DCT_block[dequant_index[36]] = quantized_list[36] * dequant_coeffs[36];
+    DCT_block[dequant_index[37]] = quantized_list[37] * dequant_coeffs[37];
+    DCT_block[dequant_index[38]] = quantized_list[38] * dequant_coeffs[38];
+    DCT_block[dequant_index[39]] = quantized_list[39] * dequant_coeffs[39];
+    DCT_block[dequant_index[40]] = quantized_list[40] * dequant_coeffs[40];
+    DCT_block[dequant_index[41]] = quantized_list[41] * dequant_coeffs[41];
+    DCT_block[dequant_index[42]] = quantized_list[42] * dequant_coeffs[42];
+    DCT_block[dequant_index[43]] = quantized_list[43] * dequant_coeffs[43];
+    DCT_block[dequant_index[44]] = quantized_list[44] * dequant_coeffs[44];
+    DCT_block[dequant_index[45]] = quantized_list[45] * dequant_coeffs[45];
+    DCT_block[dequant_index[46]] = quantized_list[46] * dequant_coeffs[46];
+    DCT_block[dequant_index[47]] = quantized_list[47] * dequant_coeffs[47];
+    DCT_block[dequant_index[48]] = quantized_list[48] * dequant_coeffs[48];
+    DCT_block[dequant_index[49]] = quantized_list[49] * dequant_coeffs[49];
+    DCT_block[dequant_index[50]] = quantized_list[50] * dequant_coeffs[50];
+    DCT_block[dequant_index[51]] = quantized_list[51] * dequant_coeffs[51];
+    DCT_block[dequant_index[52]] = quantized_list[52] * dequant_coeffs[52];
+    DCT_block[dequant_index[53]] = quantized_list[53] * dequant_coeffs[53];
+    DCT_block[dequant_index[54]] = quantized_list[54] * dequant_coeffs[54];
+    DCT_block[dequant_index[55]] = quantized_list[55] * dequant_coeffs[55];
+    DCT_block[dequant_index[56]] = quantized_list[56] * dequant_coeffs[56];
+    DCT_block[dequant_index[57]] = quantized_list[57] * dequant_coeffs[57];
+    DCT_block[dequant_index[58]] = quantized_list[58] * dequant_coeffs[58];
+    DCT_block[dequant_index[59]] = quantized_list[59] * dequant_coeffs[59];
+    DCT_block[dequant_index[60]] = quantized_list[60] * dequant_coeffs[60];
+    DCT_block[dequant_index[61]] = quantized_list[61] * dequant_coeffs[61];
+    DCT_block[dequant_index[62]] = quantized_list[62] * dequant_coeffs[62];
+    DCT_block[dequant_index[63]] = quantized_list[63] * dequant_coeffs[63];
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : IDctSlow
+ *
+ *  INPUTS        : int16 *InputData   : Pointer to 8x8 quantized DCT coefficients.
+ *                  int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
+ *
+ *  OUTPUTS       : int16 *OutputData  : Pointer to 8x8 block to hold output.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Inverse quantizes and inverse DCT's input 8x8 block
+ *                  to reproduce prediction error.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void IDctSlow ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
+{
+	int   loop;
+	int32 t1, t2;
+    int32 IntermediateData[64];
+	int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
+	int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
+	
+    int32 *ip = IntermediateData;
+	int16 *op = OutputData;
+	
+	// dequantize the input 
+	dequant_slow ( QuantMatrix, InputData, IntermediateData );
+
+	// Inverse DCT on the rows now
+	for ( loop=0; loop<8; loop++ )
+	{
+		// Check for non-zero values
+		if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] )
+		{
+			t1 = (int32)(xC1S7 * ip[1]);
+            t2 = (int32)(xC7S1 * ip[7]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_A = t1 + t2;
+
+			t1 = (int32)(xC7S1 * ip[1]);
+			t2 = (int32)(xC1S7 * ip[7]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_B = t1 - t2;
+
+			t1 = (int32)(xC3S5 * ip[3]);
+			t2 = (int32)(xC5S3 * ip[5]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_C = t1 + t2;
+
+			t1 = (int32)(xC3S5 * ip[5]);
+			t2 = (int32)(xC5S3 * ip[3]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_D = t1 - t2;
+
+			t1 = (int32)(xC4S4 * (_A - _C));
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * (_B - _D));
+            t1 >>= 16;
+			_Bd = t1;
+			
+			_Cd = _A + _C;
+			_Dd = _B + _D;
+
+			t1 = (int32)(xC4S4 * (ip[0] + ip[4]));
+            t1 >>= 16;
+			_E = t1;
+
+			t1 = (int32)(xC4S4 * (ip[0] - ip[4]));
+            t1 >>= 16;
+			_F = t1;
+			
+			t1 = (int32)(xC2S6 * ip[2]);
+			t2 = (int32)(xC6S2 * ip[6]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_G = t1 + t2;
+
+			t1 = (int32)(xC6S2 * ip[2]);
+			t2 = (int32)(xC2S6 * ip[6]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_H = t1 - t2;
+
+			_Ed = _E - _G;
+			_Gd = _E + _G;
+
+			_Add = _F + _Ad;
+			_Bdd = _Bd - _H;
+			
+			_Fd = _F - _Ad;
+			_Hd = _Bd + _H;
+	
+			// Final sequence of operations over-write original inputs.
+			ip[0] = (int16)((_Gd + _Cd )  >> 0);
+			ip[7] = (int16)((_Gd - _Cd )  >> 0);
+
+			ip[1] = (int16)((_Add + _Hd ) >> 0);
+			ip[2] = (int16)((_Add - _Hd ) >> 0);
+
+			ip[3] = (int16)((_Ed + _Dd )  >> 0);
+			ip[4] = (int16)((_Ed - _Dd )  >> 0);
+
+			ip[5] = (int16)((_Fd + _Bdd ) >> 0);
+			ip[6] = (int16)((_Fd - _Bdd ) >> 0);
+		}
+
+		ip += 8;			/* next row */
+	}
+
+	ip = IntermediateData;
+
+	for ( loop=0; loop<8; loop++ )
+	{
+		// Check for non-zero values (bitwise | faster than logical ||)
+		if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
+			 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] )
+		{
+
+			t1 = (int32)(xC1S7 * ip[1*8]);
+            t2 = (int32)(xC7S1 * ip[7*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_A = t1 + t2;
+
+			t1 = (int32)(xC7S1 * ip[1*8]);
+			t2 = (int32)(xC1S7 * ip[7*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_B = t1 - t2;
+
+			t1 = (int32)(xC3S5 * ip[3*8]);
+			t2 = (int32)(xC5S3 * ip[5*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_C = t1 + t2;
+
+			t1 = (int32)(xC3S5 * ip[5*8]);
+			t2 = (int32)(xC5S3 * ip[3*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_D = t1 - t2;
+
+			t1 = (int32)(xC4S4 * (_A - _C));
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * (_B - _D));
+            t1 >>= 16;
+			_Bd = t1;
+
+			_Cd = _A + _C;
+			_Dd = _B + _D;
+
+			t1 = (int32)(xC4S4 * (ip[0*8] + ip[4*8]));
+            t1 >>= 16;
+			_E = t1;
+
+			t1 = (int32)(xC4S4 * (ip[0*8] - ip[4*8]));
+            t1 >>= 16;
+			_F = t1;
+			
+			t1 = (int32)(xC2S6 * ip[2*8]);
+			t2 = (int32)(xC6S2 * ip[6*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_G = t1 + t2;
+
+			t1 = (int32)(xC6S2 * ip[2*8]);
+			t2 = (int32)(xC2S6 * ip[6*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_H = t1 - t2;
+			
+			_Ed = _E - _G;
+			_Gd = _E + _G;
+
+			_Add = _F + _Ad;
+			_Bdd = _Bd - _H;
+			
+			_Fd = _F - _Ad;
+			_Hd = _Bd + _H;
+	
+			_Gd += IdctAdjustBeforeShift;
+			_Add += IdctAdjustBeforeShift;
+			_Ed += IdctAdjustBeforeShift;
+			_Fd += IdctAdjustBeforeShift;
+
+			// Final sequence of operations over-write original inputs.
+			op[0*8] = (int16)((_Gd + _Cd )  >> 4);
+			op[7*8] = (int16)((_Gd - _Cd )  >> 4);
+
+			op[1*8] = (int16)((_Add + _Hd ) >> 4);
+			op[2*8] = (int16)((_Add - _Hd ) >> 4);
+
+			op[3*8] = (int16)((_Ed + _Dd )  >> 4);
+			op[4*8] = (int16)((_Ed - _Dd )  >> 4);
+
+			op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
+			op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
+		}
+		else
+		{
+			op[0*8] = 0;
+			op[7*8] = 0;
+			op[1*8] = 0;
+			op[2*8] = 0;
+			op[3*8] = 0;
+			op[4*8] = 0;
+			op[5*8] = 0;
+			op[6*8] = 0;
+		}
+
+		ip++;			// next column
+        op++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : dequant_slow10
+ *
+ *  INPUTS        : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
+ *                  INT16 *quantized_list : Pointer to quantized DCT coeffs
+ *                                          (in zig-zag order).
+ *
+ *  OUTPUTS       : INT32 *DCT_block      : Pointer to 8x8 de-quantized block
+ *                                          (in 2-D raster order).
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : De-quantizes an 8x8 block of quantized DCT coeffs that
+ *                  only has non-zero coefficients in the first 10, i.e.
+ *                  only DC & AC1-9 are non-zero, AC10-63 __MUST_BE_ zero.
+ *
+ *  SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering. 
+ *
+ ****************************************************************************/
+void dequant_slow10 ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
+{
+	memset(DCT_block,0, 128);
+
+	// Loop fully expanded for maximum speed
+    DCT_block[dequant_index[0]]  = quantized_list[0]  * dequant_coeffs[0];
+    DCT_block[dequant_index[1]]  = quantized_list[1]  * dequant_coeffs[1];
+    DCT_block[dequant_index[2]]  = quantized_list[2]  * dequant_coeffs[2];
+    DCT_block[dequant_index[3]]  = quantized_list[3]  * dequant_coeffs[3];
+    DCT_block[dequant_index[4]]  = quantized_list[4]  * dequant_coeffs[4];
+    DCT_block[dequant_index[5]]  = quantized_list[5]  * dequant_coeffs[5];
+    DCT_block[dequant_index[6]]  = quantized_list[6]  * dequant_coeffs[6];
+    DCT_block[dequant_index[7]]  = quantized_list[7]  * dequant_coeffs[7];
+    DCT_block[dequant_index[8]]  = quantized_list[8]  * dequant_coeffs[8];
+    DCT_block[dequant_index[9]]  = quantized_list[9]  * dequant_coeffs[9];
+    DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : IDctSlow10
+ *
+ *  INPUTS        : int16 *InputData   : Pointer to 8x8 quantized DCT coefficients.
+ *                  int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
+ *
+ *  OUTPUTS       : int16 *OutputData  : Pointer to 8x8 block to hold output.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Inverse quantizes and inverse DCT's input 8x8 block
+ *                  with non-zero coeffs only in DC & the first 9 AC coeffs.
+ *                  i.e. non-zeros ONLY in the following 10 positions:
+ *                  
+ *                          x  x  x  x  0  0  0  0
+ *                          x  x  x  0  0  0  0  0
+ *                          x  x  0  0  0  0  0  0
+ *                          x  0  0  0  0  0  0  0
+ *                          0  0  0  0  0  0  0  0
+ *                          0  0  0  0  0  0  0  0
+ *                          0  0  0  0  0  0  0  0
+ *                          0  0  0  0  0  0  0  0
+ *
+ *  SPECIAL NOTES : Output data is in raster, not zig-zag, order.
+ *
+ ****************************************************************************/
+void IDct10 ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
+{
+	int   loop;
+	int32 t1, t2;
+	int32 IntermediateData[64];
+	int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
+	int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
+
+    int32 *ip = IntermediateData;
+	int16 *op = OutputData;
+	
+	// dequantize the input 
+	dequant_slow10 ( QuantMatrix, InputData, IntermediateData );
+
+	// Inverse DCT on the rows now
+	for ( loop=0; loop<4; loop++ )
+	{
+		// Check for non-zero values
+		if ( ip[0] | ip[1] | ip[2] | ip[3] )
+		{
+			t1 = (int32)(xC1S7 * ip[1]);
+            t1 >>= 16;
+			_A = t1; 
+
+			t1 = (int32)(xC7S1 * ip[1]);
+            t1 >>= 16;
+			_B = t1 ;
+
+			t1 = (int32)(xC3S5 * ip[3]);
+            t1 >>= 16;
+			_C = t1; 
+
+			t2 = (int32)(xC5S3 * ip[3]);
+            t2 >>= 16;
+			_D = -t2; 
+
+			t1 = (int32)(xC4S4 * (_A - _C));
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * (_B - _D));
+            t1 >>= 16;
+			_Bd = t1;
+			
+			_Cd = _A + _C;
+			_Dd = _B + _D;
+
+			t1 = (int32)(xC4S4 * ip[0] );
+            t1 >>= 16;
+			_E = t1;
+
+			_F = t1;
+			
+			t1 = (int32)(xC2S6 * ip[2]);
+            t1 >>= 16;
+			_G = t1; 
+
+			t1 = (int32)(xC6S2 * ip[2]);
+            t1 >>= 16;
+			_H = t1 ;
+			
+			_Ed = _E - _G;
+			_Gd = _E + _G;
+
+			_Add = _F + _Ad;
+			_Bdd = _Bd - _H;
+			
+			_Fd = _F - _Ad;
+			_Hd = _Bd + _H;
+	
+			// Final sequence of operations over-write original inputs.
+			ip[0] = (int16)((_Gd + _Cd )   >> 0);
+			ip[7] = (int16)((_Gd - _Cd )   >> 0);
+
+			ip[1] = (int16)((_Add + _Hd )  >> 0);
+			ip[2] = (int16)((_Add - _Hd )  >> 0);
+
+			ip[3] = (int16)((_Ed + _Dd )   >> 0);
+			ip[4] = (int16)((_Ed - _Dd )   >> 0);
+
+			ip[5] = (int16)((_Fd + _Bdd )  >> 0);
+			ip[6] = (int16)((_Fd - _Bdd )  >> 0);
+		}
+
+		ip += 8;			/* next row */
+	}
+
+	ip = IntermediateData;
+
+	for ( loop=0; loop<8; loop++ )
+	{	
+		// Check for non-zero values (bitwise or faster than ||)
+		if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] )
+		{
+			t1 = (int32)(xC1S7 * ip[1*8]);
+            t1 >>= 16;
+			_A = t1 ;
+
+			t1 = (int32)(xC7S1 * ip[1*8]);
+            t1 >>= 16;
+			_B = t1 ;
+
+			t1 = (int32)(xC3S5 * ip[3*8]);
+            t1 >>= 16;
+			_C = t1 ;
+
+			t2 = (int32)(xC5S3 * ip[3*8]);
+            t2 >>= 16;
+			_D = - t2;
+
+			t1 = (int32)(xC4S4 * (_A - _C));
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * (_B - _D));
+            t1 >>= 16;
+			_Bd = t1;
+
+			_Cd = _A + _C;
+			_Dd = _B + _D;
+
+			t1 = (int32)(xC4S4 * ip[0*8]);
+            t1 >>= 16;
+			_E = t1;
+			_F = t1;
+			
+			t1 = (int32)(xC2S6 * ip[2*8]);
+            t1 >>= 16;
+			_G = t1;
+
+			t1 = (int32)(xC6S2 * ip[2*8]);
+            t1 >>= 16;
+			_H = t1;
+			
+			_Ed = _E - _G;
+			_Gd = _E + _G;
+
+			_Add = _F + _Ad;
+			_Bdd = _Bd - _H;
+			
+			_Fd = _F - _Ad;
+			_Hd = _Bd + _H;
+	
+			_Gd += IdctAdjustBeforeShift;
+			_Add += IdctAdjustBeforeShift;
+			_Ed += IdctAdjustBeforeShift;
+			_Fd += IdctAdjustBeforeShift;
+
+			// Final sequence of operations over-write original inputs.
+			op[0*8] = (int16)((_Gd + _Cd )  >> 4);
+			op[7*8] = (int16)((_Gd - _Cd )  >> 4);
+
+			op[1*8] = (int16)((_Add + _Hd ) >> 4);
+			op[2*8] = (int16)((_Add - _Hd ) >> 4);
+
+			op[3*8] = (int16)((_Ed + _Dd )  >> 4);
+			op[4*8] = (int16)((_Ed - _Dd )  >> 4);
+
+			op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
+			op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
+		}
+		else
+		{
+			op[0*8] = 0;
+			op[7*8] = 0;
+			op[1*8] = 0;
+			op[2*8] = 0;
+			op[3*8] = 0;
+			op[4*8] = 0;
+			op[5*8] = 0;
+			op[6*8] = 0;
+		}
+
+		ip++;	// next column
+        op++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : IDct1
+ *
+ *  INPUTS        : int16 *InputData   : Pointer to 8x8 quantized DCT coefficients.
+ *                  int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
+ *
+ *  OUTPUTS       : int16 *OutputData  : Pointer to 8x8 block to hold output.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Inverse DCT's input 8x8 block with only one non-zero
+ *                  coeff in the DC position:
+ *                  
+ *                          x   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *
+ *  SPECIAL NOTES : Output data is in raster, not zig-zag, order.
+ *
+ ****************************************************************************/
+void IDct1 ( int16 *InputData, int16 *QuantMatrix, INT16 *OutputData )
+{
+    INT32 loop;
+	INT16 OutD;
+	
+	OutD = (INT16)((INT32)(InputData[0]*QuantMatrix[0]+15)>>5);
+
+	for ( loop=0; loop<64; loop++ )
+		OutputData[loop] = OutD;
+}
+
+
+#if 0
+/****************************************************************************
+ * 
+ *  ROUTINE       : IDct4
+ *
+ *  INPUTS        : int16 *InputData   : Pointer to 8x8 DCT coefficients.
+ *
+ *  OUTPUTS       : int16 *OutputData  : Pointer to 8x8 block to hold output.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Inverse DCT's input 8x8 block with at most four non-zero
+ *                  coeffs in the following positions:
+ *                  
+ *                          x   x   0  0  0  0  0  0
+ *                          x   x   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *
+ *  SPECIAL NOTES : CURRENTLY NOT USED IN CODEBASE.
+ *
+ ****************************************************************************/
+void IDct4 ( int16 *InputData, int16 *OutputData )
+{
+	int32 t1;
+	int loop;
+	int32 _Add, _Fd;
+	int32 _A, _B, _Ad, _Bd, _Cd, _Dd, _E;
+
+    int16 *ip = InputData;
+	int16 *op = OutputData;
+
+	// Unzigzag the coefficents
+	ip[8] = ip[2];
+	ip[9] = ip[4];
+	ip[2] = 0;
+	ip[5] = 0;
+
+	// Inverse DCT on the rows now
+	for ( loop = 0; loop < 2; loop++)
+	{
+		// Check for non-zero values
+		if ( ip[0] | ip[1] )
+		{
+			t1 = (int32)(xC1S7 * ip[1]);
+            t1 >>= 16;
+			_A = t1; 
+
+			t1 = (int32)(xC7S1 * ip[1]);
+            t1 >>= 16;
+			_B = t1 ;
+
+			t1 = (int32)(xC4S4 * _A );
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * _B );
+            t1 >>= 16;
+			_Bd = t1;
+
+			_Cd = _A ;
+			_Dd = _B ;
+
+			t1 = (int32)(xC4S4 * ip[0] );
+            t1 >>= 16;
+			_E = t1;
+
+			_Add = _E + _Ad;
+			
+			_Fd = _E - _Ad;
+	
+			// Final sequence of operations over-write original inputs.
+			ip[0] = (int16)((_E + _Cd )   >> 0);
+			ip[7] = (int16)((_E - _Cd )   >> 0);
+
+			ip[1] = (int16)((_Add + _Bd ) >> 0);
+			ip[2] = (int16)((_Add - _Bd ) >> 0);
+
+			ip[3] = (int16)((_E + _Dd )   >> 0);
+			ip[4] = (int16)((_E - _Dd )   >> 0);
+
+			ip[5] = (int16)((_Fd + _Bd )  >> 0);
+			ip[6] = (int16)((_Fd - _Bd )  >> 0);
+		}
+
+		ip += 8;			/* next row */
+	}
+
+	ip = InputData;
+
+	for ( loop=0; loop<8; loop++ )
+	{	
+		// Check for non-zero values (bitwise or faster than ||)
+		if ( ip[0 * 8] | ip[1 * 8] )
+		{
+
+			t1 = (int32)(xC1S7 * ip[1*8]);
+            t1 >>= 16;
+			_A = t1 ;
+
+			t1 = (int32)(xC7S1 * ip[1*8]);
+            t1 >>= 16;
+			_B = t1 ;
+
+			t1 = (int32)(xC4S4 * _A );
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * _B );
+            t1 >>= 16;
+			_Bd = t1;
+			
+			_Cd = _A ;
+			_Dd = _B ;
+
+			t1 = (int32)(xC4S4 * ip[0*8]);
+            t1 >>= 16;
+			_E = t1;
+
+			_Add = _E + _Ad;
+			
+			_Fd = _E - _Ad;
+	
+			_Add += IdctAdjustBeforeShift;
+			_E   += IdctAdjustBeforeShift;
+			_Fd  += IdctAdjustBeforeShift;
+
+			// Final sequence of operations over-write original inputs.
+			op[0*8] = (int16)((_E + _Cd )   >> 4);
+			op[7*8] = (int16)((_E - _Cd )   >> 4);
+
+			op[1*8] = (int16)((_Add + _Bd ) >> 4);
+			op[2*8] = (int16)((_Add - _Bd ) >> 4);
+
+			op[3*8] = (int16)((_E + _Dd )   >> 4);
+			op[4*8] = (int16)((_E - _Dd )   >> 4);
+
+			op[5*8] = (int16)((_Fd + _Bd )   >> 4);
+			op[6*8] = (int16)((_Fd - _Bd )   >> 4);
+		}
+		else
+		{
+			op[0*8] = 0;
+			op[7*8] = 0;
+			op[1*8] = 0;
+			op[2*8] = 0;
+			op[3*8] = 0;
+			op[4*8] = 0;
+			op[5*8] = 0;
+			op[6*8] = 0;
+		}
+
+		ip++;	// next column
+        op++;
+	}
+}
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/generic/reconstruct.c b/Src/libvpShared/corelibs/cdxv/vputil/generic/reconstruct.c
new file mode 100644
index 00000000..0db4652a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/generic/reconstruct.c
@@ -0,0 +1,243 @@
+/****************************************************************************
+*
+*   Module Title :     Reconstruct.c
+*
+*   Description  :     Block reconstruction functions.
+*
+****************************************************************************/
+#define STRICT              // Strict type checking 
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "reconstruct.h"
+#include "codec_common.h"
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SatUnsigned8
+ *
+ *  INPUTS        : INT16 *DataBlock      : Pointer to 8x8 input block.
+ *                  UINT32 ResultLineStep : Stride of output block.
+ *                  UINT32 DataLineStep   : Stride of input block.
+ *
+ *  OUTPUTS       : UINT8 *ResultPtr      : Pointer to 8x8 output block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Saturates the input data to 8 bits unsigned and stores
+ *                  in the output buffer.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SatUnsigned8 ( UINT8 *ResultPtr, INT16 *DataBlock, UINT32 ResultLineStep, UINT32 DataLineStep )
+{
+    INT32 i;
+       
+     // Partly expanded loop
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        ResultPtr[0] = (char) LIMIT(DataBlock[0]);
+        ResultPtr[1] = (char) LIMIT(DataBlock[1]);
+        ResultPtr[2] = (char) LIMIT(DataBlock[2]);
+        ResultPtr[3] = (char) LIMIT(DataBlock[3]);
+        ResultPtr[4] = (char) LIMIT(DataBlock[4]);
+        ResultPtr[5] = (char) LIMIT(DataBlock[5]);
+        ResultPtr[6] = (char) LIMIT(DataBlock[6]);
+        ResultPtr[7] = (char) LIMIT(DataBlock[7]);
+
+        DataBlock += DataLineStep;
+        ResultPtr += ResultLineStep;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ScalarReconIntra
+ *
+ *  INPUTS        : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
+ *                  UINT16 *ChangePtr    : Pointer to 8x8 intra prediction block.
+ *                  UINT32 LineStep      : Stride of reconstruction block.
+ *
+ *  OUTPUTS       : UINT8 *ReconPtr      : Pointer to 8x8 block to hold reconstructed block.
+ *
+ *  RETURNS       : None
+ *
+ *  FUNCTION      : Reconstructs an intra block.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void ScalarReconIntra ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep )
+{
+    UINT32 i;
+	INT16 *TmpDataPtr = TmpDataBuffer;
+
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+   	{	
+        TmpDataPtr[0] = (INT16) ( ChangePtr[0] + 128 );
+        TmpDataPtr[1] = (INT16) ( ChangePtr[1] + 128 );
+        TmpDataPtr[2] = (INT16) ( ChangePtr[2] + 128 );
+        TmpDataPtr[3] = (INT16) ( ChangePtr[3] + 128 );
+        TmpDataPtr[4] = (INT16) ( ChangePtr[4] + 128 );
+        TmpDataPtr[5] = (INT16) ( ChangePtr[5] + 128 );
+        TmpDataPtr[6] = (INT16) ( ChangePtr[6] + 128 );
+        TmpDataPtr[7] = (INT16) ( ChangePtr[7] + 128 );
+
+        TmpDataPtr += BLOCK_HEIGHT_WIDTH;
+        ChangePtr  += BLOCK_HEIGHT_WIDTH;
+    }
+
+    // Saturate the output to unsigned 8 bit values in recon buffer
+    SatUnsigned8 ( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ScalarReconInter
+ *
+ *  INPUTS        : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
+ *                  UINT8 *RefPtr        : Pointer to 8x8 reference block.
+ *                  INT16 *ChangePtr     : Pointer to 8x8 inter prediction error block.
+ *                  UINT32 LineStep      : Stride of reference and output blocks.
+ *
+ *  OUTPUTS       : UINT8 *ReconPtr      : Pointer to 8x8 block to hold reconstructed block.
+ *
+ *  RETURNS       : None
+ *
+ *  FUNCTION      : Reconstructs an inter-coded block by adding a prediction
+ *                  error to a reference block in the previous frame 
+ *                  reconstruction buffer.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void ScalarReconInter ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep )
+{
+    UINT32 i;
+	INT16 *TmpDataPtr = TmpDataBuffer;
+
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+   	{	
+		// Form each row
+   	    TmpDataPtr[0] = (INT16)(RefPtr[0] + ChangePtr[0]);
+   	    TmpDataPtr[1] = (INT16)(RefPtr[1] + ChangePtr[1]);
+   	    TmpDataPtr[2] = (INT16)(RefPtr[2] + ChangePtr[2]);
+   	    TmpDataPtr[3] = (INT16)(RefPtr[3] + ChangePtr[3]);
+   	    TmpDataPtr[4] = (INT16)(RefPtr[4] + ChangePtr[4]);
+   	    TmpDataPtr[5] = (INT16)(RefPtr[5] + ChangePtr[5]);
+   	    TmpDataPtr[6] = (INT16)(RefPtr[6] + ChangePtr[6]);
+   	    TmpDataPtr[7] = (INT16)(RefPtr[7] + ChangePtr[7]);
+
+        // Next row of Block
+		ChangePtr  += BLOCK_HEIGHT_WIDTH;
+        TmpDataPtr += BLOCK_HEIGHT_WIDTH;
+        RefPtr     += LineStep; 
+    }
+
+    // Saturate the output to unsigned 8 bit values in recon buffer
+    SatUnsigned8 ( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ScalarReconInterHalfPixel2
+ *
+ *  INPUTS        : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
+ *                  UINT8 *RefPtr1       : Pointer to first 8x8 reference block.
+ *                  UINT8 *RefPtr2       : Pointer to second 8x8 reference block.
+ *                  INT16 *ChangePtr     : Pointer to 8x8 inter prediction error block.
+ *                  UINT32 LineStep      : Stride of reference blocks.
+ *
+ *  OUTPUTS       : UINT8 *ReconPtr      : Pointer to 8x8 block to hold reconstructed block.
+ *
+ *  RETURNS       : None
+ *
+ *  FUNCTION      : Reconstructs an inter-coded block by adding a prediction
+ *                  error to a reference block computed by averaging the two
+ *                  specified reference blocks. The two reference blocks are
+ *                  those that bracket the 1/2-pixel accuracy motion vector.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void ScalarReconInterHalfPixel2 
+(
+    INT16 *TmpDataBuffer,
+    UINT8 *ReconPtr, 
+    UINT8 *RefPtr1,
+    UINT8 *RefPtr2, 
+    INT16 *ChangePtr,
+    UINT32 LineStep 
+)
+{
+    UINT32  i;
+	INT16 *TmpDataPtr = TmpDataBuffer;
+
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+   	{	
+		// Form each row
+        TmpDataPtr[0] = (INT16)( (((INT32)RefPtr1[0] + (INT32)RefPtr2[0]) >> 1) + ChangePtr[0] );
+   	    TmpDataPtr[1] = (INT16)( (((INT32)RefPtr1[1] + (INT32)RefPtr2[1]) >> 1) + ChangePtr[1] );
+   	    TmpDataPtr[2] = (INT16)( (((INT32)RefPtr1[2] + (INT32)RefPtr2[2]) >> 1) + ChangePtr[2] );
+   	    TmpDataPtr[3] = (INT16)( (((INT32)RefPtr1[3] + (INT32)RefPtr2[3]) >> 1) + ChangePtr[3] );
+   	    TmpDataPtr[4] = (INT16)( (((INT32)RefPtr1[4] + (INT32)RefPtr2[4]) >> 1) + ChangePtr[4] );
+   	    TmpDataPtr[5] = (INT16)( (((INT32)RefPtr1[5] + (INT32)RefPtr2[5]) >> 1) + ChangePtr[5] );
+   	    TmpDataPtr[6] = (INT16)( (((INT32)RefPtr1[6] + (INT32)RefPtr2[6]) >> 1) + ChangePtr[6] );
+   	    TmpDataPtr[7] = (INT16)( (((INT32)RefPtr1[7] + (INT32)RefPtr2[7]) >> 1) + ChangePtr[7] );
+
+        // Next row of Block
+		ChangePtr  += BLOCK_HEIGHT_WIDTH;
+        TmpDataPtr += BLOCK_HEIGHT_WIDTH;
+        RefPtr1    += LineStep; 
+        RefPtr2    += LineStep; 
+    }
+
+    // Saturate the output to unsigned 8 bit values in recon buffer
+    SatUnsigned8( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ReconBlock_C
+ *  
+ *  INPUTS        : INT16 *SrcBlock    : Pointer to 8x8 prediction error.
+ *					INT16 *ReconRefPtr : Pointer to 8x8 block prediction.
+ *                  UINT32 LineStep    : Stride of output block.
+ *
+ *  OUTPUTS       : UINT8 *DestBlock   : Pointer to 8x8 reconstructed block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Reconstrut a block by adding the prediction error
+ *                  block to the source block and clipping values.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ReconBlock_C ( INT16 *SrcBlock, INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep )
+{
+    UINT32 i;
+    INT16 *SrcBlockPtr = SrcBlock;
+
+    // For each block row
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        SrcBlock[0] = (INT16)(SrcBlock[0] + ReconRefPtr[0]);
+        SrcBlock[1] = (INT16)(SrcBlock[1] + ReconRefPtr[1]);
+        SrcBlock[2] = (INT16)(SrcBlock[2] + ReconRefPtr[2]);
+        SrcBlock[3] = (INT16)(SrcBlock[3] + ReconRefPtr[3]);
+        SrcBlock[4] = (INT16)(SrcBlock[4] + ReconRefPtr[4]);
+        SrcBlock[5] = (INT16)(SrcBlock[5] + ReconRefPtr[5]);
+        SrcBlock[6] = (INT16)(SrcBlock[6] + ReconRefPtr[6]);
+        SrcBlock[7] = (INT16)(SrcBlock[7] + ReconRefPtr[7]);
+        
+        // Next row...
+        SrcBlock    += BLOCK_HEIGHT_WIDTH;
+        ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+    }
+
+    // Saturate the output to unsigned 8 bit values in recon buffer
+    SatUnsigned8( DestBlock, SrcBlockPtr, LineStep, BLOCK_HEIGHT_WIDTH );
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/generic/uoptsystemdependant.c b/Src/libvpShared/corelibs/cdxv/vputil/generic/uoptsystemdependant.c
new file mode 100644
index 00000000..aa745ff9
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/generic/uoptsystemdependant.c
@@ -0,0 +1,100 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions.
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "vputil_if.h"
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+// Scalar (no mmx) reconstruction functions
+extern void ClearSysState_C ( void );
+extern void IDctSlow ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void IDct10 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void IDct1 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void ScalarReconIntra ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep );
+extern void ScalarReconInter ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep );
+extern void ScalarReconInterHalfPixel2 ( INT16 *TmpDataBuffer, UINT8 *ReconPtr,UINT8 *RefPtr1, UINT8 *RefPtr2, INT16 *ChangePtr, UINT32 LineStep );
+extern void ReconBlock_C(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep );
+extern void SubtractBlock_C ( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
+extern void UnpackBlock_C ( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
+extern void AverageBlock_C ( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
+extern void CopyBlock_C ( unsigned char *src, unsigned char *dest, unsigned int srcstride );
+extern void Copy12x12_C ( const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride );
+extern void fdct_short_C ( INT16 *InputData, INT16 *OutputData );
+extern void FilterBlockBil_8_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
+extern void FilterBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     fillidctconstants
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     STUB FUNCTION.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void fillidctconstants ( void )
+{
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MachineSpecificConfig
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Checks for machine specifc features such as MMX support 
+ *                      sets approipriate flags and function pointers.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void UtilMachineSpecificConfig ( void )
+{
+		int i;
+		for(i=0;i<=64;i++)
+		{
+			if(i<=1)idctc[i]=IDct1;
+			else if(i<=10)idctc[i]=IDct10;
+			else idctc[i]=IDctSlow;
+		}
+		fdct_short=fdct_short_C ;
+		for(i=0;i<=64;i++)
+		{
+			if(i<=1)idct[i]=IDct1;
+			else if(i<=10)idct[i]=IDct10;
+			else idct[i]=IDctSlow;
+		}
+		ClearSysState = ClearSysState_C;
+		ReconIntra = ScalarReconIntra;
+		ReconInter = ScalarReconInter;
+		ReconInterHalfPixel2 = ScalarReconInterHalfPixel2;
+		AverageBlock = AverageBlock_C;
+		UnpackBlock = UnpackBlock_C;
+		ReconBlock = ReconBlock_C;
+		SubtractBlock = SubtractBlock_C;
+		CopyBlock = CopyBlock_C;
+        Copy12x12 = Copy12x12_C;
+        FilterBlockBil_8 = FilterBlockBil_8_C;
+        FilterBlock=FilterBlock_C;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/generic/vputil.c b/Src/libvpShared/corelibs/cdxv/vputil/generic/vputil.c
new file mode 100644
index 00000000..1705dc39
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/generic/vputil.c
@@ -0,0 +1,1285 @@
+/**************************************************************************** 
+ *
+ *   Module Title :     vputil.c 
+ *
+ *   Description  :     Codec utility functions.
+ *
+ ***************************************************************************/
+#define STRICT              /* Strict type checking */
+
+/****************************************************************************
+ *  Header Files
+ ***************************************************************************/ 
+#include <math.h>
+#include "codec_common.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/        
+#define FILTER_WEIGHT 128
+#define FILTER_SHIFT  7
+#define MIN(a, b) ( ( a < b ) ? a : b )
+
+/****************************************************************************
+ *  Imports
+ ***************************************************************************/ 
+extern void UtilMachineSpecificConfig ( void );
+extern void fillidctconstants ( void );
+
+/****************************************************************************
+ *  Module Statics
+ ****************************************************************************/
+
+INT32 BilinearFilters[8][2] =
+{
+    { 128,   0 },
+    { 112,  16 },
+    {  96,  32 },
+    {  80,  48 },
+    {  64,  64 },
+    {  48,  80 },
+    {  32,  96 },
+    {  16, 112 }
+};
+
+// VP6.2 Bicubic filter taps calculated for 32 values of 'A' from -0.25 to -1.00 in steps of -0.05
+// For each 'A' there are 8 sets of data corresponding to 1/8 pel offsets 0 to 7/8.
+// These are only used in VP6.2 and upwards
+// The last entry is a dummy entry used for backwards compatibility with VP61
+static INT32 BicubicFilterSet[17][8][4] = 
+{
+    {   { 0, 128, 0, 0 },
+        { -3, 122, 9, 0 },
+        { -4, 109, 24, -1 },
+        { -5, 91, 45, -3 },
+        { -4, 68, 68, -4 },
+        { -3, 45, 91, -5 },
+        { -1, 24, 109, -4 },
+        { 0, 9, 122, -3 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -4, 124, 9, -1 },
+        { -5, 110, 25, -2 },
+        { -6, 91, 46, -3 },
+        { -5, 69, 69, -5 },
+        { -3, 46, 91, -6 },
+        { -2, 25, 110, -5 },
+        { -1, 9, 124, -4 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -4, 123, 10, -1 },
+        { -6, 110, 26, -2 },
+        { -7, 92, 47, -4 },
+        { -6, 70, 70, -6 },
+        { -4, 47, 92, -7 },
+        { -2, 26, 110, -6 },
+        { -1, 10, 123, -4 },
+    },
+    {   { 0, 128, 0, 0 },			// Approx A=-0.4
+        { -5, 124, 10, -1 },
+        { -7, 110, 27, -2 },
+        { -7, 91, 48, -4 },
+        { -6, 70, 70, -6 },
+        { -4, 48, 92, -8 },
+        { -2, 27, 110, -7 },
+        { -1, 10, 124, -5 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -6, 124, 11, -1 },
+        { -8, 111, 28, -3 },
+        { -8, 92, 49, -5 },
+        { -7, 71, 71, -7 },
+        { -5, 49, 92, -8 },
+        { -3, 28, 111, -8 },
+        { -1, 11, 124, -6 },
+    },
+    {   { 0, 128, 0, 0 },			// Corresponds approximately to VDub bicubic A=-0.50
+        { -6, 123, 12, -1 },
+        { -9, 111, 29, -3 },
+        { -9, 93, 50, -6 },
+        { -8, 72, 72, -8 },
+        { -6, 50, 93, -9 },
+        { -3, 29, 111, -9 },
+        { -1, 12, 123, -6 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -7, 124, 12, -1 },
+        { -10, 111, 30, -3 },
+        { -10, 93, 51, -6 },
+        { -9, 73, 73, -9 },
+        { -6, 51, 93, -10 },
+        { -3, 30, 111, -10 },
+        { -1, 12, 124, -7 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -7, 123, 13, -1 },
+        { -11, 112, 31, -4 },
+        { -11, 94, 52, -7 },
+        { -10, 74, 74, -10 },
+        { -7, 52, 94, -11 },
+        { -4, 31, 112, -11 },
+        { -1, 13, 123, -7 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -8, 124, 13, -1 },
+        { -12, 112, 32, -4 },
+        { -12, 94, 53, -7 },
+        { -10, 74, 74, -10 },
+        { -7, 53, 94, -12 },
+        { -4, 32, 112, -12 },
+        { -1, 13, 124, -8 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -9, 124, 14, -1 },
+        { -13, 112, 33, -4 },
+        { -13, 95, 54, -8 },
+        { -11, 75, 75, -11 },
+        { -8, 54, 95, -13 },
+        { -4, 33, 112, -13 },
+        { -1, 14, 124, -9 },
+    },
+    {   { 0, 128, 0, 0 },			// Corresponds approximately to VDub bicubic A=-0.75
+        { -9, 123, 15, -1 },
+        { -14, 113, 34, -5 },
+        { -14, 95, 55, -8 },
+        { -12, 76, 76, -12 },
+        { -8, 55, 95, -14 },
+        { -5, 34, 112, -13 },
+        { -1, 15, 123, -9 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -10, 124, 15, -1 },
+        { -14, 113, 34, -5 },
+        { -15, 96, 56, -9 },
+        { -13, 77, 77, -13 },
+        { -9, 56, 96, -15 },
+        { -5, 34, 113, -14 },
+        { -1, 15, 124, -10 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -10, 123, 16, -1 },
+        { -15, 113, 35, -5 },
+        { -16, 98, 56, -10 },
+        { -14, 78, 78, -14 },
+        { -10, 56, 98, -16 },
+        { -5, 35, 113, -15 },
+        { -1, 16, 123, -10 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -11, 124, 17, -2 },
+        { -16, 113, 36, -5 },
+        { -17, 98, 57, -10 },
+        { -14, 78, 78, -14 },
+        { -10, 57, 98, -17 },
+        { -5, 36, 113, -16 },
+        { -2, 17, 124, -11 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -12, 125, 17, -2 },
+        { -17, 114, 37, -6 },
+        { -18, 99, 58, -11 },
+        { -15, 79, 79, -15 },
+        { -11, 58, 99, -18 },
+        { -6, 37, 114, -17 },
+        { -2, 17, 125, -12 },
+    },
+    {   { 0, 128, 0, 0 },
+        { -12, 124, 18, -2 },
+        { -18, 114, 38, -6 },
+        { -19, 99, 59, -11 },
+        { -16, 80, 80, -16 },
+        { -11, 59, 99, -19 },
+        { -6, 38, 114, -18 },
+        { -2, 18, 124, -12 },
+    },
+
+	// Dummy entry for backwards VP61 compatibility
+	{
+		{  0, 128,   0,  0 },
+		{ -4, 118,  16, -2 },
+		{ -7, 106,  34, -5 },
+		{ -8,  90,  53, -7 },
+		{ -8,  72,  72, -8 },
+		{ -7,  53,  90, -8 },
+		{ -5,  34, 106, -7 },
+		{ -2,  16, 118, -4 }
+	}
+};
+
+
+//static INT32 FData[BLOCK_HEIGHT_WIDTH*11];	// Temp data bufffer used in filtering
+
+/****************************************************************************
+ *  Exports
+ ****************************************************************************/
+// Function pointers to platform specif routines
+void (*ReconIntra)( INT16 *tmpBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep );
+void (*ReconInter)( INT16 *tmpBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep );
+void (*ReconInterHalfPixel2)(  INT16 * tmpBuffer,  UINT8  * ReconPtr, UINT8 *RefPtr1, UINT8 *RefPtr2, INT16 *ChangePtr, UINT32 LineStep );
+void (*fdct_short)( INT16 *InputData,  INT16 *OutputData );
+void (*idct[65])( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+void (*ClearSysState)( void );
+void (*ReconBlock)( INT16 *SrcBlock, INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep );
+void (*SubtractBlock)( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
+void (*UnpackBlock)( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+void (*AverageBlock)( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
+void (*CopyBlock)( unsigned char *src,  unsigned char *dest, unsigned int srcstride );
+void (*Copy12x12)( const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride );
+void (*idctc[65])( INT16 *InputData,  INT16 *QuantMatrix,  INT16 *OutputData );
+void (*FilterBlockBil_8)( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine,  INT32 ModX,  INT32 ModY );
+void (*FilterBlock)( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ClearSysState_C
+ *
+ *  INPUTS        : None.
+ *
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Null placeholder function.
+ *
+ *  SPECIAL NOTES : Stub in the C-code for a function required when using
+ *                  MMX, XMM, etc. to clear system state.
+ *
+ ****************************************************************************/
+void ClearSysState_C ( void )
+{
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : AverageBlock_C
+ *  
+ *  INPUTS        : UINT8 *ReconPtr1          : Pointer to first reference block.
+ *                  UINT8 *ReconPtr2          : Pointer to second reference block.
+ *                  UINT32 ReconPixelsPerLine : Stride of reference blocks.
+ *					
+ *  OUTPUTS       : UINT16 *ReconRefPtr       : Pointer to output block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Takes two input blocks and creates an output block
+ *                  by pixel averaging.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void AverageBlock_C ( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine )
+{
+    UINT32 i;
+
+    // For each block row
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        ReconRefPtr[0] = (INT16)(((INT32)(ReconPtr1[0]) + ((INT32)ReconPtr2[0]))>>1);
+        ReconRefPtr[1] = (INT16)(((INT32)(ReconPtr1[1]) + ((INT32)ReconPtr2[1]))>>1);
+        ReconRefPtr[2] = (INT16)(((INT32)(ReconPtr1[2]) + ((INT32)ReconPtr2[2]))>>1);
+        ReconRefPtr[3] = (INT16)(((INT32)(ReconPtr1[3]) + ((INT32)ReconPtr2[3]))>>1);
+        ReconRefPtr[4] = (INT16)(((INT32)(ReconPtr1[4]) + ((INT32)ReconPtr2[4]))>>1);
+        ReconRefPtr[5] = (INT16)(((INT32)(ReconPtr1[5]) + ((INT32)ReconPtr2[5]))>>1);
+        ReconRefPtr[6] = (INT16)(((INT32)(ReconPtr1[6]) + ((INT32)ReconPtr2[6]))>>1);
+        ReconRefPtr[7] = (INT16)(((INT32)(ReconPtr1[7]) + ((INT32)ReconPtr2[7]))>>1);
+        
+        // Start next row
+        ReconPtr1 += ReconPixelsPerLine;
+        ReconPtr2 += ReconPixelsPerLine;
+
+        ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : UnpackBlock_C
+ *  
+ *  INPUTS        : UINT8 *ReconPtr           : Pointer to reference block.
+ *                  UINT32 ReconPixelsPerLine : Stride of reference block.
+ *					
+ *  OUTPUTS       : UINT16 *ReconRefPtr       : Pointer to output block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Converts block of 8x8 unsigned 8-bit to block of 
+ *                  signed 16-bit.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void UnpackBlock_C ( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine )
+{
+    UINT32 i;
+
+    // For each block row
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        ReconRefPtr[0] = (INT16)ReconPtr[0];
+        ReconRefPtr[1] = (INT16)ReconPtr[1];
+        ReconRefPtr[2] = (INT16)ReconPtr[2];
+        ReconRefPtr[3] = (INT16)ReconPtr[3];
+        ReconRefPtr[4] = (INT16)ReconPtr[4];
+        ReconRefPtr[5] = (INT16)ReconPtr[5];
+        ReconRefPtr[6] = (INT16)ReconPtr[6];
+        ReconRefPtr[7] = (INT16)ReconPtr[7];
+        
+        // Start next row
+        ReconPtr    += ReconPixelsPerLine;
+        ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SubtractBlock_C
+ *  
+ *  INPUTS        : UINT8 *SrcBlock : Pointer to 8x8 source block.
+ *					UINT32 LineStep : Stride of source block.
+ *
+ *  OUTPUTS       : INT16 *DestPtr  : Pointer to 8x8 output block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Subtracts block pointed to by DestPtr from that pointed
+ *                  to by SrcBlock. Result stored in DstPtr.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SubtractBlock_C ( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep )
+{
+    UINT32 i;
+
+    // For each block row
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        DestPtr[0] = (INT16)((INT32)SrcBlock[0] - (INT32)DestPtr[0]);
+        DestPtr[1] = (INT16)((INT32)SrcBlock[1] - (INT32)DestPtr[1]);
+        DestPtr[2] = (INT16)((INT32)SrcBlock[2] - (INT32)DestPtr[2]);
+        DestPtr[3] = (INT16)((INT32)SrcBlock[3] - (INT32)DestPtr[3]);
+        DestPtr[4] = (INT16)((INT32)SrcBlock[4] - (INT32)DestPtr[4]);
+        DestPtr[5] = (INT16)((INT32)SrcBlock[5] - (INT32)DestPtr[5]);
+        DestPtr[6] = (INT16)((INT32)SrcBlock[6] - (INT32)DestPtr[6]);
+        DestPtr[7] = (INT16)((INT32)SrcBlock[7] - (INT32)DestPtr[7]);
+        
+        // Start next row
+        SrcBlock += LineStep;
+        DestPtr  += BLOCK_HEIGHT_WIDTH;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : CopyBlock_C
+ *
+ *  INPUTS        : unsigned char *src     : Pointer to 8x8 source block.
+ *                  unsigned int srcstride : Pointer to 8x8 destination block.
+ *
+ *  OUTPUTS       : unsigned char *dest    : Stride of blocks.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Copies a block from source to destination.
+ *
+ *  SPECIAL NOTES : Copies block in chunks of 32-bits at a time.
+ *
+ ****************************************************************************/
+void CopyBlock_C ( unsigned char *src, unsigned char *dest, unsigned int srcstride )
+{
+	int j;
+	unsigned char *s = src;
+	unsigned char *d = dest;
+	unsigned int stride = srcstride;
+
+    for ( j=0; j<8; j++ )
+	{
+		((UINT32*)d)[0] = ((UINT32*)s)[0];
+		((UINT32*)d)[1] = ((UINT32*)s)[1];
+		s += stride;
+		d += stride;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : Copy12x12_C
+ *
+ *  INPUTS        : const unsigned char *src : Pointer to source block.
+ *                  unsigned int srcstride   : Stride of the source block.
+ *                  unsigned int deststride  : Stride of the destination block.
+ *
+ *  OUTPUTS       : unsigned char *dest      : Pointer to destination block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Copies a 12x12 block from source to destination.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void Copy12x12_C
+(
+    const unsigned char *src, 
+    unsigned char *dest, 
+    unsigned int srcstride,
+    unsigned int deststride
+)
+{
+	int j;
+	const unsigned char *s = src;
+	unsigned char *d = dest;
+
+    for ( j=0; j<12; j++ )
+	{
+        d[0]  = s[0];
+		d[1]  = s[1];
+		d[2]  = s[2];
+        d[3]  = s[3];
+		d[4]  = s[4];
+		d[5]  = s[5];
+        d[6]  = s[6];
+		d[7]  = s[7];
+		d[8]  = s[8];
+        d[9]  = s[9];
+		d[10] = s[10];
+		d[11] = s[11];
+		s += srcstride;
+		d += deststride;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : InitVPUtil
+ *
+ *  INPUTS        : None.
+ *                           
+ *  OUTPUTS       : None.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Setup static initialized variables for Util.
+ *
+ *  SPECIAL NOTES : None
+ *
+ ****************************************************************************/
+void InitVPUtil ( void )
+{
+	fillidctconstants ();
+	UtilMachineSpecificConfig ();
+}
+
+/****************************************************************************
+/* Fractional pixel prediction filtering...
+****************************************************************************/
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock1d
+ *  
+ *  INPUTS        : UINT8 *SrcPtr           : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of source block.
+ *                  UINT32 PixelStep        : 1 for horizontal filtering,
+ *                                            SrcPixelsPerLine for vertical filtering.
+ *                  UINT32 OutputHeight     : Height of the output block.
+ *                  UINT32 OutputWidth      : Width of the output block.
+ *                  INT32 *Filter           : Array of 4 filter taps.
+ *
+ *  OUTPUTS       : UINT16 *OutputPtr       : Pointer to output block.
+ *
+ *  RETURNS       : void.
+ *
+ *  FUNCTION      : Applies a 1-D 4-tap filter to the source block in 
+ *                  either horizontal or vertical direction to produce the
+ *                  filtered output block.
+ *
+ *  SPECIAL NOTES : Four filter taps should sum to FILTER_WEIGHT.
+ *                  PixelStep defines whether the filter is applied 
+ *                  horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ *                  It defines the offset required to move from one input 
+ *                  to the next.
+ *
+ ****************************************************************************/
+void FilterBlock1d 
+( 
+    UINT8 *SrcPtr,
+    UINT16 *OutputPtr,
+    UINT32 SrcPixelsPerLine,
+    UINT32 PixelStep,
+    UINT32 OutputHeight,
+    UINT32 OutputWidth,
+    INT32 *Filter 
+)
+{
+    UINT32 i, j;
+	INT32  Temp;
+
+    for ( i=0; i<OutputHeight; i++ )
+    {
+		for ( j=0; j<OutputWidth; j++ )
+		{
+			// Apply filter...
+			Temp = ((INT32)SrcPtr[-(INT32)PixelStep] * Filter[0]) +
+				   ((INT32)SrcPtr[0]                 * Filter[1]) +
+				   ((INT32)SrcPtr[PixelStep]         * Filter[2]) +
+				   ((INT32)SrcPtr[2*PixelStep]       * Filter[3]) + 
+				    (FILTER_WEIGHT >> 1);       // Rounding
+
+			// Normalize back to 0-255
+			Temp = Temp >> FILTER_SHIFT;
+			if ( Temp < 0 )
+				Temp = 0;
+			else if ( Temp > 255 )
+				Temp = 255;
+
+			OutputPtr[j] = (INT16)Temp;
+			SrcPtr++;
+		}
+			
+        // Next row...
+        SrcPtr    += SrcPixelsPerLine - OutputWidth;
+        OutputPtr += OutputWidth;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock2dFirstPass
+ *  
+ *  INPUTS        : UINT8 *SrcPtr           : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of source block.
+ *                  UINT32 PixelStep        : 1 for horizontal filtering,
+ *                                            SrcPixelsPerLine for vertical filtering.
+ *                  UINT32 OutputHeight     : Height of the output block.
+ *                  UINT32 OutputWidth      : Width of the output block.
+ *                  INT32 *Filter           : Array of 4 filter taps.
+ *
+ *  OUTPUTS       : INT32 *OutputPtr        : Pointer to output block.
+ *
+ *  RETURNS       : void.
+ *
+ *  FUNCTION      : Applies a 1-D 4-tap filter to the source block in 
+ *                  either horizontal or vertical direction to produce the
+ *                  filtered output block. Used to implement first-pass
+ *                  of 2-D separable filter.
+ *
+ *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
+ *                  Four filter taps should sum to FILTER_WEIGHT.
+ *                  PixelStep defines whether the filter is applied 
+ *                  horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ *                  It defines the offset required to move from one input 
+ *                  to the next.
+ *
+ ****************************************************************************/
+void FilterBlock2dFirstPass
+( 
+    UINT8 *SrcPtr,
+    INT32 *OutputPtr,
+    UINT32 SrcPixelsPerLine,
+    UINT32 PixelStep,
+    UINT32 OutputHeight,
+    UINT32 OutputWidth,
+    INT32 *Filter 
+)
+{
+    UINT32 i, j;
+	INT32  Temp;
+
+    for ( i=0; i<OutputHeight; i++ )
+    {
+		for ( j=0; j<OutputWidth; j++ )
+		{
+			// Apply filter
+			Temp =  ((INT32)SrcPtr[-(INT32)PixelStep] * Filter[0]) +
+					((INT32)SrcPtr[0]                 * Filter[1]) +
+					((INT32)SrcPtr[PixelStep]         * Filter[2]) +
+					((INT32)SrcPtr[2*PixelStep]       * Filter[3]) + 
+					 (FILTER_WEIGHT >> 1);      // Rounding
+
+			// Normalize back to 0-255
+			Temp = Temp >> FILTER_SHIFT;
+			if ( Temp < 0 )
+				Temp = 0;
+			else if ( Temp > 255 )
+				Temp = 255;
+
+			OutputPtr[j] = Temp;
+			SrcPtr++;
+		}
+			
+        // Next row...
+        SrcPtr    += SrcPixelsPerLine - OutputWidth;
+        OutputPtr += OutputWidth;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock2dSecondPass
+ *  
+ *  INPUTS        : INT32 *SrcPtr           : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of source block.
+ *                  UINT32 PixelStep        : 1 for horizontal filtering,
+ *                                            SrcPixelsPerLine for vertical filtering.
+ *                  UINT32 OutputHeight     : Height of the output block.
+ *                  UINT32 OutputWidth      : Width of the output block.
+ *                  INT32 *Filter           : Array of 4 filter taps.
+ *
+ *  OUTPUTS       : UINT16 *OutputPtr       : Pointer to output block.
+ *
+ *  RETURNS       : void.
+ *
+ *  FUNCTION      : Applies a 1-D 4-tap filter to the source block in 
+ *                  either horizontal or vertical direction to produce the
+ *                  filtered output block. Used to implement second-pass
+ *                  of 2-D separable filter.
+ *
+ *  SPECIAL NOTES : Requires 32-bit input as produced by FilterBlock2dFirstPass.
+ *                  Four filter taps should sum to FILTER_WEIGHT.
+ *                  PixelStep defines whether the filter is applied 
+ *                  horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ *                  It defines the offset required to move from one input 
+ *                  to the next.
+ *
+ ****************************************************************************/
+void FilterBlock2dSecondPass 
+(
+    INT32 *SrcPtr,
+    UINT16 *OutputPtr,
+    UINT32 SrcPixelsPerLine,
+    UINT32 PixelStep,
+    UINT32 OutputHeight,
+    UINT32 OutputWidth,
+    INT32 *Filter 
+)
+{
+    UINT32 i,j;
+	INT32  Temp;
+
+    for ( i=0; i < OutputHeight; i++ )
+    {
+		for ( j = 0; j < OutputWidth; j++ )
+		{
+			// Apply filter
+			Temp = ((INT32)SrcPtr[-(INT32)PixelStep] * Filter[0]) +
+				   ((INT32)SrcPtr[0]                 * Filter[1]) +
+				   ((INT32)SrcPtr[PixelStep]         * Filter[2]) +
+				   ((INT32)SrcPtr[2*PixelStep]       * Filter[3]) +
+				    (FILTER_WEIGHT >> 1);   // Rounding
+
+			// Normalize back to 0-255
+			Temp = Temp >> FILTER_SHIFT;
+			if ( Temp < 0 )
+				Temp = 0;
+			else if ( Temp > 255 )
+				Temp = 255;
+
+			OutputPtr[j] = (UINT16)Temp;
+			SrcPtr++;
+		}
+			
+        // Start next row
+        SrcPtr    += SrcPixelsPerLine - OutputWidth;
+        OutputPtr += OutputWidth;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock2d
+ *  
+ *  INPUTS        : UINT8  *SrcPtr          : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of input block.
+ *                  INT32  *HFilter         : Array of 4 horizontal filter taps.
+ *                  INT32  *VFilter         : Array of 4 vertical filter taps.
+ *					
+ *  OUTPUTS       : UINT16 *OutputPtr       : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 2-D filters an 8x8 input block by applying a 4-tap 
+ *                  filter horizontally followed by a 4-tap filter vertically
+ *                  on the result.
+ *
+ *  SPECIAL NOTES : The intermediate horizontally filtered block must produce
+ *                  3 more points than the input block in each column. This
+ *                  is to ensure that the 4-tap filter has one extra data-point
+ *                  at the top & 2 extra data-points at the bottom of each 
+ *                  column so filter taps do not extend beyond data. Thus the
+ *                  output of the first stage filter is an 8x11 (HxV) block.
+ *
+ ****************************************************************************/
+void FilterBlock2d 
+( 
+    UINT8  *SrcPtr, 
+    UINT16 *OutputPtr, 
+    UINT32 SrcPixelsPerLine, 
+    INT32  *HFilter, 
+    INT32  *VFilter 
+)
+{
+	INT32 FData[BLOCK_HEIGHT_WIDTH*11];	// Temp data bufffer used in filtering
+
+	// First filter 1-D horizontally...
+	FilterBlock2dFirstPass ( SrcPtr-SrcPixelsPerLine, FData, SrcPixelsPerLine, 1, 11, 8, HFilter );
+
+	// then filter verticaly...
+	FilterBlock2dSecondPass ( FData+BLOCK_HEIGHT_WIDTH, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock1dBil
+ *  
+ *  INPUTS        : UINT8  *SrcPtr          : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of input block.
+ *                  UINT32 PixelStep        : Offset between filter input samples (see notes).
+ *                  UINT32 OutputHeight     : Input block height.
+ *                  UINT32 OutputWidth      : Input block width.
+ *                  INT32  *Filter          : Array of 2 bi-linear filter taps.
+ *					
+ *  OUTPUTS       : UINT16 *OutputPtr       : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 2-tap 1-D bi-linear filter to input block in
+ *                  either horizontal or vertical direction.
+ *
+ *  SPECIAL NOTES : PixelStep defines whether the filter is applied 
+ *                  horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ *                  It defines the offset required to move from one input 
+ *                  to the next.
+ *
+ ****************************************************************************/
+void FilterBlock1dBil
+( 
+    UINT8  *SrcPtr, 
+    UINT16 *OutputPtr, 
+    UINT32  SrcPixelsPerLine, 
+    UINT32  PixelStep, 
+    UINT32  OutputHeight, 
+    UINT32  OutputWidth, 
+    INT32  *Filter 
+)
+{
+    UINT32 i, j;
+    
+    for ( i=0; i<OutputHeight; i++ )
+    {
+		for ( j=0; j<OutputWidth; j++ )
+		{
+			// Apply filter 
+            // NOTE: Rounding doesn't improve accuracy but is 
+            //       easier to implement on certain platforms.
+			OutputPtr[j] = (INT16)( ( ((INT32)SrcPtr[0]         * Filter[0]) +
+							          ((INT32)SrcPtr[PixelStep] * Filter[1]) +
+                                       (FILTER_WEIGHT/2) ) >> FILTER_SHIFT );		
+			SrcPtr++;
+		}
+			
+        // Next row...
+        SrcPtr    += SrcPixelsPerLine - OutputWidth;
+        OutputPtr += OutputWidth;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock2dBil_FirstPass
+ *  
+ *  INPUTS        : UINT8  *SrcPtr          : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of input block.
+ *                  UINT32 PixelStep        : Offset between filter input samples (see notes).
+ *                  UINT32 OutputHeight     : Input block height.
+ *                  UINT32 OutputWidth      : Input block width.
+ *                  INT32  *Filter          : Array of 2 bi-linear filter taps.
+ *					
+ *  OUTPUTS       : INT32 *OutputPtr        : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in 
+ *                  either horizontal or vertical direction to produce the
+ *                  filtered output block. Used to implement first-pass
+ *                  of 2-D separable filter.
+ *
+ *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
+ *                  Two filter taps should sum to FILTER_WEIGHT.
+ *                  PixelStep defines whether the filter is applied 
+ *                  horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ *                  It defines the offset required to move from one input 
+ *                  to the next.
+ *
+ ****************************************************************************/
+void FilterBlock2dBil_FirstPass
+( 
+    UINT8 *SrcPtr,  
+    INT32 *OutputPtr, 
+    UINT32 SrcPixelsPerLine, 
+    UINT32 PixelStep, 
+    UINT32 OutputHeight, 
+    UINT32 OutputWidth, 
+    INT32 *Filter 
+)
+{
+    UINT32 i, j;
+
+    for ( i=0; i<OutputHeight; i++ )
+    {
+		for ( j=0; j<OutputWidth; j++ )
+		{
+			// Apply bilinear filter
+			OutputPtr[j] = ( ( (INT32)SrcPtr[0]          * Filter[0]) +
+						       ((INT32)SrcPtr[PixelStep] * Filter[1]) +
+                                (FILTER_WEIGHT/2) ) >> FILTER_SHIFT;
+			SrcPtr++;
+		}
+			
+        // Next row...
+        SrcPtr    += SrcPixelsPerLine - OutputWidth;
+        OutputPtr += OutputWidth;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock2dBil_SecondPass
+ *  
+ *  INPUTS        : INT32  *SrcPtr          : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of input block.
+ *                  UINT32 PixelStep        : Offset between filter input samples (see notes).
+ *                  UINT32 OutputHeight     : Input block height.
+ *                  UINT32 OutputWidth      : Input block width.
+ *                  INT32  *Filter          : Array of 2 bi-linear filter taps.
+ *					
+ *  OUTPUTS       : UINT16 *OutputPtr       : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in 
+ *                  either horizontal or vertical direction to produce the
+ *                  filtered output block. Used to implement second-pass
+ *                  of 2-D separable filter.
+ *
+ *  SPECIAL NOTES : Requires 32-bit input as produced by FilterBlock2dBil_FirstPass.
+ *                  Two filter taps should sum to FILTER_WEIGHT.
+ *                  PixelStep defines whether the filter is applied 
+ *                  horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ *                  It defines the offset required to move from one input 
+ *                  to the next.
+ *
+ ****************************************************************************/
+void FilterBlock2dBil_SecondPass
+(
+    INT32 *SrcPtr, 
+    UINT16 *OutputPtr, 
+    UINT32 SrcPixelsPerLine, 
+    UINT32 PixelStep, 
+    UINT32 OutputHeight, 
+    UINT32 OutputWidth, 
+    INT32 *Filter 
+)
+{
+    UINT32 i,j;
+	INT32  Temp;
+
+    for ( i=0; i<OutputHeight; i++ )
+    {
+		for ( j=0; j<OutputWidth; j++ )
+		{
+			// Apply filter
+			Temp =  ((INT32)SrcPtr[0]         * Filter[0]) +
+					((INT32)SrcPtr[PixelStep] * Filter[1]) +
+                    (FILTER_WEIGHT/2);
+            OutputPtr[j] = (UINT16)(Temp >> FILTER_SHIFT);
+			SrcPtr++;
+		}
+			
+        // Next row...
+        SrcPtr    += SrcPixelsPerLine - OutputWidth;
+        OutputPtr += OutputWidth;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock2dBil
+ *  
+ *  INPUTS        : UINT8  *SrcPtr          : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of input block.
+ *                  INT32  *HFilter         : Array of 2 horizontal filter taps.
+ *                  INT32  *VFilter         : Array of 2 vertical filter taps.
+ *					
+ *  OUTPUTS       : UINT16 *OutputPtr       : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 2-D filters an 8x8 input block by applying a 2-tap 
+ *                  bi-linear filter horizontally followed by a 2-tap 
+ *                  bi-linear filter vertically on the result.
+ *
+ *  SPECIAL NOTES : The intermediate horizontally filtered block must produce
+ *                  1 more point than the input block in each column. This
+ *                  is to ensure that the 2-tap filter has one extra data-point
+ *                  at the top of each column so filter taps do not extend 
+ *                  beyond data. Thus the output of the first stage filter
+ *                  is an 8x9 (HxV) block.
+ *
+ ****************************************************************************/
+ void FilterBlock2dBil 
+(
+    UINT8  *SrcPtr, 
+    UINT16 *OutputPtr, 
+    UINT32 SrcPixelsPerLine, 
+    INT32  *HFilter, 
+    INT32  *VFilter 
+)
+{
+    
+    INT32 FData[BLOCK_HEIGHT_WIDTH*11];	// Temp data bufffer used in filtering
+
+     // First filter 1-D horizontally...
+	FilterBlock2dBil_FirstPass ( SrcPtr, FData, SrcPixelsPerLine, 1, 9, 8, HFilter );
+
+	// then 1-D vertically...
+	FilterBlock2dBil_SecondPass ( FData, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock_C
+ *  
+ *  INPUTS        : UINT8 *ReconPtr1     : Pointer to first 8x8 input block.
+ *                  UINT8 *ReconPtr2     : Pointer to second 8x8 input block.
+ *					UINT32 PixelsPerLine : Stride for ReconPtr1 & ReconPtr2.
+ *				    INT32 ModX           : Fractional part of x-component of motion vector.
+ *					INT32 ModY           : Fractional part of y-component of motion vector.
+ *                  BOOL UseBicubic      : TRUE=Bicubic, FALSE=Bi-Linear filter.
+ *					UINT8 BicubicAlpha	 : Defines which set of bicubic taps to use.
+ *				
+ *  OUTPUTS       : UINT16 *ReconRefPtr  : Pointer to 8x8 filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Produces a filtered fractional pel prediction block
+ *					using bilinear or bicubic filters.
+ *					This is used by baseline VP6.2 and upwards.
+ *
+ *  SPECIAL NOTES : ReconPtr1 & ReconPtr2 point to blocks that bracket the
+ *                  position of the fractional pixel motion vector. These
+ *                  two blocks are combined using either a bi-linear or
+ *                  bi-cubic filter to produce the output prediction block
+ *                  for this motion vector.
+ *                  ModX, ModY are used for filter selection--see code
+ *                  comment for definition.
+ *
+ ****************************************************************************/
+void FilterBlock_C
+( 
+    UINT8 *ReconPtr1,
+    UINT8 *ReconPtr2,
+    UINT16 *ReconRefPtr,
+    UINT32 PixelsPerLine,
+    INT32 ModX,
+    INT32 ModY,
+    BOOL  UseBicubic,
+	UINT8 BicubicAlpha
+)
+{
+	int diff;
+
+	// ModX and ModY are the bottom three bits of the signed motion vector
+    // components (in 1/8th pel units). This works out to be what we want
+    // --despite the pointer swapping that goes on below.
+	// For example...
+    // if MV x-component is +ve then ModX = x%8.
+	// if MV x-component is -ve then ModX = 8+(x%8), where X%8 is in the range -7 to -1.
+
+	// Swap pointers to ensure that ReconPtr1 is "smaller than",
+    // i.e. above, left, above-right or above-left, ReconPtr1
+	diff = ReconPtr2 - ReconPtr1;
+
+	if ( diff<0 ) 
+	{
+        // ReconPtr1>ReconPtr2, so swap...
+		UINT8 *temp = ReconPtr1;
+		ReconPtr1 = ReconPtr2;
+		ReconPtr2 = temp;
+		diff = (int)(ReconPtr2-ReconPtr1);
+    } 
+
+	if ( diff==1 )
+	{   
+		// Fractional pixel in horizontal only...											            
+		if ( UseBicubic )
+			FilterBlock1d ( ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BicubicFilterSet[BicubicAlpha][ModX] );
+		else
+			FilterBlock1dBil ( ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters[ModX] );
+	}
+	else if ( diff == (int)(PixelsPerLine) )
+	{
+		// Fractional pixel in vertical only...
+		if ( UseBicubic )
+			FilterBlock1d ( ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BicubicFilterSet[BicubicAlpha][ModY] );
+		else
+			FilterBlock1dBil ( ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters[ModY] );
+	}
+	else if(diff == (int)(PixelsPerLine - 1))
+	{
+		// ReconPtr1 is Top right...
+		if ( UseBicubic )
+			FilterBlock2d ( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BicubicFilterSet[BicubicAlpha][ModX], BicubicFilterSet[BicubicAlpha][ModY] );
+		else
+			FilterBlock2dBil ( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters[ModX], BilinearFilters[ModY] );
+	}
+	else if(diff == (int)(PixelsPerLine + 1) )			
+	{	
+		// ReconPtr1 is Top left...
+		if ( UseBicubic )
+			FilterBlock2d ( ReconPtr1, ReconRefPtr, PixelsPerLine, BicubicFilterSet[BicubicAlpha][ModX], BicubicFilterSet[BicubicAlpha][ModY] );
+		else
+			FilterBlock2dBil ( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters[ModX], BilinearFilters[ModY] );
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock1dBil_8
+ *  
+ *  INPUTS        : UINT8  *SrcPtr          : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of input block.
+ *                  UINT32 PixelStep        : Offset between filter input samples (see notes).
+ *                  UINT32 OutputHeight     : Input block height.
+ *                  UINT32 OutputWidth      : Input block width.
+ *                  INT32  *Filter          : Array of 2 bi-linear filter taps.
+ *					
+ *  OUTPUTS       : UINT8 *OutputPtr        : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 2-tap 1-D bi-linear filter to input block in
+ *                  either horizontal or vertical direction.
+ *
+ *  SPECIAL NOTES : PixelStep defines whether the filter is applied 
+ *                  horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ *                  It defines the offset required to move from one input 
+ *                  to the next.
+ *
+ ****************************************************************************/
+void FilterBlock1dBil_8
+( 
+    UINT8 *SrcPtr, 
+    UINT8 *OutputPtr, 
+    UINT32 SrcPixelsPerLine, 
+    UINT32 PixelStep, 
+    UINT32 OutputHeight, 
+    UINT32 OutputWidth, 
+    INT32 *Filter )
+{
+    UINT32 i, j;
+
+    for ( i=0; i<OutputHeight; i++ )
+    {
+		for ( j=0; j<OutputWidth; j++ )
+		{
+			// Apply filter 
+            // NOTE: Rounding doesn't improve accuracy but is 
+            //       easier to implement on certain platforms.
+			OutputPtr[j] = (UINT8)( ( ((INT32)SrcPtr[0]         * Filter[0]) + 
+									  ((INT32)SrcPtr[PixelStep] * Filter[1]) + 
+                                       (FILTER_WEIGHT/2) ) >> FILTER_SHIFT );
+			SrcPtr++;
+		}
+			
+        // Next row...
+        SrcPtr    += SrcPixelsPerLine - OutputWidth;
+        OutputPtr += OutputWidth;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock2dBil_SecondPass_8
+ *  
+ *  INPUTS        : INT32  *SrcPtr          : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of input block.
+ *                  UINT32 PixelStep        : Offset between filter input samples (see notes).
+ *                  UINT32 OutputHeight     : Input block height.
+ *                  UINT32 OutputWidth      : Input block width.
+ *                  INT32  *Filter          : Array of 2 bi-linear filter taps.
+ *					
+ *  OUTPUTS       : UINT8 *OutputPtr        : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in 
+ *                  either horizontal or vertical direction to produce the
+ *                  filtered output block. Used to implement second-pass
+ *                  of 2-D separable bi-linear filter.
+ *
+ *  SPECIAL NOTES : Requires 32-bit input as produced by FilterBlock2dBil_FirstPass.
+ *                  Two filter taps should sum to FILTER_WEIGHT.
+ *                  PixelStep defines whether the filter is applied 
+ *                  horizontally (PixelStep=1) or vertically (PixelStep=stride).
+ *                  It defines the offset required to move from one input 
+ *                  to the next.
+ *
+ ****************************************************************************/
+void FilterBlock2dBil_SecondPass_8
+( 
+    INT32 *SrcPtr, 
+    UINT8 *OutputPtr, 
+    UINT32 SrcPixelsPerLine, 
+    UINT32 PixelStep, 
+    UINT32 OutputHeight, 
+    UINT32 OutputWidth, 
+    INT32 *Filter 
+)
+{
+    UINT32 i, j;
+	INT32  Temp;
+	INT32  RoundValue = ((FILTER_WEIGHT*FILTER_WEIGHT) >> 1);
+
+    for ( i=0; i<OutputHeight; i++ )
+    {
+		for ( j=0; j<OutputWidth; j++ ) 
+		{
+			// Apply bi-linear filter...
+			Temp =  ((INT32)SrcPtr[0]         * Filter[0]) +
+					((INT32)SrcPtr[PixelStep] * Filter[1]) + 
+                    (FILTER_WEIGHT / 2);
+
+			OutputPtr[j] = (UINT8)(Temp >> FILTER_SHIFT);
+
+			SrcPtr++;
+		}
+			
+        // Next row...
+        SrcPtr    += SrcPixelsPerLine - OutputWidth;
+        OutputPtr += OutputWidth;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlock2dBil_8
+ *  
+ *  INPUTS        : UINT8  *SrcPtr          : Pointer to source block.
+ *                  UINT32 SrcPixelsPerLine : Stride of input block.
+ *                  INT32  *HFilter         : Array of 2 horizontal filter taps.
+ *                  INT32  *VFilter         : Array of 2 vertical filter taps.
+ *					
+ *  OUTPUTS       : UINT8 *OutputPtr        : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 2-D filters an 8x8 input block by applying a 2-tap 
+ *                  bi-linear filter horizontally followed by a 2-tap 
+ *                  bi-linear filter vertically on the result. Output
+ *                  is 8-bit unsigned.
+ *
+ *  SPECIAL NOTES : The intermediate horizontally filtered block must produce
+ *                  1 more point than the input block in each column. This
+ *                  is to ensure that the 2-tap filter has one extra data-point
+ *                  at the top of each column so filter taps do not extend 
+ *                  beyond data. Thus the output of the first stage filter
+ *                  is an 8x9 (HxV) block.
+ *
+ ****************************************************************************/
+void FilterBlock2dBil_8
+( 
+    UINT8 *SrcPtr, 
+    UINT8 *OutputPtr, 
+    UINT32 SrcPixelsPerLine, 
+    INT32 *HFilter, 
+    INT32 *VFilter 
+)
+{
+	INT32 FData[BLOCK_HEIGHT_WIDTH*11];	// Temp data bufffer used in filtering
+
+	// First filter 1-D horizontally...
+	FilterBlock2dBil_FirstPass ( SrcPtr, FData, SrcPixelsPerLine, 1, 9, 8, HFilter );
+
+	// then filter 1-D vertically..
+	FilterBlock2dBil_SecondPass_8 ( FData, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : FilterBlockBil_8_C
+ *  
+ *  INPUTS        : UINT8 *ReconPtr1     : Pointer to first 8x8 input block.
+ *                  UINT8 *ReconPtr2     : Pointer to second 8x8 input block.
+ *					UINT32 PixelsPerLine : Stride for ReconPtr1 & ReconPtr2.
+ *				    INT32 ModX           : Fractional part of x-component of motion vector.
+ *					INT32 ModY           : Fractional part of y-component of motion vector.
+ *				
+ *  OUTPUTS       : UINT8 *ReconRefPtr   : Pointer to 8x8 filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Produces a filtered fractional pel prediction block
+ *					using bilinear filter.
+ *
+ *  SPECIAL NOTES : ReconPtr1 & ReconPtr2 point to blocks that bracket the
+ *                  position of the fractional pixel motion vector. These
+ *                  two blocks are combined using a bi-linear filter to
+ *                  produce the output prediction block for this motion vector.
+ *                  ModX, ModY are used for filter selection--see code
+ *                  comment for definition.
+ *
+ ****************************************************************************/
+void FilterBlockBil_8_C
+( 
+    UINT8 *ReconPtr1, 
+    UINT8 *ReconPtr2, 
+    UINT8 *ReconRefPtr, 
+    UINT32 PixelsPerLine, 
+    INT32  ModX, 
+    INT32  ModY
+)
+{
+	int diff;
+
+    // ModX and ModY are the bottom three bits of the signed motion vector
+    // components (in 1/8th pel units). This works out to be what we want
+    // --despite the pointer swapping that goes on below.
+	// For example...
+    // if MV x-component is +ve then ModX = x%8.
+	// if MV x-component is -ve then ModX = 8+(x%8), where X%8 is in the range -7 to -1.
+
+    // Swap pointers to ensure that ReconPtr1 is "smaller than",
+    // i.e. above, left, above-right or above-left, ReconPtr1
+	diff = ReconPtr2 - ReconPtr1;
+
+    if ( diff<0 )
+	{
+        // ReconPtr1>ReconPtr2, so swap...
+		UINT8 *temp = ReconPtr1;
+		ReconPtr1 = ReconPtr2;
+		ReconPtr2 = temp;
+		diff = (int)(ReconPtr2-ReconPtr1);
+	}
+
+	if ( diff==1 )
+	{											        
+        // Fractional pixel in horizontal only...
+		FilterBlock1dBil_8 ( ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters[ModX] );
+	}
+	else if ( diff == (int)(PixelsPerLine) )				
+	{
+        // Fractional pixel in vertical only...
+		FilterBlock1dBil_8 ( ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters[ModY] );
+	}
+	else if ( diff == (int)(PixelsPerLine - 1))
+	{	
+        // ReconPtr1 is Top right...
+		FilterBlock2dBil_8 ( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters[ModX], BilinearFilters[ModY] );
+	}
+	else if ( diff == (int)(PixelsPerLine + 1) )
+	{	
+        // ReconPtr1 is Top left
+		FilterBlock2dBil_8 ( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters[ModX], BilinearFilters[ModY] );
+	}
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/include/dct.h b/Src/libvpShared/corelibs/cdxv/vputil/include/dct.h
new file mode 100644
index 00000000..40ae448a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/include/dct.h
@@ -0,0 +1,74 @@
+/****************************************************************************
+*
+*   Module Title :     dct.h
+*
+*   Description  :     DCT header file.
+*
+****************************************************************************/						
+
+#ifndef __INC_DCT_H
+#define __INC_DCT_H
+
+/****************************************************************************
+*  Header files
+****************************************************************************/
+#include "type_aliases.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define COEFF_MAX   32768   // Max magnitude of DCT coefficient
+// Extra bits of precision added to the fdct that have to be stripped off during the quantize
+#define FDCT_PRECISION_BITS			1
+#define FDCT_PRECISION_NEG_ADJ      ((INT16) (1<<FDCT_PRECISION_BITS)-1)
+
+
+
+
+#if 0   // AWG not required any more!!!
+/*	Cos and Sin constant multipliers used during DCT and IDCT */
+extern const double C1S7;
+extern const double C2S6;
+extern const double C3S5;
+extern const double C4S4;
+extern const double C5S3;
+extern const double C6S2;
+extern const double C7S1;
+
+// DCT lookup tables and pointers
+extern INT32 * C4S4_TablePtr;
+extern INT32 C4S4_Table[(COEFF_MAX * 4) + 1];
+
+extern INT32 * C6S2_TablePtr;
+extern INT32 C6S2_Table[(COEFF_MAX * 2) + 1];
+
+extern INT32 * C2S6_TablePtr;
+extern INT32 C2S6_Table[(COEFF_MAX * 2) + 1];
+
+extern INT32 * C1S7_TablePtr;
+extern INT32 C1S7_Table[(COEFF_MAX * 2) + 1];
+
+extern INT32 * C7S1_TablePtr;
+extern INT32 C7S1_Table[(COEFF_MAX * 2) + 1];
+
+extern INT32 * C3S5_TablePtr;
+extern INT32 C3S5_Table[(COEFF_MAX * 2) + 1];
+
+extern INT32 * C5S3_TablePtr;
+extern INT32 C5S3_Table[(COEFF_MAX * 2) + 1];
+#endif
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+#ifdef COMPDLL
+// Forward Transform
+extern void fdct_slow ( INT32 *InputData, double *OutputData );
+#endif
+
+// Reverse Transform
+extern void IDctSlow(  INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void IDct10  (  INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void IDct1   (  INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/include/mac_specs.h b/Src/libvpShared/corelibs/cdxv/vputil/include/mac_specs.h
new file mode 100644
index 00000000..c218ac52
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/include/mac_specs.h
@@ -0,0 +1,11 @@
+#if !defined(_mac_specs_h)
+#define _mac_specs_h
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int vputil_hasAltivec(void);
+int vputil_cpuMhz(void);
+#if defined(__cplusplus)
+}
+#endif
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/include/reconstruct.h b/Src/libvpShared/corelibs/cdxv/vputil/include/reconstruct.h
new file mode 100644
index 00000000..f87983d9
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/include/reconstruct.h
@@ -0,0 +1,60 @@
+/****************************************************************************
+*
+*   Module Title :     Reconstruct.h
+*
+*   Description  :     Block Reconstruction module header
+*
+*   AUTHOR       :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+* 
+*   1.00 PGW 14/10/99  Created
+*
+*****************************************************************************
+*/
+
+#define STRICT              /* Strict type checking. */
+
+#ifndef RECONSTRUCT_H
+#define RECONSTRUCT_H
+
+#include "type_aliases.h"
+
+/****************************************************************************
+*  Constants
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Types
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*   Data structures
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Functions
+*****************************************************************************
+*/
+
+// Scalar (no mmx) reconstruction functions
+extern void ScalarReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void ScalarReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void ScalarReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+
+// MMx versions
+extern void MMXReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+
+// WMT versions
+extern void WmtReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void WmtReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void WmtReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+
+
+#endif
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj b/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj
new file mode 100644
index 00000000..ca32aed6
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj
@@ -0,0 +1,388 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>17.0</VCProjectVersion>
+    <ProjectGuid>{F93716CE-8F89-4334-BE64-43705EF3FB70}</ProjectGuid>
+    <RootNamespace>vputil</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnableManifest>false</VcpkgEnableManifest>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader />
+      <PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation />
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>
+      </AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation />
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>
+      </AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="generic\fdct.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\idctpart.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\reconstruct.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\uoptsystemdependant.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="generic\vputil.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\fdctmmx.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\fdctwmt.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\filtmmx.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\filtwmt.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\mmxidct.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\mmxrecon.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\uoptsystemdependant.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\vputilasm.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\wmtidct.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="win32\wmtrecon.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj.filters b/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj.filters
new file mode 100644
index 00000000..204b2144
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj.filters
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="generic">
+      <UniqueIdentifier>{f7966dc8-1d55-46a4-b0e6-8584774d721d}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="win32">
+      <UniqueIdentifier>{ad0ce32e-d033-416c-813e-7a7f913ac3fa}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="generic\fdct.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\idctpart.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\reconstruct.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\uoptsystemdependant.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="generic\vputil.c">
+      <Filter>generic</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\fdctmmx.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\fdctwmt.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\filtmmx.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\filtwmt.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\mmxidct.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\mmxrecon.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\uoptsystemdependant.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\vputilasm.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\wmtidct.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+    <ClCompile Include="win32\wmtrecon.c">
+      <Filter>win32</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/vputil.xcodeproj/project.pbxproj b/Src/libvpShared/corelibs/cdxv/vputil/vputil.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..df47f476
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/vputil.xcodeproj/project.pbxproj
@@ -0,0 +1,213 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 42;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		0CAF34950BB78E9F000FB06C /* vputil.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34940BB78E9F000FB06C /* vputil.c */; };
+		0CAF34AC0BB78EDF000FB06C /* idctpart.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34A80BB78EDF000FB06C /* idctpart.c */; };
+		0CAF34AD0BB78EDF000FB06C /* fdct.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34A90BB78EDF000FB06C /* fdct.c */; };
+		0CAF34AE0BB78EDF000FB06C /* uoptsystemdependant.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34AA0BB78EDF000FB06C /* uoptsystemdependant.c */; };
+		0CAF34AF0BB78EDF000FB06C /* reconstruct.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34AB0BB78EDF000FB06C /* reconstruct.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		0CAF34940BB78E9F000FB06C /* vputil.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = vputil.c; path = generic/vputil.c; sourceTree = "<group>"; };
+		0CAF34A80BB78EDF000FB06C /* idctpart.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = idctpart.c; path = generic/idctpart.c; sourceTree = "<group>"; };
+		0CAF34A90BB78EDF000FB06C /* fdct.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = fdct.c; path = generic/fdct.c; sourceTree = "<group>"; };
+		0CAF34AA0BB78EDF000FB06C /* uoptsystemdependant.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = uoptsystemdependant.c; path = generic/uoptsystemdependant.c; sourceTree = "<group>"; };
+		0CAF34AB0BB78EDF000FB06C /* reconstruct.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = reconstruct.c; path = generic/reconstruct.c; sourceTree = "<group>"; };
+		D2AAC046055464E500DB518D /* libvputil.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvputil.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		D289987405E68DCB004EDB86 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		08FB7794FE84155DC02AAC07 /* vputil */ = {
+			isa = PBXGroup;
+			children = (
+				08FB7795FE84155DC02AAC07 /* Source */,
+				C6A0FF2B0290797F04C91782 /* Documentation */,
+				1AB674ADFE9D54B511CA2CBB /* Products */,
+			);
+			name = vputil;
+			sourceTree = "<group>";
+		};
+		08FB7795FE84155DC02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				0CAF34940BB78E9F000FB06C /* vputil.c */,
+				0CAF34A80BB78EDF000FB06C /* idctpart.c */,
+				0CAF34A90BB78EDF000FB06C /* fdct.c */,
+				0CAF34AA0BB78EDF000FB06C /* uoptsystemdependant.c */,
+				0CAF34AB0BB78EDF000FB06C /* reconstruct.c */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		1AB674ADFE9D54B511CA2CBB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				D2AAC046055464E500DB518D /* libvputil.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		C6A0FF2B0290797F04C91782 /* Documentation */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			name = Documentation;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		D2AAC043055464E500DB518D /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		D2AAC045055464E500DB518D /* vputil */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vputil" */;
+			buildPhases = (
+				D2AAC043055464E500DB518D /* Headers */,
+				D2AAC044055464E500DB518D /* Sources */,
+				D289987405E68DCB004EDB86 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = vputil;
+			productName = vputil;
+			productReference = D2AAC046055464E500DB518D /* libvputil.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		08FB7793FE84155DC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vputil" */;
+			hasScannedForEncodings = 1;
+			mainGroup = 08FB7794FE84155DC02AAC07 /* vputil */;
+			projectDirPath = "";
+			targets = (
+				D2AAC045055464E500DB518D /* vputil */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		D2AAC044055464E500DB518D /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				0CAF34950BB78E9F000FB06C /* vputil.c in Sources */,
+				0CAF34AC0BB78EDF000FB06C /* idctpart.c in Sources */,
+				0CAF34AD0BB78EDF000FB06C /* fdct.c in Sources */,
+				0CAF34AE0BB78EDF000FB06C /* uoptsystemdependant.c in Sources */,
+				0CAF34AF0BB78EDF000FB06C /* reconstruct.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		1DEB91EC08733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COPY_PHASE_STRIP = NO;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_FIX_AND_CONTINUE = YES;
+				GCC_MODEL_TUNING = G5;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				INSTALL_PATH = /usr/local/lib;
+				PRODUCT_NAME = vputil;
+				ZERO_LINK = YES;
+			};
+			name = Debug;
+		};
+		1DEB91ED08733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = (
+					ppc,
+					i386,
+				);
+				GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+				GCC_MODEL_TUNING = G5;
+				INSTALL_PATH = /usr/local/lib;
+				PRODUCT_NAME = vputil;
+			};
+			name = Release;
+		};
+		1DEB91F008733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				OBJROOT = build;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+				SYMROOT = ../../../lib/osx;
+				USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
+			};
+			name = Debug;
+		};
+		1DEB91F108733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				OBJROOT = build;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+				SYMROOT = ../../../lib/osx;
+				USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vputil" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91EC08733DB70010E9CD /* Debug */,
+				1DEB91ED08733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vputil" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91F008733DB70010E9CD /* Debug */,
+				1DEB91F108733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/fdct_m.asm b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdct_m.asm
new file mode 100644
index 00000000..affb8497
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdct_m.asm
@@ -0,0 +1,1002 @@
+;***********************************************************************
+;	File:			fdct_m.asm
+;
+;	Description:
+;					This function perform 2-D Forward DCT on a 8x8 block
+;					
+;
+;	Input:			Pointers to input source data buffer and destination 
+;					buffer.
+;
+;	Note:			none
+;
+;	Special Notes:	We try to do the truncation right to match the result 
+;					of the c version. 
+;
+;************************************************************************
+;	Revision History:
+;   
+;	1.00 YWX 08/05/00  Configuration Baseline 
+;
+
+ 
+        .586
+        .387
+        .MODEL  flat, SYSCALL, os_dos
+        .MMX
+;
+; macro functions
+;
+Fdct MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7
+    ; execute stage 1 of forward DCT
+    
+	
+	movq        mm0,ip0             ; mm0 = ip0
+    movq        mm1,ip1             ; mm1 = ip1
+    movq        mm2,ip3             ; mm2 = ip3
+    movq        mm3,ip5             ; mm3 = ip5
+    movq        mm4,mm1             ; mm4 = ip1
+    movq        mm5,mm3             ; mm5 = ip5
+    movq        mm6,mm0             ; mm0 = ip0
+    movq        mm7,mm2             ; mm7 = ip3
+
+    paddsw      mm0,ip7             ; mm0 = ip0 + ip7 = is07
+    paddsw      mm1,ip2             ; mm1 = ip1 + ip2 = is12
+    paddsw      mm2,ip4             ; mm2 = ip3 + ip4 = is34
+    paddsw      mm3,ip6             ; mm3 = ip5 + ip6 = is56
+    psubsw      mm6,ip7             ; mm6 = ip0 - ip7 = id07
+    psubsw      mm7,ip4             ; mm7 = ip3 - ip4 = id34
+    psubsw      mm4,ip2             ; mm4 = ip1 - ip2 = id12
+    psubsw      mm5,ip6             ; mm5 = ip5 - ip6 = id56
+
+    movq        TID07,mm6           ; save id07
+    movq        TID34,mm7           ; save id34
+
+    ; free = mm6, mm7
+
+    movq        mm6,mm4             ; mm6 = id12
+    psubsw      mm4,mm5             ; mm4 = id12 - id56 = irot_input_x
+
+	movq        TIRX,mm4            ; save irot_input_x    
+    paddsw      mm6,mm5             ; mm6 = id12 + id56
+	movq		mm5,mm6				; 
+
+    pmulhw      mm6,xC4S4           ; (xC4S4 * (id12 + id56)) - (id12 + id56) 
+	paddw		mm6,mm5				; (xC4S4 * (id12 + id56))
+	psrlw		mm5,15				;
+
+	paddw		mm6,mm5;			;
+	
+
+    ; free = mm4 ,mm5, mm7
+
+    movq        mm4,mm0             ; mm4 = is07
+    psubsw      mm0,mm2             ; mm0 = is07 - is34 = irot_input_y
+
+    movq        TIRY,mm0            ; save irot_input_y
+
+    ; free = mm0, mm5, mm7
+
+    movq        mm0,mm1             ; mm0 = is12
+    psubsw      mm1,mm3             ; mm1 = is12 - is56
+
+    movq        TIC2,mm6            ; save icommon_product2
+	movq		mm7, mm1
+
+    pmulhw      mm1,xC4S4           ; mm1 = (xC4S4 * (is12 - is56)) - (is12 - is56)
+	paddw		mm1, mm7			; mm1 = (xC4S4 * (is12 - is56))	
+	psrlw		mm7, 15				;
+	
+	paddw		mm1, mm7
+    movq        TIC1,mm1            ; save icommon_product1
+
+    ; free = mm1, mm5, mm6, mm7
+
+    paddsw      mm4,mm2             ; mm4 = is07 + is34 = is0734
+    paddsw      mm0,mm3             ; mm0 = is12 + is56 = is1256
+    movq        mm1,mm4             ; mm1 = is07 + is34 = is0734
+
+    paddsw      mm4,mm0             ; mm4 = is0734 + is1256
+    psubsw      mm1,mm0             ; mm1 = is0734 - is1256
+
+	movq		mm7,mm4
+	movq		mm6,mm1
+
+    pmulhw      mm4,xC4S4           ; mm4 = (xC4S4 * (is0734 + is1256)) - (is0734 + is1256)
+    pmulhw      mm1,xC4S4           ; mm1 = (xC4S4 * (is0734 - is1256)) - (is0734 - is1256)
+	paddw		mm4,mm7				; mm4 = (xC4S4 * (is0734 + is1256))
+	paddw       mm1,mm6				; mm1 = (xC4S4 * (is0734 - is1256))
+
+	psrlw		mm7, 15
+	psrlw		mm6, 15
+
+	paddw		mm4, mm7
+    movq        ip0,mm4             ; write out ip0
+
+	paddw		mm1, mm6
+    movq        ip4,mm1             ; write out ip4
+
+    ; free = mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7
+
+    movq        mm0,TIRY            ; mm0 = irot_input_y
+    movq        mm1,TIRX            ; mm1 = irot_input_x
+
+    movq        mm2,mm0             ; mm2 = irot_input_y
+	movq		mm3,mm1				; mm3 = irot_input_x
+
+	movq		mm4,mm0				;
+	movq		mm5,mm1				;
+	
+	movq		mm6,xC2S6			;
+	movq		mm7,xC6s2			;
+
+    pmulhw      mm0,mm6             ; mm0 = xC2S6*irot_input_y - irot_input_y
+    pmulhw      mm3,mm6             ; mm3 = xC2S6*irot_input_x - irot_input_x
+	psrlw		mm4, 15
+	psrlw		mm5, 15
+	paddw		mm0,mm2				; mm0 = xC2S6*irot_input_y
+	paddw		mm3,mm1				; mm3 = xC2S6*irot_input_x
+	paddw		mm0,mm4
+	paddw		mm3,mm5;
+
+	pmulhw      mm1,mm7             ; mm1 = xC6S2*irot_input_x
+    pmulhw      mm2,mm7             ; mm2 = xC6S2*irot_input_y 
+
+	paddw		mm1,mm5				;
+	paddw		mm2,mm4				;
+    
+    paddsw      mm0,mm1             ; mm0 = xC2S6(irot_input_y * 2) + xC6S2(irot_input_x * 2) = ip2
+    psubsw      mm2,mm3             ; mm2 = xC6S2(irot_input_y * 2) - xC2S6(irot_input_x * 2) = ip6
+
+    movq        ip2,mm0             ; write out ip2
+    movq        ip6,mm2             ; write out ip6
+
+    ;
+
+    movq        mm6,TIC1            ; mm6 = icommon_product1
+    movq        mm4,TID07           ; mm4 = id07
+
+    movq        mm5,TID34           ; mm5 = id34
+    movq        mm7,TIC2            ; mm7 = icommon_product2
+
+    movq        mm1,mm6             ; mm1 = icommon_product1
+    movq        mm3,mm7             ; mm3 = icommon_product2
+
+    pxor        mm0,mm0             ; clear mm0
+    paddsw      mm7,mm5             ; mm7 = icommon_product2 + id34
+    
+	paddsw      mm6,mm4             ; mm6 = icommon_product1 + id07 = irot_input_x
+    psubsw      mm0,mm7             ; mm0 = -(icommon_product2 + id34) = irot_input_y
+
+
+    ; free = mm2, mm7, mm4, mm5;
+
+    movq        mm2,mm6             ; mm2 = irot_input_x 
+    movq        mm7,mm0             ; mm7 = irot_input_y 
+
+	movq		mm4,mm6;
+	movq		mm5,mm0;			
+
+    pmulhw      mm6,xC1S7           ; mm6 = xC1S7*irot_input_x -irot_input_x
+	psrlw		mm4,15;
+
+	psrlw		mm5,15;			
+   	pmulhw      mm7,xC1S7           ; mm7 = xC1S7*irot_input_y -irot_input_y
+
+	paddw		mm6,mm2				; mm6 = xC1S7*irot_input_x 
+	paddw		mm7,mm0				; mm7 = xC1S7*irot_input_y 
+
+    pmulhw      mm0,xC7S1           ; mm0 = xC7S1*irot_input_y 
+	paddw		mm6,mm4				;
+
+	paddw		mm7,mm5				;
+    pmulhw      mm2,xC7S1           ; mm2 = xC7S1*irot_input_x 
+
+	paddw		mm0,mm5				;
+	paddw		mm2,mm4				;
+
+    psubsw      mm6,mm0             ; mm6 = xC1S7*irot_input_x - xC7S1*irot_input_y = ip1
+    paddsw      mm2,mm7             ; mm2 = xC7S1*irot_input_x + xC1S7*irot_input_y = ip7
+
+    movq        ip1,mm6             ; write out ip1
+
+    movq        mm4,TID07           ; mm4 = id07
+    movq        mm5,TID34           ; mm5 = id34
+
+    movq        ip7,mm2             ; write out ip7
+
+
+    psubsw      mm4,mm1             ; mm4 = id07 - icommon_product1 = irot_input_x
+    psubsw      mm5,mm3             ; mm5 = id34 - icommon_product2 = irot_input_y
+
+    movq        mm6,mm4             ; mm6 = irot_input_x 
+	movq		mm0,mm4				; mm0 = irot_input_x 
+
+    movq        mm7,mm5             ; mm7 = irot_input_y 
+	movq		mm2,mm5				; mm2 = irot_input_y 
+
+	movq		mm1,xC3S5
+	movq		mm3,xC5S3
+
+    pmulhw      mm4,mm1             ; mm4 = xC3S5*irot_input_x - irot_input_x
+    pmulhw      mm6,mm3             ; mm6 = xC5S3*irot_input_x - irot_input_x
+	pmulhw      mm5,mm3             ; mm5 = xC5S3*irot_input_y - irot_input_y
+    pmulhw      mm7,mm1             ; mm7 = xC3S5*irot_input_y - irot_input_y
+
+	paddw		mm4, mm0			; mm4 = xC3S5*irot_input_x
+	paddw       mm6, mm0			; mm6 = xC5S3*irot_input_x
+	paddw		mm5, mm2			; mm5 = xC5S3*irot_input_y
+	paddw		mm7, mm2			; mm7 = xC3S5*irot_input_y
+
+	
+	psrlw		mm0, 15				;
+	psrlw		mm2, 15				;
+	
+	paddw		mm4, mm0			;
+	paddw		mm6, mm0			;
+	paddw		mm5, mm2			;
+	paddw		mm7, mm2			;
+
+    psubsw      mm4,mm5             ; mm4 = xC3S4*irot_input_x - xC5S3*irot_input_y  = ip3
+    paddsw      mm6,mm7             ; mm6 = xC5S3*irot_input_x + xC3S5*irot_input_y  = ip5
+
+    movq        ip3,mm4             ; write out ip3
+    movq        ip5,mm6             ; write out ip5
+
+
+ENDM
+
+Fdct_new MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7
+    ; execute stage 1 of forward DCT
+    
+	
+	movq        mm0,ip0             ; mm0 = ip0
+    movq        mm1,ip1             ; mm1 = ip1
+    movq        mm2,ip3             ; mm2 = ip3
+    movq        mm3,ip5             ; mm3 = ip5
+	movq        mm4,ip0             ; mm0 = ip0
+    movq        mm5,ip1             ; mm1 = ip1
+    movq        mm6,ip3             ; mm2 = ip3
+    movq        mm7,ip5             ; mm3 = ip5
+
+
+    paddsw      mm0,ip7             ; mm0 = ip0 + ip7 = is07
+    paddsw      mm1,ip2             ; mm1 = ip1 + ip2 = is12
+    paddsw      mm2,ip4             ; mm2 = ip3 + ip4 = is34
+    paddsw      mm3,ip6             ; mm3 = ip5 + ip6 = is56
+    psubsw      mm4,ip7             ; mm4 = ip0 - ip7 = id07
+    psubsw      mm5,ip2             ; mm5 = ip1 - ip2 = id12
+
+	 psubsw		mm0,mm2				; mm0 = is07 - is34
+
+	 paddsw		mm2,mm2				
+
+    psubsw      mm6,ip4             ; mm6 = ip3 - ip4 = id34
+	 
+     paddsw		mm2,mm0				; mm2 = is07 + is34 = is0734
+	 psubsw		mm1,mm3				; mm1 = is12 - is56
+	 movq		TIRY,mm0			; Save is07 - is34 to free mm0;
+	 paddsw		mm3,mm3				
+     paddsw		mm3,mm1				; mm3 = is12 + 1s56	= is1256
+
+    psubsw      mm7,ip6             ; mm7 = ip5 - ip6 = id56
+
+;--------------------------------------------------------------------
+;
+
+	psubsw		mm5,mm7				; mm5 = id12 - id56
+	paddsw		mm7,mm7				
+	paddsw		mm7,mm5				; mm7 = id12 + id56
+
+									; mm4 = id07
+									
+									; mm6 = id34	
+;---------------------------------------------------------------------
+; ip[0], ip[4]
+;	mm0			Free
+;	mm2			is0734
+;	mm3			is1256
+	
+
+	psubsw		mm2,mm3				; mm2 = is0734 - is1256
+	paddsw		mm3,mm3				
+
+	movq		mm0,mm2				; make a copy 
+	paddsw		mm3,mm2				; mm3 = is0734 + is1256
+
+	pmulhw		mm0,xC4S4			; mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 )
+	paddw		mm0,mm2				; mm0 = xC4S4 * ( is0734 - is1256 )
+	psrlw		mm2,15				;
+	paddw		mm0,mm2				; Truncate mm0, now it is op[4]
+
+	movq		mm2,mm3				;
+	movq		ip4,mm0				; save ip4, now mm0,mm2 are free
+
+	movq		mm0,mm3				;
+	pmulhw		mm3,xC4S4			; mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 )
+
+	psrlw		mm2,15				; 
+	paddw		mm3,mm0				; mm3 = xC4S4 * ( is0734 +is1256 )	
+	paddw		mm3,mm2				; Truncate mm3, now it is op[0]
+
+	movq		ip0,mm3				;
+
+;----------------------------------------------------------------------
+; ip[2], ip[6]
+;	mm0			Free
+;	mm2			Free
+;	mm3			Free
+;	mm5			id12 - id56			irot_input_x
+;	TIRY		is07 - is34			irot_input_y
+
+	movq		mm3,TIRY			; mm3 = irot_input_y
+	pmulhw		mm3,xC2S6			; mm3 = xC2S6 * irot_input_y - irot_input_y
+
+	movq		mm2,TIRY			;
+	movq		mm0,mm2				;
+	
+	psrlw		mm2,15				; mm3 = xC2S6 * irot_input_y
+	paddw		mm3,mm0
+	
+	paddw       mm3,mm2				; Truncated
+	movq		mm0, mm5;			;
+
+
+	movq		mm2, mm5;
+	pmulhw		mm0, xC6S2			; mm0 = xC6S2 * irot_input_x
+
+	psrlw		mm2, 15			
+	paddw		mm0, mm2			; Truncated
+
+	paddsw		mm3, mm0			; ip[2]
+	movq		ip2, mm3			; Save ip2
+
+
+	movq		mm0, mm5			;
+	movq		mm2, mm5			;
+	
+	pmulhw		mm5, xC2S6			; mm5 = xC2S6 * irot_input_x - irot_input_x
+	psrlw		mm2, 15				;
+
+	movq		mm3, TIRY			;
+	paddw		mm5, mm0		    ; mm5 = xC2S6 * irot_input_x
+
+	paddw		mm5, mm2			; Truncated
+	movq		mm2, mm3			
+	
+	pmulhw		mm3, xC6S2			; mm3 = xC6S2 * irot_input_y
+	psrlw		mm2, 15
+
+	paddw		mm3, mm2			; Truncated
+	psubsw		mm3, mm5			;
+
+	movq		ip6, mm3			;
+
+
+
+;-----------------------------------------------------------------------
+; icommon_product1, icommon_product2
+;	mm0			Free	
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm1			is12 - is56
+;	mm7			id12 + id56
+
+	movq		mm0, xC4S4
+	movq		mm2, mm1
+	movq		mm3, mm1
+
+	pmulhw		mm1, mm0			; mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 )
+	psrlw		mm2, 15				
+
+	paddw		mm1, mm3			; mm0 = xC4S4 * ( is12 - is56 )
+	paddw		mm1, mm2			; Truncate mm1, now it is icommon_product1
+
+	movq		mm2, mm7
+	movq		mm3, mm7			
+
+	pmulhw		mm7, mm0			; mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 )
+	psrlw		mm2, 15			
+
+	paddw		mm7, mm3			; mm7 = xC4S4 * ( id12 + id56 )
+	paddw		mm7, mm2			; Truncate mm7, now it is icommon_product2
+
+;------------------------------------------------------------------------
+;	mm0			Free	
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm1			icommon_product1
+;	mm7			icommon_product2
+;   mm4			id07
+;	mm6			id34
+
+	
+	pxor		mm0, mm0			; Clear mm0
+	psubsw		mm0, mm6			; mm0 = - id34
+
+	psubsw		mm0, mm7			; mm0 = - ( id34 + idcommon_product2 )
+	paddsw		mm6, mm6			;
+	paddsw		mm6, mm0			; mm6 = id34 - icommon_product2
+
+	psubsw		mm4, mm1			; mm4 = id07 - icommon_product1
+	paddsw		mm1, mm1			;
+	paddsw		mm1, mm4			; mm1 = id07 + icommon_product1
+
+
+;-------------------------------------------------------------------------
+; ip1, ip7
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm7			Free	
+;	mm1			irot_input_x
+;	mm0			irot_input_y
+
+	movq		mm7, xC1S7
+	movq		mm2, mm1
+
+	movq		mm3, mm1;
+	pmulhw		mm1, mm7			; mm1 = xC1S7 * irot_input_x - irot_input_x
+
+	movq		mm7, xC7S1			;
+	psrlw		mm2, 15				
+	
+	paddw		mm1, mm3			; mm1 = xC1S7 * irot_input_x
+	paddw		mm1, mm2			; Trucated
+
+	pmulhw		mm3, mm7			; mm3 = xC7S1 * irot_input_x
+	paddw		mm3, mm2			; Truncated
+
+	movq		mm5, mm0			
+	movq	    mm2, mm0
+
+	movq		mm7, xC1S7			
+	pmulhw		mm0, mm7			; mm0 = xC1S7 * irot_input_y - irot_input_y
+	
+	movq		mm7, xC7S1			
+	psrlw		mm2, 15			
+	
+	paddw		mm0, mm5			; mm0 = xC1S7 * irot_input_y
+	paddw		mm0, mm2			; Truncated
+
+	pmulhw		mm5, mm7			; mm5 = xC7S1 * irot_input_y
+	paddw		mm5, mm2			; Truncated
+
+	psubsw		mm1, mm5			; mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1
+	paddsw		mm3, mm0			; mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7
+	
+	movq		ip1, mm1
+	movq		ip7, mm3
+;-----------------------------------------------------------------------------
+; ip3, ip5
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm7			Free	
+;	mm1			Free
+;	mm0			Free
+;   mm4         id07 - icommon_product1 = irot_input_x
+;   mm6			id34 - icommon_product2 = irot_input_y
+
+	movq		mm0, xC3S5
+	movq		mm1, xC5S3
+
+	movq		mm5, mm6
+	movq		mm7, mm6
+
+	movq		mm2, mm4
+	movq		mm3, mm4
+
+	pmulhw		mm4, mm0			; mm4 = xC3S5 * irot_input_x - irot_input_x
+	pmulhw		mm6, mm1			; mm6 = xC5S3 * irot_input_y - irot_input_y
+
+	psrlw		mm2, 15
+	psrlw		mm5, 15
+
+	paddw		mm4, mm3			; mm4 = xC3S5 * irot_input_x
+	paddw		mm6, mm7			; mm6 = xC5S3 * irot_input_y
+
+	paddw		mm4, mm2			; Truncated
+	paddw		mm6, mm5			; Truncated
+
+	psubsw		mm4, mm6			; ip3
+	movq		ip3, mm4			;
+
+	movq		mm4, mm3			;
+	movq		mm6, mm7			;
+
+	pmulhw		mm3, mm1			; mm3 = xC5S3 * irot_input_x - irot_input_x
+	pmulhw		mm7, mm0			; mm7 = xC3S5 * irot_input_y - irot_input_y
+
+	paddw		mm4, mm2
+	paddw		mm6, mm5
+
+	paddw		mm3, mm4			; mm3 = xC5S3 * irot_input_x
+	paddw		mm7, mm6			; mm7 = xC3S5 * irot_input_y
+
+	paddw		mm3, mm7			; ip5
+	movq		ip5, mm3			;
+
+ENDM
+
+Transpose MACRO ip0, ip1, ip2, ip3, ip4, ip5, ip6, ip7,
+                op0, op1, op2, op3, op4, op5, op6, op7
+    movq        mm0,ip0             ; mm0 = a0 a1 a2 a3
+    movq        mm4,ip4             ; mm4 = e4 e5 e6 e7
+    movq        mm1,ip1             ; mm1 = b0 b1 b2 b3
+    movq        mm5,ip5             ; mm5 = f4 f5 f6 f7
+    movq        mm2,ip2             ; mm2 = c0 c1 c2 c3
+    movq        mm6,ip6             ; mm6 = g4 g5 g6 g7
+    movq        mm3,ip3             ; mm3 = d0 d1 d2 d3
+    movq        op1,mm1             ; save  b0 b1 b2 b3
+    movq        mm7,ip7             ; mm7 = h0 h1 h2 h3
+
+    ; Transpose 2x8 block
+    movq		mm1, mm4            ; mm1 = e3 e2 e1 e0      
+	 punpcklwd	mm4, mm5            ; mm4 = f1 e1 f0 e0      
+	movq		op0, mm0            ; save a3 a2 a1 a0      
+	 punpckhwd	mm1, mm5            ; mm1 = f3 e3 f2 e2      
+	movq		mm0, mm6            ; mm0 = g3 g2 g1 g0      
+	 punpcklwd	mm6, mm7            ; mm6 = h1 g1 h0 g0      
+	movq		mm5, mm4            ; mm5 = f1 e1 f0 e0      
+	 punpckldq	mm4, mm6            ; mm4 = h0 g0 f0 e0 = MM4 
+	punpckhdq	mm5, mm6            ; mm5 = h1 g1 f1 e1 = MM5 
+	 movq		mm6, mm1            ; mm6 = f3 e3 f2 e2      
+	movq		op4, mm4            ;                           
+	 punpckhwd	mm0, mm7            ; mm0 = h3 g3 h2 g2      
+	movq		op5, mm5            ;                           
+	 punpckhdq	mm6, mm0            ; mm6 = h3 g3 f3 e3 = MM7 
+	movq		mm4, op0            ; mm4 = a3 a2 a1 a0      
+	 punpckldq	mm1, mm0            ; mm1 = h2 g2 f2 e2 = MM6 
+	movq		mm5, op1            ; mm5 = b3 b2 b1 b0      
+	 movq		mm0, mm4            ; mm0 = a3 a2 a1 a0      
+	movq		op7, mm6            ;                           
+	 punpcklwd	mm0, mm5            ; mm0 = b1 a1 b0 a0      
+	movq		op6, mm1            ;                           
+	 punpckhwd	mm4, mm5            ; mm4 = b3 a3 b2 a2      
+	movq		mm5, mm2            ; mm5 = c3 c2 c1 c0      
+	 punpcklwd	mm2, mm3            ; mm2 = d1 c1 d0 c0      
+	movq		mm1, mm0            ; mm1 = b1 a1 b0 a0      
+	 punpckldq	mm0, mm2            ; mm0 = d0 c0 b0 a0 = MM0 
+	punpckhdq	mm1, mm2            ; mm1 = d1 c1 b1 a1 = MM1 
+	 movq		mm2, mm4            ; mm2 = b3 a3 b2 a2      
+	movq		op0, mm0            ;                           
+	 punpckhwd	mm5, mm3            ; mm5 = d3 c3 d2 c2      
+	movq		op1, mm1            ;                           
+	 punpckhdq	mm4, mm5            ; mm4 = d3 c3 b3 a3 = MM3 
+	punpckldq	mm2, mm5            ; mm2 = d2 c2 b2 a2 = MM2 
+	movq		op3, mm4
+	movq		op2, mm2	 
+ENDM
+
+;------------------------------------------------
+fdctParams  STRUC
+                    dd  6 dup (?)   ;6 pushed regs
+                    dd  ?           ;return address
+    InputPtr        dd  ?
+    OutputPtr       dd  ?
+fdctParams  ENDS
+;------------------------------------------------
+
+
+
+        .DATA
+TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA' 
+
+        ALIGN 32
+
+xC1S7  QWORD   0fb15fb15fb15fb15h
+xC2S6  QWORD   0ec83ec83ec83ec83h
+xC3S5  QWORD   0d4dbd4dbd4dbd4dbh
+xC4S4  QWORD   0b505b505b505b505h
+xC5S3  QWORD   08e3a8e3a8e3a8e3ah
+xC6S2  QWORD   061f861f861f861f8h
+xC7S1  QWORD   031f131f131f131f1h
+TIRX   QWORD   00000000000000000h
+TIRY   QWORD   00000000000000000h
+TIC1   QWORD   00000000000000000h
+TIC2   QWORD   00000000000000000h
+TID07  QWORD   00000000000000000h
+TID34  QWORD   00000000000000000h
+
+; data goes here
+
+        .CODE
+
+NAME fdct
+
+PUBLIC fdct_MMX_
+PUBLIC _fdct_MMX
+ 
+; includes go here
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE     EQU 0
+
+
+;------------------------------------------------
+; void fdct_MMX ( INT16 * InputData, INT16 * OutputData )
+;
+fdct_MMX_:
+_fdct_MMX:
+    push    esi
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+;
+; ESP = Stack Pointer                      MM0 = Free
+; ESI = Free                               MM1 = Free
+; EDI = Free                               MM2 = Free
+; EBP = Free                               MM3 = Free
+; EBX = Free                               MM4 = Free
+; ECX = Free                               MM5 = Free
+; EDX = Free                               MM6 = Free
+; EAX = Free                               MM7 = Free
+;
+
+    mov         eax,(fdctParams PTR [esp]).InputPtr             ; load pointer to input data
+    mov         edx,(fdctParams PTR [esp]).OutputPtr            ; load pointer to output data
+    
+    ;
+    ; Input data is an 8x8 block.  To make processing of the data more efficent
+    ; we will transpose the block of data to two 4x8 blocks???
+    ;
+
+    Transpose [eax], [eax+16], [eax+32], [eax+48], [eax+8], [eax+24], [eax+40], [eax+56], [edx], [edx+16], [edx+32], [edx+48], [edx+8], [edx+24], [edx+40], [edx+56]
+	Fdct_new [edx], [edx+16], [edx+32], [edx+48], [edx+8], [edx+24], [edx+40], [edx+56]	
+
+    Transpose [eax+64], [eax+80], [eax+96], [eax+112], [eax+72], [eax+88], [eax+104], [eax+120], [edx+64], [edx+80], [edx+96], [edx+112], [edx+72], [edx+88], [edx+104], [edx+120]
+    Fdct_new [edx+64], [edx+80], [edx+96], [edx+112], [edx+72], [edx+88], [edx+104], [edx+120]
+
+    Transpose [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112], [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112] 
+    Fdct_new [edx+0], [edx+16], [edx+32], [edx+48], [edx+64], [edx+80], [edx+96], [edx+112]
+
+    Transpose [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120], [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120]
+    Fdct_new [edx+8], [edx+24], [edx+40], [edx+56], [edx+72], [edx+88], [edx+104], [edx+120]
+    
+    
+theExit:
+
+    emms
+
+    pop     edx
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+
+    ret
+
+
+NAME FDct1D4Mmx
+
+PUBLIC FDct1D4Mmx_
+PUBLIC _FDct1D4Mmx
+ 
+; includes go here
+
+
+;------------------------------------------------
+; local vars
+LOCAL_SPACE     EQU 0
+
+;------------------------------------------------
+; void FDct1D4Mmx ( INT16 * InputData, INT16 * OutputData )
+;
+FDct1D4Mmx_:
+_FDct1D4Mmx:
+    push    esi
+    push    edi
+    push    ebp
+    push    ebx 
+    push    ecx
+    push    edx
+
+;
+; ESP = Stack Pointer                      MM0 = Free
+; ESI = Free                               MM1 = Free
+; EDI = Free                               MM2 = Free
+; EBP = Free                               MM3 = Free
+; EBX = Free                               MM4 = Free
+; ECX = Free                               MM5 = Free
+; EDX = Free                               MM6 = Free
+; EAX = Free                               MM7 = Free
+;
+
+    mov         eax,(fdctParams PTR [esp]).InputPtr             ; load pointer to input data
+    mov         edx,(fdctParams PTR [esp]).OutputPtr            ; load pointer to output data
+
+
+	movq        mm0,[eax]           ; mm0 = ip0
+    movq        mm1,[eax + 8]       ; mm1 = ip1
+    movq        mm2,[eax + 24]      ; mm2 = ip3
+    movq        mm3,[eax + 40]      ; mm3 = ip5
+	movq        mm4,[eax]           ; mm0 = ip0
+    movq        mm5,[eax + 8]       ; mm1 = ip1
+    movq        mm6,[eax + 24]      ; mm2 = ip3
+    movq        mm7,[eax + 40]      ; mm3 = ip5
+
+
+    paddsw      mm0,[eax + 56]      ; mm0 = ip0 + ip7 = is07
+    paddsw      mm1,[eax + 16]      ; mm1 = ip1 + ip2 = is12
+    paddsw      mm2,[eax + 32]      ; mm2 = ip3 + ip4 = is34
+    paddsw      mm3,[eax + 48]      ; mm3 = ip5 + ip6 = is56
+    psubsw      mm4,[eax + 56]      ; mm4 = ip0 - ip7 = id07
+    psubsw      mm5,[eax + 16]      ; mm5 = ip1 - ip2 = id12
+
+	 psubsw		mm0,mm2				; mm0 = is07 - is34
+
+	 paddsw		mm2,mm2				
+
+     psubsw      mm6,[eax + 32]     ; mm6 = ip3 - ip4 = id34
+	 
+     paddsw		mm2,mm0				; mm2 = is07 + is34 = is0734
+	 psubsw		mm1,mm3				; mm1 = is12 - is56
+	 movq		TIRY,mm0			; Save is07 - is34 to free mm0;
+	 paddsw		mm3,mm3				
+     paddsw		mm3,mm1				; mm3 = is12 + 1s56	= is1256
+
+	 psubsw      mm7,[eax + 48]     ; mm7 = ip5 - ip6 = id56
+
+;--------------------------------------------------------------------
+;
+
+	psubsw		mm5,mm7				; mm5 = id12 - id56
+	paddsw		mm7,mm7				
+	paddsw		mm7,mm5				; mm7 = id12 + id56
+
+									; mm4 = id07
+									
+									; mm6 = id34	
+;---------------------------------------------------------------------
+; ip[0], ip[4]
+;	mm0			Free
+;	mm2			is0734
+;	mm3			is1256
+	
+
+	psubsw		mm2,mm3				; mm2 = is0734 - is1256
+	paddsw		mm3,mm3				
+
+	movq		mm0,mm2				; make a copy 
+	paddsw		mm3,mm2				; mm3 = is0734 + is1256
+
+	pmulhw		mm0,xC4S4			; mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 )
+	paddw		mm0,mm2				; mm0 = xC4S4 * ( is0734 - is1256 )
+	psrlw		mm2,15				;
+	paddw		mm0,mm2				; Truncate mm0, now it is op[4]
+
+	movq		mm2,mm3				;
+	movq		[edx + 32],mm0		; save op4, now mm0,mm2 are free
+
+	movq		mm0,mm3				;
+	pmulhw		mm3,xC4S4			; mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 )
+
+	psrlw		mm2,15				; 
+	paddw		mm3,mm0				; mm3 = xC4S4 * ( is0734 +is1256 )	
+	paddw		mm3,mm2				; Truncate mm3, now it is op[0]
+
+	movq		[edx],mm3				;
+
+;----------------------------------------------------------------------
+; ip[2], ip[6]
+;	mm0			Free
+;	mm2			Free
+;	mm3			Free
+;	mm5			id12 - id56			irot_input_x
+;	TIRY		is07 - is34			irot_input_y
+
+	movq		mm3,TIRY			; mm3 = irot_input_y
+	pmulhw		mm3,xC2S6			; mm3 = xC2S6 * irot_input_y - irot_input_y
+
+	movq		mm2,TIRY			;
+	movq		mm0,mm2				;
+	
+	psrlw		mm2,15				; mm3 = xC2S6 * irot_input_y
+	paddw		mm3,mm0
+	
+	paddw       mm3,mm2				; Truncated
+	movq		mm0, mm5;			;
+
+
+	movq		mm2, mm5;
+	pmulhw		mm0, xC6S2			; mm0 = xC6S2 * irot_input_x
+
+	psrlw		mm2, 15			
+	paddw		mm0, mm2			; Truncated
+
+	paddsw		mm3, mm0			; ip[2]
+	movq		[edx + 16], mm3			; Save ip2
+
+
+	movq		mm0, mm5			;
+	movq		mm2, mm5			;
+	
+	pmulhw		mm5, xC2S6			; mm5 = xC2S6 * irot_input_x - irot_input_x
+	psrlw		mm2, 15				;
+
+	movq		mm3, TIRY			;
+	paddw		mm5, mm0		    ; mm5 = xC2S6 * irot_input_x
+
+	paddw		mm5, mm2			; Truncated
+	movq		mm2, mm3			
+	
+	pmulhw		mm3, xC6S2			; mm3 = xC6S2 * irot_input_y
+	psrlw		mm2, 15
+
+	paddw		mm3, mm2			; Truncated
+	psubsw		mm3, mm5			;
+
+	movq		[edx + 48], mm3			;
+
+
+
+;-----------------------------------------------------------------------
+; icommon_product1, icommon_product2
+;	mm0			Free	
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm1			is12 - is56
+;	mm7			id12 + id56
+
+	movq		mm0, xC4S4
+	movq		mm2, mm1
+	movq		mm3, mm1
+
+	pmulhw		mm1, mm0			; mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 )
+	psrlw		mm2, 15				
+
+	paddw		mm1, mm3			; mm0 = xC4S4 * ( is12 - is56 )
+	paddw		mm1, mm2			; Truncate mm1, now it is icommon_product1
+
+	movq		mm2, mm7
+	movq		mm3, mm7			
+
+	pmulhw		mm7, mm0			; mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 )
+	psrlw		mm2, 15			
+
+	paddw		mm7, mm3			; mm7 = xC4S4 * ( id12 + id56 )
+	paddw		mm7, mm2			; Truncate mm7, now it is icommon_product2
+
+;------------------------------------------------------------------------
+;	mm0			Free	
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm1			icommon_product1
+;	mm7			icommon_product2
+;   mm4			id07
+;	mm6			id34
+
+	
+	pxor		mm0, mm0			; Clear mm0
+	psubsw		mm0, mm6			; mm0 = - id34
+
+	psubsw		mm0, mm7			; mm0 = - ( id34 + idcommon_product2 )
+	paddsw		mm6, mm6			;
+	paddsw		mm6, mm0			; mm6 = id34 - icommon_product2
+
+	psubsw		mm4, mm1			; mm4 = id07 - icommon_product1
+	paddsw		mm1, mm1			;
+	paddsw		mm1, mm4			; mm1 = id07 + icommon_product1
+
+
+;-------------------------------------------------------------------------
+; ip1, ip7
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm7			Free	
+;	mm1			irot_input_x
+;	mm0			irot_input_y
+
+	movq		mm7, xC1S7
+	movq		mm2, mm1
+
+	movq		mm3, mm1;
+	pmulhw		mm1, mm7			; mm1 = xC1S7 * irot_input_x - irot_input_x
+
+	movq		mm7, xC7S1			;
+	psrlw		mm2, 15				
+	
+	paddw		mm1, mm3			; mm1 = xC1S7 * irot_input_x
+	paddw		mm1, mm2			; Trucated
+
+	pmulhw		mm3, mm7			; mm3 = xC7S1 * irot_input_x
+	paddw		mm3, mm2			; Truncated
+
+	movq		mm5, mm0			
+	movq	    mm2, mm0
+
+	movq		mm7, xC1S7			
+	pmulhw		mm0, mm7			; mm0 = xC1S7 * irot_input_y - irot_input_y
+	
+	movq		mm7, xC7S1			
+	psrlw		mm2, 15			
+	
+	paddw		mm0, mm5			; mm0 = xC1S7 * irot_input_y
+	paddw		mm0, mm2			; Truncated
+
+	pmulhw		mm5, mm7			; mm5 = xC7S1 * irot_input_y
+	paddw		mm5, mm2			; Truncated
+
+	psubsw		mm1, mm5			; mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1
+	paddsw		mm3, mm0			; mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7
+	
+	movq		[edx + 8], mm1
+	movq		[edx + 56], mm3
+;-----------------------------------------------------------------------------
+; ip3, ip5
+;	mm2			Free
+;	mm3			Free
+;	mm5			Free
+;	mm7			Free	
+;	mm1			Free
+;	mm0			Free
+;   mm4         id07 - icommon_product1 = irot_input_x
+;   mm6			id34 - icommon_product2 = irot_input_y
+
+	movq		mm0, xC3S5
+	movq		mm1, xC5S3
+
+	movq		mm5, mm6
+	movq		mm7, mm6
+
+	movq		mm2, mm4
+	movq		mm3, mm4
+
+	pmulhw		mm4, mm0			; mm4 = xC3S5 * irot_input_x - irot_input_x
+	pmulhw		mm6, mm1			; mm6 = xC5S3 * irot_input_y - irot_input_y
+
+	psrlw		mm2, 15
+	psrlw		mm5, 15
+
+	paddw		mm4, mm3			; mm4 = xC3S5 * irot_input_x
+	paddw		mm6, mm7			; mm6 = xC5S3 * irot_input_y
+
+	paddw		mm4, mm2			; Truncated
+	paddw		mm6, mm5			; Truncated
+
+	psubsw		mm4, mm6			; ip3
+	movq		[edx + 24], mm4			;
+
+	movq		mm4, mm3			;
+	movq		mm6, mm7			;
+
+	pmulhw		mm3, mm1			; mm3 = xC5S3 * irot_input_x - irot_input_x
+	pmulhw		mm7, mm0			; mm7 = xC3S5 * irot_input_y - irot_input_y
+
+	paddw		mm4, mm2
+	paddw		mm6, mm5
+
+	paddw		mm3, mm4			; mm3 = xC5S3 * irot_input_x
+	paddw		mm7, mm6			; mm7 = xC3S5 * irot_input_y
+
+	paddw		mm3, mm7			; ip5
+	movq		[edx + 40], mm3			;
+
+    
+    emms
+
+    pop     edx
+    pop     ecx
+    pop     ebx
+    pop     ebp
+    pop     edi
+    pop     esi
+
+    ret
+
+
+;************************************************
+        END
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctmmx.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctmmx.c
new file mode 100644
index 00000000..dceb1982
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctmmx.c
@@ -0,0 +1,1398 @@
+/****************************************************************************
+ *
+ *   Module Title :     fdctmmx.c
+ *
+ *   Description  :     Forward DCT optimized specifically for mmx or compatible
+ *						processor
+ *
+ *   AUTHOR       :     Yaowu Xu
+ *
+ ***************************************************************************** 
+ *   Revision History
+ *	
+ *   1.00 YWX  07/11/11 Configuration baseline
+ *
+ *****************************************************************************
+ */
+
+
+/*******************************************************************************
+ * Module Constants
+ *******************************************************************************
+ */
+	
+
+__declspec(align(16)) static unsigned short TIRY[8];
+
+__declspec(align(16)) static unsigned short MmxIdctConst[8 * 4] =
+{
+    0,    0,    0,    0,    
+	64277,64277,64277,64277, 
+	60547,60547,60547,60547, 
+	54491,54491,54491,54491, 
+	46341,46341,46341,46341, 
+	36410,36410,36410,36410, 
+	25080,25080,25080,25080, 
+	12785,12785,12785,12785
+};
+
+ 
+/**************************************************************************************
+ *
+ *		Macro:			fdct_MMX
+ *		
+ *		Description:	The Macro does 1-D IDct on 8 columns. 
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	The inputdata is limited to 9 bits [-256, 255]
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+void  fdct_MMX(short *InputData, short *OutputData)
+{
+
+	__asm 
+	{
+		mov		eax, InputData
+		mov		ebx, OutputData
+        lea     ecx, [eax+8]
+        lea     edi, [ebx+8]
+
+		lea		edx, MmxIdctConst
+
+#define IL(i)   [eax + 16 * i]
+#define IH(i)   [ecx + 16 * i]
+#define OL(i)   [ebx + 16 * i]
+#define OH(i)   [edi + 16 * i]
+#define C(i)    [edx + 8  * i]
+
+/******************************************************/
+/* Do 4x8 Transpose  is done through 2 4x4 Transpose  */
+/******************************************************/
+
+    	movq		mm4, IH(0)		/* mm4=e3e2e1e0 */	
+        movq		mm0, IH(1)		/* mm4=f3f2f1f0 */	
+        
+        psllw       mm4, 1          /* up precision */
+        psllw       mm0, 1          /* up precision */
+
+        movq		mm5, mm4		/* make a copy  */			
+        punpcklwd	mm4, mm0		/* mm4=f1e1f0e0 */	
+        
+        punpckhwd	mm5, mm0		/* mm5=f3e3f2e2 */        
+        movq		mm6, IH(2)		/* mm6=g3g2g1g0 */         
+
+        movq		mm0, IH(3)		/* mm0=h3h2h1h0 */ 
+        psllw       mm6, 1          /* up precision */
+
+        psllw       mm0, 1          /* up precision */
+        movq		mm7, mm6		/* mm7=g3g2g1g0 */         
+        
+        punpcklwd	mm6, mm0		/* mm6=h1g1h0g0 */ 
+        punpckhwd	mm7, mm0		/* mm7=h3g3h2g2 */ 
+        
+        movq		mm3, mm4		/* mm4=f1e1f0e0 */	
+        punpckldq	mm4, mm6		/* mm4=h0g0f0e0 */	
+        
+        punpckhdq	mm3, mm6		/* mm3=h1g1f1e1 */	
+        movq		mm6, mm5		/* mm5=f3e3f2e2 */
+
+        punpckldq	mm5, mm7		/* mm5=h2g2f2e2 */ 
+        movq        IH(0), mm4      /* saveh0g0f0e0 */       
+        
+        punpckhdq	mm6, mm7		/* mm6=h3g3f3e3 */        
+        movq        IH(2), mm5      /* saveh2g2f2e2 */
+
+        movq        IH(3), mm6      /* saveh3g3f3e3 */
+
+/*----------------------------------------------------*/        
+/*    mm3 in use for IH(1)                            */
+/*----------------------------------------------------*/
+
+        movq		mm4, IL(0)		/* mm4=a3a2a1a0 */	
+        movq		mm0, IL(1)		/* mm0=b3b2b1b0 */	
+
+        psllw       mm4, 1          /* up precision */
+        psllw       mm0, 1          /* up precision */
+        
+        movq        mm5, mm4        /* mm5=a3a2a1a0 */
+        punpcklwd   mm4, mm0        /* mm4=b1a1b0a0 */
+        
+        punpckhwd	mm5, mm0		/* mm5=b3a3b2a2 */	                
+        movq		mm6, IL(2)		/* mm6=c3c2c1c0 */ 
+        
+        
+        movq		mm0, IL(3)	    /* mm0=d3d2d1d0 */         
+        psllw       mm6, 1          /* up precision */
+
+        psllw       mm0, 1          /* up precision */
+        movq        mm7, mm6        /* mm7=c3c2c1c0 */
+
+        punpcklwd	mm6, mm0		/* mm6=d1c1d0c0 */ 
+        punpckhwd	mm7, mm0		/* mm7=c3c3d2c2 */ 
+        
+        movq		mm1, mm4		/* mm4=b1a1b0a0 */	
+        punpckldq	mm4, mm6		/* mm4=d0c0b0a0 */	
+        
+        punpckhdq	mm1, mm6		/* mm1=d1c1b1a1 */	
+        movq		mm2, mm5		/* mm5=b3a3b2a2 */
+
+        punpckldq	mm5, mm7		/* mm5=d2c2b2a2 */ 
+        punpckhdq	mm2, mm7		/* mm6=d3c3b3a3 */
+        
+        movq        IL(2), mm5       /* saved2c2b2a2 */
+
+/*----------------------------------------------------*/        
+/*    mm1 in use for IL(1)                            */
+/*    mm2 in use for IL(3)                            */
+/*    mm3 in use for IH(1)                            */
+/*    mm4 in use for IH(0)                            */
+/*----------------------------------------------------*/
+
+/******************************************************/
+/* Let's do the 4x8 forward DCT                       */
+/******************************************************/
+        movq        mm0, mm4        /* mm4 = ip0 */
+        movq        mm5, mm1        /* mm5 = ip1 */      
+        
+        movq        mm6, mm2        /* mm6 = ip3 */      
+        movq        mm7, mm3        /* mm7 = ip5 */      	
+
+        paddsw      mm0, IH(3)      /* mm0 = ip0 + ip7 */
+        paddsw      mm1, IL(2)      /* mm1 = ip1 + ip2 */
+
+        paddsw      mm2, IH(0)      /* mm2 = ip3 + ip4 */
+        paddsw      mm3, IH(2)      /* mm3 = ip5 + ip6 */
+
+        psubsw      mm4, IH(3)      /* mm4 = ip0 - ip7 */
+        psubsw      mm5, IL(2)      /* mm5 = ip1 - ip2 */       
+
+        psubsw		mm0, mm2        /* mm0 = is07 - is34 */			
+        paddsw		mm2, mm2		/* mm2 = is34 * 2    */	
+        
+        psubsw      mm6, IH(0)      /* mm6 = ip3 - ip4 */               
+        paddsw		mm2, mm0		/* mm2 = is07 + is34 */	
+
+        psubsw		mm1, mm3		/* mm1 = is12 - is56 */	
+        movq		TIRY, mm0		/* save is07-is34 */	
+
+        paddsw		mm3, mm3		/* mm3 = is56 * 2 */	
+        paddsw		mm3, mm1	    /* mm3 = is12 + is56 */
+        
+        psubsw      mm7, IH(2)      /* mm7 = ip5 -ip6 */
+        psubsw		mm5, mm7		/* mm5 = id12 - id56 */
+	    
+        paddsw		mm7, mm7		/* mm7 = id56 * 2 */		
+	    paddsw		mm7, mm5	    /* mm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4 
+/*---------------------------------------------------------*/
+        psubsw		mm2, mm3		/* mm2 = is0734 - is1256 */
+        paddsw		mm3, mm3		/* mm3 = is1256 * 2 */		
+
+        movq		mm0, mm2	    /* mm0 = is0734 - is1256 */
+        paddsw		mm3, mm2		/* mm3 = is0734 + is1256 */
+
+        pmulhw		mm0, C(4)	    /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+        paddw		mm0, mm2		/* mm0 = xC4S4 * ( is0734 - is1256 ) */
+
+        psrlw		mm2, 15			
+        paddw		mm0, mm2		/* Truncate mm0, now it is op[4] */
+            
+        movq		mm2, mm3		/* mm2 = is0734 + is1256 */
+        movq		OH(0), mm0		/*	op4, now mm0,mm2 are free */
+            
+        movq		mm0, mm3		/* mm0 = is0734 + is1256 */
+        pmulhw		mm3, C(4)		/* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */            
+        
+        psrlw		mm2, 15			
+        paddw		mm3, mm0		/* mm3 = xC4S4 * ( is0734 +is1256 ) */
+        
+        paddw		mm3, mm2		/* Truncate mm3, now it is op[0] */     
+        movq		OL(0), mm3		/* save op0 */
+/*---------------------------------------------------------*/
+/* op2 and op6 
+/*---------------------------------------------------------*/
+ 	    movq		mm3, TIRY		/* mm3 = irot_input_y */
+        pmulhw		mm3, C(2)		/* mm3 = xC2S6 * irot_input_y - irot_input_y */
+        
+        movq		mm2, TIRY		/* mm2 = irot_input_y */
+        movq		mm0, mm2		/* mm0 = irot_input_y */
+        
+        psrlw		mm2, 15		
+        paddw		mm3, mm0        /* mm3 = xC2S6 * irot_input_y */
+            
+        paddw       mm3, mm2		/* Truncated */
+        movq		mm0, mm5		/* mm0 = id12 - id56 */
+        
+        
+        movq		mm2, mm5        /* mm2 = id12 - id56 */
+        pmulhw		mm0, C(6)		/* mm0 = xC6S2 * irot_input_x */
+            
+        psrlw		mm2, 15			
+        paddw		mm0, mm2		/* Truncated */
+        
+        paddsw		mm3, mm0		/* op[2] */
+        movq		OL(2), mm3		/* save op[2] */
+        
+        
+        movq		mm0, mm5		/* mm0 = id12 - id56 */
+        movq		mm2, mm5		/* mm0 = id12 - id56 */
+        
+        pmulhw		mm5, C(2)		/* mm5 = xC2S6 * irot_input_x - irot_input_x */
+        psrlw		mm2, 15		
+        
+        movq		mm3, TIRY		/* mm3 = irot_input_y */
+        paddw		mm5, mm0		/* mm5 = xC2S6 * irot_input_x */
+            
+        paddw		mm5, mm2		/* Truncated */
+        movq		mm2, mm3		/* mm2 = irot_input_y */	
+        
+        pmulhw		mm3, C(6)	    /* mm3 = xC6S2 * irot_input_y */
+        psrlw		mm2, 15        
+        
+        paddw		mm3, mm2		/* Truncated */
+        psubsw		mm3, mm5		/* mm3 = op[6] */
+        
+        movq		OH(2), mm3		
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2                                    */
+/*-----------------------------------------------------------------------*/
+	    movq		mm0, C(4)       /* mm0 = xC4s4 */
+	    movq		mm2, mm1        /* mm2 = is12 - is56 */	
+	
+        movq		mm3, mm1        /* mm3 = is12 - is56 */	
+	    pmulhw		mm1, mm0		/* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+	
+        psrlw		mm2, 15				
+	    paddw		mm1, mm3	    /* mm1 = xC4S4 * ( is12 - is56 ) */
+	    
+        paddw		mm1, mm2        /* Truncate mm1, now it is icommon_product1 */
+	    movq		mm2, mm7        /* mm2 = id12 + id56 */
+	    
+        movq		mm3, mm7		/* mm3 = id12 + id56 */
+        pmulhw		mm7, mm0		/* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+	
+        psrlw		mm2, 15		    /* For trucation */	
+	    paddw		mm7, mm3		/* mm7 = xC4S4 * ( id12 + id56 ) */
+
+	    paddw		mm7, mm2		/* Truncate mm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+	    pxor		mm0, mm0		/* Clear mm0 */
+	    psubsw		mm0, mm6		/* mm0 = - id34 */
+
+	    psubsw		mm0, mm7	    /* mm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+	    paddsw		mm6, mm6	    /* mm6 = id34 * 2 */
+
+	    paddsw		mm6, mm0		/* mm6 = id34 - icommon_product2 = irot_input_x for 35 */
+	    psubsw		mm4, mm1		/* mm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+	    paddsw		mm1, mm1		/* mm1 = icommon_product1 * 2 */	    
+        paddsw		mm1, mm4		/* mm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7              
+/*---------------------------------------------------------*/
+
+	    movq		mm7, C(1)       /* xC1S7 */
+        movq		mm2, mm1        /* mm2 = irot_input_x */
+        
+        movq		mm3, mm1;       /* mm3 = irot_input_x */
+        pmulhw		mm1, mm7		/* mm1 = xC1S7 * irot_input_x - irot_input_x */
+            
+        movq		mm7, C(7)		/* xC7S1 */
+        psrlw		mm2, 15		    /* for trucation */		
+            
+        paddw		mm1, mm3		/* mm1 = xC1S7 * irot_input_x */
+        paddw		mm1, mm2		/* Trucated */
+            
+        pmulhw		mm3, mm7		/* mm3 = xC7S1 * irot_input_x */
+        paddw		mm3, mm2		/* Truncated */
+            
+        movq		mm5, mm0		/* mm5 = irot_input_y */	
+        movq	    mm2, mm0        /* mm2 = irot_input_y */	
+            
+        movq		mm7, C(1)       /* xC1S7 */			
+        pmulhw		mm0, mm7	    /* mm0 = xC1S7 * irot_input_y - irot_input_y */
+        
+        movq		mm7, C(7)		/* xC7S1 */	
+        psrlw		mm2, 15		    /* for trucation */	
+        
+        paddw		mm0, mm5		/* mm0 = xC1S7 * irot_input_y */
+        paddw		mm0, mm2		/* Truncated */
+        
+        pmulhw		mm5, mm7		/* mm5 = xC7S1 * irot_input_y */
+        paddw		mm5, mm2		/* Truncated */
+        
+        psubsw		mm1, mm5		/* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+        paddsw		mm3, mm0		/* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+        
+        movq		OL(1), mm1
+        movq		OH(3), mm3
+/*---------------------------------------------------------*/
+/* op3 and op5 
+/*---------------------------------------------------------*/
+	    movq		mm0, C(3)       /* xC3S5 */
+	    movq		mm1, C(5)       /* xC5S3 */
+
+	    movq		mm5,mm6         /* irot_input_x */
+	    movq		mm7,mm6         /* irot_input_x */
+
+	    movq		mm2,mm4         /* irot_input_y */
+	    movq		mm3,mm4         /* irot_input_y */
+
+	    pmulhw		mm4,mm0         /* mm4 = xC3S5 * irot_input_x - irot_input_x */
+	    pmulhw		mm6,mm1		    /* mm6 = xC5S3 * irot_input_y - irot_input_y */
+
+	    psrlw		mm2,15          /* for trucation */
+	    psrlw		mm5,15          /* for trucation */
+
+	    paddw		mm4,mm3		    /* mm4 = xC3S5 * irot_input_x */
+	    paddw		mm6,mm7		    /* mm6 = xC5S3 * irot_input_y */
+
+	    paddw		mm4,mm2		    /* Truncated */
+	    paddw		mm6,mm5		    /* Truncated */
+
+	    psubsw		mm4,mm6		    /* op [3] */
+	    movq		OL(3),mm4		/* Save Op[3] */
+
+	    movq		mm4,mm3		    /* irot_input_y */
+	    movq		mm6,mm7		    /* irot_input_x */
+
+	    pmulhw		mm3,mm1		    /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+	    pmulhw		mm7,mm0		    /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+	    paddw		mm4,mm2         /* Trucated */
+	    paddw		mm6,mm5         /* Trucated */
+
+	    paddw		mm3,mm4		    /* mm3 = xC5S3 * irot_input_x */
+	    paddw		mm7,mm6		    /*  mm7 = xC3S5 * irot_input_y */
+
+	    paddw		mm3,mm7		    /* Op[5] */
+	    movq		OH(1),mm3		/* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of 4x8 1-D FDCT                                     */        
+/*---------------------------------------------------------*/
+
+/******************************************************/
+/* Do 4x8 Transpose  is done through 2 4x4 Transpose  */
+/******************************************************/
+
+        lea         eax, [eax+64]
+        lea         ecx, [ecx+64]
+        lea         ebx, [ebx+64]
+        lea         edi, [edi+64]
+
+    	movq		mm4, IH(0)		/* mm4=e3e2e1e0 */	
+        movq		mm0, IH(1)		/* mm4=f3f2f1f0 */	
+        
+        psllw       mm4, 1          /* up precision */
+        psllw       mm0, 1          /* up precision */
+
+        movq		mm5, mm4		/* make a copy  */			
+        punpcklwd	mm4, mm0		/* mm4=f1e1f0e0 */	
+        
+        punpckhwd	mm5, mm0		/* mm5=f3e3f2e2 */        
+        movq		mm6, IH(2)		/* mm6=g3g2g1g0 */         
+
+        movq		mm0, IH(3)		/* mm0=h3h2h1h0 */ 
+        psllw       mm6, 1          /* up precision */
+
+        psllw       mm0, 1          /* up precision */
+        movq		mm7, mm6		/* mm7=g3g2g1g0 */         
+        
+        punpcklwd	mm6, mm0		/* mm6=h1g1h0g0 */ 
+        punpckhwd	mm7, mm0		/* mm7=h3g3h2g2 */ 
+        
+        movq		mm3, mm4		/* mm4=f1e1f0e0 */	
+        punpckldq	mm4, mm6		/* mm4=h0g0f0e0 */	
+        
+        punpckhdq	mm3, mm6		/* mm3=h1g1f1e1 */	
+        movq		mm6, mm5		/* mm5=f3e3f2e2 */
+
+        punpckldq	mm5, mm7		/* mm5=h2g2f2e2 */ 
+        movq        IH(0), mm4      /* saveh0g0f0e0 */       
+        
+        punpckhdq	mm6, mm7		/* mm6=h3g3f3e3 */        
+        movq        IH(2), mm5      /* saveh2g2f2e2 */
+
+        movq        IH(3), mm6      /* saveh3g3f3e3 */
+
+/*----------------------------------------------------*/        
+/*    mm3 in use for IH(1)                            */
+/*----------------------------------------------------*/
+
+        movq		mm4, IL(0)		/* mm4=a3a2a1a0 */	
+        movq		mm0, IL(1)		/* mm0=b3b2b1b0 */	
+
+        psllw       mm4, 1          /* up precision */
+        psllw       mm0, 1          /* up precision */
+        
+        movq        mm5, mm4        /* mm5=a3a2a1a0 */
+        punpcklwd   mm4, mm0        /* mm4=b1a1b0a0 */
+        
+        punpckhwd	mm5, mm0		/* mm5=b3a3b2a2 */	                
+        movq		mm6, IL(2)		/* mm6=c3c2c1c0 */ 
+        
+        
+        movq		mm0, IL(3)	    /* mm0=d3d2d1d0 */         
+        psllw       mm6, 1          /* up precision */
+
+        psllw       mm0, 1          /* up precision */
+        movq        mm7, mm6        /* mm7=c3c2c1c0 */
+
+        punpcklwd	mm6, mm0		/* mm6=d1c1d0c0 */ 
+        punpckhwd	mm7, mm0		/* mm7=c3c3d2c2 */ 
+        
+        movq		mm1, mm4		/* mm4=b1a1b0a0 */	
+        punpckldq	mm4, mm6		/* mm4=d0c0b0a0 */	
+        
+        punpckhdq	mm1, mm6		/* mm1=d1c1b1a1 */	
+        movq		mm2, mm5		/* mm5=b3a3b2a2 */
+
+        punpckldq	mm5, mm7		/* mm5=d2c2b2a2 */ 
+        punpckhdq	mm2, mm7		/* mm6=d3c3b3a3 */
+        
+        movq        IL(2), mm5       /* saved2c2b2a2 */
+
+/*----------------------------------------------------*/        
+/*    mm1 in use for IL(1)                            */
+/*    mm2 in use for IL(3)                            */
+/*    mm3 in use for IH(1)                            */
+/*    mm4 in use for IH(0)                            */
+/*----------------------------------------------------*/
+
+/******************************************************/
+/* Let's do the 4x8 forward DCT                       */
+/******************************************************/
+        movq        mm0, mm4        /* mm4 = ip0 */
+        movq        mm5, mm1        /* mm5 = ip1 */      
+        
+        movq        mm6, mm2        /* mm6 = ip3 */      
+        movq        mm7, mm3        /* mm7 = ip5 */      	
+
+        paddsw      mm0, IH(3)      /* mm0 = ip0 + ip7 */
+        paddsw      mm1, IL(2)      /* mm1 = ip1 + ip2 */
+
+        paddsw      mm2, IH(0)      /* mm2 = ip3 + ip4 */
+        paddsw      mm3, IH(2)      /* mm3 = ip5 + ip6 */
+
+        psubsw      mm4, IH(3)      /* mm4 = ip0 - ip7 */
+        psubsw      mm5, IL(2)      /* mm5 = ip1 - ip2 */       
+
+        psubsw		mm0, mm2        /* mm0 = is07 - is34 */			
+        paddsw		mm2, mm2		/* mm2 = is34 * 2    */	
+        
+        psubsw      mm6, IH(0)      /* mm6 = ip3 - ip4 */               
+        paddsw		mm2, mm0		/* mm2 = is07 + is34 */	
+
+        psubsw		mm1, mm3		/* mm1 = is12 - is56 */	
+        movq		TIRY, mm0		/* save is07-is34 */	
+
+        paddsw		mm3, mm3		/* mm3 = is56 * 2 */	
+        paddsw		mm3, mm1	    /* mm3 = is12 + is56 */
+        
+        psubsw      mm7, IH(2)      /* mm7 = ip5 -ip6 */
+        psubsw		mm5, mm7		/* mm5 = id12 - id56 */
+	    
+        paddsw		mm7, mm7		/* mm7 = id56 * 2 */		
+	    paddsw		mm7, mm5	    /* mm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4 
+/*---------------------------------------------------------*/
+        psubsw		mm2, mm3		/* mm2 = is0734 - is1256 */
+        paddsw		mm3, mm3		/* mm3 = is1256 * 2 */		
+
+        movq		mm0, mm2	    /* mm0 = is0734 - is1256 */
+        paddsw		mm3, mm2		/* mm3 = is0734 + is1256 */
+
+        pmulhw		mm0, C(4)	    /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+        paddw		mm0, mm2		/* mm0 = xC4S4 * ( is0734 - is1256 ) */
+
+        psrlw		mm2, 15			
+        paddw		mm0, mm2		/* Truncate mm0, now it is op[4] */
+            
+        movq		mm2, mm3		/* mm2 = is0734 + is1256 */
+        movq		OH(0), mm0		/*	op4, now mm0,mm2 are free */
+            
+        movq		mm0, mm3		/* mm0 = is0734 + is1256 */
+        pmulhw		mm3, C(4)		/* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */            
+        
+        psrlw		mm2, 15			
+        paddw		mm3, mm0		/* mm3 = xC4S4 * ( is0734 +is1256 ) */
+        
+        paddw		mm3, mm2		/* Truncate mm3, now it is op[0] */     
+        movq		OL(0), mm3		/* save op0 */
+/*---------------------------------------------------------*/
+/* op2 and op6 
+/*---------------------------------------------------------*/
+ 	    movq		mm3, TIRY		/* mm3 = irot_input_y */
+        pmulhw		mm3, C(2)		/* mm3 = xC2S6 * irot_input_y - irot_input_y */
+        
+        movq		mm2, TIRY		/* mm2 = irot_input_y */
+        movq		mm0, mm2		/* mm0 = irot_input_y */
+        
+        psrlw		mm2, 15		
+        paddw		mm3, mm0        /* mm3 = xC2S6 * irot_input_y */
+            
+        paddw       mm3, mm2		/* Truncated */
+        movq		mm0, mm5		/* mm0 = id12 - id56 */
+        
+        
+        movq		mm2, mm5        /* mm2 = id12 - id56 */
+        pmulhw		mm0, C(6)		/* mm0 = xC6S2 * irot_input_x */
+            
+        psrlw		mm2, 15			
+        paddw		mm0, mm2		/* Truncated */
+        
+        paddsw		mm3, mm0		/* op[2] */
+        movq		OL(2), mm3		/* save op[2] */
+        
+        
+        movq		mm0, mm5		/* mm0 = id12 - id56 */
+        movq		mm2, mm5		/* mm0 = id12 - id56 */
+        
+        pmulhw		mm5, C(2)		/* mm5 = xC2S6 * irot_input_x - irot_input_x */
+        psrlw		mm2, 15		
+        
+        movq		mm3, TIRY		/* mm3 = irot_input_y */
+        paddw		mm5, mm0		/* mm5 = xC2S6 * irot_input_x */
+            
+        paddw		mm5, mm2		/* Truncated */
+        movq		mm2, mm3		/* mm2 = irot_input_y */	
+        
+        pmulhw		mm3, C(6)	    /* mm3 = xC6S2 * irot_input_y */
+        psrlw		mm2, 15        
+        
+        paddw		mm3, mm2		/* Truncated */
+        psubsw		mm3, mm5		/* mm3 = op[6] */
+        
+        movq		OH(2), mm3		
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2                                    */
+/*-----------------------------------------------------------------------*/
+	    movq		mm0, C(4)       /* mm0 = xC4s4 */
+	    movq		mm2, mm1        /* mm2 = is12 - is56 */	
+	
+        movq		mm3, mm1        /* mm3 = is12 - is56 */	
+	    pmulhw		mm1, mm0		/* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+	
+        psrlw		mm2, 15				
+	    paddw		mm1, mm3	    /* mm1 = xC4S4 * ( is12 - is56 ) */
+	    
+        paddw		mm1, mm2        /* Truncate mm1, now it is icommon_product1 */
+	    movq		mm2, mm7        /* mm2 = id12 + id56 */
+	    
+        movq		mm3, mm7		/* mm3 = id12 + id56 */
+        pmulhw		mm7, mm0		/* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+	
+        psrlw		mm2, 15		    /* For trucation */	
+	    paddw		mm7, mm3		/* mm7 = xC4S4 * ( id12 + id56 ) */
+
+	    paddw		mm7, mm2		/* Truncate mm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+	    pxor		mm0, mm0		/* Clear mm0 */
+	    psubsw		mm0, mm6		/* mm0 = - id34 */
+
+	    psubsw		mm0, mm7	    /* mm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+	    paddsw		mm6, mm6	    /* mm6 = id34 * 2 */
+
+	    paddsw		mm6, mm0		/* mm6 = id34 - icommon_product2 = irot_input_x for 35 */
+	    psubsw		mm4, mm1		/* mm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+	    paddsw		mm1, mm1		/* mm1 = icommon_product1 * 2 */	    
+        paddsw		mm1, mm4		/* mm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7              
+/*---------------------------------------------------------*/
+
+	    movq		mm7, C(1)       /* xC1S7 */
+        movq		mm2, mm1        /* mm2 = irot_input_x */
+        
+        movq		mm3, mm1;       /* mm3 = irot_input_x */
+        pmulhw		mm1, mm7		/* mm1 = xC1S7 * irot_input_x - irot_input_x */
+            
+        movq		mm7, C(7)		/* xC7S1 */
+        psrlw		mm2, 15		    /* for trucation */		
+            
+        paddw		mm1, mm3		/* mm1 = xC1S7 * irot_input_x */
+        paddw		mm1, mm2		/* Trucated */
+            
+        pmulhw		mm3, mm7		/* mm3 = xC7S1 * irot_input_x */
+        paddw		mm3, mm2		/* Truncated */
+            
+        movq		mm5, mm0		/* mm5 = irot_input_y */	
+        movq	    mm2, mm0        /* mm2 = irot_input_y */	
+            
+        movq		mm7, C(1)       /* xC1S7 */			
+        pmulhw		mm0, mm7	    /* mm0 = xC1S7 * irot_input_y - irot_input_y */
+        
+        movq		mm7, C(7)		/* xC7S1 */	
+        psrlw		mm2, 15		    /* for trucation */	
+        
+        paddw		mm0, mm5		/* mm0 = xC1S7 * irot_input_y */
+        paddw		mm0, mm2		/* Truncated */
+        
+        pmulhw		mm5, mm7		/* mm5 = xC7S1 * irot_input_y */
+        paddw		mm5, mm2		/* Truncated */
+        
+        psubsw		mm1, mm5		/* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+        paddsw		mm3, mm0		/* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+        
+        movq		OL(1), mm1
+        movq		OH(3), mm3
+/*---------------------------------------------------------*/
+/* op3 and op5 
+/*---------------------------------------------------------*/
+	    movq		mm0, C(3)       /* xC3S5 */
+	    movq		mm1, C(5)       /* xC5S3 */
+
+	    movq		mm5,mm6         /* irot_input_x */
+	    movq		mm7,mm6         /* irot_input_x */
+
+	    movq		mm2,mm4         /* irot_input_y */
+	    movq		mm3,mm4         /* irot_input_y */
+
+	    pmulhw		mm4,mm0         /* mm4 = xC3S5 * irot_input_x - irot_input_x */
+	    pmulhw		mm6,mm1		    /* mm6 = xC5S3 * irot_input_y - irot_input_y */
+
+	    psrlw		mm2,15          /* for trucation */
+	    psrlw		mm5,15          /* for trucation */
+
+	    paddw		mm4,mm3		    /* mm4 = xC3S5 * irot_input_x */
+	    paddw		mm6,mm7		    /* mm6 = xC5S3 * irot_input_y */
+
+	    paddw		mm4,mm2		    /* Truncated */
+	    paddw		mm6,mm5		    /* Truncated */
+
+	    psubsw		mm4,mm6		    /* op [3] */
+	    movq		OL(3),mm4		/* Save Op[3] */
+
+	    movq		mm4,mm3		    /* irot_input_y */
+	    movq		mm6,mm7		    /* irot_input_x */
+
+	    pmulhw		mm3,mm1		    /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+	    pmulhw		mm7,mm0		    /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+	    paddw		mm4,mm2         /* Trucated */
+	    paddw		mm6,mm5         /* Trucated */
+
+	    paddw		mm3,mm4		    /* mm3 = xC5S3 * irot_input_x */
+	    paddw		mm7,mm6		    /*  mm7 = xC3S5 * irot_input_y */
+
+	    paddw		mm3,mm7		    /* Op[5] */
+	    movq		OH(1),mm3		/* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of Horizontal FDCT                                  */        
+/*---------------------------------------------------------*/
+        lea         eax, [ebx-64]
+        lea         esi, [edi-64]
+
+#undef  IL
+#undef  IH
+#undef  OL
+#undef  OH
+#define IL(i)   [eax + 16 * i]
+#define IH(i)   [ebx + 16 * i]
+#define OL(i)   [eax + 16 * i]
+#define OH(i)   [ebx + 16 * i]
+
+/******************************************************/
+/* Do 4x8 Transpose  is done through 2 4x4 Transpose  */
+/******************************************************/
+    	movq		mm4, IH(0)		/* mm4=e3e2e1e0 */	
+        movq		mm0, IH(1)		/* mm4=f3f2f1f0 */	
+        
+        movq		mm5, mm4		/* make a copy  */			
+        punpcklwd	mm4, mm0		/* mm4=f1e1f0e0 */	
+        
+        punpckhwd	mm5, mm0		/* mm5=f3e3f2e2 */        
+        movq		mm6, IH(2)		/* mm6=g3g2g1g0 */         
+
+        movq		mm0, IH(3)		/* mm0=h3h2h1h0 */ 
+        movq		mm7, mm6		/* mm7=g3g2g1g0 */         
+        
+        punpcklwd	mm6, mm0		/* mm6=h1g1h0g0 */ 
+        punpckhwd	mm7, mm0		/* mm7=h3g3h2g2 */ 
+        
+        movq		mm3, mm4		/* mm4=f1e1f0e0 */	
+        punpckldq	mm4, mm6		/* mm4=h0g0f0e0 */	
+        
+        punpckhdq	mm3, mm6		/* mm3=h1g1f1e1 */	
+        movq		mm6, mm5		/* mm5=f3e3f2e2 */
+
+        punpckldq	mm5, mm7		/* mm5=h2g2f2e2 */ 
+        movq        IH(0), mm4      /* saveh0g0f0e0 */       
+        
+        punpckhdq	mm6, mm7		/* mm6=h3g3f3e3 */        
+        movq        IH(2), mm5      /* saveh2g2f2e2 */
+
+        movq        IH(3), mm6      /* saveh3g3f3e3 */
+
+/*----------------------------------------------------*/        
+/*    mm3 in use for IH(1)                            */
+/*----------------------------------------------------*/
+
+        movq		mm4, IL(0)		/* mm4=a3a2a1a0 */	
+        movq		mm0, IL(1)		/* mm0=b3b2b1b0 */	
+        
+        movq        mm5, mm4        /* mm5=a3a2a1a0 */
+        punpcklwd   mm4, mm0        /* mm4=b1a1b0a0 */
+        
+        punpckhwd	mm5, mm0		/* mm5=b3a3b2a2 */	                
+        movq		mm6, IL(2)		/* mm6=c3c2c1c0 */ 
+                
+        movq		mm0, IL(3)	    /* mm0=d3d2d1d0 */         
+        movq        mm7, mm6        /* mm7=c3c2c1c0 */
+
+        punpcklwd	mm6, mm0		/* mm6=d1c1d0c0 */ 
+        punpckhwd	mm7, mm0		/* mm7=c3c3d2c2 */ 
+        
+        movq		mm1, mm4		/* mm4=b1a1b0a0 */	
+        punpckldq	mm4, mm6		/* mm4=d0c0b0a0 */	
+        
+        punpckhdq	mm1, mm6		/* mm1=d1c1b1a1 */	
+        movq		mm2, mm5		/* mm5=b3a3b2a2 */
+
+        punpckldq	mm5, mm7		/* mm5=d2c2b2a2 */ 
+        punpckhdq	mm2, mm7		/* mm6=d3c3b3a3 */
+    
+        movq        IL(2), mm5       /* saved2c2b2a2 */
+
+/*----------------------------------------------------*/        
+/*    mm1 in use for IL(1)                            */
+/*    mm2 in use for IL(3)                            */
+/*    mm3 in use for IH(1)                            */
+/*    mm4 in use for IH(0)                            */
+/*----------------------------------------------------*/
+
+/******************************************************/
+/* Let's do the 4x8 forward DCT                       */
+/******************************************************/
+        movq        mm0, mm4        /* mm4 = ip0 */
+        movq        mm5, mm1        /* mm5 = ip1 */      
+        
+        movq        mm6, mm2        /* mm6 = ip3 */      
+        movq        mm7, mm3        /* mm7 = ip5 */      	
+
+        paddsw      mm0, IH(3)      /* mm0 = ip0 + ip7 */
+        paddsw      mm1, IL(2)      /* mm1 = ip1 + ip2 */
+
+        paddsw      mm2, IH(0)      /* mm2 = ip3 + ip4 */
+        paddsw      mm3, IH(2)      /* mm3 = ip5 + ip6 */
+
+        psubsw      mm4, IH(3)      /* mm4 = ip0 - ip7 */
+        psubsw      mm5, IL(2)      /* mm5 = ip1 - ip2 */       
+
+        psubsw		mm0, mm2        /* mm0 = is07 - is34 */			
+        paddsw		mm2, mm2		/* mm2 = is34 * 2    */	
+        
+        psubsw      mm6, IH(0)      /* mm6 = ip3 - ip4 */               
+        paddsw		mm2, mm0		/* mm2 = is07 + is34 */	
+
+        psubsw		mm1, mm3		/* mm1 = is12 - is56 */	
+        movq		TIRY, mm0		/* save is07-is34 */	
+
+        paddsw		mm3, mm3		/* mm3 = is56 * 2 */	
+        paddsw		mm3, mm1	    /* mm3 = is12 + is56 */
+        
+        psubsw      mm7, IH(2)      /* mm7 = ip5 -ip6 */
+        psubsw		mm5, mm7		/* mm5 = id12 - id56 */
+	    
+        paddsw		mm7, mm7		/* mm7 = id56 * 2 */		
+	    paddsw		mm7, mm5	    /* mm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4 
+/*---------------------------------------------------------*/
+        psubsw		mm2, mm3		/* mm2 = is0734 - is1256 */
+        paddsw		mm3, mm3		/* mm3 = is1256 * 2 */		
+
+        movq		mm0, mm2	    /* mm0 = is0734 - is1256 */
+        paddsw		mm3, mm2		/* mm3 = is0734 + is1256 */
+
+        pmulhw		mm0, C(4)	    /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+        paddw		mm0, mm2		/* mm0 = xC4S4 * ( is0734 - is1256 ) */
+
+        psrlw		mm2, 15			
+        paddw		mm0, mm2		/* Truncate mm0, now it is op[4] */
+       
+        movq        mm2, mm0      
+        psrlw       mm0, 15
+        
+        paddw       mm0, mm2
+        psraw       mm0, 1        
+
+        movq		OH(0), mm0		/*	op4, now mm0,mm2 are free */
+        movq		mm2, mm3		/* mm2 = is0734 + is1256 */
+            
+
+        movq		mm0, mm3		/* mm0 = is0734 + is1256 */
+        pmulhw		mm3, C(4)		/* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */            
+        
+        psrlw		mm2, 15			
+        paddw		mm3, mm0		/* mm3 = xC4S4 * ( is0734 +is1256 ) */
+        
+        paddw		mm3, mm2		/* Truncate mm3, now it is op[0] */     
+        movq        mm2, mm3
+
+        psrlw       mm3, 15
+        paddw       mm3, mm2
+        
+        psraw       mm3, 1
+        movq		OL(0), mm3		/* save op0 */
+/*---------------------------------------------------------*/
+/* op2 and op6 
+/*---------------------------------------------------------*/
+ 	    movq		mm3, TIRY		/* mm3 = irot_input_y */
+        pmulhw		mm3, C(2)		/* mm3 = xC2S6 * irot_input_y - irot_input_y */
+        
+        movq		mm2, TIRY		/* mm2 = irot_input_y */
+        movq		mm0, mm2		/* mm0 = irot_input_y */
+        
+        psrlw		mm2, 15		
+        paddw		mm3, mm0        /* mm3 = xC2S6 * irot_input_y */
+            
+        paddw       mm3, mm2		/* Truncated */
+        movq		mm0, mm5		/* mm0 = id12 - id56 */
+        
+        
+        movq		mm2, mm5        /* mm2 = id12 - id56 */
+        pmulhw		mm0, C(6)		/* mm0 = xC6S2 * irot_input_x */
+            
+        psrlw		mm2, 15			
+        paddw		mm0, mm2		/* Truncated */
+        
+        paddsw		mm3, mm0		/* op[2] */
+        movq        mm0, mm3
+
+        psrlw       mm3, 15
+        paddw       mm3, mm0
+        
+        psraw       mm3, 1                
+        movq		OL(2), mm3		/* save op[2] */        
+        
+        movq		mm0, mm5		/* mm0 = id12 - id56 */
+        movq		mm2, mm5		/* mm0 = id12 - id56 */
+        
+        pmulhw		mm5, C(2)		/* mm5 = xC2S6 * irot_input_x - irot_input_x */
+        psrlw		mm2, 15		
+        
+        movq		mm3, TIRY		/* mm3 = irot_input_y */
+        paddw		mm5, mm0		/* mm5 = xC2S6 * irot_input_x */
+            
+        paddw		mm5, mm2		/* Truncated */
+        movq		mm2, mm3		/* mm2 = irot_input_y */	
+        
+        pmulhw		mm3, C(6)	    /* mm3 = xC6S2 * irot_input_y */
+        psrlw		mm2, 15        
+        
+        paddw		mm3, mm2		/* Truncated */
+        psubsw		mm3, mm5		/* mm3 = op[6] */
+
+        movq        mm5, mm3
+        psrlw       mm3,  15
+        
+        paddw       mm3, mm5
+        psraw       mm3, 1
+
+        movq		OH(2), mm3		
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2                                    */
+/*-----------------------------------------------------------------------*/
+	    movq		mm0, C(4)       /* mm0 = xC4s4 */
+	    movq		mm2, mm1        /* mm2 = is12 - is56 */	
+	
+        movq		mm3, mm1        /* mm3 = is12 - is56 */	
+	    pmulhw		mm1, mm0		/* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+	
+        psrlw		mm2, 15				
+	    paddw		mm1, mm3	    /* mm1 = xC4S4 * ( is12 - is56 ) */
+	    
+        paddw		mm1, mm2        /* Truncate mm1, now it is icommon_product1 */
+	    movq		mm2, mm7        /* mm2 = id12 + id56 */
+	    
+        movq		mm3, mm7		/* mm3 = id12 + id56 */
+        pmulhw		mm7, mm0		/* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+	
+        psrlw		mm2, 15		    /* For trucation */	
+	    paddw		mm7, mm3		/* mm7 = xC4S4 * ( id12 + id56 ) */
+
+	    paddw		mm7, mm2		/* Truncate mm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+	    pxor		mm0, mm0		/* Clear mm0 */
+	    psubsw		mm0, mm6		/* mm0 = - id34 */
+
+	    psubsw		mm0, mm7	    /* mm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+	    paddsw		mm6, mm6	    /* mm6 = id34 * 2 */
+
+	    paddsw		mm6, mm0		/* mm6 = id34 - icommon_product2 = irot_input_x for 35 */
+	    psubsw		mm4, mm1		/* mm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+	    paddsw		mm1, mm1		/* mm1 = icommon_product1 * 2 */	    
+        paddsw		mm1, mm4		/* mm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7              
+/*---------------------------------------------------------*/
+	    movq		mm7, C(1)       /* xC1S7 */
+        movq		mm2, mm1        /* mm2 = irot_input_x */
+        
+        movq		mm3, mm1;       /* mm3 = irot_input_x */
+        pmulhw		mm1, mm7		/* mm1 = xC1S7 * irot_input_x - irot_input_x */
+            
+        movq		mm7, C(7)		/* xC7S1 */
+        psrlw		mm2, 15		    /* for trucation */		
+            
+        paddw		mm1, mm3		/* mm1 = xC1S7 * irot_input_x */
+        paddw		mm1, mm2		/* Trucated */
+            
+        pmulhw		mm3, mm7		/* mm3 = xC7S1 * irot_input_x */
+        paddw		mm3, mm2		/* Truncated */
+            
+        movq		mm5, mm0		/* mm5 = irot_input_y */	
+        movq	    mm2, mm0        /* mm2 = irot_input_y */	
+            
+        movq		mm7, C(1)       /* xC1S7 */			
+        pmulhw		mm0, mm7	    /* mm0 = xC1S7 * irot_input_y - irot_input_y */
+        
+        movq		mm7, C(7)		/* xC7S1 */	
+        psrlw		mm2, 15		    /* for trucation */	
+        
+        paddw		mm0, mm5		/* mm0 = xC1S7 * irot_input_y */
+        paddw		mm0, mm2		/* Truncated */
+        
+        pmulhw		mm5, mm7		/* mm5 = xC7S1 * irot_input_y */
+        paddw		mm5, mm2		/* Truncated */
+        
+        psubsw		mm1, mm5		/* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+        paddsw		mm3, mm0		/* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+
+        movq        mm5, mm1
+        movq        mm0, mm3        
+
+        psrlw       mm1, 15
+        psrlw       mm3, 15
+
+        paddw       mm1, mm5
+        paddw       mm3, mm0
+
+        psraw       mm1, 1
+        psraw       mm3, 1
+            
+        movq		OL(1), mm1
+        movq		OH(3), mm3
+/*---------------------------------------------------------*/
+/* op3 and op5 
+/*---------------------------------------------------------*/
+	    movq		mm0, C(3)       /* xC3S5 */
+	    movq		mm1, C(5)       /* xC5S3 */
+
+	    movq		mm5,mm6         /* irot_input_x */
+	    movq		mm7,mm6         /* irot_input_x */
+
+	    movq		mm2,mm4         /* irot_input_y */
+	    movq		mm3,mm4         /* irot_input_y */
+
+	    pmulhw		mm4,mm0         /* mm4 = xC3S5 * irot_input_x - irot_input_x */
+	    pmulhw		mm6,mm1		    /* mm6 = xC5S3 * irot_input_y - irot_input_y */
+
+	    psrlw		mm2,15          /* for trucation */
+	    psrlw		mm5,15          /* for trucation */
+
+	    paddw		mm4,mm3		    /* mm4 = xC3S5 * irot_input_x */
+	    paddw		mm6,mm7		    /* mm6 = xC5S3 * irot_input_y */
+
+	    paddw		mm4,mm2		    /* Truncated */
+	    paddw		mm6,mm5		    /* Truncated */
+
+	    psubsw		mm4,mm6		    /* op [3] */
+        movq        mm6,mm4
+
+        psrlw       mm4,15        
+        paddw       mm4,mm6
+
+        psraw       mm4,1
+        movq		OL(3),mm4		/* Save Op[3] */
+
+	    movq		mm4,mm3		    /* irot_input_y */
+	    movq		mm6,mm7		    /* irot_input_x */
+
+	    pmulhw		mm3,mm1		    /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+	    pmulhw		mm7,mm0		    /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+	    paddw		mm4,mm2         /* Trucated */
+	    paddw		mm6,mm5         /* Trucated */
+
+	    paddw		mm3,mm4		    /* mm3 = xC5S3 * irot_input_x */
+	    paddw		mm7,mm6		    /*  mm7 = xC3S5 * irot_input_y */
+
+	    paddw		mm3,mm7		    /* Op[5] */
+        movq        mm7,mm3
+
+        psrlw       mm3,15        
+        paddw       mm3,mm7
+
+        psraw       mm3,1
+        movq		OH(1),mm3		/* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of 4x8 1-D FDCT                                     */        
+/*---------------------------------------------------------*/
+        lea         eax, [eax+8]
+        lea         ebx, [ebx+8]
+
+/******************************************************/
+/* Do 4x8 Transpose  is done through 2 4x4 Transpose  */
+/******************************************************/
+    	movq		mm4, IH(0)		/* mm4=e3e2e1e0 */	
+        movq		mm0, IH(1)		/* mm4=f3f2f1f0 */	
+        
+        movq		mm5, mm4		/* make a copy  */			
+        punpcklwd	mm4, mm0		/* mm4=f1e1f0e0 */	
+        
+        punpckhwd	mm5, mm0		/* mm5=f3e3f2e2 */        
+        movq		mm6, IH(2)		/* mm6=g3g2g1g0 */         
+
+        movq		mm0, IH(3)		/* mm0=h3h2h1h0 */ 
+        movq		mm7, mm6		/* mm7=g3g2g1g0 */         
+        
+        punpcklwd	mm6, mm0		/* mm6=h1g1h0g0 */ 
+        punpckhwd	mm7, mm0		/* mm7=h3g3h2g2 */ 
+        
+        movq		mm3, mm4		/* mm4=f1e1f0e0 */	
+        punpckldq	mm4, mm6		/* mm4=h0g0f0e0 */	
+        
+        punpckhdq	mm3, mm6		/* mm3=h1g1f1e1 */	
+        movq		mm6, mm5		/* mm5=f3e3f2e2 */
+
+        punpckldq	mm5, mm7		/* mm5=h2g2f2e2 */ 
+        movq        IH(0), mm4      /* saveh0g0f0e0 */       
+        
+        punpckhdq	mm6, mm7		/* mm6=h3g3f3e3 */        
+        movq        IH(2), mm5      /* saveh2g2f2e2 */
+
+        movq        IH(3), mm6      /* saveh3g3f3e3 */
+
+/*----------------------------------------------------*/        
+/*    mm3 in use for IH(1)                            */
+/*----------------------------------------------------*/
+
+        movq		mm4, IL(0)		/* mm4=a3a2a1a0 */	
+        movq		mm0, IL(1)		/* mm0=b3b2b1b0 */	
+        
+        movq        mm5, mm4        /* mm5=a3a2a1a0 */
+        punpcklwd   mm4, mm0        /* mm4=b1a1b0a0 */
+        
+        punpckhwd	mm5, mm0		/* mm5=b3a3b2a2 */	                
+        movq		mm6, IL(2)		/* mm6=c3c2c1c0 */ 
+                
+        movq		mm0, IL(3)	    /* mm0=d3d2d1d0 */         
+        movq        mm7, mm6        /* mm7=c3c2c1c0 */
+
+        punpcklwd	mm6, mm0		/* mm6=d1c1d0c0 */ 
+        punpckhwd	mm7, mm0		/* mm7=c3c3d2c2 */ 
+        
+        movq		mm1, mm4		/* mm4=b1a1b0a0 */	
+        punpckldq	mm4, mm6		/* mm4=d0c0b0a0 */	
+        
+        punpckhdq	mm1, mm6		/* mm1=d1c1b1a1 */	
+        movq		mm2, mm5		/* mm5=b3a3b2a2 */
+
+        punpckldq	mm5, mm7		/* mm5=d2c2b2a2 */ 
+        punpckhdq	mm2, mm7		/* mm6=d3c3b3a3 */
+    
+        movq        IL(2), mm5       /* saved2c2b2a2 */
+
+/*----------------------------------------------------*/        
+/*    mm1 in use for IL(1)                            */
+/*    mm2 in use for IL(3)                            */
+/*    mm3 in use for IH(1)                            */
+/*    mm4 in use for IH(0)                            */
+/*----------------------------------------------------*/
+
+/******************************************************/
+/* Let's do the 4x8 forward DCT                       */
+/******************************************************/
+        movq        mm0, mm4        /* mm4 = ip0 */
+        movq        mm5, mm1        /* mm5 = ip1 */      
+        
+        movq        mm6, mm2        /* mm6 = ip3 */      
+        movq        mm7, mm3        /* mm7 = ip5 */      	
+
+        paddsw      mm0, IH(3)      /* mm0 = ip0 + ip7 */
+        paddsw      mm1, IL(2)      /* mm1 = ip1 + ip2 */
+
+        paddsw      mm2, IH(0)      /* mm2 = ip3 + ip4 */
+        paddsw      mm3, IH(2)      /* mm3 = ip5 + ip6 */
+
+        psubsw      mm4, IH(3)      /* mm4 = ip0 - ip7 */
+        psubsw      mm5, IL(2)      /* mm5 = ip1 - ip2 */       
+
+        psubsw		mm0, mm2        /* mm0 = is07 - is34 */			
+        paddsw		mm2, mm2		/* mm2 = is34 * 2    */	
+        
+        psubsw      mm6, IH(0)      /* mm6 = ip3 - ip4 */               
+        paddsw		mm2, mm0		/* mm2 = is07 + is34 */	
+
+        psubsw		mm1, mm3		/* mm1 = is12 - is56 */	
+        movq		TIRY, mm0		/* save is07-is34 */	
+
+        paddsw		mm3, mm3		/* mm3 = is56 * 2 */	
+        paddsw		mm3, mm1	    /* mm3 = is12 + is56 */
+        
+        psubsw      mm7, IH(2)      /* mm7 = ip5 -ip6 */
+        psubsw		mm5, mm7		/* mm5 = id12 - id56 */
+	    
+        paddsw		mm7, mm7		/* mm7 = id56 * 2 */		
+	    paddsw		mm7, mm5	    /* mm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4 
+/*---------------------------------------------------------*/
+        psubsw		mm2, mm3		/* mm2 = is0734 - is1256 */
+        paddsw		mm3, mm3		/* mm3 = is1256 * 2 */		
+
+        movq		mm0, mm2	    /* mm0 = is0734 - is1256 */
+        paddsw		mm3, mm2		/* mm3 = is0734 + is1256 */
+
+        pmulhw		mm0, C(4)	    /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+        paddw		mm0, mm2		/* mm0 = xC4S4 * ( is0734 - is1256 ) */
+
+        psrlw		mm2, 15			
+        paddw		mm0, mm2		/* Truncate mm0, now it is op[4] */
+       
+        movq        mm2, mm0      
+        psrlw       mm0, 15
+        
+        paddw       mm0, mm2
+        psraw       mm0, 1        
+
+        movq		OH(0), mm0		/*	op4, now mm0,mm2 are free */
+        movq		mm2, mm3		/* mm2 = is0734 + is1256 */
+            
+
+        movq		mm0, mm3		/* mm0 = is0734 + is1256 */
+        pmulhw		mm3, C(4)		/* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */            
+        
+        psrlw		mm2, 15			
+        paddw		mm3, mm0		/* mm3 = xC4S4 * ( is0734 +is1256 ) */
+        
+        paddw		mm3, mm2		/* Truncate mm3, now it is op[0] */     
+        movq        mm2, mm3
+
+        psrlw       mm3, 15
+        paddw       mm3, mm2
+        
+        psraw       mm3, 1
+        movq		OL(0), mm3		/* save op0 */
+/*---------------------------------------------------------*/
+/* op2 and op6 
+/*---------------------------------------------------------*/
+ 	    movq		mm3, TIRY		/* mm3 = irot_input_y */
+        pmulhw		mm3, C(2)		/* mm3 = xC2S6 * irot_input_y - irot_input_y */
+        
+        movq		mm2, TIRY		/* mm2 = irot_input_y */
+        movq		mm0, mm2		/* mm0 = irot_input_y */
+        
+        psrlw		mm2, 15		
+        paddw		mm3, mm0        /* mm3 = xC2S6 * irot_input_y */
+            
+        paddw       mm3, mm2		/* Truncated */
+        movq		mm0, mm5		/* mm0 = id12 - id56 */
+        
+        
+        movq		mm2, mm5        /* mm2 = id12 - id56 */
+        pmulhw		mm0, C(6)		/* mm0 = xC6S2 * irot_input_x */
+            
+        psrlw		mm2, 15			
+        paddw		mm0, mm2		/* Truncated */
+        
+        paddsw		mm3, mm0		/* op[2] */
+        movq        mm0, mm3
+
+        psrlw       mm3, 15
+        paddw       mm3, mm0
+        
+        psraw       mm3, 1                
+        movq		OL(2), mm3		/* save op[2] */        
+        
+        movq		mm0, mm5		/* mm0 = id12 - id56 */
+        movq		mm2, mm5		/* mm0 = id12 - id56 */
+        
+        pmulhw		mm5, C(2)		/* mm5 = xC2S6 * irot_input_x - irot_input_x */
+        psrlw		mm2, 15		
+        
+        movq		mm3, TIRY		/* mm3 = irot_input_y */
+        paddw		mm5, mm0		/* mm5 = xC2S6 * irot_input_x */
+            
+        paddw		mm5, mm2		/* Truncated */
+        movq		mm2, mm3		/* mm2 = irot_input_y */	
+        
+        pmulhw		mm3, C(6)	    /* mm3 = xC6S2 * irot_input_y */
+        psrlw		mm2, 15        
+        
+        paddw		mm3, mm2		/* Truncated */
+        psubsw		mm3, mm5		/* mm3 = op[6] */
+
+        movq        mm5, mm3
+        psrlw       mm3,  15
+        
+        paddw       mm3, mm5
+        psraw       mm3, 1
+
+        movq		OH(2), mm3		
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2                                    */
+/*-----------------------------------------------------------------------*/
+	    movq		mm0, C(4)       /* mm0 = xC4s4 */
+	    movq		mm2, mm1        /* mm2 = is12 - is56 */	
+	
+        movq		mm3, mm1        /* mm3 = is12 - is56 */	
+	    pmulhw		mm1, mm0		/* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+	
+        psrlw		mm2, 15				
+	    paddw		mm1, mm3	    /* mm1 = xC4S4 * ( is12 - is56 ) */
+	    
+        paddw		mm1, mm2        /* Truncate mm1, now it is icommon_product1 */
+	    movq		mm2, mm7        /* mm2 = id12 + id56 */
+	    
+        movq		mm3, mm7		/* mm3 = id12 + id56 */
+        pmulhw		mm7, mm0		/* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+	
+        psrlw		mm2, 15		    /* For trucation */	
+	    paddw		mm7, mm3		/* mm7 = xC4S4 * ( id12 + id56 ) */
+
+	    paddw		mm7, mm2		/* Truncate mm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+	    pxor		mm0, mm0		/* Clear mm0 */
+	    psubsw		mm0, mm6		/* mm0 = - id34 */
+
+	    psubsw		mm0, mm7	    /* mm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+	    paddsw		mm6, mm6	    /* mm6 = id34 * 2 */
+
+	    paddsw		mm6, mm0		/* mm6 = id34 - icommon_product2 = irot_input_x for 35 */
+	    psubsw		mm4, mm1		/* mm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+	    paddsw		mm1, mm1		/* mm1 = icommon_product1 * 2 */	    
+        paddsw		mm1, mm4		/* mm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7              
+/*---------------------------------------------------------*/
+	    movq		mm7, C(1)       /* xC1S7 */
+        movq		mm2, mm1        /* mm2 = irot_input_x */
+        
+        movq		mm3, mm1;       /* mm3 = irot_input_x */
+        pmulhw		mm1, mm7		/* mm1 = xC1S7 * irot_input_x - irot_input_x */
+            
+        movq		mm7, C(7)		/* xC7S1 */
+        psrlw		mm2, 15		    /* for trucation */		
+            
+        paddw		mm1, mm3		/* mm1 = xC1S7 * irot_input_x */
+        paddw		mm1, mm2		/* Trucated */
+            
+        pmulhw		mm3, mm7		/* mm3 = xC7S1 * irot_input_x */
+        paddw		mm3, mm2		/* Truncated */
+            
+        movq		mm5, mm0		/* mm5 = irot_input_y */	
+        movq	    mm2, mm0        /* mm2 = irot_input_y */	
+            
+        movq		mm7, C(1)       /* xC1S7 */			
+        pmulhw		mm0, mm7	    /* mm0 = xC1S7 * irot_input_y - irot_input_y */
+        
+        movq		mm7, C(7)		/* xC7S1 */	
+        psrlw		mm2, 15		    /* for trucation */	
+        
+        paddw		mm0, mm5		/* mm0 = xC1S7 * irot_input_y */
+        paddw		mm0, mm2		/* Truncated */
+        
+        pmulhw		mm5, mm7		/* mm5 = xC7S1 * irot_input_y */
+        paddw		mm5, mm2		/* Truncated */
+        
+        psubsw		mm1, mm5		/* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+        paddsw		mm3, mm0		/* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+
+        movq        mm5, mm1
+        movq        mm0, mm3        
+
+        psrlw       mm1, 15
+        psrlw       mm3, 15
+
+        paddw       mm1, mm5
+        paddw       mm3, mm0
+
+        psraw       mm1, 1
+        psraw       mm3, 1
+            
+        movq		OL(1), mm1
+        movq		OH(3), mm3
+/*---------------------------------------------------------*/
+/* op3 and op5 
+/*---------------------------------------------------------*/
+	    movq		mm0, C(3)       /* xC3S5 */
+	    movq		mm1, C(5)       /* xC5S3 */
+
+	    movq		mm5,mm6         /* irot_input_x */
+	    movq		mm7,mm6         /* irot_input_x */
+
+	    movq		mm2,mm4         /* irot_input_y */
+	    movq		mm3,mm4         /* irot_input_y */
+
+	    pmulhw		mm4,mm0         /* mm4 = xC3S5 * irot_input_x - irot_input_x */
+	    pmulhw		mm6,mm1		    /* mm6 = xC5S3 * irot_input_y - irot_input_y */
+
+	    psrlw		mm2,15          /* for trucation */
+	    psrlw		mm5,15          /* for trucation */
+
+	    paddw		mm4,mm3		    /* mm4 = xC3S5 * irot_input_x */
+	    paddw		mm6,mm7		    /* mm6 = xC5S3 * irot_input_y */
+
+	    paddw		mm4,mm2		    /* Truncated */
+	    paddw		mm6,mm5		    /* Truncated */
+
+	    psubsw		mm4,mm6		    /* op [3] */
+        movq        mm6,mm4
+
+        psrlw       mm4,15        
+        paddw       mm4,mm6
+
+        psraw       mm4,1
+        movq		OL(3),mm4		/* Save Op[3] */
+
+	    movq		mm4,mm3		    /* irot_input_y */
+	    movq		mm6,mm7		    /* irot_input_x */
+
+	    pmulhw		mm3,mm1		    /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+	    pmulhw		mm7,mm0		    /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+	    paddw		mm4,mm2         /* Trucated */
+	    paddw		mm6,mm5         /* Trucated */
+
+	    paddw		mm3,mm4		    /* mm3 = xC5S3 * irot_input_x */
+	    paddw		mm7,mm6		    /*  mm7 = xC3S5 * irot_input_y */
+
+	    paddw		mm3,mm7		    /* Op[5] */
+        movq        mm7,mm3
+
+        psrlw       mm3,15        
+        paddw       mm3,mm7
+
+        psraw       mm3,1
+        movq		OH(1),mm3		/* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of 4x8 1-D FDCT                                     */        
+/*---------------------------------------------------------*/
+
+
+    }/* end of _asm code section */
+}
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctwmt.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctwmt.c
new file mode 100644
index 00000000..13a67fa7
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/fdctwmt.c
@@ -0,0 +1,810 @@
+/****************************************************************************
+ *
+ *   Module Title :     Fdctwmt.c
+ *
+ *   Description  :     Forward DCT optimized specifically for Intel  P4
+ *						processor
+ *
+ *   AUTHOR       :     YaoWu Xu
+ *
+ ***************************************************************************** 
+ *   Revision History
+ *	
+ *   1.00 YWX  03/11/02  Configuration baseline
+ *
+ *****************************************************************************
+ */
+
+
+/*******************************************************************************
+ * Module Constants
+ *******************************************************************************
+ */
+	
+
+__declspec(align(16)) static unsigned short TIRY[8];
+
+__declspec(align(16)) static unsigned short WmtIdctConst[8 * 8] =
+{
+    0,    0,    0,    0,    0,    0,    0,    0, 
+	64277,64277,64277,64277,64277,64277,64277,64277, 
+	60547,60547,60547,60547,60547,60547,60547,60547, 
+	54491,54491,54491,54491,54491,54491,54491,54491, 
+	46341,46341,46341,46341,46341,46341,46341,46341, 
+	36410,36410,36410,36410,36410,36410,36410,36410, 
+	25080,25080,25080,25080,25080,25080,25080,25080, 
+	12785,12785,12785,12785,12785,12785,12785,12785
+};
+
+ 
+/**************************************************************************************
+ *
+ *		Macro:			FDct_WMT
+ *		
+ *		Description:	The Macro does 1-D IDct on 8 columns. 
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+void  fdct_WMT(short *InputData, short *OutputData)
+{
+
+	__asm 
+	{
+		mov		eax, InputData
+		mov		ebx, OutputData
+		lea		edx, WmtIdctConst
+
+#define I(i) [eax + 16 * i ]
+#define O(i) [ebx + 16 * i ]
+#define C(i) [edx + 16 * i ]
+
+/******************************************************/
+/* Do 8x8 Transpose                                   */
+/******************************************************/
+
+    	movdqa		xmm4, I(4)		/* xmm4=e7e6e5e4e3e2e1e0 */	
+        movdqa		xmm0, I(5)		/* xmm4=f7f6f5f4f3f2f1f0 */	
+        
+        psllw       xmm4, 1
+        psllw       xmm0, 1
+
+        movdqa		xmm5, xmm4		/* make a copy */			
+        punpcklwd	xmm4, xmm0		/* xmm4=f3e3f2e2f1e1f0e0 */	
+        
+        punpckhwd	xmm5, xmm0		/* xmm5=f7e7f6e6f5e5f4e4 */	
+        movdqa		xmm6, I(6)		/* xmm6=g7g6g5g4g3g2g1g0 */ 
+        
+        movdqa		xmm0, I(7)		/* xmm0=h7h6h5h4h3h2h1h0 */ 
+
+        psllw       xmm6, 1
+        psllw       xmm0, 1
+
+        movdqa		xmm7, xmm6		/* make a copy */			
+        
+        punpcklwd	xmm6, xmm0		/* xmm6=h3g3h3g2h1g1h0g0 */ 
+        punpckhwd	xmm7, xmm0		/* xmm7=h7g7h6g6h5g5h4g4 */ 
+        
+        movdqa		xmm3, xmm4		/* make a copy */			
+        punpckldq	xmm4, xmm6		/* xmm4=h1g1f1e1h0g0f0e0 */	
+        
+        punpckhdq	xmm3, xmm6		/* xmm3=h3g3g3e3h2g2f2e2 */	
+        movdqa		I(6), xmm3		/* save h3g3g3e3h2g2f2e2 */	
+        /* Free xmm6 */ 
+        movdqa		xmm6, xmm5		/* make a copy */			
+        punpckldq	xmm5, xmm7		/* xmm5=h5g5f5e5h4g4f4e4 */ 
+        
+        punpckhdq	xmm6, xmm7		/* xmm6=h7g7f7e7h6g6f6e6 */ 
+        movdqa		xmm0, I(0)		/* xmm0=a7a6a5a4a3a2a1a0 */	
+        /* Free xmm7 */ 
+        movdqa		xmm1, I(1)		/* xmm1=b7b6b5b4b3b2b1b0 */	
+
+        psllw       xmm0, 1
+        psllw       xmm1, 1
+        
+        movdqa		xmm7, xmm0		/* make a copy */			
+        
+        punpcklwd	xmm0, xmm1		/* xmm0=b3a3b2a2b1a1b0a0 */	
+        punpckhwd	xmm7, xmm1		/* xmm7=b7a7b6a6b5a5b4a4 */ 
+        /* Free xmm1 */ 
+        movdqa		xmm2, I(2)		/* xmm2=c7c6c5c4c3c2c1c0 */ 
+        movdqa		xmm3, I(3)	    /* xmm3=d7d6d5d4d3d2d1d0 */ 
+        
+        psllw       xmm2, 1
+        psllw       xmm3, 1
+
+        movdqa		xmm1, xmm2		/* make a copy */			
+        punpcklwd	xmm2, xmm3		/* xmm2=d3c3d2c2d1c1d0c0 */ 
+        
+        punpckhwd	xmm1, xmm3		/* xmm1=d7c7d6c6d5c5d4c4 */ 
+        movdqa		xmm3, xmm0		/* make a copy	*/			
+        
+        punpckldq	xmm0, xmm2		/* xmm0=d1c1b1a1d0c0b0a0 */ 
+        punpckhdq	xmm3, xmm2		/* xmm3=d3c3b3a3d2c2b2a2 */ 
+        /* Free xmm2 */ 
+        movdqa		xmm2, xmm7		/* make a copy */			
+        punpckldq	xmm2, xmm1		/* xmm2=d5c5b5a5d4c4b4a4 */	
+        
+        punpckhdq	xmm7, xmm1		/* xmm7=d7c7b7a7d6c6b6a6 */ 
+        movdqa		xmm1, xmm0		/* make a copy */			
+        
+        punpcklqdq	xmm0, xmm4		/* xmm0=h0g0f0e0d0c0b0a0 */	
+        punpckhqdq	xmm1, xmm4		/* xmm1=h1g1g1e1d1c1b1a1 */ 
+        
+        movdqa		I(0), xmm0		/* save I(0) */				
+        movdqa		I(1), xmm1		/* save I(1) */				
+        
+        movdqa		xmm0, I(6)		/* load h3g3g3e3h2g2f2e2 */ 
+        movdqa		xmm1, xmm3		/* make a copy */			
+        
+        punpcklqdq	xmm1, xmm0		/* xmm1=h2g2f2e2d2c2b2a2 */ 
+        punpckhqdq	xmm3, xmm0		/* xmm3=h3g3f3e3d3c3b3a3 */	
+        
+        movdqa		xmm4, xmm2		/* make a copy */			
+        punpcklqdq	xmm4, xmm5		/* xmm4=h4g4f4e4d4c4b4a4 */	
+        
+        punpckhqdq	xmm2, xmm5		/* xmm2=h5g5f5e5d5c5b5a5 */	
+        movdqa		I(2), xmm1		/* save I(2) */				
+        
+        movdqa		I(3), xmm3		/* save I(3) */				
+        movdqa		I(4), xmm4		/* save I(4) */				
+        
+        movdqa		I(5), xmm2		/* save I(5) */				
+        movdqa		xmm5, xmm7		/* make a copy */			
+        
+        punpcklqdq	xmm5, xmm6		/* xmm5=h6g6f6e6d6c6b6a6 */	
+        punpckhqdq	xmm7, xmm6		/* xmm7=h7g7f7e7d7c7b7a7 */	
+        
+        movdqa		I(6), xmm5		/* save I(6) */				
+        movdqa		I(7), xmm7		/* save I(7) */				
+
+/******************************************************/
+/* Done with transpose - Let's do the forward DCT     */
+/******************************************************/
+
+        movdqa		xmm0, I(0)      /* xmm0 = ip0 */
+        movdqa      xmm1, I(1)      /* xmm1 = ip1 */
+
+        movdqa      xmm2, I(3)      /* xmm2 = ip3 */
+        movdqa      xmm3, I(5)      /* xmm3 = ip5 */
+
+        movdqa      xmm4, xmm0      /* xmm4 = ip0 */
+        movdqa      xmm5, xmm1      /* xmm5 = ip1 */      
+        
+        movdqa      xmm6, xmm2      /* xmm6 = ip3 */      
+        movdqa      xmm7, xmm3      /* xmm7 = ip5 */      	
+
+        paddsw      xmm0, I(7)      /* xmm0 = ip0 + ip7 */
+        paddsw      xmm1, I(2)      /* xmm1 = ip1 + ip2 */
+
+        paddsw      xmm2, I(4)      /* xmm2 = ip3 + ip4 */
+        paddsw      xmm3, I(6)      /* xmm3 = ip5 + ip6 */
+
+        psubsw      xmm4, I(7)      /* xmm4 = ip0 - ip7 */
+        psubsw      xmm5, I(2)      /* xmm5 = ip1 - ip2 */       
+
+        psubsw		xmm0, xmm2      /* xmm0 = is07 - is34 */			
+        paddsw		xmm2, xmm2		/* xmm2 = is34 * 2    */	
+        
+        psubsw      xmm6, I(4)      /* xmm6 = ip3 - ip4 */               
+        paddsw		xmm2, xmm0		/* xmm2 = is07 + is34 */	
+
+        psubsw		xmm1, xmm3		/* xmm1 = is12 - is56 */	
+        movdqa		TIRY, xmm0		/* save is07-is34 */	
+
+        paddsw		xmm3, xmm3		/* xmm3 = is56 * 2 */	
+        paddsw		xmm3, xmm1	    /* xmm3 = is12 + is56 */
+        
+        psubsw      xmm7, I(6)      /* xmm7 = ip5 -ip6 */
+        psubsw		xmm5, xmm7		/* xmm5 = id12 - id56 */
+	    
+        paddsw		xmm7, xmm7		/* xmm7 = id56 * 2 */		
+	    paddsw		xmm7, xmm5	    /* xmm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4 
+/*---------------------------------------------------------*/
+        psubsw		xmm2, xmm3		/* xmm2 = is0734 - is1256 */
+        paddsw		xmm3, xmm3		/* xmm3 = is1256 * 2 */		
+
+        movdqa		xmm0, xmm2	    /* xmm0 = is0734 - is1256 */
+        paddsw		xmm3, xmm2		/* xmm3 = is0734 + is1256 */
+
+        pmulhw		xmm0, C(4)	    /* xmm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+        paddw		xmm0, xmm2		/* xmm0 = xC4S4 * ( is0734 - is1256 ) */
+
+        psrlw		xmm2, 15			
+        paddw		xmm0, xmm2		/* Truncate xmm0, now it is op[4] */
+            
+        movdqa		xmm2, xmm3		/* xmm2 = is0734 + is1256 */
+        movdqa		O(4), xmm0		/*	op4, now xmm0,xmm2 are free */
+            
+        movdqa		xmm0, xmm3		/* xmm0 = is0734 + is1256 */
+        pmulhw		xmm3, C(4)		/* xmm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */            
+        
+        psrlw		xmm2, 15			
+        paddw		xmm3, xmm0		/* xmm3 = xC4S4 * ( is0734 +is1256 ) */
+        
+        paddw		xmm3, xmm2		/* Truncate xmm3, now it is op[0] */     
+        movdqa		O(0), xmm3		/* save op0 */
+/*---------------------------------------------------------*/
+/* op2 and op6 
+/*---------------------------------------------------------*/
+ 	    movdqa		xmm3, TIRY		/* xmm3 = irot_input_y */
+        pmulhw		xmm3, C(2)		/* xmm3 = xC2S6 * irot_input_y - irot_input_y */
+        
+        movdqa		xmm2, TIRY		/* xmm2 = irot_input_y */
+        movdqa		xmm0, xmm2		/* xmm0 = irot_input_y */
+        
+        psrlw		xmm2, 15		
+        paddw		xmm3, xmm0      /* xmm3 = xC2S6 * irot_input_y */
+            
+        paddw       xmm3, xmm2		/* Truncated */
+        movdqa		xmm0, xmm5		/* xmm0 = id12 - id56 */
+        
+        
+        movdqa		xmm2, xmm5      /* xmm2 = id12 - id56 */
+        pmulhw		xmm0, C(6)		/* xmm0 = xC6S2 * irot_input_x */
+            
+        psrlw		xmm2, 15			
+        paddw		xmm0, xmm2		/* Truncated */
+        
+        paddsw		xmm3, xmm0		/* op[2] */
+        movdqa		O(2), xmm3		/* save op[2] */
+        
+        
+        movdqa		xmm0, xmm5		/* xmm0 = id12 - id56 */
+        movdqa		xmm2, xmm5		/* xmm0 = id12 - id56 */
+        
+        pmulhw		xmm5, C(2)		/* xmm5 = xC2S6 * irot_input_x - irot_input_x */
+        psrlw		xmm2, 15		
+        
+        movdqa		xmm3, TIRY		/* xmm3 = irot_input_y */
+        paddw		xmm5, xmm0		/* xmm5 = xC2S6 * irot_input_x */
+            
+        paddw		xmm5, xmm2		/* Truncated */
+        movdqa		xmm2, xmm3		/* xmm2 = irot_input_y */	
+        
+        pmulhw		xmm3, C(6)	    /* mm3 = xC6S2 * irot_input_y */
+        psrlw		xmm2, 15        
+        
+        paddw		xmm3, xmm2		/* Truncated */
+        psubsw		xmm3, xmm5		/* xmm3 = op[6] */
+        
+        movdqa		O(6), xmm3		
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2                                    */
+/*-----------------------------------------------------------------------*/
+	    movdqa		xmm0, C(4)      /* xmm0 = xC4s4 */
+	    movdqa		xmm2, xmm1      /* xmm2 = is12 - is56 */	
+	
+        movdqa		xmm3, xmm1      /* xmm3 = is12 - is56 */	
+	    pmulhw		xmm1, xmm0		/* xmm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+	
+        psrlw		xmm2, 15				
+	    paddw		xmm1, xmm3	    /* xmm1 = xC4S4 * ( is12 - is56 ) */
+	    
+        paddw		xmm1, xmm2      /* Truncate xmm1, now it is icommon_product1 */
+	    movdqa		xmm2, xmm7      /* xmm2 = id12 + id56 */
+	    
+        movdqa		xmm3, xmm7		/* xmm3 = id12 + id56 */
+        pmulhw		xmm7, xmm0		/* xmm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+	
+        psrlw		xmm2, 15		/* For trucation */	
+	    paddw		xmm7, xmm3		/* xmm7 = xC4S4 * ( id12 + id56 ) */
+
+	    paddw		xmm7, xmm2		/* Truncate xmm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+	    pxor		xmm0, xmm0		/* Clear xmm0 */
+	    psubsw		xmm0, xmm6		/* xmm0 = - id34 */
+
+	    psubsw		xmm0, xmm7	    /* xmm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+	    paddsw		xmm6, xmm6	    /* xmm6 = id34 * 2 */
+
+	    paddsw		xmm6, xmm0		/* xmm6 = id34 - icommon_product2 = irot_input_x for 35 */
+	    psubsw		xmm4, xmm1		/* xmm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+	    paddsw		xmm1, xmm1		/* xmm1 = icommon_product1 * 2 */	    
+        paddsw		xmm1, xmm4		/* xmm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7              
+/*---------------------------------------------------------*/
+
+	    movdqa		xmm7, C(1)     /* xC1S7 */
+        movdqa		xmm2, xmm1      /* xmm2 = irot_input_x */
+        
+        movdqa		xmm3, xmm1;     /* xmm3 = irot_input_x */
+        pmulhw		xmm1, xmm7		/* xmm1 = xC1S7 * irot_input_x - irot_input_x */
+            
+        movdqa		xmm7, C(7)		/* xC7S1 */
+        psrlw		xmm2, 15		/* for trucation */		
+            
+        paddw		xmm1, xmm3		/* xmm1 = xC1S7 * irot_input_x */
+        paddw		xmm1, xmm2		/* Trucated */
+            
+        pmulhw		xmm3, xmm7		/* xmm3 = xC7S1 * irot_input_x */
+        paddw		xmm3, xmm2		/* Truncated */
+            
+        movdqa		xmm5, xmm0		/* xmm5 = irot_input_y */	
+        movdqa	    xmm2, xmm0      /* xmm2 = irot_input_y */	
+            
+        movdqa		xmm7, C(1)      /* xC1S7 */			
+        pmulhw		xmm0, xmm7	    /* xmm0 = xC1S7 * irot_input_y - irot_input_y */
+        
+        movdqa		xmm7, C(7)		/* xC7S1 */	
+        psrlw		xmm2, 15		/* for trucation */	
+        
+        paddw		xmm0, xmm5		/* xmm0 = xC1S7 * irot_input_y */
+        paddw		xmm0, xmm2		/* Truncated */
+        
+        pmulhw		xmm5, xmm7		/* xmm5 = xC7S1 * irot_input_y */
+        paddw		xmm5, xmm2		/* Truncated */
+        
+        psubsw		xmm1, xmm5		/* xmm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+        paddsw		xmm3, xmm0		/* xmm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+        
+        movdqa		O(1), xmm1
+        movdqa		O(7), xmm3
+/*---------------------------------------------------------*/
+/* op3 and op5 
+/*---------------------------------------------------------*/
+	    movdqa		xmm0, C(3)      /* xC3S5 */
+	    movdqa		xmm1, C(5)      /* xC5S3 */
+
+	    movdqa		xmm5,xmm6       /* irot_input_x */
+	    movdqa		xmm7,xmm6       /* irot_input_x */
+
+	    movdqa		xmm2,xmm4       /* irot_input_y */
+	    movdqa		xmm3,xmm4       /* irot_input_y */
+
+	    pmulhw		xmm4,xmm0       /* xmm4 = xC3S5 * irot_input_x - irot_input_x */
+	    pmulhw		xmm6,xmm1		/* xmm6 = xC5S3 * irot_input_y - irot_input_y */
+
+	    psrlw		xmm2,15         /* for trucation */
+	    psrlw		xmm5,15         /* for trucation */
+
+	    paddw		xmm4,xmm3		/* xmm4 = xC3S5 * irot_input_x */
+	    paddw		xmm6,xmm7		/* xmm6 = xC5S3 * irot_input_y */
+
+	    paddw		xmm4,xmm2		/* Truncated */
+	    paddw		xmm6,xmm5		/* Truncated */
+
+	    psubsw		xmm4,xmm6		/* op [3] */
+	    movdqa		O(3),xmm4		/* Save Op[3] */
+
+	    movdqa		xmm4,xmm3		/* irot_input_y */
+	    movdqa		xmm6,xmm7		/* irot_input_x */
+
+	    pmulhw		xmm3,xmm1		/* mm3 = xC5S3 * irot_input_x - irot_input_x */
+	    pmulhw		xmm7,xmm0		/* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+	    paddw		xmm4,xmm2       /* Trucated */
+	    paddw		xmm6,xmm5       /* Trucated */
+
+	    paddw		xmm3,xmm4		/* xmm3 = xC5S3 * irot_input_x */
+	    paddw		xmm7,xmm6		/*  mm7 = xC3S5 * irot_input_y */
+
+	    paddw		xmm3,xmm7		/* Op[5] */
+	    movdqa		O(5),xmm3		/* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of 8 1-D FDCT                                       */        
+/*---------------------------------------------------------*/
+#undef I
+#undef O
+#define I(i) [ebx + 16 * i ]
+#define O(i) [ebx + 16 * i ]
+
+/******************************************************/
+/* Do 8x8 Transpose                                   */
+/******************************************************/
+
+    	movdqa		xmm4, I(4)		/* xmm4=e7e6e5e4e3e2e1e0 */	
+        movdqa		xmm0, I(5)		/* xmm4=f7f6f5f4f3f2f1f0 */	
+        
+        movdqa		xmm5, xmm4		/* make a copy */			
+        punpcklwd	xmm4, xmm0		/* xmm4=f3e3f2e2f1e1f0e0 */	
+        
+        punpckhwd	xmm5, xmm0		/* xmm5=f7e7f6e6f5e5f4e4 */	
+        movdqa		xmm6, I(6)		/* xmm6=g7g6g5g4g3g2g1g0 */ 
+        
+        movdqa		xmm0, I(7)		/* xmm0=h7h6h5h4h3h2h1h0 */ 
+        movdqa		xmm7, xmm6		/* make a copy */			
+        
+        punpcklwd	xmm6, xmm0		/* xmm6=h3g3h3g2h1g1h0g0 */ 
+        punpckhwd	xmm7, xmm0		/* xmm7=h7g7h6g6h5g5h4g4 */ 
+        
+        movdqa		xmm3, xmm4		/* make a copy */			
+        punpckldq	xmm4, xmm6		/* xmm4=h1g1f1e1h0g0f0e0 */	
+        
+        punpckhdq	xmm3, xmm6		/* xmm3=h3g3g3e3h2g2f2e2 */	
+        movdqa		I(6), xmm3		/* save h3g3g3e3h2g2f2e2 */	
+        /* Free xmm6 */ 
+        movdqa		xmm6, xmm5		/* make a copy */			
+        punpckldq	xmm5, xmm7		/* xmm5=h5g5f5e5h4g4f4e4 */ 
+        
+        punpckhdq	xmm6, xmm7		/* xmm6=h7g7f7e7h6g6f6e6 */ 
+        movdqa		xmm0, I(0)		/* xmm0=a7a6a5a4a3a2a1a0 */	
+        /* Free xmm7 */ 
+        movdqa		xmm1, I(1)		/* xmm1=b7b6b5b4b3b2b1b0 */	
+        movdqa		xmm7, xmm0		/* make a copy */			
+        
+        punpcklwd	xmm0, xmm1		/* xmm0=b3a3b2a2b1a1b0a0 */	
+        punpckhwd	xmm7, xmm1		/* xmm7=b7a7b6a6b5a5b4a4 */ 
+        /* Free xmm1 */ 
+        movdqa		xmm2, I(2)		/* xmm2=c7c6c5c4c3c2c1c0 */ 
+        movdqa		xmm3, I(3)	    /* xmm3=d7d6d5d4d3d2d1d0 */ 
+        
+        movdqa		xmm1, xmm2		/* make a copy */			
+        punpcklwd	xmm2, xmm3		/* xmm2=d3c3d2c2d1c1d0c0 */ 
+        
+        punpckhwd	xmm1, xmm3		/* xmm1=d7c7d6c6d5c5d4c4 */ 
+        movdqa		xmm3, xmm0		/* make a copy	*/			
+        
+        punpckldq	xmm0, xmm2		/* xmm0=d1c1b1a1d0c0b0a0 */ 
+        punpckhdq	xmm3, xmm2		/* xmm3=d3c3b3a3d2c2b2a2 */ 
+        /* Free xmm2 */ 
+        movdqa		xmm2, xmm7		/* make a copy */			
+        punpckldq	xmm2, xmm1		/* xmm2=d5c5b5a5d4c4b4a4 */	
+        
+        punpckhdq	xmm7, xmm1		/* xmm7=d7c7b7a7d6c6b6a6 */ 
+        movdqa		xmm1, xmm0		/* make a copy */			
+        
+        punpcklqdq	xmm0, xmm4		/* xmm0=h0g0f0e0d0c0b0a0 */	
+        punpckhqdq	xmm1, xmm4		/* xmm1=h1g1g1e1d1c1b1a1 */ 
+        
+        movdqa		I(0), xmm0		/* save I(0) */				
+        movdqa		I(1), xmm1		/* save I(1) */				
+        
+        movdqa		xmm0, I(6)		/* load h3g3g3e3h2g2f2e2 */ 
+        movdqa		xmm1, xmm3		/* make a copy */			
+        
+        punpcklqdq	xmm1, xmm0		/* xmm1=h2g2f2e2d2c2b2a2 */ 
+        punpckhqdq	xmm3, xmm0		/* xmm3=h3g3f3e3d3c3b3a3 */	
+        
+        movdqa		xmm4, xmm2		/* make a copy */			
+        punpcklqdq	xmm4, xmm5		/* xmm4=h4g4f4e4d4c4b4a4 */	
+        
+        punpckhqdq	xmm2, xmm5		/* xmm2=h5g5f5e5d5c5b5a5 */	
+        movdqa		I(2), xmm1		/* save I(2) */				
+        
+        movdqa		I(3), xmm3		/* save I(3) */				
+        movdqa		I(4), xmm4		/* save I(4) */				
+        
+        movdqa		I(5), xmm2		/* save I(5) */				
+        movdqa		xmm5, xmm7		/* make a copy */			
+        
+        punpcklqdq	xmm5, xmm6		/* xmm5=h6g6f6e6d6c6b6a6 */	
+        punpckhqdq	xmm7, xmm6		/* xmm7=h7g7f7e7d7c7b7a7 */	
+        
+        movdqa		I(6), xmm5		/* save I(6) */				
+        movdqa		I(7), xmm7		/* save I(7) */				
+
+/******************************************************/
+/* Done with transpose - Let's do the forward DCT     */
+/******************************************************/
+
+        movdqa		xmm0, I(0)      /* xmm0 = ip0 */
+        movdqa      xmm1, I(1)      /* xmm1 = ip1 */
+
+        movdqa      xmm2, I(3)      /* xmm2 = ip3 */
+        movdqa      xmm3, I(5)      /* xmm3 = ip5 */
+
+        movdqa      xmm4, xmm0      /* xmm4 = ip0 */
+        movdqa      xmm5, xmm1      /* xmm5 = ip1 */      
+        
+        movdqa      xmm6, xmm2      /* xmm6 = ip3 */      
+        movdqa      xmm7, xmm3      /* xmm7 = ip5 */      	
+
+        paddsw      xmm0, I(7)      /* xmm0 = ip0 + ip7 */
+        paddsw      xmm1, I(2)      /* xmm1 = ip1 + ip2 */
+
+        paddsw      xmm2, I(4)      /* xmm2 = ip3 + ip4 */
+        paddsw      xmm3, I(6)      /* xmm3 = ip5 + ip6 */
+
+        psubsw      xmm4, I(7)      /* xmm4 = ip0 - ip7 */
+        psubsw      xmm5, I(2)      /* xmm5 = ip1 - ip2 */       
+
+        psubsw		xmm0, xmm2      /* xmm0 = is07 - is34 */			
+        paddsw		xmm2, xmm2		/* xmm2 = is34 * 2    */	
+        
+        psubsw      xmm6, I(4)      /* xmm6 = ip3 - ip4 */               
+        paddsw		xmm2, xmm0		/* xmm2 = is07 + is34 */	
+
+        psubsw		xmm1, xmm3		/* xmm1 = is12 - is56 */	
+        movdqa		TIRY, xmm0		/* save is07-is34 */	
+
+        paddsw		xmm3, xmm3		/* xmm3 = is56 * 2 */	
+        paddsw		xmm3, xmm1	    /* xmm3 = is12 + is56 */
+        
+        psubsw      xmm7, I(6)      /* xmm7 = ip5 -ip6 */
+        psubsw		xmm5, xmm7		/* xmm5 = id12 - id56 */
+	    
+        paddsw		xmm7, xmm7		/* xmm7 = id56 * 2 */		
+	    paddsw		xmm7, xmm5	    /* xmm7 = id12 + id56 */
+/*---------------------------------------------------------*/
+/* op0 and op4 
+/*---------------------------------------------------------*/
+#if 0        
+        movdqa      xmm0, xmm2      /* xmm0 =xmm2= is0734  */
+        pmulhw      xmm2, C(4)      /* xC4S4 * is0734 - is0734 */
+    
+        paddw       xmm2, xmm0      /* XC4S4 * is0734  */
+        movdqa      xmm0, xmm3      /* xmm0 =xmm3= is1256 */
+
+        pmulhw      xmm3, C(4)      /* xC4S4 * is1256 - is1256 */
+        paddw       xmm3, xmm0      /* xC4S4 * is1256 */
+
+
+        movdqa      xmm0, xmm2      
+        paddsw      xmm2, xmm3      /* xC4S4 * ( is0734 +is1256 ) */
+
+        psubsw      xmm0, xmm3      /* xC4S4 * ( is0734 -is1256 ) */
+        movdqa      xmm3, xmm2      
+        
+        psrlw       xmm2, 15        
+        paddsw      xmm3, xmm2      
+
+        movdqa      xmm2, xmm0
+        movdqa      O(0), xmm3
+        
+        psrlw       xmm0, 15
+        paddsw      xmm2, xmm0
+
+        movdqa      O(4), xmm2
+
+
+#else
+
+
+        psubsw		xmm2, xmm3		/* xmm2 = is0734 - is1256 */
+        paddsw		xmm3, xmm3		/* xmm3 = is1256 * 2 */		
+
+        movdqa		xmm0, xmm2	    /* xmm0 = is0734 - is1256 */
+        paddsw		xmm3, xmm2		/* xmm3 = is0734 + is1256 */
+
+        pmulhw		xmm0, C(4)	    /* xmm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+        paddw		xmm0, xmm2		/* xmm0 = xC4S4 * ( is0734 - is1256 ) */
+
+        psrlw		xmm2, 15			
+        paddw		xmm0, xmm2		/* Truncate xmm0, now it is op[4] */
+        
+        movdqa      xmm2, xmm0      
+        psrlw       xmm0, 15
+        
+        paddw       xmm0, xmm2
+        psraw       xmm0, 1        
+        
+        movdqa		O(4), xmm0		/*	op4, now xmm0,xmm2 are free */        
+        movdqa		xmm2, xmm3		/* xmm2 = is0734 + is1256 */
+        
+            
+        movdqa		xmm0, xmm3		/* xmm0 = is0734 + is1256 */
+        pmulhw		xmm3, C(4)		/* xmm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */            
+        
+        psrlw		xmm2, 15			
+        paddw		xmm3, xmm0		/* xmm3 = xC4S4 * ( is0734 +is1256 ) */
+        
+        paddw		xmm3, xmm2		/* Truncate xmm3, now it is op[0] */     
+        movdqa      xmm2, xmm3
+
+        psrlw       xmm3, 15
+        paddw       xmm3, xmm2
+        
+        psraw       xmm3, 1
+        movdqa		O(0), xmm3		/* save op0 */
+#endif
+/*---------------------------------------------------------*/
+/* op2 and op6 
+/*---------------------------------------------------------*/
+ 	    movdqa		xmm3, TIRY		/* xmm3 = irot_input_y */
+        pmulhw		xmm3, C(2)		/* xmm3 = xC2S6 * irot_input_y - irot_input_y */
+        
+        movdqa		xmm2, TIRY		/* xmm2 = irot_input_y */
+        movdqa		xmm0, xmm2		/* xmm0 = irot_input_y */
+        
+        psrlw		xmm2, 15		
+        paddw		xmm3, xmm0      /* xmm3 = xC2S6 * irot_input_y */
+            
+        paddw       xmm3, xmm2		/* Truncated */
+        movdqa		xmm0, xmm5		/* xmm0 = id12 - id56 */
+        
+        
+        movdqa		xmm2, xmm5      /* xmm2 = id12 - id56 */
+        pmulhw		xmm0, C(6)		/* xmm0 = xC6S2 * irot_input_x */
+            
+        psrlw		xmm2, 15			
+        paddw		xmm0, xmm2		/* Truncated */
+        
+        paddsw		xmm3, xmm0		/* op[2] */
+        movdqa      xmm0, xmm3
+
+        psrlw       xmm3, 15
+        paddw       xmm3, xmm0
+
+        psraw       xmm3, 1
+        movdqa		O(2), xmm3		/* save op[2] */
+        
+        
+        movdqa		xmm0, xmm5		/* xmm0 = id12 - id56 */
+        movdqa		xmm2, xmm5		/* xmm0 = id12 - id56 */
+        
+        pmulhw		xmm5, C(2)		/* xmm5 = xC2S6 * irot_input_x - irot_input_x */
+        psrlw		xmm2, 15		
+        
+        movdqa		xmm3, TIRY		/* xmm3 = irot_input_y */
+        paddw		xmm5, xmm0		/* xmm5 = xC2S6 * irot_input_x */
+            
+        paddw		xmm5, xmm2		/* Truncated */
+        movdqa		xmm2, xmm3		/* xmm2 = irot_input_y */	
+        
+        pmulhw		xmm3, C(6)	    /* mm3 = xC6S2 * irot_input_y */
+        psrlw		xmm2, 15        
+        
+        paddw		xmm3, xmm2		/* Truncated */
+        psubsw		xmm3, xmm5		/* xmm3 = op[6] */
+        
+        movdqa      xmm5, xmm3
+        psrlw       xmm3,  15
+        
+        paddw       xmm3, xmm5
+        psraw       xmm3, 1
+        
+        movdqa		O(6), xmm3		
+/*-----------------------------------------------------------------------*/
+/* icommon_product1, icommon_product2                                    */
+/*-----------------------------------------------------------------------*/
+	    movdqa		xmm0, C(4)      /* xmm0 = xC4s4 */
+	    movdqa		xmm2, xmm1      /* xmm2 = is12 - is56 */	
+	
+        movdqa		xmm3, xmm1      /* xmm3 = is12 - is56 */	
+	    pmulhw		xmm1, xmm0		/* xmm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+	
+        psrlw		xmm2, 15				
+	    paddw		xmm1, xmm3	    /* xmm1 = xC4S4 * ( is12 - is56 ) */
+	    
+        paddw		xmm1, xmm2      /* Truncate xmm1, now it is icommon_product1 */
+	    movdqa		xmm2, xmm7      /* xmm2 = id12 + id56 */
+	    
+        movdqa		xmm3, xmm7		/* xmm3 = id12 + id56 */
+        pmulhw		xmm7, xmm0		/* xmm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+	
+        psrlw		xmm2, 15		/* For trucation */	
+	    paddw		xmm7, xmm3		/* xmm7 = xC4S4 * ( id12 + id56 ) */
+
+	    paddw		xmm7, xmm2		/* Truncate xmm7, now it is icommon_product2 */
+/*---------------------------------------------------------*/
+	    pxor		xmm0, xmm0		/* Clear xmm0 */
+	    psubsw		xmm0, xmm6		/* xmm0 = - id34 */
+
+	    psubsw		xmm0, xmm7	    /* xmm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
+	    paddsw		xmm6, xmm6	    /* xmm6 = id34 * 2 */
+
+	    paddsw		xmm6, xmm0		/* xmm6 = id34 - icommon_product2 = irot_input_x for 35 */
+	    psubsw		xmm4, xmm1		/* xmm4 = id07 - icommon_product1 = irot_input_x for 35*/
+
+	    paddsw		xmm1, xmm1		/* xmm1 = icommon_product1 * 2 */	    
+        paddsw		xmm1, xmm4		/* xmm1 = id07 + icommon_product1 = irot_input_x for 17*/
+
+/*---------------------------------------------------------*/
+/* op1 and op7              
+/*---------------------------------------------------------*/
+
+	    movdqa		xmm7, C(1)     /* xC1S7 */
+        movdqa		xmm2, xmm1      /* xmm2 = irot_input_x */
+        
+        movdqa		xmm3, xmm1;     /* xmm3 = irot_input_x */
+        pmulhw		xmm1, xmm7		/* xmm1 = xC1S7 * irot_input_x - irot_input_x */
+            
+        movdqa		xmm7, C(7)		/* xC7S1 */
+        psrlw		xmm2, 15		/* for trucation */		
+            
+        paddw		xmm1, xmm3		/* xmm1 = xC1S7 * irot_input_x */
+        paddw		xmm1, xmm2		/* Trucated */
+            
+        pmulhw		xmm3, xmm7		/* xmm3 = xC7S1 * irot_input_x */
+        paddw		xmm3, xmm2		/* Truncated */
+            
+        movdqa		xmm5, xmm0		/* xmm5 = irot_input_y */	
+        movdqa	    xmm2, xmm0      /* xmm2 = irot_input_y */	
+            
+        movdqa		xmm7, C(1)      /* xC1S7 */			
+        pmulhw		xmm0, xmm7	    /* xmm0 = xC1S7 * irot_input_y - irot_input_y */
+        
+        movdqa		xmm7, C(7)		/* xC7S1 */	
+        psrlw		xmm2, 15		/* for trucation */	
+        
+        paddw		xmm0, xmm5		/* xmm0 = xC1S7 * irot_input_y */
+        paddw		xmm0, xmm2		/* Truncated */
+        
+        pmulhw		xmm5, xmm7		/* xmm5 = xC7S1 * irot_input_y */
+        paddw		xmm5, xmm2		/* Truncated */
+        
+        psubsw		xmm1, xmm5		/* xmm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
+        paddsw		xmm3, xmm0		/* xmm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
+
+        movdqa      xmm5, xmm1
+        movdqa      xmm0, xmm3
+
+        psrlw       xmm1, 15
+        psrlw       xmm3, 15
+
+        paddw       xmm1, xmm5
+        paddw       xmm3, xmm0
+
+        psraw       xmm1, 1
+        psraw       xmm3, 1
+
+        
+        movdqa		O(1), xmm1
+        movdqa		O(7), xmm3
+/*---------------------------------------------------------*/
+/* op3 and op5 
+/*---------------------------------------------------------*/
+	    movdqa		xmm0, C(3)      /* xC3S5 */
+	    movdqa		xmm1, C(5)      /* xC5S3 */
+
+	    movdqa		xmm5,xmm6       /* irot_input_x */
+	    movdqa		xmm7,xmm6       /* irot_input_x */
+
+	    movdqa		xmm2,xmm4       /* irot_input_y */
+	    movdqa		xmm3,xmm4       /* irot_input_y */
+
+	    pmulhw		xmm4,xmm0       /* xmm4 = xC3S5 * irot_input_x - irot_input_x */
+	    pmulhw		xmm6,xmm1		/* xmm6 = xC5S3 * irot_input_y - irot_input_y */
+
+	    psrlw		xmm2,15         /* for trucation */
+	    psrlw		xmm5,15         /* for trucation */
+
+	    paddw		xmm4,xmm3		/* xmm4 = xC3S5 * irot_input_x */
+	    paddw		xmm6,xmm7		/* xmm6 = xC5S3 * irot_input_y */
+
+	    paddw		xmm4,xmm2		/* Truncated */
+	    paddw		xmm6,xmm5		/* Truncated */
+
+	    psubsw		xmm4,xmm6		/* op [3] */
+        movdqa      xmm6,xmm4
+
+        psrlw       xmm4,15        
+        paddw       xmm4,xmm6
+
+        psraw       xmm4,1
+	    movdqa		O(3),xmm4		/* Save Op[3] */
+
+	    movdqa		xmm4,xmm3		/* irot_input_y */
+	    movdqa		xmm6,xmm7		/* irot_input_x */
+
+	    pmulhw		xmm3,xmm1		/* mm3 = xC5S3 * irot_input_x - irot_input_x */
+	    pmulhw		xmm7,xmm0		/* mm7 = xC3S5 * irot_input_y - irot_input_y */
+
+	    paddw		xmm4,xmm2       /* Trucated */
+	    paddw		xmm6,xmm5       /* Trucated */
+
+	    paddw		xmm3,xmm4		/* xmm3 = xC5S3 * irot_input_x */
+	    paddw		xmm7,xmm6		/*  mm7 = xC3S5 * irot_input_y */
+
+	    paddw		xmm3,xmm7		/* Op[5] */        
+        movdqa      xmm7,xmm3
+
+        psrlw       xmm3,15        
+        paddw       xmm3,xmm7
+
+        psraw       xmm3,1
+	    movdqa		O(5),xmm3		/* Save Op[5] */
+/*---------------------------------------------------------*/
+/* End of 8 1-D FDCT                                       */        
+/*---------------------------------------------------------*/
+
+    }/* end of _asm code section */
+}
+
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/filtmmx.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/filtmmx.c
new file mode 100644
index 00000000..f424c13e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/filtmmx.c
@@ -0,0 +1,1053 @@
+/****************************************************************************
+ *
+ *   Module Title :     newLoopTest_asm.c 
+ *
+ *   Description  :     Codec specific functions
+ *
+ *   AUTHOR       :     Yaowu Xu
+ *
+ *****************************************************************************
+ *   Revision History
+ *
+ *   1.02 YWX 03-Nov-00 Changed confusing variable name
+ *   1.01 YWX 02-Nov-00 Added the set of functions
+ *   1.00 YWX 19-Oct-00 configuration baseline
+ *****************************************************************************
+ */ 
+
+/****************************************************************************
+ *  Header Frames
+ *****************************************************************************
+ */
+
+
+#define STRICT              /* Strict type checking. */
+#include "codec_common.h"
+#include <math.h>
+
+ /****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */        
+
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
+#define FILTER_WEIGHT 128
+#define FILTER_SHIFT  7
+
+extern void UnpackBlock_MMX( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+
+static __declspec(align(16)) short rd[]={64,64,64,64,64,64,64,64};
+
+
+__declspec(align(16)) INT16  BilinearFilters_mmx[8][16] = 
+{
+{ 128,128,128,128,128,128,128,128,    0,  0, 0,   0,  0,  0,  0,  0 },
+{ 112,112,112,112,112,112,112,112,   16, 16, 16, 16, 16, 16, 16, 16 },
+{  96, 96, 96, 96, 96, 96, 96, 96,   32, 32, 32, 32, 32, 32, 32, 32 },
+{  80, 80, 80, 80, 80, 80, 80, 80,   48, 48, 48, 48, 48, 48, 48, 48 },
+{  64, 64, 64, 64, 64, 64, 64, 64,   64, 64, 64, 64, 64, 64, 64, 64 },
+{  48, 48, 48, 48, 48, 48, 48, 48,   80, 80, 80, 80, 80, 80, 80, 80 },
+{  32, 32, 32, 32, 32, 32, 32, 32,   96, 96, 96, 96, 96, 96, 96, 96 },
+{  16, 16, 16, 16, 16, 16, 16, 16,  112,112,112,112,112,112,112,112 }
+};
+
+__declspec(align(16)) INT16  BicubicFilters_mmx[17][8][32] = 
+{
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -3, -3, -3, -3, -3, -3, -3, -3,  122,122,122,122,122,122,122,122,    9,  9,  9,  9,  9,  9,  9,  9,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -4, -4, -4, -4, -4, -4, -4, -4,  109,109,109,109,109,109,109,109,   24, 24, 24, 24, 24, 24, 24, 24,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        {  -5, -5, -5, -5, -5, -5, -5, -5,   91, 91, 91, 91, 91, 91, 91, 91,   45, 45, 45, 45, 45, 45, 45, 45,   -3, -3, -3, -3, -3, -3, -3, -3, },
+        {  -4, -4, -4, -4, -4, -4, -4, -4,   68, 68, 68, 68, 68, 68, 68, 68,   68, 68, 68, 68, 68, 68, 68, 68,   -4, -4, -4, -4, -4, -4, -4, -4, },
+        {  -3, -3, -3, -3, -3, -3, -3, -3,   45, 45, 45, 45, 45, 45, 45, 45,   91, 91, 91, 91, 91, 91, 91, 91,   -5, -5, -5, -5, -5, -5, -5, -5, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   24, 24, 24, 24, 24, 24, 24, 24,  109,109,109,109,109,109,109,109,   -4, -4, -4, -4, -4, -4, -4, -4, },
+        {   0,  0,  0,  0,  0,  0,  0,  0,    9,  9,  9,  9,  9,  9,  9,  9,  122,122,122,122,122,122,122,122,   -3, -3, -3, -3, -3, -3, -3, -3, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -4, -4, -4, -4, -4, -4, -4, -4,  124,124,124,124,124,124,124,124,    9,  9,  9,  9,  9,  9,  9,  9,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        {  -5, -5, -5, -5, -5, -5, -5, -5,  110,110,110,110,110,110,110,110,   25, 25, 25, 25, 25, 25, 25, 25,   -2, -2, -2, -2, -2, -2, -2, -2, },
+        {  -6, -6, -6, -6, -6, -6, -6, -6,   91, 91, 91, 91, 91, 91, 91, 91,   46, 46, 46, 46, 46, 46, 46, 46,   -3, -3, -3, -3, -3, -3, -3, -3, },
+        {  -5, -5, -5, -5, -5, -5, -5, -5,   69, 69, 69, 69, 69, 69, 69, 69,   69, 69, 69, 69, 69, 69, 69, 69,   -5, -5, -5, -5, -5, -5, -5, -5, },
+        {  -3, -3, -3, -3, -3, -3, -3, -3,   46, 46, 46, 46, 46, 46, 46, 46,   91, 91, 91, 91, 91, 91, 91, 91,   -6, -6, -6, -6, -6, -6, -6, -6, },
+        {  -2, -2, -2, -2, -2, -2, -2, -2,   25, 25, 25, 25, 25, 25, 25, 25,  110,110,110,110,110,110,110,110,   -5, -5, -5, -5, -5, -5, -5, -5, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,    9,  9,  9,  9,  9,  9,  9,  9,  124,124,124,124,124,124,124,124,   -4, -4, -4, -4, -4, -4, -4, -4, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -4, -4, -4, -4, -4, -4, -4, -4,  123,123,123,123,123,123,123,123,   10, 10, 10, 10, 10, 10, 10, 10,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        {  -6, -6, -6, -6, -6, -6, -6, -6,  110,110,110,110,110,110,110,110,   26, 26, 26, 26, 26, 26, 26, 26,   -2, -2, -2, -2, -2, -2, -2, -2, },
+        {  -7, -7, -7, -7, -7, -7, -7, -7,   92, 92, 92, 92, 92, 92, 92, 92,   47, 47, 47, 47, 47, 47, 47, 47,   -4, -4, -4, -4, -4, -4, -4, -4, },
+        {  -6, -6, -6, -6, -6, -6, -6, -6,   70, 70, 70, 70, 70, 70, 70, 70,   70, 70, 70, 70, 70, 70, 70, 70,   -6, -6, -6, -6, -6, -6, -6, -6, },
+        {  -4, -4, -4, -4, -4, -4, -4, -4,   47, 47, 47, 47, 47, 47, 47, 47,   92, 92, 92, 92, 92, 92, 92, 92,   -7, -7, -7, -7, -7, -7, -7, -7, },
+        {  -2, -2, -2, -2, -2, -2, -2, -2,   26, 26, 26, 26, 26, 26, 26, 26,  110,110,110,110,110,110,110,110,   -6, -6, -6, -6, -6, -6, -6, -6, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   10, 10, 10, 10, 10, 10, 10, 10,  123,123,123,123,123,123,123,123,   -4, -4, -4, -4, -4, -4, -4, -4, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -5, -5, -5, -5, -5, -5, -5, -5,  124,124,124,124,124,124,124,124,   10, 10, 10, 10, 10, 10, 10, 10,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        {  -7, -7, -7, -7, -7, -7, -7, -7,  110,110,110,110,110,110,110,110,   27, 27, 27, 27, 27, 27, 27, 27,   -2, -2, -2, -2, -2, -2, -2, -2, },
+        {  -7, -7, -7, -7, -7, -7, -7, -7,   91, 91, 91, 91, 91, 91, 91, 91,   48, 48, 48, 48, 48, 48, 48, 48,   -4, -4, -4, -4, -4, -4, -4, -4, },
+        {  -6, -6, -6, -6, -6, -6, -6, -6,   70, 70, 70, 70, 70, 70, 70, 70,   70, 70, 70, 70, 70, 70, 70, 70,   -6, -6, -6, -6, -6, -6, -6, -6, },
+        {  -4, -4, -4, -4, -4, -4, -4, -4,   48, 48, 48, 48, 48, 48, 48, 48,   92, 92, 92, 92, 92, 92, 92, 92,   -8, -8, -8, -8, -8, -8, -8, -8, },
+        {  -2, -2, -2, -2, -2, -2, -2, -2,   27, 27, 27, 27, 27, 27, 27, 27,  110,110,110,110,110,110,110,110,   -7, -7, -7, -7, -7, -7, -7, -7, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   10, 10, 10, 10, 10, 10, 10, 10,  124,124,124,124,124,124,124,124,   -5, -5, -5, -5, -5, -5, -5, -5, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -6, -6, -6, -6, -6, -6, -6, -6,  124,124,124,124,124,124,124,124,   11, 11, 11, 11, 11, 11, 11, 11,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        {  -8, -8, -8, -8, -8, -8, -8, -8,  111,111,111,111,111,111,111,111,   28, 28, 28, 28, 28, 28, 28, 28,   -3, -3, -3, -3, -3, -3, -3, -3, },
+        {  -8, -8, -8, -8, -8, -8, -8, -8,   92, 92, 92, 92, 92, 92, 92, 92,   49, 49, 49, 49, 49, 49, 49, 49,   -5, -5, -5, -5, -5, -5, -5, -5, },
+        {  -7, -7, -7, -7, -7, -7, -7, -7,   71, 71, 71, 71, 71, 71, 71, 71,   71, 71, 71, 71, 71, 71, 71, 71,   -7, -7, -7, -7, -7, -7, -7, -7, },
+        {  -5, -5, -5, -5, -5, -5, -5, -5,   49, 49, 49, 49, 49, 49, 49, 49,   92, 92, 92, 92, 92, 92, 92, 92,   -8, -8, -8, -8, -8, -8, -8, -8, },
+        {  -3, -3, -3, -3, -3, -3, -3, -3,   28, 28, 28, 28, 28, 28, 28, 28,  111,111,111,111,111,111,111,111,   -8, -8, -8, -8, -8, -8, -8, -8, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   11, 11, 11, 11, 11, 11, 11, 11,  124,124,124,124,124,124,124,124,   -6, -6, -6, -6, -6, -6, -6, -6, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -6, -6, -6, -6, -6, -6, -6, -6,  123,123,123,123,123,123,123,123,   12, 12, 12, 12, 12, 12, 12, 12,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        {  -9, -9, -9, -9, -9, -9, -9, -9,  111,111,111,111,111,111,111,111,   29, 29, 29, 29, 29, 29, 29, 29,   -3, -3, -3, -3, -3, -3, -3, -3, },
+        {  -9, -9, -9, -9, -9, -9, -9, -9,   93, 93, 93, 93, 93, 93, 93, 93,   50, 50, 50, 50, 50, 50, 50, 50,   -6, -6, -6, -6, -6, -6, -6, -6, },
+        {  -8, -8, -8, -8, -8, -8, -8, -8,   72, 72, 72, 72, 72, 72, 72, 72,   72, 72, 72, 72, 72, 72, 72, 72,   -8, -8, -8, -8, -8, -8, -8, -8, },
+        {  -6, -6, -6, -6, -6, -6, -6, -6,   50, 50, 50, 50, 50, 50, 50, 50,   93, 93, 93, 93, 93, 93, 93, 93,   -9, -9, -9, -9, -9, -9, -9, -9, },
+        {  -3, -3, -3, -3, -3, -3, -3, -3,   29, 29, 29, 29, 29, 29, 29, 29,  111,111,111,111,111,111,111,111,   -9, -9, -9, -9, -9, -9, -9, -9, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   12, 12, 12, 12, 12, 12, 12, 12,  123,123,123,123,123,123,123,123,   -6, -6, -6, -6, -6, -6, -6, -6, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -7, -7, -7, -7, -7, -7, -7, -7,  124,124,124,124,124,124,124,124,   12, 12, 12, 12, 12, 12, 12, 12,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        { -10,-10,-10,-10,-10,-10,-10,-10,  111,111,111,111,111,111,111,111,   30, 30, 30, 30, 30, 30, 30, 30,   -3, -3, -3, -3, -3, -3, -3, -3, },
+        { -10,-10,-10,-10,-10,-10,-10,-10,   93, 93, 93, 93, 93, 93, 93, 93,   51, 51, 51, 51, 51, 51, 51, 51,   -6, -6, -6, -6, -6, -6, -6, -6, },
+        {  -9, -9, -9, -9, -9, -9, -9, -9,   73, 73, 73, 73, 73, 73, 73, 73,   73, 73, 73, 73, 73, 73, 73, 73,   -9, -9, -9, -9, -9, -9, -9, -9, },
+        {  -6, -6, -6, -6, -6, -6, -6, -6,   51, 51, 51, 51, 51, 51, 51, 51,   93, 93, 93, 93, 93, 93, 93, 93,  -10,-10,-10,-10,-10,-10,-10,-10, },
+        {  -3, -3, -3, -3, -3, -3, -3, -3,   30, 30, 30, 30, 30, 30, 30, 30,  111,111,111,111,111,111,111,111,  -10,-10,-10,-10,-10,-10,-10,-10, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   12, 12, 12, 12, 12, 12, 12, 12,  124,124,124,124,124,124,124,124,   -7, -7, -7, -7, -7, -7, -7, -7, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -7, -7, -7, -7, -7, -7, -7, -7,  123,123,123,123,123,123,123,123,   13, 13, 13, 13, 13, 13, 13, 13,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        { -11,-11,-11,-11,-11,-11,-11,-11,  112,112,112,112,112,112,112,112,   31, 31, 31, 31, 31, 31, 31, 31,   -4, -4, -4, -4, -4, -4, -4, -4, },
+        { -11,-11,-11,-11,-11,-11,-11,-11,   94, 94, 94, 94, 94, 94, 94, 94,   52, 52, 52, 52, 52, 52, 52, 52,   -7, -7, -7, -7, -7, -7, -7, -7, },
+        { -10,-10,-10,-10,-10,-10,-10,-10,   74, 74, 74, 74, 74, 74, 74, 74,   74, 74, 74, 74, 74, 74, 74, 74,  -10,-10,-10,-10,-10,-10,-10,-10, },
+        {  -7, -7, -7, -7, -7, -7, -7, -7,   52, 52, 52, 52, 52, 52, 52, 52,   94, 94, 94, 94, 94, 94, 94, 94,  -11,-11,-11,-11,-11,-11,-11,-11, },
+        {  -4, -4, -4, -4, -4, -4, -4, -4,   31, 31, 31, 31, 31, 31, 31, 31,  112,112,112,112,112,112,112,112,  -11,-11,-11,-11,-11,-11,-11,-11, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   13, 13, 13, 13, 13, 13, 13, 13,  123,123,123,123,123,123,123,123,   -7, -7, -7, -7, -7, -7, -7, -7, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -8, -8, -8, -8, -8, -8, -8, -8,  124,124,124,124,124,124,124,124,   13, 13, 13, 13, 13, 13, 13, 13,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        { -12,-12,-12,-12,-12,-12,-12,-12,  112,112,112,112,112,112,112,112,   32, 32, 32, 32, 32, 32, 32, 32,   -4, -4, -4, -4, -4, -4, -4, -4, },
+        { -12,-12,-12,-12,-12,-12,-12,-12,   94, 94, 94, 94, 94, 94, 94, 94,   53, 53, 53, 53, 53, 53, 53, 53,   -7, -7, -7, -7, -7, -7, -7, -7, },
+        { -10,-10,-10,-10,-10,-10,-10,-10,   74, 74, 74, 74, 74, 74, 74, 74,   74, 74, 74, 74, 74, 74, 74, 74,  -10,-10,-10,-10,-10,-10,-10,-10, },
+        {  -7, -7, -7, -7, -7, -7, -7, -7,   53, 53, 53, 53, 53, 53, 53, 53,   94, 94, 94, 94, 94, 94, 94, 94,  -12,-12,-12,-12,-12,-12,-12,-12, },
+        {  -4, -4, -4, -4, -4, -4, -4, -4,   32, 32, 32, 32, 32, 32, 32, 32,  112,112,112,112,112,112,112,112,  -12,-12,-12,-12,-12,-12,-12,-12, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   13, 13, 13, 13, 13, 13, 13, 13,  124,124,124,124,124,124,124,124,   -8, -8, -8, -8, -8, -8, -8, -8, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -9, -9, -9, -9, -9, -9, -9, -9,  124,124,124,124,124,124,124,124,   14, 14, 14, 14, 14, 14, 14, 14,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        { -13,-13,-13,-13,-13,-13,-13,-13,  112,112,112,112,112,112,112,112,   33, 33, 33, 33, 33, 33, 33, 33,   -4, -4, -4, -4, -4, -4, -4, -4, },
+        { -13,-13,-13,-13,-13,-13,-13,-13,   95, 95, 95, 95, 95, 95, 95, 95,   54, 54, 54, 54, 54, 54, 54, 54,   -8, -8, -8, -8, -8, -8, -8, -8, },
+        { -11,-11,-11,-11,-11,-11,-11,-11,   75, 75, 75, 75, 75, 75, 75, 75,   75, 75, 75, 75, 75, 75, 75, 75,  -11,-11,-11,-11,-11,-11,-11,-11, },
+        {  -8, -8, -8, -8, -8, -8, -8, -8,   54, 54, 54, 54, 54, 54, 54, 54,   95, 95, 95, 95, 95, 95, 95, 95,  -13,-13,-13,-13,-13,-13,-13,-13, },
+        {  -4, -4, -4, -4, -4, -4, -4, -4,   33, 33, 33, 33, 33, 33, 33, 33,  112,112,112,112,112,112,112,112,  -13,-13,-13,-13,-13,-13,-13,-13, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   14, 14, 14, 14, 14, 14, 14, 14,  124,124,124,124,124,124,124,124,   -9, -9, -9, -9, -9, -9, -9, -9, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        {  -9, -9, -9, -9, -9, -9, -9, -9,  123,123,123,123,123,123,123,123,   15, 15, 15, 15, 15, 15, 15, 15,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        { -14,-14,-14,-14,-14,-14,-14,-14,  113,113,113,113,113,113,113,113,   34, 34, 34, 34, 34, 34, 34, 34,   -5, -5, -5, -5, -5, -5, -5, -5, },
+        { -14,-14,-14,-14,-14,-14,-14,-14,   95, 95, 95, 95, 95, 95, 95, 95,   55, 55, 55, 55, 55, 55, 55, 55,   -8, -8, -8, -8, -8, -8, -8, -8, },
+        { -12,-12,-12,-12,-12,-12,-12,-12,   76, 76, 76, 76, 76, 76, 76, 76,   76, 76, 76, 76, 76, 76, 76, 76,  -12,-12,-12,-12,-12,-12,-12,-12, },
+        {  -8, -8, -8, -8, -8, -8, -8, -8,   55, 55, 55, 55, 55, 55, 55, 55,   95, 95, 95, 95, 95, 95, 95, 95,  -14,-14,-14,-14,-14,-14,-14,-14, },
+        {  -5, -5, -5, -5, -5, -5, -5, -5,   34, 34, 34, 34, 34, 34, 34, 34,  112,112,112,112,112,112,112,112,  -13,-13,-13,-13,-13,-13,-13,-13, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   15, 15, 15, 15, 15, 15, 15, 15,  123,123,123,123,123,123,123,123,   -9, -9, -9, -9, -9, -9, -9, -9, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        { -10,-10,-10,-10,-10,-10,-10,-10,  124,124,124,124,124,124,124,124,   15, 15, 15, 15, 15, 15, 15, 15,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        { -14,-14,-14,-14,-14,-14,-14,-14,  113,113,113,113,113,113,113,113,   34, 34, 34, 34, 34, 34, 34, 34,   -5, -5, -5, -5, -5, -5, -5, -5, },
+        { -15,-15,-15,-15,-15,-15,-15,-15,   96, 96, 96, 96, 96, 96, 96, 96,   56, 56, 56, 56, 56, 56, 56, 56,   -9, -9, -9, -9, -9, -9, -9, -9, },
+        { -13,-13,-13,-13,-13,-13,-13,-13,   77, 77, 77, 77, 77, 77, 77, 77,   77, 77, 77, 77, 77, 77, 77, 77,  -13,-13,-13,-13,-13,-13,-13,-13, },
+        {  -9, -9, -9, -9, -9, -9, -9, -9,   56, 56, 56, 56, 56, 56, 56, 56,   96, 96, 96, 96, 96, 96, 96, 96,  -15,-15,-15,-15,-15,-15,-15,-15, },
+        {  -5, -5, -5, -5, -5, -5, -5, -5,   34, 34, 34, 34, 34, 34, 34, 34,  113,113,113,113,113,113,113,113,  -14,-14,-14,-14,-14,-14,-14,-14, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   15, 15, 15, 15, 15, 15, 15, 15,  124,124,124,124,124,124,124,124,  -10,-10,-10,-10,-10,-10,-10,-10, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        { -10,-10,-10,-10,-10,-10,-10,-10,  123,123,123,123,123,123,123,123,   16, 16, 16, 16, 16, 16, 16, 16,   -1, -1, -1, -1, -1, -1, -1, -1, },
+        { -15,-15,-15,-15,-15,-15,-15,-15,  113,113,113,113,113,113,113,113,   35, 35, 35, 35, 35, 35, 35, 35,   -5, -5, -5, -5, -5, -5, -5, -5, },
+        { -16,-16,-16,-16,-16,-16,-16,-16,   98, 98, 98, 98, 98, 98, 98, 98,   56, 56, 56, 56, 56, 56, 56, 56,  -10,-10,-10,-10,-10,-10,-10,-10, },
+        { -14,-14,-14,-14,-14,-14,-14,-14,   78, 78, 78, 78, 78, 78, 78, 78,   78, 78, 78, 78, 78, 78, 78, 78,  -14,-14,-14,-14,-14,-14,-14,-14, },
+        { -10,-10,-10,-10,-10,-10,-10,-10,   56, 56, 56, 56, 56, 56, 56, 56,   98, 98, 98, 98, 98, 98, 98, 98,  -16,-16,-16,-16,-16,-16,-16,-16, },
+        {  -5, -5, -5, -5, -5, -5, -5, -5,   35, 35, 35, 35, 35, 35, 35, 35,  113,113,113,113,113,113,113,113,  -15,-15,-15,-15,-15,-15,-15,-15, },
+        {  -1, -1, -1, -1, -1, -1, -1, -1,   16, 16, 16, 16, 16, 16, 16, 16,  123,123,123,123,123,123,123,123,  -10,-10,-10,-10,-10,-10,-10,-10, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        { -11,-11,-11,-11,-11,-11,-11,-11,  124,124,124,124,124,124,124,124,   17, 17, 17, 17, 17, 17, 17, 17,   -2, -2, -2, -2, -2, -2, -2, -2, },
+        { -16,-16,-16,-16,-16,-16,-16,-16,  113,113,113,113,113,113,113,113,   36, 36, 36, 36, 36, 36, 36, 36,   -5, -5, -5, -5, -5, -5, -5, -5, },
+        { -17,-17,-17,-17,-17,-17,-17,-17,   98, 98, 98, 98, 98, 98, 98, 98,   57, 57, 57, 57, 57, 57, 57, 57,  -10,-10,-10,-10,-10,-10,-10,-10, },
+        { -14,-14,-14,-14,-14,-14,-14,-14,   78, 78, 78, 78, 78, 78, 78, 78,   78, 78, 78, 78, 78, 78, 78, 78,  -14,-14,-14,-14,-14,-14,-14,-14, },
+        { -10,-10,-10,-10,-10,-10,-10,-10,   57, 57, 57, 57, 57, 57, 57, 57,   98, 98, 98, 98, 98, 98, 98, 98,  -17,-17,-17,-17,-17,-17,-17,-17, },
+        {  -5, -5, -5, -5, -5, -5, -5, -5,   36, 36, 36, 36, 36, 36, 36, 36,  113,113,113,113,113,113,113,113,  -16,-16,-16,-16,-16,-16,-16,-16, },
+        {  -2, -2, -2, -2, -2, -2, -2, -2,   17, 17, 17, 17, 17, 17, 17, 17,  124,124,124,124,124,124,124,124,  -11,-11,-11,-11,-11,-11,-11,-11, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        { -12,-12,-12,-12,-12,-12,-12,-12,  125,125,125,125,125,125,125,125,   17, 17, 17, 17, 17, 17, 17, 17,   -2, -2, -2, -2, -2, -2, -2, -2, },
+        { -17,-17,-17,-17,-17,-17,-17,-17,  114,114,114,114,114,114,114,114,   37, 37, 37, 37, 37, 37, 37, 37,   -6, -6, -6, -6, -6, -6, -6, -6, },
+        { -18,-18,-18,-18,-18,-18,-18,-18,   99, 99, 99, 99, 99, 99, 99, 99,   58, 58, 58, 58, 58, 58, 58, 58,  -11,-11,-11,-11,-11,-11,-11,-11, },
+        { -15,-15,-15,-15,-15,-15,-15,-15,   79, 79, 79, 79, 79, 79, 79, 79,   79, 79, 79, 79, 79, 79, 79, 79,  -15,-15,-15,-15,-15,-15,-15,-15, },
+        { -11,-11,-11,-11,-11,-11,-11,-11,   58, 58, 58, 58, 58, 58, 58, 58,   99, 99, 99, 99, 99, 99, 99, 99,  -18,-18,-18,-18,-18,-18,-18,-18, },
+        {  -6, -6, -6, -6, -6, -6, -6, -6,   37, 37, 37, 37, 37, 37, 37, 37,  114,114,114,114,114,114,114,114,  -17,-17,-17,-17,-17,-17,-17,-17, },
+        {  -2, -2, -2, -2, -2, -2, -2, -2,   17, 17, 17, 17, 17, 17, 17, 17,  125,125,125,125,125,125,125,125,  -12,-12,-12,-12,-12,-12,-12,-12, },
+    },
+
+    {
+        {   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0, },
+        { -12,-12,-12,-12,-12,-12,-12,-12,  124,124,124,124,124,124,124,124,   18, 18, 18, 18, 18, 18, 18, 18,   -2, -2, -2, -2, -2, -2, -2, -2, },
+        { -18,-18,-18,-18,-18,-18,-18,-18,  114,114,114,114,114,114,114,114,   38, 38, 38, 38, 38, 38, 38, 38,   -6, -6, -6, -6, -6, -6, -6, -6, },
+        { -19,-19,-19,-19,-19,-19,-19,-19,   99, 99, 99, 99, 99, 99, 99, 99,   59, 59, 59, 59, 59, 59, 59, 59,  -11,-11,-11,-11,-11,-11,-11,-11, },
+        { -16,-16,-16,-16,-16,-16,-16,-16,   80, 80, 80, 80, 80, 80, 80, 80,   80, 80, 80, 80, 80, 80, 80, 80,  -16,-16,-16,-16,-16,-16,-16,-16, },
+        { -11,-11,-11,-11,-11,-11,-11,-11,   59, 59, 59, 59, 59, 59, 59, 59,   99, 99, 99, 99, 99, 99, 99, 99,  -19,-19,-19,-19,-19,-19,-19,-19, },
+        {  -6, -6, -6, -6, -6, -6, -6, -6,   38, 38, 38, 38, 38, 38, 38, 38,  114,114,114,114,114,114,114,114,  -18,-18,-18,-18,-18,-18,-18,-18, },
+        {  -2, -2, -2, -2, -2, -2, -2, -2,   18, 18, 18, 18, 18, 18, 18, 18,  124,124,124,124,124,124,124,124,  -12,-12,-12,-12,-12,-12,-12,-12, },
+    },
+
+	// Dummy entry for VP61 supporty
+	{
+		{   0,  0,  0,  0,  0,  0,  0,  0,  128,128,128,128,128,128,128,128,    0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0 },
+		{  -4, -4, -4, -4, -4, -4, -4, -4,  118,118,118,118,118,118,118,118,   16, 16, 16, 16, 16, 16, 16, 16,   -2, -2, -2, -2, -2, -2, -2, -2 },
+		{  -7, -7, -7, -7, -7, -7, -7, -7,  106,106,106,106,106,106,106,106,   34, 34, 34, 34, 34, 34, 34, 34,   -5, -5, -5, -5, -5, -5, -5, -5 },
+		{  -8, -8, -8, -8, -8, -8, -8, -8,   90, 90, 90, 90, 90, 90, 90, 90,   53, 53, 53, 53, 53, 53, 53, 53,   -7, -7, -7, -7, -7, -7, -7, -7 },
+		{  -8, -8, -8, -8, -8, -8, -8, -8,   72, 72, 72, 72, 72, 72, 72, 72,   72, 72, 72, 72, 72, 72, 72, 72,   -8, -8, -8, -8, -8, -8, -8, -8 },
+		{  -7, -7, -7, -7, -7, -7, -7, -7,   53, 53, 53, 53, 53, 53, 53, 53,   90, 90, 90, 90, 90, 90, 90, 90,   -8, -8, -8, -8, -8, -8, -8, -8 },
+		{  -5, -5, -5, -5, -5, -5, -5, -5,   34, 34, 34, 34, 34, 34, 34, 34,  106,106,106,106,106,106,106,106,   -7, -7, -7, -7, -7, -7, -7, -7 },
+		{  -2, -2, -2, -2, -2, -2, -2, -2,   16, 16, 16, 16, 16, 16, 16, 16,  118,118,118,118,118,118,118,118,   -4, -4, -4, -4, -4, -4, -4, -4 }
+	}
+
+};
+
+
+
+void FilterBlock1d_h_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movq      mm1, [edi]             ; mm3 *= kernel 0 modifiers.
+        movq      mm2, [edi+ 16]         ; mm3 *= kernel 0 modifiers.
+        movq      mm6, [edi + 32]        ; mm3 *= kernel 0 modifiers.
+        movq      mm7, [edi + 48]        ; mm3 *= kernel 0 modifiers.
+
+        mov         edi,OutputPtr
+		mov			esi,SrcPtr
+        dec         esi
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth      ; destination pitch?
+		pxor		mm0, mm0              ; mm0 = 00000000
+
+nextrow:
+        movq		mm3, [esi]            ; mm3 = p-1..p6    
+        movq        mm4, mm3              ; mm4 = p-1..p6
+        punpcklbw   mm3, mm0              ; mm3 = p-1..p2
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        psrlq       mm4, 24               ; mm4 = p2..p6
+        movq        mm5, mm4              ; mm5 = p2..p6
+        punpcklbw   mm5, mm0              ; mm5 = p2..p5
+        pmullw      mm5, mm7              ; mm5 *= kernel 3 modifiers
+        paddsw      mm3, mm5              ; mm3 += mm5
+
+        movq        mm4, [esi+1]          ; mm4 = p0..p6
+        movq        mm5, mm4              ; mm5 = p0..p6
+        punpcklbw   mm5, mm0              ; mm5 = p0..p3
+        pmullw      mm5, mm2              ; mm5 *= kernel 1 modifiers
+        paddsw      mm3, mm5              ; mm3 += mm5
+
+        psrlq       mm4, 8                ; mm4 = p1..p6
+        movq        mm5, mm4              ; mm5 = p1..p6
+        punpcklbw   mm5, mm0              ; mm5 = p1..p4
+        pmullw      mm5, mm6              ; mm5 *= kernel 2 modifiers
+        paddsw      mm3, mm5              ; mm3 += mm5
+
+
+        paddsw      mm3, rd               ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT     ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and unpack to saturate
+
+        movd        [edi],mm3             ; store the results in the destination
+
+
+        movq		mm3, [esi+4]           ; mm3 = p-1..p6    
+        movq        mm4, mm3              ; mm4 = p-1..p6
+        punpcklbw   mm3, mm0              ; mm3 = p-1..p2
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        psrlq       mm4, 24               ; mm4 = p2..p6
+        movq        mm5, mm4              ; mm5 = p2..p6
+        punpcklbw   mm5, mm0              ; mm5 = p2..p5
+        pmullw      mm5, mm7              ; mm5 *= kernel 3 modifiers
+        paddsw      mm3, mm5              ; mm3 += mm5
+
+        movq        mm4, [esi+5]          ; mm4 = p0..p6
+        movq        mm5, mm4              ; mm5 = p0..p6
+        punpcklbw   mm5, mm0              ; mm5 = p0..p3
+        pmullw      mm5, mm2              ; mm5 *= kernel 1 modifiers
+        paddsw      mm3, mm5              ; mm3 += mm5
+
+        psrlq       mm4, 8                ; mm4 = p1..p6
+        movq        mm5, mm4              ; mm5 = p1..p6
+        punpcklbw   mm5, mm0              ; mm5 = p1..p4
+        pmullw      mm5, mm6              ; mm5 *= kernel 2 modifiers
+        paddsw      mm3, mm5              ; mm3 += mm5
+
+
+        paddsw      mm3, rd               ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT     ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and unpack to saturate
+
+        movd       [edi+4],mm3               ; store the results in the destination
+
+        add         esi,SrcPixelsPerLine    ; next line
+        add         edi,eax; 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+    }
+}
+
+
+void FilterBlock1d_v_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movq      mm1, [edi]          ; mm3 *= kernel 0 modifiers.
+        movq      mm2, [edi + 16]     ; mm3 *= kernel 0 modifiers.
+        movq      mm6, [edi + 32]     ; mm3 *= kernel 0 modifiers.
+        movq      mm7, [edi + 48]     ; mm3 *= kernel 0 modifiers.
+
+        mov         edx, PixelsPerLine
+        mov         edi, OutputPtr
+		mov			esi, SrcPtr
+        sub         esi, PixelsPerLine
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth      ; destination pitch?
+		pxor		mm0, mm0              ; mm0 = 00000000
+
+
+nextrow:
+        movq		mm3, [esi]            ; mm3 = p0..p8
+        punpcklbw   mm3, mm0              ; mm3 = p0..p3
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        add         esi, edx              ; move source forward 1 line to avoid 3 * pitch
+
+        movq		mm4, [esi+2*edx]      ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm7              ; mm4 *= kernel 3 modifiers.
+        paddsw      mm3, mm4              ; mm3 += mm4
+
+        movq		mm4, [esi ]           ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm2              ; mm4 *= kernel 1 modifiers.
+        paddsw      mm3, mm4              ; mm3 += mm4
+
+        movq		mm4, [esi +edx]       ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm6              ; mm4 *= kernel 2 modifiers.
+        paddsw      mm3, mm4              ; mm3 += mm4
+
+
+        paddsw      mm3, rd               ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT     ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and saturate
+
+        movd        [edi],mm3             ; store the results in the destination
+        
+        sub         esi, edx              ;  subtract edx to get back to -1 column
+
+        movq		mm3, [esi+4]          ; mm3 = p4..p12
+        punpcklbw   mm3, mm0              ; mm3 = p4..p7
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        add         esi, edx              ; move source forward 1 line to avoid 3 * pitch
+
+        movq		mm4, [esi+2*edx+4]    ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm7              ; mm4 *= kernel 3 modifiers.
+        paddsw      mm3, mm4              ; mm3 += mm4
+
+        movq		mm4, [esi +4]         ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm2              ; mm4 *= kernel 1 modifiers.
+        paddsw      mm3, mm4              ; mm3 += mm4
+
+        movq		mm4, [esi +edx+4]     ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm6              ; mm4 *= kernel 2 modifiers.
+        paddsw      mm3, mm4              ; mm3 += mm4
+
+
+        paddsw      mm3, rd               ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT     ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and saturate
+
+        movd        [edi+4],mm3           ; store the results in the destination
+
+
+
+        // the subsequent iterations repeat 3 out of 4 of these reads.  Since the 
+        // recon block should be in cache this shouldn't cost much.  Its obviously 
+        // avoidable!!!. 
+        add         edi,eax; 
+
+        dec         ecx                   ; decrement count
+        jnz         nextrow               ; next row
+
+    }
+}
+
+
+void FilterBlock1d_h_mmxa( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movq      mm1, [edi]             ; mm3 *= kernel 0 modifiers.
+        movq      mm2, [edi+ 16]         ; mm3 *= kernel 0 modifiers.
+        movq      mm6, [edi + 32]        ; mm3 *= kernel 0 modifiers.
+        movq      mm7, [edi + 48]        ; mm3 *= kernel 0 modifiers.
+
+        mov         edi,OutputPtr
+		mov			esi,SrcPtr
+        dec         esi
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth      ; destination pitch?
+		pxor		mm0, mm0              ; mm0 = 00000000
+
+nextrow:
+        movq		mm3, [esi]            ; mm3 = p-1..p6    
+        movq        mm4, mm3              ; mm4 = p-1..p6
+        punpcklbw   mm3, mm0              ; mm3 = p-1..p2
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        psrlq       mm4, 8                ; mm4 = p0..p6
+        movq        mm5, mm4              ; mm5 = p0..p6
+        punpcklbw   mm5, mm0              ; mm5 = p0..p3
+        pmullw      mm5, mm2              ; mm5 *= kernel 1 modifiers
+        paddw       mm3, mm5              ; mm3 += mm5
+
+        psrlq       mm4, 8                ; mm4 = p1..p6
+        movq        mm5, mm4              ; mm5 = p1..p6
+        punpcklbw   mm5, mm0              ; mm5 = p1..p4
+        pmullw      mm5, mm6              ; mm5 *= kernel 2 modifiers
+        paddw       mm3, mm5              ; mm3 += mm5
+
+        psrlq       mm4, 8                ; mm4 = p2..p6
+        movq        mm5, mm4              ; mm5 = p2..p6
+        punpcklbw   mm5, mm0              ; mm5 = p2..p5
+        pmullw      mm5, mm7              ; mm5 *= kernel 3 modifiers
+        paddw       mm3, mm5              ; mm3 += mm5
+
+        paddw       mm3, rd               ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT     ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and unpack to saturate
+
+        movd        [edi],mm3             ; store the results in the destination
+
+
+        movq		mm3, [esi+4]           ; mm3 = p-1..p6    
+        movq        mm4, mm3              ; mm4 = p-1..p6
+        punpcklbw   mm3, mm0              ; mm3 = p-1..p2
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        psrlq       mm4, 8                ; mm4 = p0..p6
+        movq        mm5, mm4              ; mm5 = p0..p6
+        punpcklbw   mm5, mm0              ; mm5 = p0..p3
+        pmullw      mm5, mm2              ; mm5 *= kernel 1 modifiers
+        paddw       mm3, mm5              ; mm3 += mm5
+
+        psrlq       mm4, 8                ; mm4 = p1..p6
+        movq        mm5, mm4              ; mm5 = p1..p6
+        punpcklbw   mm5, mm0              ; mm5 = p1..p4
+        pmullw      mm5, mm6              ; mm5 *= kernel 2 modifiers
+        paddw       mm3, mm5              ; mm3 += mm5
+
+        psrlq       mm4, 8                ; mm4 = p2..p6
+        movq        mm5, mm4              ; mm5 = p2..p6
+        punpcklbw   mm5, mm0              ; mm5 = p2..p5
+        pmullw      mm5, mm7              ; mm5 *= kernel 3 modifiers
+        paddw       mm3, mm5              ; mm3 += mm5
+
+        paddw       mm3, rd               ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT     ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and unpack to saturate
+
+        movd       [edi+4],mm3               ; store the results in the destination
+
+        add         esi,SrcPixelsPerLine    ; next line
+        add         edi,eax; 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+    }
+}
+
+
+void FilterBlock1d_v_mmxa( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movq      mm1, [edi]          ; mm3 *= kernel 0 modifiers.
+        movq      mm2, [edi + 16]     ; mm3 *= kernel 0 modifiers.
+        movq      mm6, [edi + 32]     ; mm3 *= kernel 0 modifiers.
+        movq      mm7, [edi + 48]     ; mm3 *= kernel 0 modifiers.
+
+        mov         edx, PixelsPerLine
+        mov         edi, OutputPtr
+		mov			esi, SrcPtr
+        sub         esi, PixelsPerLine
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth      ; destination pitch?
+		pxor		mm0, mm0              ; mm0 = 00000000
+
+
+nextrow:
+        movq		mm3, [esi]            ; mm3 = p0..p8
+        punpcklbw   mm3, mm0              ; mm3 = p0..p3
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        movq		mm4, [esi +edx ]      ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm2              ; mm4 *= kernel 1 modifiers.
+        paddw       mm3, mm4              ; mm3 += mm4
+
+        movq		mm4, [esi +2*edx]     ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm6              ; mm4 *= kernel 2 modifiers.
+        paddw       mm3, mm4              ; mm3 += mm4
+
+        add         esi, edx              ; move source forward 1 line to avoid 3 * pitch
+
+        movq		mm4, [esi+2*edx]      ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm7              ; mm4 *= kernel 3 modifiers.
+        paddw       mm3, mm4              ; mm3 += mm4
+
+        paddw       mm3, rd               ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT     ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and saturate
+
+        movd        [edi],mm3             ; store the results in the destination
+        
+        sub         esi, edx              ;  subtract edx to get back to -1 column
+
+        movq		mm3, [esi+4]          ; mm3 = p4..p12
+        punpcklbw   mm3, mm0              ; mm3 = p4..p7
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        movq		mm4, [esi +edx +4]      ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm2              ; mm4 *= kernel 1 modifiers.
+        paddw       mm3, mm4              ; mm3 += mm4
+
+        movq		mm4, [esi +2*edx+4]   ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm6              ; mm4 *= kernel 2 modifiers.
+        paddw       mm3, mm4              ; mm3 += mm4
+
+        add         esi, edx              ; move source forward 1 line to avoid 3 * pitch
+
+        movq		mm4, [esi+2*edx+4]    ; mm4 = p0..p8
+        punpcklbw   mm4, mm0              ; mm4 = p0..p3
+        pmullw      mm4, mm7              ; mm4 *= kernel 3 modifiers.
+        paddw       mm3, mm4              ; mm3 += mm4
+
+        paddw       mm3, rd               ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT     ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and saturate
+
+        movd        [edi+4],mm3           ; store the results in the destination
+
+
+
+        // the subsequent iterations repeat 3 out of 4 of these reads.  Since the 
+        // recon block should be in cache this shouldn't cost much.  Its obviously 
+        // avoidable!!!. 
+        add         edi,eax; 
+
+        dec         ecx                   ; decrement count
+        jnz         nextrow               ; next row
+
+    }
+}
+
+
+void FilterBlock1d_hb8_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movq        mm1, [edi]            ; mm3 *= kernel 0 modifiers.
+        movq        mm2, [edi + 16]       ; mm3 *= kernel 0 modifiers.
+
+        mov         edi,OutputPtr
+		mov			esi,SrcPtr
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth      ; destination pitch?
+		pxor		mm0, mm0              ; mm0 = 00000000
+
+nextrow:
+        movq		mm3, [esi]            ; mm3 = p-1..p14    
+        movq        mm4, mm3                ; mm4 = p-1..p14
+        punpcklbw   mm3, mm0              ; mm3 = p-1..p6
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        psrlq       mm4, 8                 ; mm4 = p0..p13
+        movq        mm5, mm4              ; mm5 = p0..p13
+        punpcklbw   mm5, mm0              ; mm5 = p0..p7
+        pmullw      mm5, mm2              ; mm5 *= kernel 1 modifiers
+        paddw       mm3, mm5              ; mm3 += mm5
+
+        paddw       mm3, rd                ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT      ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and unpack to saturate
+
+        movd        [edi],mm3               ; store the results in the destination
+
+        movq		mm3, [esi+4]            ; mm3 = p-1..p14    
+        movq        mm4, mm3                ; mm4 = p-1..p14
+        punpcklbw   mm3, mm0              ; mm3 = p-1..p6
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        psrlq       mm4, 8                 ; mm4 = p0..p13
+        movq        mm5, mm4              ; mm5 = p0..p13
+        punpcklbw   mm5, mm0              ; mm5 = p0..p7
+        pmullw      mm5, mm2              ; mm5 *= kernel 1 modifiers
+        paddw       mm3, mm5              ; mm3 += mm5
+
+        paddw       mm3, rd                ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT      ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and unpack to saturate
+
+        movd        [edi+4],mm3               ; store the results in the destination
+
+
+        add         esi,SrcPixelsPerLine    ; next line
+        add         edi,eax; 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+    }
+}
+
+
+void FilterBlock1d_vb8_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movq      mm1, [edi]          ; mm3 *= kernel 0 modifiers.
+        movq      mm2, [edi + 16]     ; mm3 *= kernel 0 modifiers.
+        mov         edx, PixelsPerLine
+        mov         edi, OutputPtr
+		mov			esi, SrcPtr
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth        ; destination pitch?
+		pxor		mm0, mm0              ; mm0 = 00000000
+
+
+nextrow:
+        movq		mm3, [esi]            ; mm3 = p0..p16
+        punpcklbw   mm3, mm0              ; mm3 = p0..p8
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        movq		mm4, [esi +edx ]      ; mm4 = p0..p16
+        punpcklbw   mm4, mm0              ; mm4 = p0..p8
+        pmullw      mm4, mm2              ; mm4 *= kernel 1 modifiers.
+        paddw       mm3, mm4              ; mm3 += mm4
+
+        paddw       mm3, rd               ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT     ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and unpack to saturate
+
+        movd        [edi],mm3             ; store the results in the destination
+
+        movq		mm3, [esi+4]          ; mm3 = p0..p16
+        punpcklbw   mm3, mm0              ; mm3 = p0..p8
+        pmullw      mm3, mm1              ; mm3 *= kernel 0 modifiers.
+
+        movq		mm4, [esi +edx +4]    ; mm4 = p0..p16
+        punpcklbw   mm4, mm0              ; mm4 = p0..p8
+        pmullw      mm4, mm2              ; mm4 *= kernel 1 modifiers.
+        paddw       mm3, mm4              ; mm3 += mm4
+
+        paddw       mm3, rd               ; mm3 += round value
+        psraw       mm3, FILTER_SHIFT     ; mm3 /= 128
+        packuswb    mm3, mm0              ; pack and unpack to saturate
+
+        movd        [edi+4],mm3           ; store the results in the destination
+
+        // the subsequent iterations repeat 3 out of 4 of these reads.  Since the 
+        // recon block should be in cache this shouldn't cost much.  Its obviously 
+        // avoidable!!!. 
+        add         esi,edx
+        add         edi,eax 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+
+    }
+}
+ 
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FilterBlock2dBil
+ *  
+ *  INPUTS        :     Pointer to source data
+ *						
+ *  OUTPUTS       :     Filtered data
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Applies a bilinear filter on the intput data to produce
+ *						a predictor block (UINT16)
+ *
+ *  SPECIAL NOTES :     
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+_inline
+void FilterBlock2dBil_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
+{
+
+    __asm
+    {
+        mov         eax,        HFilter             ; 
+        mov         edi,        OutputPtr           ; 
+        mov         esi,        SrcPtr              ;
+        lea         ecx,        [edi+64]            ;
+        mov         edx,        SrcPixelsPerLine    ;
+               
+        movq        mm1,        [eax]               ;
+        movq        mm2,        [eax+16]            ;
+        
+        mov         eax,        VFilter             ;       
+        pxor        mm0,        mm0                 ;
+
+        // get the first horizontal line done       ;
+        movq        mm3,        [esi]               ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+        movq        mm4,        mm3                 ; make a copy of current line
+        
+        punpcklbw   mm3,        mm0                 ; xx 00 01 02 03 04 05 06
+        punpckhbw   mm4,        mm0                 ;
+
+        pmullw      mm3,        mm1                 ;
+        pmullw      mm4,        mm1                 ;
+
+        movq        mm5,        [esi+1]             ;
+        movq        mm6,        mm5                 ;
+
+        punpcklbw   mm5,        mm0                 ;
+        punpckhbw   mm6,        mm0                 ;
+
+        pmullw      mm5,        mm2                 ;
+        pmullw      mm6,        mm2                 ;
+
+        paddw       mm3,        mm5                 ;
+        paddw       mm4,        mm6                 ;
+        
+        paddw       mm3,        rd                  ; xmm3 += round value
+        psraw       mm3,        FILTER_SHIFT        ; xmm3 /= 128
+
+        paddw       mm4,        rd                  ;
+        psraw       mm4,        FILTER_SHIFT        ;
+        
+        movq        mm7,        mm3                 ;
+        packuswb    mm7,        mm4                 ;
+
+        add         esi,        edx                 ; next line
+NextRow:
+        movq        mm3,        [esi]               ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+        movq        mm4,        mm3                 ; make a copy of current line
+        
+        punpcklbw   mm3,        mm0                 ; xx 00 01 02 03 04 05 06
+        punpckhbw   mm4,        mm0                 ;
+
+        pmullw      mm3,        mm1                 ;
+        pmullw      mm4,        mm1                 ;
+
+        movq        mm5,        [esi+1]             ;
+        movq        mm6,        mm5                 ;
+
+        punpcklbw   mm5,        mm0                 ;
+        punpckhbw   mm6,        mm0                 ;
+
+        pmullw      mm5,        mm2                 ;
+        pmullw      mm6,        mm2                 ;
+
+        paddw       mm3,        mm5                 ;
+        paddw       mm4,        mm6                 ;
+        
+        movq        mm5,        mm7                 ;
+        movq        mm6,        mm7                 ;                
+
+        punpcklbw   mm5,        mm0                 ;
+        punpckhbw   mm6,        mm0
+
+        pmullw      mm5,        [eax]               ;
+        pmullw      mm6,        [eax]               ;
+        
+        paddw       mm3,        rd                  ; xmm3 += round value
+        psraw       mm3,        FILTER_SHIFT        ; xmm3 /= 128
+
+        paddw       mm4,        rd                  ;
+        psraw       mm4,        FILTER_SHIFT        ;
+        
+        movq        mm7,        mm3                 ;
+        packuswb    mm7,        mm4                 ;    
+        
+
+        pmullw      mm3,        [eax+16]            ;
+        pmullw      mm4,        [eax+16]            ;
+
+        paddw       mm3,        mm5                 ;
+        paddw       mm4,        mm6                 ;
+        
+        
+        paddw       mm3,        rd                  ; xmm3 += round value
+        psraw       mm3,        FILTER_SHIFT        ; xmm3 /= 128
+
+        paddw       mm4,        rd                  ;
+        psraw       mm4,        FILTER_SHIFT        ;
+               
+        packuswb    mm3,        mm4                                         
+
+        movq        [edi],      mm3                 ; store the results in the destination
+
+        add         esi,        edx                 ; next line
+        add         edi,        8                   ; 
+
+        cmp         edi,        ecx                 ;
+        jne         NextRow                         
+
+    }
+
+    // First filter 1d Horizontal
+	//FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter );
+	// Now filter Verticaly
+	//FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
+
+
+}
+
+ 
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FilterBlockBil_8
+ *  
+ *  INPUTS        :     ReconPtr1, ReconPtr12
+ *							Two pointers into the block of data to be filtered
+ *							These pointers bound the fractional pel position
+ *						PixelsPerLine
+ *							Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
+ *						Modx, ModY
+ *							The fractional pel bits used to select a filter.
+ *
+ *				
+ *  OUTPUTS       :     ReconRefPtr
+ *							A pointer to an 8x8 buffer into which UINT8 filtered data is written.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Produces a bilinear filtered fractional pel prediction block
+ *						with UINT8 output
+ *
+ *  SPECIAL NOTES :      
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void FilterBlockBil_8_mmx( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY )
+{
+	int diff;
+
+	// swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+	diff=ReconPtr2-ReconPtr1;
+
+	// The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+	// This works out to be what we want... despite the pointer swapping that goes on below.
+	// For example... if the X component of the vector is a +ve ModX = X%8.
+	//                if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+
+	if(diff<0) 
+	{											// swap pointers so ReconPtr1 smaller
+		UINT8 *temp=ReconPtr1;
+		ReconPtr1=ReconPtr2;
+		ReconPtr2=temp;
+		diff= (int)(ReconPtr2-ReconPtr1);
+	}
+
+	if( diff==1 )
+	{			
+		FilterBlock1d_hb8_mmx(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters_mmx[ModX] );
+	}
+	else if (diff == (int)(PixelsPerLine) )				// Fractional pixel in vertical only
+	{
+		FilterBlock1d_vb8_mmx(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_mmx[ModY]);
+	}
+	else if(diff == (int)(PixelsPerLine - 1))			// ReconPtr1 is Top right
+	{										
+		FilterBlock2dBil_mmx( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );
+	}
+	else if(diff == (int)(PixelsPerLine + 1) )			// ReconPtr1 is Top left
+	{	
+		FilterBlock2dBil_mmx( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FilterBlock2d
+ *  
+ *  INPUTS        :     Pointer to source data
+ *						
+ *  OUTPUTS       :     Filtered data
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Applies a 2d 4 tap filter on the intput data to produce
+ *						a predictor block (UINT16)
+ *
+ *  SPECIAL NOTES :     
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void FilterBlock2d_mmx( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
+{
+
+    UINT8 Intermediate[256];
+
+	// First filter 1d Horizontal
+	FilterBlock1d_h_mmx(SrcPtr-SrcPixelsPerLine, Intermediate, SrcPixelsPerLine, 1, 11, 8, HFilter );
+
+	// Now filter Verticaly
+	FilterBlock1d_v_mmx(Intermediate+BLOCK_HEIGHT_WIDTH, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
+
+
+}
+ 
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FilterBlock
+ *  
+ *  INPUTS        :     ReconPtr1, ReconPtr12
+ *							Two pointers into the block of data to be filtered
+ *							These pointers bound the fractional pel position
+ *						PixelsPerLine
+ *							Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
+ *						Modx, ModY
+ *							The fractional pel bits used to select a filter.
+ *						UseBicubic
+ *							Whether to use the bicubuc filter set or the bilinear set
+ *
+ *				
+ *  OUTPUTS       :     ReconRefPtr
+ *							A pointer to an 8x8 buffer into which the filtered data is written.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Produces a filtered fractional pel prediction block
+ *						using bilinear or bicubic filters
+ *
+ *  SPECIAL NOTES :     
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void FilterBlock_mmx( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha )
+{
+	int diff;
+    UINT8 Intermediate[256];
+
+	// swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+	diff=ReconPtr2-ReconPtr1;
+
+	// The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+	// This works out to be what we want... despite the pointer swapping that goes on below.
+	// For example... if the X component of the vector is a +ve ModX = X%8.
+	//                if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+
+	if(diff<0) 
+	{											// swap pointers so ReconPtr1 smaller
+		UINT8 *temp=ReconPtr1;
+		ReconPtr1=ReconPtr2;
+		ReconPtr2=temp;
+		diff= (int)(ReconPtr2-ReconPtr1);
+	}
+
+    if(!diff)
+    {
+        return;
+    }
+	if( diff==1 )
+	{											        // Fractional pixel in horizontal only
+		if ( UseBicubic )
+			FilterBlock1d_h_mmx(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModX] );
+		else
+			FilterBlock1d_hb8_mmx(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BilinearFilters_mmx[ModX] );
+	}
+	else if (diff == (int)(PixelsPerLine) )				// Fractional pixel in vertical only
+	{
+		if ( UseBicubic )
+			FilterBlock1d_v_mmx(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModY]);
+		else
+			FilterBlock1d_vb8_mmx(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_mmx[ModY]);
+	}
+	else if(diff == (int)(PixelsPerLine - 1))			// ReconPtr1 is Top right
+	{										
+		if ( UseBicubic )
+			FilterBlock2d_mmx( ReconPtr1-1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
+		else
+			FilterBlock2dBil_mmx( ReconPtr1-1, Intermediate, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );
+	}
+	else if(diff == (int)(PixelsPerLine + 1) )			// ReconPtr1 is Top left
+	{	
+		if ( UseBicubic )
+			FilterBlock2d_mmx( ReconPtr1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
+		else
+			FilterBlock2dBil_mmx( ReconPtr1, Intermediate, PixelsPerLine, BilinearFilters_mmx[ModX], BilinearFilters_mmx[ModY] );
+	}
+    UnpackBlock_MMX( Intermediate, ReconRefPtr, 8 );
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/filtwmt.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/filtwmt.c
new file mode 100644
index 00000000..7b5f0486
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/filtwmt.c
@@ -0,0 +1,790 @@
+/****************************************************************************
+ *
+ *   Module Title :     newLoopTest_asm.c 
+ *
+ *   Description  :     Codec specific functions
+ *
+ *   AUTHOR       :     Yaowu Xu
+ *
+ *****************************************************************************
+ *   Revision History
+ *
+ *   1.02 YWX 03-Nov-00 Changed confusing variable name
+ *   1.01 YWX 02-Nov-00 Added the set of functions
+ *   1.00 YWX 19-Oct-00 configuration baseline
+ *****************************************************************************
+ */ 
+
+/****************************************************************************
+ *  Header Frames
+ *****************************************************************************
+ */
+
+
+#define STRICT              /* Strict type checking. */
+#include "codec_common.h"
+#include <math.h>
+
+ /****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */        
+
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
+#define FILTER_WEIGHT 128
+#define FILTER_SHIFT  7
+__declspec(align(16)) short rd[]={64,64,64,64,64,64,64,64};
+
+
+__declspec(align(16)) INT16  BilinearFilters_wmt[8][16] = 
+{
+{ 128,128,128,128,128,128,128,128,    0,  0, 0,   0,  0,  0,  0,  0 },
+{ 112,112,112,112,112,112,112,112,   16, 16, 16, 16, 16, 16, 16, 16 },
+{  96, 96, 96, 96, 96, 96, 96, 96,   32, 32, 32, 32, 32, 32, 32, 32 },
+{  80, 80, 80, 80, 80, 80, 80, 80,   48, 48, 48, 48, 48, 48, 48, 48 },
+{  64, 64, 64, 64, 64, 64, 64, 64,   64, 64, 64, 64, 64, 64, 64, 64 },
+{  48, 48, 48, 48, 48, 48, 48, 48,   80, 80, 80, 80, 80, 80, 80, 80 },
+{  32, 32, 32, 32, 32, 32, 32, 32,   96, 96, 96, 96, 96, 96, 96, 96 },
+{  16, 16, 16, 16, 16, 16, 16, 16,  112,112,112,112,112,112,112,112 }
+};
+
+extern __declspec(align(16)) INT16  BicubicFilters_mmx[17][8][32];
+
+_inline 
+void FilterBlock1d_h_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movdqa      xmm1, [edi]             ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm2, [edi+ 16]         ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm6, [edi + 32]        ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm7, [edi + 48]        ; xmm3 *= kernel 0 modifiers.
+
+        mov         edi,OutputPtr
+		mov			esi,SrcPtr
+        dec         esi
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth        ; destination pitch?
+		pxor		xmm0, xmm0              ; xmm0 = 00000000
+
+nextrow:
+
+        // kernel 0 and 3 are potentially negative taps.  These negative tap filters 
+        // must be done first or we could have problems saturating our high value 
+        // tap filters
+        movdqu		xmm3, [esi]             ; xmm3 = p-1..p14    
+        movdqu      xmm4, xmm3              ; xmm4 = p-1..p14
+        punpcklbw   xmm3, xmm0              ; xmm3 = p-1..p6
+        pmullw      xmm3, xmm1              ; xmm3 *= kernel 0 modifiers.
+
+        psrldq      xmm4, 3                 ; xmm4 = p2..p13
+        movdqa      xmm5, xmm4              ; xmm5 = p2..p13
+        punpcklbw   xmm5, xmm0              ; xmm5 = p2..p7
+        pmullw      xmm5, xmm7              ; xmm5 *= kernel 3 modifiers
+        paddsw      xmm3, xmm5              ; xmm3 += xmm5
+
+        movdqu      xmm4, [esi+1]           ; xmm4 = p0..p13
+        movdqa      xmm5, xmm4              ; xmm5 = p0..p13
+        punpcklbw   xmm5, xmm0              ; xmm5 = p0..p7
+        pmullw      xmm5, xmm2              ; xmm5 *= kernel 1 modifiers
+        paddsw      xmm3, xmm5              ; xmm3 += xmm5
+
+        psrldq      xmm4, 1                 ; xmm4 = p1..p13
+        movdqa      xmm5, xmm4              ; xmm5 = p1..p13
+        punpcklbw   xmm5, xmm0              ; xmm5 = p1..p7
+        pmullw      xmm5, xmm6              ; xmm5 *= kernel 2 modifiers
+        paddsw      xmm3, xmm5              ; xmm3 += xmm5
+
+        paddsw      xmm3, rd                ; xmm3 += round value
+        psraw       xmm3, FILTER_SHIFT      ; xmm3 /= 128
+        packuswb    xmm3, xmm0              ; pack and saturate
+
+        movdq2q     mm0, xmm3
+        movq        [edi],mm0               ; store the results in the destination
+
+        add         esi,SrcPixelsPerLine    ; next line
+        add         edi,eax; 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+    }
+}
+
+_inline 
+void FilterBlock1d_v_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movdqa      xmm1, [edi]          ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm2, [edi + 16]     ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm6, [edi + 32]     ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm7, [edi + 48]     ; xmm3 *= kernel 0 modifiers.
+
+        mov         edx, PixelsPerLine
+        mov         edi, OutputPtr
+		mov			esi, SrcPtr
+        sub         esi, PixelsPerLine
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth        ; destination pitch?
+		pxor		xmm0, xmm0              ; xmm0 = 00000000
+
+
+nextrow:
+        movdqu		xmm3, [esi]             ; xmm3 = p0..p16
+        punpcklbw   xmm3, xmm0              ; xmm3 = p0..p8
+        pmullw      xmm3, xmm1              ; xmm3 *= kernel 0 modifiers.
+
+        add         esi, edx                ; move source forward 1 line to avoid 3 * pitch
+
+        movdqu		xmm4, [esi+2*edx]       ; xmm4 = p0..p16
+        punpcklbw   xmm4, xmm0              ; xmm4 = p0..p8
+        pmullw      xmm4, xmm7              ; xmm4 *= kernel 3 modifiers.
+        paddsw      xmm3, xmm4              ; xmm3 += xmm4
+
+        movdqu		xmm4, [esi ]            ; xmm4 = p0..p16
+        punpcklbw   xmm4, xmm0              ; xmm4 = p0..p8
+        pmullw      xmm4, xmm2              ; xmm4 *= kernel 1 modifiers.
+        paddsw      xmm3, xmm4              ; xmm3 += xmm4
+
+        movdqu		xmm4, [esi +edx]        ; xmm4 = p0..p16
+        punpcklbw   xmm4, xmm0              ; xmm4 = p0..p8
+        pmullw      xmm4, xmm6              ; xmm4 *= kernel 2 modifiers.
+        paddsw      xmm3, xmm4              ; xmm3 += xmm4
+
+
+
+        paddsw      xmm3, rd                ; xmm3 += round value
+        psraw       xmm3, FILTER_SHIFT      ; xmm3 /= 128
+        packuswb    xmm3, xmm0              ; pack and unpack to saturate
+
+        movdq2q     mm0, xmm3
+        movq        [edi],mm0               ; store the results in the destination
+
+        // the subsequent iterations repeat 3 out of 4 of these reads.  Since the 
+        // recon block should be in cache this shouldn't cost much.  Its obviously 
+        // avoidable!!!. 
+        add         edi,eax; 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+
+    }
+}
+
+
+_inline 
+void FilterBlock1d_hb8_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movdqa      xmm1, [edi]          ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm2, [edi + 16]     ; xmm3 *= kernel 0 modifiers.
+
+        mov         edi,OutputPtr
+		mov			esi,SrcPtr
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth        ; destination pitch?
+		pxor		xmm0, xmm0              ; xmm0 = 00000000
+
+nextrow:
+        movdqu		xmm3, [esi]             ; xmm3 = p-1..p14    
+        movdqu      xmm5, xmm3              ; xmm4 = p-1..p14
+        punpcklbw   xmm3, xmm0              ; xmm3 = p-1..p6
+        pmullw      xmm3, xmm1              ; xmm3 *= kernel 0 modifiers.
+
+        psrldq      xmm5, 1                 ; xmm4 = p0..p13
+        punpcklbw   xmm5, xmm0              ; xmm5 = p0..p7
+        pmullw      xmm5, xmm2              ; xmm5 *= kernel 1 modifiers
+        paddw       xmm3, xmm5              ; xmm3 += xmm5
+
+        paddw       xmm3, rd                ; xmm3 += round value
+        psraw       xmm3, FILTER_SHIFT      ; xmm3 /= 128
+        packuswb    xmm3, xmm0              ; pack and unpack to saturate
+
+        movdq2q     mm0, xmm3
+        movq        [edi],mm0               ; store the results in the destination
+
+        add         esi,SrcPixelsPerLine    ; next line
+        add         edi,eax; 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+    }
+}
+
+_inline 
+void FilterBlock1d_vb8_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movdqa      xmm1, [edi]          ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm2, [edi + 16]     ; xmm3 *= kernel 0 modifiers.
+        mov         edx, PixelsPerLine
+        mov         edi, OutputPtr
+		mov			esi, SrcPtr
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth        ; destination pitch?
+		pxor		xmm0, xmm0              ; xmm0 = 00000000
+
+
+nextrow:
+        movdqu		xmm3, [esi]             ; xmm3 = p0..p16
+        punpcklbw   xmm3, xmm0              ; xmm3 = p0..p8
+        pmullw      xmm3, xmm1              ; xmm3 *= kernel 0 modifiers.
+
+        movdqu		xmm4, [esi +edx ]       ; xmm4 = p0..p16
+        punpcklbw   xmm4, xmm0              ; xmm4 = p0..p8
+        pmullw      xmm4, xmm2              ; xmm4 *= kernel 1 modifiers.
+        paddw       xmm3, xmm4              ; xmm3 += xmm4
+
+        paddw       xmm3, rd                ; xmm3 += round value
+        psraw       xmm3, FILTER_SHIFT      ; xmm3 /= 128
+        packuswb    xmm3, xmm0              ; pack and unpack to saturate
+
+        movdq2q     mm0, xmm3
+        movq        [edi],mm0               ; store the results in the destination
+
+        // the subsequent iterations repeat 3 out of 4 of these reads.  Since the 
+        // recon block should be in cache this shouldn't cost much.  Its obviously 
+        // avoidable!!!. 
+        add         esi,edx
+        add         edi,eax 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FilterBlock2dBil
+ *  
+ *  INPUTS        :     Pointer to source data
+ *						
+ *  OUTPUTS       :     Filtered data
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Applies a bilinear filter on the intput data to produce
+ *						a predictor block (UINT16)
+ *
+ *  SPECIAL NOTES :     
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+_inline 
+void FilterBlock2dBil_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
+{
+
+    __asm
+    {
+        mov         eax,        HFilter             ; 
+        mov         edi,        OutputPtr           ; 
+        mov         esi,        SrcPtr              ;
+        lea         ecx,        [edi+64]            ;
+        mov         edx,        SrcPixelsPerLine     ;
+               
+        movdqa      xmm1,       [eax]               ;
+        movdqa      xmm2,       [eax+16]            ;
+        
+        mov         eax,        VFilter             ;       
+        pxor        xmm0,       xmm0                ;
+
+        // get the first horizontal line done       ;
+        movdqu      xmm3,       [esi]               ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+        movdqa      xmm4,       xmm3                ; make a copy of current line
+        
+        punpcklbw   xmm3,       xmm0                ; xx 00 01 02 03 04 05 06
+        psrldq      xmm4,       1                   ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx        
+        
+        pmullw      xmm3,       xmm1                ;        
+        punpcklbw   xmm4,       xmm0                ; 00 01 02 03 04 05 06 07
+
+        pmullw      xmm4,       xmm2                ;
+        paddw       xmm3,       xmm4                ;   
+
+        paddw       xmm3,       rd                  ; 
+        psraw       xmm3,       FILTER_SHIFT        ; ready for output
+        
+        movdqa      xmm5,       xmm3                ;
+
+        add         esi,        edx                 ; next line
+NextRow:
+        pmullw      xmm5,       [eax]               ; 
+        movdqu      xmm3,       [esi]               ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+
+        movdqa      xmm4,       xmm3                ; make a copy of current line        
+        punpcklbw   xmm3,       xmm0                ; xx 00 01 02 03 04 05 06
+
+        psrldq      xmm4,       1                   ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx                
+        pmullw      xmm3,       xmm1                ;        
+        punpcklbw   xmm4,       xmm0                ; 00 01 02 03 04 05 06 07
+
+        movdqa      xmm6,       xmm5                ; 
+        pmullw      xmm4,       xmm2                ;
+
+        paddw       xmm3,       xmm4                ;   
+        paddw       xmm3,       rd                  ; 
+
+        psraw       xmm3,       FILTER_SHIFT        ; ready for output
+        movdqa      xmm5,       xmm3                ; make a copy for the next row
+        
+        pmullw      xmm3,       [eax+16]            ; 
+        paddw       xmm6,       xmm3                ;
+        
+
+        paddw       xmm6,       rd                  ; xmm6 += round value
+        psraw       xmm6,       FILTER_SHIFT        ; xmm6 /= 128
+
+        packuswb    xmm6,       xmm0                ; pack and unpack to saturate
+        movdq2q     mm0,        xmm6
+
+        movq        [edi],      mm0                 ; store the results in the destination
+        add         esi,        edx                 ; next line
+        add         edi,        8                   ; 
+
+        cmp         edi,        ecx                 ;
+        jne         NextRow                         
+
+    }
+
+    // First filter 1d Horizontal
+	//FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter );
+	// Now filter Verticaly
+	//FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
+
+
+}
+
+_inline 
+void FilterUnpackBlock2dBil_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
+{
+
+    __asm
+    {
+        mov         eax,        HFilter             ; 
+        mov         edi,        OutputPtr           ; 
+        mov         esi,        SrcPtr              ;
+        lea         ecx,        [edi+128]            ;
+        mov         edx,        SrcPixelsPerLine     ;
+               
+        movdqa      xmm1,       [eax]               ;
+        movdqa      xmm2,       [eax+16]            ;
+        
+        mov         eax,        VFilter             ;       
+        pxor        xmm0,       xmm0                ;
+
+        // get the first horizontal line done       ;
+        movdqu      xmm3,       [esi]               ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+        movdqa      xmm4,       xmm3                ; make a copy of current line
+        
+        punpcklbw   xmm3,       xmm0                ; xx 00 01 02 03 04 05 06
+        psrldq      xmm4,       1                   ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx        
+        
+        pmullw      xmm3,       xmm1                ;        
+        punpcklbw   xmm4,       xmm0                ; 00 01 02 03 04 05 06 07
+
+        pmullw      xmm4,       xmm2                ;
+        paddw       xmm3,       xmm4                ;   
+
+        paddw       xmm3,       rd                  ; 
+        psraw       xmm3,       FILTER_SHIFT        ; ready for output
+        
+        movdqa      xmm5,       xmm3                ;
+
+        add         esi,        edx                 ; next line
+NextRow:
+        pmullw      xmm5,       [eax]               ; 
+        movdqu      xmm3,       [esi]               ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
+
+        movdqa      xmm4,       xmm3                ; make a copy of current line        
+        punpcklbw   xmm3,       xmm0                ; xx 00 01 02 03 04 05 06
+
+        psrldq      xmm4,       1                   ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx                
+        pmullw      xmm3,       xmm1                ;        
+        punpcklbw   xmm4,       xmm0                ; 00 01 02 03 04 05 06 07
+
+        movdqa      xmm6,       xmm5                ; 
+        pmullw      xmm4,       xmm2                ;
+
+        paddw       xmm3,       xmm4                ;   
+        paddw       xmm3,       rd                  ; 
+
+        psraw       xmm3,       FILTER_SHIFT        ; ready for output
+        movdqa      xmm5,       xmm3                ; make a copy for the next row
+        
+        pmullw      xmm3,       [eax+16]            ; 
+        paddw       xmm6,       xmm3                ;
+        
+
+        paddw       xmm6,       rd                  ; xmm6 += round value
+        psraw       xmm6,       FILTER_SHIFT        ; xmm6 /= 128
+
+        movdqu      [edi],      xmm6;
+        
+        /*
+        packuswb    xmm6,       xmm0                ; pack and unpack to saturate
+        movdq2q     mm0,        xmm6
+
+        movq        [edi],      mm0                 ; store the results in the destination
+        */
+        add         esi,        edx                 ; next line
+        add         edi,        16                   ; 
+
+        cmp         edi,        ecx                 ;
+        jne         NextRow                         
+
+    }
+
+    // First filter 1d Horizontal
+	//FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter );
+	// Now filter Verticaly
+	//FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
+
+
+}
+_inline 
+void FilterUnpackBlock1d_hb8_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movdqa      xmm1, [edi]          ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm2, [edi + 16]     ; xmm3 *= kernel 0 modifiers.
+
+        mov         edi,OutputPtr
+		mov			esi,SrcPtr
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth        ; destination pitch?
+		pxor		xmm0, xmm0              ; xmm0 = 00000000
+
+nextrow:
+        movdqu		xmm3, [esi]             ; xmm3 = p-1..p14    
+        movdqu      xmm5, xmm3              ; xmm4 = p-1..p14
+        punpcklbw   xmm3, xmm0              ; xmm3 = p-1..p6
+        pmullw      xmm3, xmm1              ; xmm3 *= kernel 0 modifiers.
+
+        psrldq      xmm5, 1                 ; xmm4 = p0..p13
+        punpcklbw   xmm5, xmm0              ; xmm5 = p0..p7
+        pmullw      xmm5, xmm2              ; xmm5 *= kernel 1 modifiers
+        paddw       xmm3, xmm5              ; xmm3 += xmm5
+
+        paddw       xmm3, rd                ; xmm3 += round value
+        psraw       xmm3, FILTER_SHIFT      ; xmm3 /= 128
+        
+        /*
+        packuswb    xmm3, xmm0              ; pack and unpack to saturate
+        movdq2q     mm0, xmm3
+        */
+
+        movdqu      [edi],xmm3               ; store the results in the destination
+
+        add         esi,SrcPixelsPerLine    ; next line
+        add         edi,eax; 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+    }
+}
+
+_inline 
+void FilterUnpackBlock1d_vb8_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
+{
+    __asm
+    {
+
+        mov         edi, Filter
+        movdqa      xmm1, [edi]          ; xmm3 *= kernel 0 modifiers.
+        movdqa      xmm2, [edi + 16]     ; xmm3 *= kernel 0 modifiers.
+        mov         edx, PixelsPerLine
+        mov         edi, OutputPtr
+		mov			esi, SrcPtr
+        mov         ecx, DWORD PTR OutputHeight
+        mov         eax, OutputWidth        ; destination pitch?
+		pxor		xmm0, xmm0              ; xmm0 = 00000000
+
+
+nextrow:
+        movdqu		xmm3, [esi]             ; xmm3 = p0..p16
+        punpcklbw   xmm3, xmm0              ; xmm3 = p0..p8
+        pmullw      xmm3, xmm1              ; xmm3 *= kernel 0 modifiers.
+
+        movdqu		xmm4, [esi +edx ]       ; xmm4 = p0..p16
+        punpcklbw   xmm4, xmm0              ; xmm4 = p0..p8
+        pmullw      xmm4, xmm2              ; xmm4 *= kernel 1 modifiers.
+        paddw       xmm3, xmm4              ; xmm3 += xmm4
+
+        paddw       xmm3, rd                ; xmm3 += round value
+        psraw       xmm3, FILTER_SHIFT      ; xmm3 /= 128
+       
+        /*packuswb    xmm3, xmm0              ; pack and unpack to saturate
+
+        movdq2q     mm0, xmm3
+        */
+        movdqu      [edi],xmm3               ; store the results in the destination
+
+        // the subsequent iterations repeat 3 out of 4 of these reads.  Since the 
+        // recon block should be in cache this shouldn't cost much.  Its obviously 
+        // avoidable!!!. 
+        add         esi,edx
+        add         edi,eax 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+
+    }
+}
+ 
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FilterBlockBil_8
+ *  
+ *  INPUTS        :     ReconPtr1, ReconPtr12
+ *							Two pointers into the block of data to be filtered
+ *							These pointers bound the fractional pel position
+ *						PixelsPerLine
+ *							Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
+ *						Modx, ModY
+ *							The fractional pel bits used to select a filter.
+ *
+ *				
+ *  OUTPUTS       :     ReconRefPtr
+ *							A pointer to an 8x8 buffer into which UINT8 filtered data is written.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Produces a bilinear filtered fractional pel prediction block
+ *						with UINT8 output
+ *
+ *  SPECIAL NOTES :      
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void FilterBlockBil_8_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY )
+{
+	int diff;
+
+	// swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+	diff=ReconPtr2-ReconPtr1;
+
+	// The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+	// This works out to be what we want... despite the pointer swapping that goes on below.
+	// For example... if the X component of the vector is a +ve ModX = X%8.
+	//                if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+
+	if(diff<0) 
+	{											// swap pointers so ReconPtr1 smaller
+		UINT8 *temp=ReconPtr1;
+		ReconPtr1=ReconPtr2;
+		ReconPtr2=temp;
+		diff= (int)(ReconPtr2-ReconPtr1);
+	}
+
+	if( diff==1 )
+	{			
+		FilterBlock1d_hb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters_wmt[ModX] );
+	}
+	else if (diff == (int)(PixelsPerLine) )				// Fractional pixel in vertical only
+	{
+		FilterBlock1d_vb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_wmt[ModY]);
+	}
+	else if(diff == (int)(PixelsPerLine - 1))			// ReconPtr1 is Top right
+	{										
+        FilterBlock2dBil_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+        //FilterBlock2dBil_8_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+	}
+	else if(diff == (int)(PixelsPerLine + 1) )			// ReconPtr1 is Top left
+	{	
+        FilterBlock2dBil_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+		//FilterBlock2dBil_8_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+	}
+}
+
+_inline void UnpackBlock_wmt( UINT8 *SrcPtr, UINT16 *OutputPtr, UINT32 SrcPixelsPerLine )
+{
+    __asm
+    {
+        mov         edi,OutputPtr
+		mov			esi,SrcPtr
+
+        mov         ecx, 8
+        mov         eax, 16                 ; destination pitch?
+		pxor		xmm0, xmm0              ; xmm0 = 00000000
+
+nextrow:
+        movdqu		xmm3, [esi]             ; xmm3 = p-1..p14    
+        punpcklbw   xmm3, xmm0              ; xmm3 = p-1..p6
+        movdqu     [edi],xmm3                ; store the results in the destination
+
+        add         esi,SrcPixelsPerLine    ; next line
+        add         edi,eax; 
+
+        dec         ecx                     ; decrement count
+        jnz         nextrow                 ; next row
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FilterBlock2d
+ *  
+ *  INPUTS        :     Pointer to source data
+ *						
+ *  OUTPUTS       :     Filtered data
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Applies a 2d 4 tap filter on the intput data to produce
+ *						a predictor block (UINT16)
+ *
+ *  SPECIAL NOTES :     
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void FilterBlock2d_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
+{
+
+    UINT8 Intermediate[256];
+
+	// First filter 1d Horizontal
+	FilterBlock1d_h_wmt(SrcPtr-SrcPixelsPerLine, Intermediate, SrcPixelsPerLine, 1, 11, 8, HFilter );
+
+	// Now filter Verticaly
+	FilterBlock1d_v_wmt(Intermediate+BLOCK_HEIGHT_WIDTH, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
+
+
+}
+ 
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     FilterBlock
+ *  
+ *  INPUTS        :     ReconPtr1, ReconPtr12
+ *							Two pointers into the block of data to be filtered
+ *							These pointers bound the fractional pel position
+ *						PixelsPerLine
+ *							Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
+ *						Modx, ModY
+ *							The fractional pel bits used to select a filter.
+ *						UseBicubic
+ *							Whether to use the bicubuc filter set or the bilinear set
+ *
+ *				
+ *  OUTPUTS       :     ReconRefPtr
+ *							A pointer to an 8x8 buffer into which the filtered data is written.
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Produces a filtered fractional pel prediction block
+ *						using bilinear or bicubic filters
+ *
+ *  SPECIAL NOTES :     
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void FilterBlock_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha )
+{
+	int diff;
+    UINT8 Intermediate[256];
+
+	// swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
+	diff=ReconPtr2-ReconPtr1;
+
+	// The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
+	// This works out to be what we want... despite the pointer swapping that goes on below.
+	// For example... if the X component of the vector is a +ve ModX = X%8.
+	//                if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
+
+	if(diff<0) 
+	{											// swap pointers so ReconPtr1 smaller
+		UINT8 *temp=ReconPtr1;
+		ReconPtr1=ReconPtr2;
+		ReconPtr2=temp;
+		diff= (int)(ReconPtr2-ReconPtr1);
+	}
+
+    if(!diff)
+    {
+        return;
+    }
+
+
+
+    if(UseBicubic)
+    {
+        if( diff==1 )
+        {											        // Fractional pixel in horizontal only
+                FilterBlock1d_h_wmt(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModX] );
+        }
+        else if (diff == (int)(PixelsPerLine) )				// Fractional pixel in vertical only
+        {
+                FilterBlock1d_v_wmt(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModY]);
+        }
+        else if(diff == (int)(PixelsPerLine - 1))			// ReconPtr1 is Top right
+        {										
+                FilterBlock2d_wmt( ReconPtr1-1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
+        }
+        else if(diff == (int)(PixelsPerLine + 1) )			// ReconPtr1 is Top left
+        {	
+                FilterBlock2d_wmt( ReconPtr1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
+        }
+        UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
+    }
+    else
+    {
+   
+        if( diff==1 )
+        {	
+            FilterUnpackBlock1d_hb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 16, BilinearFilters_wmt[ModX] );
+            
+            // Fractional pixel in horizontal only
+            /*
+            FilterBlock1d_hb8_wmt(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BilinearFilters_wmt[ModX] );
+            UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
+            */
+            
+        }
+        else if (diff == (int)(PixelsPerLine) )				// Fractional pixel in vertical only
+        {
+            FilterUnpackBlock1d_vb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 16, BilinearFilters_wmt[ModY]);    
+            /*
+            FilterBlock1d_vb8_wmt(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_wmt[ModY]);
+            UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
+            */
+        }
+        else if(diff == (int)(PixelsPerLine - 1))			// ReconPtr1 is Top right
+        {										
+
+            FilterUnpackBlock2dBil_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+            /*
+            FilterBlock2dBil_wmt( ReconPtr1-1, Intermediate, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+            UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
+            */
+        }
+        else if(diff == (int)(PixelsPerLine + 1) )			// ReconPtr1 is Top left
+        {	
+            FilterUnpackBlock2dBil_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );    
+            /*
+            FilterBlock2dBil_wmt( ReconPtr1, Intermediate, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
+            UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
+            */
+        }
+    }
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxidct.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxidct.c
new file mode 100644
index 00000000..50d8749d
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxidct.c
@@ -0,0 +1,2156 @@
+/****************************************************************************
+*
+*   Module Title :     IDCTPart.c
+*
+*   Description  :     IDCT with multiple versions based on # of non 0 coeffs
+*
+*   AUTHOR       :     Scott Lavarnway, Tim Murphy
+*
+*****************************************************************************
+*   Revision History
+*	
+*   1.02 JBB 15 Nov 00 Cleaned out unused ifdefs
+*	1.01 YWX  15/05/00  Added MMX_idct3 for use in PostProcesser
+*   1.00 YWX  14/05/00  Configuration baseline from Scott Lavarnway
+*
+*****************************************************************************
+*/
+
+// Dequantization + inverse discrete cosine transform.
+// Timothy S. Murphy   14 July 1999.
+
+#pragma warning(disable:4005)
+#include "codec_common.h"
+#include <math.h>
+#include <memory.h>
+#undef PI
+#define	PI		3.14159265358979323846
+
+
+// Constants used in MMX implementation of dequantization and idct.
+// All the MMX stuff works with 4 16-bit quantities at a time and
+// we create 11 constants of size 4 x 16 bits.
+// The first 4 are used to mask the individual 16-bit words within a group
+// and are used in the address-shuffling part of the dequantization.
+// The last 7 are fixed-point approximations to the cosines of angles
+// occurring in the DCT; each of these contains 4 copies of the same value.
+
+// There is only one (statically initialized) instance of this object
+// wrapped in an allocator object that forces its starting address
+// to be evenly divisible by 32.  Hence the actual object occupies 2.75
+// cache lines on a Pentium processor.
+
+// Offsets in bytes used by the assembler code below
+// must of course agree with the idctConstants constructor.
+
+#define MaskOffset 0		// 4 masks come in order low word to high
+#define CosineOffset 32		// 7 cosines come in order pi/16 * (1 ... 7)
+#define EightOffset 88
+#define IdctAdjustBeforeShift 8
+#pragma warning( disable : 4799 )  // Disable no emms instruction warning!
+
+UINT16 idctconstants[(4+7+1) * 4];
+UINT16 idctcosTbl[ 7] = 
+{
+	64277, 60547, 54491, 46341, 36410, 25080, 12785
+};
+
+
+/* Dequantization + inverse DCT.
+
+   Dequantization multiplies user's 16-bit signed indices (range -512 to +511)
+   by unsigned 16-bit quantization table entries.
+   These table entries are upscaled by 4, max is 30 * 128 * 4 < 2^14.
+   Result is scaled signed DCT coefficients (abs value < 2^15).
+
+   In the data stream, the coefficients are sent in order of increasing
+   total (horizontal + vertical) frequency.  The exact picture is as follows:
+
+	00 01 05 06  16 17 33 34
+	02 04 07 15  20 32 35 52
+	03 10 14 21  31 36 51 53
+	11 13 22 30  37 50 54 65
+
+	12 23 27 40  47 55 64 66
+	24 26 41 46	 56 63 67 74
+	25 42 45 57  62 70 73 75
+	43 44 60 61  71 72 76 77
+
+   Here the position in the matrix corresponds to the (horiz,vert)
+   freqency indices and the octal entry in the matrix is the position
+   of the coefficient in the data stream.  Thus the coefficients are sent
+   in sort of a diagonal "snake".
+
+   The dequantization stage "uncurls the snake" and stores the expanded
+   coefficients in more convenient positions.  These are not exactly the
+   natural positions given above but take into account our implementation
+   of the idct, which basically requires two one-dimensional idcts and
+   two transposes.
+
+   We fold the first transpose into the storage of the expanded coefficients.
+   We don't actually do a full transpose because this would require doubling
+   the size of the idct buffer; rather, we just transpose each of the 4x4
+   subblocks.  Using slightly varying addressing schemes in each of the
+   four 4x8 idcts then allows these transforms to be done in place.
+
+   Transposing the 4x4 subblocks in the matrix above gives
+
+	00 02 03 11  16 20 31 37
+	01 04 10 13  17 32 36 50
+	05 07 14 22  33 35 51 54
+	06 15 21 30  34 52 53 65
+
+	12 24 25 43  47 56 62 71
+	23 26 42 44  55 63 70 72
+	27 41 45 60  64 67 73 76
+	40 46 57 61  66 74 75 77
+
+   Finally, we reverse the words in each 4 word group to clarify
+   direction of shifts.
+
+	11 03 02 00  37 31 20 16
+	13 10 04 01  50 36 32 17
+	22 14 07 05	 54 51 35 33
+	30 21 15 06	 65 53 52 34
+
+	43 25 24 12	 71 62 56 47
+	44 42 26 23  72 70 63 55
+	60 45 41 27	 76 73 67 64
+	61 57 46 40  77 75 74 66
+
+   This matrix then shows the 16 4x16 destination words in terms of
+   the 16 4x16 input words.
+
+   We implement this algorithm by manipulation of mmx registers,
+   which seems to be the fastest way to proceed.  It is completely
+   hand-written; there does not seem to be enough recurrence to
+   reasonably compartmentalize any of it.  Hence the resulting
+   program is ugly and bloated.  Furthermore, due to the absence of
+   register pressure, it is boring and artless.	 I hate it.
+
+   The idct itself is more interesting.  Since the two-dimensional dct
+   basis functions are products of the one-dimesional dct basis functions,
+   we can compute an inverse (or forward) dct via two 1-D transforms,
+   on rows then on columns.  To exploit MMX parallelism, we actually do
+   both operations on columns, interposing a (partial) transpose between
+   the two 1-D transforms, the first transpose being done by the expansion
+   described above.
+
+   The 8-sample one-dimensional DCT is a standard orthogonal expansion using
+   the (unnormalized) basis functions
+
+	b[k]( i) = cos( pi * k * (2i + 1) / 16);
+
+   here k = 0 ... 7 is the frequency and i = 0 ... 7 is the spatial coordinate.
+   To normalize, b[0] should be multiplied by 1/sqrt( 8) and the other b[k]
+   should be multiplied by 1/2.
+
+   The 8x8 two-dimensional DCT is just the product of one-dimensional DCTs
+   in each direction.  The (unnormalized) basis functions are
+
+	B[k,l]( i, j) = b[k]( i) * b[l]( j);
+
+   this time k and l are the horizontal and vertical frequencies,
+   i and j are the horizontal and vertical spatial coordinates;
+   all indices vary from 0 ... 7 (as above)
+   and there are now 4 cases of normalization.
+  
+   Our 1-D idct expansion uses constants C1 ... C7 given by
+
+   	(*)  Ck = C(-k) = cos( pi * k/16) = S(8-k) = -S(k-8) = sin( pi * (8-k)/16) 
+
+   and the following 1-D algorithm transforming I0 ... I7  to  R0 ... R7 :
+  
+   A = (C1 * I1) + (C7 * I7)		B = (C7 * I1) - (C1 * I7)
+   C = (C3 * I3) + (C5 * I5)		D = (C3 * I5) - (C5 * I3)
+   A. = C4 * (A - C)				B. = C4 * (B - D)
+   C. = A + C						D. = B + D
+   
+   E = C4 * (I0 + I4)				F = C4 * (I0 - I4)
+   G = (C2 * I2) + (C6 * I6)		H = (C6 * I2) - (C2 * I6)
+   E. = E - G
+   G. = E + G
+   
+   A.. = F + A.					B.. = B. - H
+   F.  = F - A. 				H.  = B. + H
+   
+   R0 = G. + C.	R1 = A.. + H.	R3 = E. + D.	R5 = F. + B..
+   R7 = G. - C.	R2 = A.. - H.	R4 = E. - D.	R6 = F. - B..
+
+   This algorithm was also used by Paul Wilkins in his C implementation;
+   it is due to Vetterli and Lightenberg and may be found in the JPEG
+   reference book by Pennebaker and Mitchell.
+
+   Correctness of the algorithm follows from (*) together with the
+   addition formulas for sine and cosine:
+
+	cos( A + B) = cos( A) * cos( B)  -  sin( A) * sin( B)
+	sin( A + B) = sin( A) * cos( B)  +  cos( A) * sin( B)
+
+   Note that this implementation absorbs the difference in normalization
+   between the 0th and higher frequencies, although the results produced
+   are actually twice as big as they should be.  Since we do this for each
+   dimension, the 2-D idct results are 4x the desired results.  Finally,
+   taking into account that the dequantization multiplies by 4 as well,
+   our actual results are 16x too big.  We fix this by shifting the final
+   results right by 4 bits.
+
+   High precision version approximates C1 ... C7 to 16 bits.
+   Since MMX only provides a signed multiply, C1 ... C5 appear to be
+   negative and multiplies involving them must be adjusted to compensate
+   for this.  C6 and C7 do not require this adjustment since
+   they are < 1/2 and are correctly treated as positive numbers.
+
+   Following macro does four 8-sample one-dimensional idcts in parallel.
+   This is actually not such a difficult program to write once you
+   make a couple of observations (I of course was unable to make these
+   observations until I'd half-written a couple of other versions).
+
+	1. Everything is easy once you are done with the multiplies.
+	   This is because, given X and Y in registers, one may easily
+	   calculate X+Y and X-Y using just those 2 registers.
+
+	2. You always need at least 2 extra registers to calculate products,
+	   so storing 2 temporaries is inevitable.  C. and D. seem to be
+	   the best candidates.   
+
+	3. The products should be calculated in decreasing order of complexity
+	   (which translates into register pressure).  Since C1 ... C5 require
+	   adjustment (and C6, C7 do not), we begin by calculating C and D.
+*/
+
+/**************************************************************************************
+ *
+ *		Routine:		BeginIDCT
+ *		
+ *		Description:	The Macro does IDct on 4 1-D Dcts
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+#define Dump	__asm  call MMX_dump
+
+#define BeginIDCT __asm { \
+	\
+	__asm	movq		r2, I(3)  \
+	 \
+	__asm	movq		r6, C(3) \
+	__asm	 movq		r4, r2 \
+	__asm	movq		r7, J(5) \
+	__asm	 pmulhw		r4, r6		/* r4 = c3*i3 - i3 */ \
+	__asm	movq		r1, C(5) \
+	__asm	 pmulhw		r6, r7		/* r6 = c3*i5 - i5 */ \
+	__asm	movq		r5, r1 \
+	__asm	 pmulhw		r1, r2		/* r1 = c5*i3 - i3 */ \
+	__asm	movq		r3, I(1) \
+	__asm	 pmulhw		r5, r7		/* r5 = c5*i5 - i5 */ \
+	__asm	movq		r0, C(1)	/* (all registers are in use) */ \
+	__asm	 paddw		r4, r2		/* r4 = c3*i3 */ \
+	__asm	paddw		r6, r7		/* r6 = c3*i5 */ \
+	__asm	 paddw		r2, r1		/* r2 = c5*i3 */ \
+	__asm	movq		r1, J(7) \
+	__asm	 paddw		r7, r5		/* r7 = c5*i5 */ \
+	__asm	movq		r5, r0		/* r5 = c1 */ \
+	__asm	 pmulhw		r0, r3		/* r0 = c1*i1 - i1 */ \
+	__asm	paddsw		r4, r7		/* r4 = C = c3*i3 + c5*i5 */ \
+	__asm	 pmulhw		r5, r1		/* r5 = c1*i7 - i7 */ \
+	__asm	movq		r7, C(7) \
+	__asm	 psubsw		r6, r2		/* r6 = D = c3*i5 - c5*i3  (done w/r2) */ \
+	__asm	paddw		r0, r3		/* r0 = c1*i1 */ \
+	__asm	 pmulhw		r3, r7		/* r3 = c7*i1 */ \
+	__asm	movq		r2, I(2) \
+	__asm	 pmulhw		r7, r1		/* r7 = c7*i7 */ \
+	__asm	paddw		r5, r1		/* r5 = c1*i7 */ \
+	__asm	 movq		r1, r2		/* r1 = i2 */ \
+	__asm	pmulhw		r2, C(2)	/* r2 = c2*i2 - i2 */ \
+	__asm	 psubsw		r3, r5		/* r3 = B = c7*i1 - c1*i7 */ \
+	__asm	movq		r5, J(6) \
+	__asm	 paddsw		r0, r7		/* r0 = A = c1*i1 + c7*i7 */ \
+	__asm	movq		r7, r5		/* r7 = i6 */ \
+	__asm	 psubsw		r0, r4		/* r0 = A - C */ \
+	__asm	pmulhw		r5, C(2)	/* r5 = c2*i6 - i6 */ \
+	__asm	 paddw		r2, r1		/* r2 = c2*i2 */ \
+	__asm	pmulhw		r1, C(6)	/* r1 = c6*i2 */ \
+	__asm	 paddsw		r4, r4		/* r4 = C + C */ \
+	__asm	paddsw		r4, r0		/* r4 = C. = A + C */ \
+	__asm	 psubsw		r3, r6		/* r3 = B - D */ \
+	__asm	paddw		r5, r7		/* r5 = c2*i6 */ \
+	__asm	 paddsw		r6, r6		/* r6 = D + D */ \
+	__asm	pmulhw		r7, C(6)	/* r7 = c6*i6 */ \
+	__asm	 paddsw		r6, r3		/* r6 = D. = B + D */ \
+	__asm	movq		I(1), r4	/* save C. at I(1) */ \
+	__asm	 psubsw		r1, r5		/* r1 = H = c6*i2 - c2*i6 */ \
+	__asm	movq		r4, C(4) \
+	__asm	 movq		r5, r3		/* r5 = B - D */ \
+	__asm	pmulhw		r3, r4		/* r3 = (c4 - 1) * (B - D) */ \
+	__asm	 paddsw		r7, r2		/* r7 = G = c6*i6 + c2*i2 */ \
+	__asm	movq		I(2), r6	/* save D. at I(2) */ \
+	__asm	 movq		r2, r0		/* r2 = A - C */ \
+	__asm	movq		r6, I(0) \
+	__asm	 pmulhw		r0, r4		/* r0 = (c4 - 1) * (A - C) */ \
+	__asm	paddw		r5, r3		/* r5 = B. = c4 * (B - D) */ \
+	 \
+	__asm	movq		r3, J(4) \
+	__asm	 psubsw		r5, r1		/* r5 = B.. = B. - H */ \
+	__asm	paddw		r2, r0		/* r0 = A. = c4 * (A - C) */ \
+	__asm	 psubsw		r6, r3		/* r6 = i0 - i4 */ \
+	__asm	movq		r0, r6 \
+	__asm	 pmulhw		r6, r4		/* r6 = (c4 - 1) * (i0 - i4) */ \
+	__asm	paddsw		r3, r3		/* r3 = i4 + i4 */ \
+	__asm	 paddsw		r1, r1		/* r1 = H + H */ \
+	__asm	paddsw		r3, r0		/* r3 = i0 + i4 */ \
+	__asm	 paddsw		r1, r5		/* r1 = H. = B + H */ \
+	__asm	pmulhw		r4, r3		/* r4 = (c4 - 1) * (i0 + i4) */ \
+	__asm	 paddsw		r6, r0		/* r6 = F = c4 * (i0 - i4) */ \
+	__asm	psubsw		r6, r2		/* r6 = F. = F - A. */ \
+	__asm	 paddsw		r2, r2		/* r2 = A. + A. */ \
+	__asm	movq		r0, I(1)	/* r0 = C. */ \
+	__asm	 paddsw		r2, r6		/* r2 = A.. = F + A. */ \
+	__asm	paddw		r4, r3		/* r4 = E = c4 * (i0 + i4) */ \
+	__asm	 psubsw		r2, r1		/* r2 = R2 = A.. - H. */ \
+}
+// end BeginIDCT macro (38 cycles).
+
+
+// Two versions of the end of the idct depending on whether we're feeding
+// into a transpose or dividing the final results by 16 and storing them.
+
+/**************************************************************************************
+ *
+ *		Routine:		RowIDCT
+ *		
+ *		Description:	The Macro does 1-D IDct on 4 Rows
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+// RowIDCT gets ready to transpose.
+
+#define RowIDCT __asm { \
+	\
+	BeginIDCT \
+	\
+	__asm	movq		r3, I(2)	/* r3 = D. */ \
+	__asm	 psubsw		r4, r7		/* r4 = E. = E - G */ \
+	__asm	paddsw		r1, r1		/* r1 = H. + H. */ \
+	__asm	 paddsw		r7, r7		/* r7 = G + G */ \
+	__asm	paddsw		r1, r2		/* r1 = R1 = A.. + H. */ \
+	__asm	 paddsw		r7, r4		/* r7 = G. = E + G */ \
+	__asm	psubsw		r4, r3		/* r4 = R4 = E. - D. */ \
+	__asm	 paddsw		r3, r3 \
+	__asm	psubsw		r6, r5		/* r6 = R6 = F. - B.. */ \
+	__asm	 paddsw		r5, r5 \
+	__asm	paddsw		r3, r4		/* r3 = R3 = E. + D. */ \
+	__asm	 paddsw		r5, r6		/* r5 = R5 = F. + B.. */ \
+	__asm	psubsw		r7, r0		/* r7 = R7 = G. - C. */ \
+	__asm	 paddsw		r0, r0 \
+	__asm	movq		I(1), r1	/* save R1 */ \
+	__asm	 paddsw		r0, r7		/* r0 = R0 = G. + C. */ \
+}
+// end RowIDCT macro (8 + 38 = 46 cycles)
+
+
+/**************************************************************************************
+ *
+ *		Routine:		ColumnIDCT
+ *		
+ *		Description:	The Macro does 1-D IDct on 4 columns
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+// Column IDCT normalizes and stores final results.
+
+#define ColumnIDCT __asm { \
+	\
+	BeginIDCT \
+	\
+	__asm	paddsw		r2, Eight	/* adjust R2 (and R1) for shift */ \
+	__asm	 paddsw		r1, r1		/* r1 = H. + H. */ \
+	__asm	paddsw		r1, r2		/* r1 = R1 = A.. + H. */ \
+	__asm	 psraw		r2, 4		/* r2 = NR2 */ \
+	__asm	psubsw		r4, r7		/* r4 = E. = E - G */ \
+	__asm	 psraw		r1, 4		/* r1 = NR1 */ \
+	__asm	movq		r3, I(2)	/* r3 = D. */ \
+	__asm	 paddsw		r7, r7		/* r7 = G + G */ \
+	__asm	movq		I(2), r2	/* store NR2 at I2 */ \
+	__asm	 paddsw		r7, r4		/* r7 = G. = E + G */ \
+	__asm	movq		I(1), r1	/* store NR1 at I1 */ \
+	__asm	 psubsw		r4, r3		/* r4 = R4 = E. - D. */ \
+	__asm	paddsw		r4, Eight	/* adjust R4 (and R3) for shift */ \
+	__asm	 paddsw		r3, r3		/* r3 = D. + D. */ \
+	__asm	paddsw		r3, r4		/* r3 = R3 = E. + D. */ \
+	__asm	 psraw		r4, 4		/* r4 = NR4 */ \
+	__asm	psubsw		r6, r5		/* r6 = R6 = F. - B.. */ \
+	__asm	 psraw		r3, 4		/* r3 = NR3 */ \
+	__asm	paddsw		r6, Eight	/* adjust R6 (and R5) for shift */ \
+	__asm	 paddsw		r5, r5		/* r5 = B.. + B.. */ \
+	__asm	paddsw		r5, r6		/* r5 = R5 = F. + B.. */ \
+	__asm	 psraw		r6, 4		/* r6 = NR6 */ \
+	__asm	movq		J(4), r4	/* store NR4 at J4 */ \
+	__asm	 psraw		r5, 4		/* r5 = NR5 */ \
+	__asm	movq		I(3), r3	/* store NR3 at I3 */ \
+	__asm	 psubsw		r7, r0		/* r7 = R7 = G. - C. */ \
+	__asm	paddsw		r7, Eight	/* adjust R7 (and R0) for shift */ \
+	__asm	 paddsw		r0, r0 		/* r0 = C. + C. */ \
+	__asm	paddsw		r0, r7		/* r0 = R0 = G. + C. */ \
+	__asm	 psraw		r7, 4		/* r7 = NR7 */ \
+	__asm	movq		J(6), r6	/* store NR6 at J6 */ \
+	__asm	 psraw		r0, 4		/* r0 = NR0 */ \
+	__asm	movq		J(5), r5	/* store NR5 at J5 */ \
+	 \
+	__asm	movq		J(7), r7	/* store NR7 at J7 */ \
+	 \
+	__asm	movq		I(0), r0	/* store NR0 at I0 */ \
+	 \
+}
+// end ColumnIDCT macro (38 + 19 = 57 cycles)
+
+/**************************************************************************************
+ *
+ *		Routine:		Transpose
+ *		
+ *		Description:	The Macro does two 4x4 transposes in place.
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+/* Following macro does two 4x4 transposes in place.
+
+  At entry (we assume):
+
+	r0 = a3 a2 a1 a0
+	I(1) = b3 b2 b1 b0
+	r2 = c3 c2 c1 c0
+	r3 = d3 d2 d1 d0
+
+	r4 = e3 e2 e1 e0
+	r5 = f3 f2 f1 f0
+	r6 = g3 g2 g1 g0
+	r7 = h3 h2 h1 h0
+
+   At exit, we have:
+
+	I(0) = d0 c0 b0 a0
+	I(1) = d1 c1 b1 a1
+	I(2) = d2 c2 b2 a2
+	I(3) = d3 c3 b3 a3
+	
+	J(4) = h0 g0 f0 e0
+	J(5) = h1 g1 f1 e1
+	J(6) = h2 g2 f2 e2
+	J(7) = h3 g3 f3 e3
+
+   I(0) I(1) I(2) I(3)  is the transpose of r0 I(1) r2 r3.
+   J(4) J(5) J(6) J(7)  is the transpose of r4 r5 r6 r7.
+
+   Since r1 is free at entry, we calculate the Js first. */
+
+
+#define Transpose __asm { \
+	\
+	__asm	movq		r1, r4			/* r1 = e3 e2 e1 e0 */ \
+	__asm	 punpcklwd	r4, r5			/* r4 = f1 e1 f0 e0 */ \
+	__asm	movq		I(0), r0		/* save a3 a2 a1 a0 */ \
+	__asm	 punpckhwd	r1, r5			/* r1 = f3 e3 f2 e2 */ \
+	__asm	movq		r0, r6			/* r0 = g3 g2 g1 g0 */ \
+	__asm	 punpcklwd	r6, r7			/* r6 = h1 g1 h0 g0 */ \
+	__asm	movq		r5, r4			/* r5 = f1 e1 f0 e0 */ \
+	__asm	 punpckldq	r4, r6			/* r4 = h0 g0 f0 e0 = R4 */ \
+	__asm	punpckhdq	r5, r6			/* r5 = h1 g1 f1 e1 = R5 */ \
+	__asm	 movq		r6, r1			/* r6 = f3 e3 f2 e2 */ \
+	__asm	movq		J(4), r4 \
+	__asm	 punpckhwd	r0, r7			/* r0 = h3 g3 h2 g2 */ \
+	__asm	movq		J(5), r5 \
+	__asm	 punpckhdq	r6, r0			/* r6 = h3 g3 f3 e3 = R7 */ \
+	__asm	movq		r4, I(0)		/* r4 = a3 a2 a1 a0 */ \
+	__asm	 punpckldq	r1, r0			/* r1 = h2 g2 f2 e2 = R6 */ \
+	__asm	movq		r5, I(1)		/* r5 = b3 b2 b1 b0 */ \
+	__asm	 movq		r0, r4			/* r0 = a3 a2 a1 a0 */ \
+	__asm	movq		J(7), r6 \
+	__asm	 punpcklwd	r0, r5			/* r0 = b1 a1 b0 a0 */ \
+	__asm	movq		J(6), r1 \
+	__asm	 punpckhwd	r4, r5			/* r4 = b3 a3 b2 a2 */ \
+	__asm	movq		r5, r2			/* r5 = c3 c2 c1 c0 */ \
+	__asm	 punpcklwd	r2, r3			/* r2 = d1 c1 d0 c0 */ \
+	__asm	movq		r1, r0			/* r1 = b1 a1 b0 a0 */ \
+	__asm	 punpckldq	r0, r2			/* r0 = d0 c0 b0 a0 = R0 */ \
+	__asm	punpckhdq	r1, r2			/* r1 = d1 c1 b1 a1 = R1 */ \
+	__asm	 movq		r2, r4			/* r2 = b3 a3 b2 a2 */ \
+	__asm	movq		I(0), r0 \
+	__asm	 punpckhwd	r5, r3			/* r5 = d3 c3 d2 c2 */ \
+	__asm	movq		I(1), r1 \
+	__asm	 punpckhdq	r4, r5			/* r4 = d3 c3 b3 a3 = R3 */ \
+	__asm	punpckldq	r2, r5			/* r2 = d2 c2 b2 a2 = R2 */ \
+	 \
+	__asm	movq		I(3), r4 \
+	 \
+	__asm	movq		I(2), r2 \
+	 \
+}
+// end Transpose macro (19 cycles).
+
+/*
+__declspec( naked) static void MMX_dump() 
+{
+	__asm 
+	{
+		movq	[edi],	mm0
+		movq	[edi+8], mm1
+		movq	[edi+16], mm2
+		movq	[edi+24], mm3
+		movq	[edi+32], mm4
+		movq	[edi+40], mm5
+		movq	[edi+48], mm6
+		movq	[edi+56], mm7
+		ret
+	}
+}
+*/
+/**************************************************************************************
+ *
+ *		Routine:		MMX_idct
+ *		
+ *		Description:	Perform IDCT on a 8x8 block
+ *
+ *		Input:			Pointer to input and output buffer				
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	The input coefficients are in ZigZag order
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+__declspec ( naked ) void MMX_idct (	INT16 * input, INT16 * qtbl, INT16 * output) 
+{
+
+//	uINT16 *constants = idctconstants;
+#	define M(I)		[ecx + MaskOffset + I*8]
+#	define C(I)		[ecx + CosineOffset + (I-1)*8]
+#	define Eight	[ecx + EightOffset]
+#   undef Arg
+#	define Arg(I)	[esp + 1*4 + 3*4 + I*4] // 1 return address + 3 pushes prior to args
+
+#	define r0	mm0
+#	define r1	mm1
+#	define r2	mm2
+#	define r3	mm3
+#	define r4	mm4
+#	define r5	mm5
+#	define r6	mm6
+#	define r7	mm7
+	(void) output;
+	(void) qtbl;
+	(void) input;
+
+
+	__asm {
+
+	push	edx
+	push	ecx
+	push	ebx
+
+;; Label:
+	mov		eax, Arg( 0)	; eax = quantized input
+	 mov	edx, Arg( 2)	; edx = destination (= idct buffer)
+
+	mov		ecx, [edx]		; (+1 at least) preload the cache before writing
+	 mov	ebx, [edx+28]   ; in case proc doesn't cache on writes
+	mov		ecx, [edx+56]	; gets all the cache lines
+	 mov	ebx, [edx+84]	; regardless of alignment (beyond 32-bit)
+	mov		ecx, [edx+112]	; also avoids address contention stalls
+	 mov	ebx, [edx+124]
+
+	mov		ebx, Arg( 1)	; ebx = quantization table
+	 lea    ecx, idctconstants ;;[0];
+
+	movq	r0, [eax]
+	 ;
+	pmullw	r0, [ebx]		; r0 = 03 02 01 00
+	 ;
+	movq	r1, [eax+16]
+	 ;
+	pmullw	r1, [ebx+16]	; r1 = 13 12 11 10
+	 ;
+	movq	r2, M(0)		; r2 = __ __ __ FF
+	 movq	r3, r0			; r3 = 03 02 01 00
+	movq	r4, [eax+8]
+	 psrlq	r0, 16			; r0 = __ 03 02 01
+	pmullw	r4, [ebx+8]		; r4 = 07 06 05 04
+	 pand	r3, r2			; r3 = __ __ __ 00
+	movq	r5, r0			; r5 = __ 03 02 01
+	 movq	r6, r1			; r6 = 13 12 11 10
+	pand	r5, r2			; r5 = __ __ __ 01
+	 psllq	r6, 32			; r6 = 11 10 __ __
+	movq	r7, M(3)		; r7 = FF __ __ __
+	 pxor	r0, r5			; r0 = __ 03 02 __
+	pand	r7, r6			; r7 = 11 __ __ __
+	 por	r0, r3			; r0 = __ 03 02 00
+	pxor	r6, r7			; r6 = __ 10 __ __
+	 por	r0, r7			; r0 = 11 03 02 00 = R0
+	movq	r7, M(3)		; r7 = FF __ __ __
+	 movq	r3, r4			; r3 = 07 06 05 04
+	movq	[edx], r0		; write R0 = r0
+	 pand	r3, r2			; r3 = __ __ __ 04
+	movq	r0, [eax+32]
+	 psllq	r3, 16			; r3 = __ __ 04 __
+	pmullw	r0, [ebx+32]	; r0 = 23 22 21 20
+	 pand	r7, r1			; r7 = 13 __ __ __
+	por		r5, r3			; r5 = __ __ 04 01
+	 por	r7, r6			; r7 = 13 10 __ __
+	movq	r3, [eax+24]
+	 por	r7, r5			; r7 = 13 10 04 01 = R1
+	pmullw	r3, [ebx+24]	; r3 = 17 16 15 14
+	 psrlq	r4, 16			; r4 = __ 07 06 05
+	movq	[edx+16], r7	; write R1 = r7
+	 movq	r5, r4			; r5 = __ 07 06 05
+	movq	r7, r0			; r7 = 23 22 21 20
+	 psrlq	r4, 16			; r4 = __ __ 07 06
+	psrlq	r7, 48			; r7 = __ __ __ 23
+	 movq	r6, r2			; r6 = __ __ __ FF
+	pand	r5, r2			; r5 = __ __ __ 05
+	 pand	r6, r4			; r6 = __ __ __ 06
+	movq	[edx+80], r7	; partial R9 = __ __ __ 23
+	 pxor	r4, r6			; r4 = __ __ 07 __
+	psrlq	r1, 32			; r1 = __ __ 13 12
+	 por	r4, r5			; r4 = __ __ 07 05
+	movq	r7, M(3)		; r7 = FF __ __ __
+	 pand	r1, r2			; r1 = __ __ __ 12
+	movq	r5, [eax+48]
+	 psllq	r0, 16			; r0 = 22 21 20 __
+	pmullw	r5, [ebx+48]	; r5 = 33 32 31 30
+	 pand	r7, r0			; r7 = 22 __ __ __
+	movq	[edx+64], r1	; partial R8 = __ __ __ 12
+	 por	r7, r4			; r7 = 22 __ 07 05
+	movq	r4, r3			; r4 = 17 16 15 14
+	 pand	r3, r2			; r3 = __ __ __ 14
+	movq	r1, M(2)		; r1 = __ FF __ __
+	 psllq	r3, 32			; r3 = __ 14 __ __
+	por		r7, r3			; r7 = 22 14 07 05 = R2
+	 movq	r3, r5			; r3 = 33 32 31 30
+	psllq	r3, 48			; r3 = 30 __ __ __
+	 pand	r1, r0			; r1 = __ 21 __ __
+	movq	[edx+32], r7	; write R2 = r7
+	 por	r6, r3			; r6 = 30 __ __ 06
+	movq	r7, M(1)		; r7 = __ __ FF __
+	 por	r6, r1			; r6 = 30 21 __ 06
+	movq	r1, [eax+56]
+	 pand	r7, r4			; r7 = __ __ 15 __
+	pmullw	r1, [ebx+56]	; r1 = 37 36 35 34
+	 por	r7, r6			; r7 = 30 21 15 06 = R3
+	pand	r0, M(1)		; r0 = __ __ 20 __
+	 psrlq	r4, 32			; r4 = __ __ 17 16
+	movq	[edx+48], r7	; write R3 = r7
+	 movq	r6, r4			; r6 = __ __ 17 16
+	movq	r7, M(3)		; r7 = FF __ __ __
+	 pand	r4, r2			; r4 = __ __ __ 16
+	movq	r3, M(1)		; r3 = __ __ FF __
+	 pand	r7, r1			; r7 = 37 __ __ __
+	pand	r3, r5			; r3 = __ __ 31 __
+	 por	r0, r4			; r0 = __ __ 20 16
+	psllq	r3, 16			; r3 = __ 31 __ __
+	 por	r7, r0			; r7 = 37 __ 20 16
+	movq	r4, M(2)		; r4 = __ FF __ __
+	 por	r7, r3			; r7 = 37 31 20 16 = R4
+	movq	r0, [eax+80]
+	 movq	r3, r4			; r3 = __ __ FF __
+	pmullw	r0, [ebx+80]	; r0 = 53 52 51 50
+	 pand	r4, r5			; r4 = __ 32 __ __
+	movq	[edx+8], r7		; write R4 = r7
+	 por	r6, r4			; r6 = __ 32 17 16
+	movq	r4, r3			; r4 = __ FF __ __
+	 psrlq	r6, 16			; r6 = __ __ 32 17
+	movq	r7, r0			; r7 = 53 52 51 50
+	 pand	r4, r1			; r4 = __ 36 __ __
+	psllq	r7, 48			; r7 = 50 __ __ __
+	 por	r6, r4			; r6 = __ 36 32 17
+	movq	r4, [eax+88]
+	 por	r7, r6			; r7 = 50 36 32 17 = R5
+	pmullw	r4, [ebx+88]	; r4 = 57 56 55 54
+	 psrlq	r3, 16			; r3 = __ __ FF __
+	movq	[edx+24], r7	; write R5 = r7	 
+	 pand	r3, r1			; r3 = __ __ 35 __
+	psrlq	r5, 48			; r5 = __ __ __ 33
+	 pand	r1, r2			; r1 = __ __ __ 34
+	movq	r6, [eax+104]
+	 por	r5, r3			; r5 = __ __ 35 33
+	pmullw	r6, [ebx+104]	; r6 = 67 66 65 64
+	 psrlq	r0, 16			; r0 = __ 53 52 51
+	movq	r7, r4			; r7 = 57 56 55 54
+	 movq	r3, r2			; r3 = __ __ __ FF
+	psllq	r7, 48			; r7 = 54 __ __ __
+	 pand	r3, r0			; r3 = __ __ __ 51
+	pxor	r0, r3			; r0 = __ 53 52 __
+	 psllq	r3, 32			; r3 = __ 51 __ __
+	por		r7, r5			; r7 = 54 __ 35 33
+	 movq	r5, r6			; r5 = 67 66 65 64
+	pand	r6, M(1)		; r6 = __ __ 65 __
+	 por	r7, r3			; r7 = 54 51 35 33 = R6
+	psllq	r6, 32			; r6 = 65 __ __ __
+	 por	r0, r1			; r0 = __ 53 52 34
+	movq	[edx+40], r7	; write R6 = r7
+	 por	r0, r6			; r0 = 65 53 52 34 = R7
+	movq	r7, [eax+120]
+	 movq	r6, r5			; r6 = 67 66 65 64
+	pmullw	r7, [ebx+120]	; r7 = 77 76 75 74
+	 psrlq	r5, 32			; r5 = __ __ 67 66
+	pand	r6, r2			; r6 = __ __ __ 64
+	 movq	r1, r5			; r1 = __ __ 67 66
+	movq	[edx+56], r0	; write R7 = r0
+	 pand	r1, r2			; r1 = __ __ __ 66
+	movq	r0, [eax+112]
+	 movq	r3, r7			; r3 = 77 76 75 74
+	pmullw	r0, [ebx+112]	; r0 = 73 72 71 70
+	 psllq	r3, 16			; r3 = 76 75 74 __
+	pand	r7, M(3)		; r7 = 77 __ __ __
+	 pxor	r5, r1			; r5 = __ __ 67 __
+	por		r6, r5			; r6 = __ __ 67 64
+	 movq	r5, r3			; r5 = 76 75 74 __
+	pand	r5, M(3)		; r5 = 76 __ __ __
+	 por	r7, r1			; r7 = 77 __ __ 66
+	movq	r1, [eax+96]
+	 pxor	r3, r5			; r3 = __ 75 74 __
+	pmullw	r1, [ebx+96] 	; r1 = 63 62 61 60
+	 por	r7, r3			; r7 = 77 75 74 66 = R15
+	por		r6, r5			; r6 = 76 __ 67 64
+	 movq	r5, r0			; r5 = 73 72 71 70
+	movq	[edx+120], r7	; store R15 = r7
+	 psrlq	r5, 16			; r5 = __ 73 72 71
+	pand	r5, M(2)		; r5 = __ 73 __ __
+	 movq	r7, r0			; r7 = 73 72 71 70
+	por		r6, r5			; r6 = 76 73 67 64 = R14
+	 pand	r0, r2			; r0 = __ __ __ 70
+	pxor	r7, r0			; r7 = 73 72 71 __
+	 psllq	r0, 32			; r0 = __ 70 __ __	
+	movq	[edx+104], r6	; write R14 = r6
+	 psrlq	r4, 16			; r4 = __ 57 56 55
+	movq	r5, [eax+72]
+	 psllq	r7, 16			; r7 = 72 71 __ __
+	pmullw	r5, [ebx+72]	; r5 = 47 46 45 44
+	 movq	r6, r7			; r6 = 72 71 __ __
+	movq	r3, M(2)		; r3 = __ FF __ __
+	 psllq	r6, 16			; r6 = 71 __ __ __
+	pand	r7, M(3)		; r7 = 72 __ __ __
+	 pand	r3, r1			; r3 = __ 62 __ __
+	por		r7, r0			; r7 = 72 70 __ __
+	 movq	r0, r1			; r0 = 63 62 61 60
+	pand	r1, M(3)		; r1 = 63 __ __ __
+	 por	r6, r3			; r6 = 71 62 __ __
+	movq	r3, r4			; r3 = __ 57 56 55
+	 psrlq	r1, 32			; r1 = __ __ 63 __
+	pand	r3, r2			; r3 = __ __ __ 55
+	 por	r7, r1			; r7 = 72 70 63 __
+	por		r7, r3			; r7 = 72 70 63 55 = R13
+	 movq	r3, r4			; r3 = __ 57 56 55
+	pand	r3, M(1)		; r3 = __ __ 56 __
+	 movq	r1, r5			; r1 = 47 46 45 44
+	movq	[edx+88], r7	; write R13 = r7
+	 psrlq	r5, 48			; r5 = __ __ __ 47
+	movq	r7, [eax+64]
+	 por	r6, r3			; r6 = 71 62 56 __	 
+	pmullw	r7, [ebx+64]	; r7 = 43 42 41 40
+	 por	r6, r5			; r6 = 71 62 56 47 = R12
+	pand	r4, M(2)		; r4 = __ 57 __ __
+	 psllq	r0, 32			; r0 = 61 60 __ __
+	movq	[edx+72], r6	; write R12 = r6
+	 movq	r6, r0			; r6 = 61 60 __ __
+	pand	r0, M(3)		; r0 = 61 __ __ __
+	 psllq	r6, 16			; r6 = 60 __ __ __
+	movq	r5, [eax+40]
+	 movq	r3, r1			; r3 = 47 46 45 44
+	pmullw	r5, [ebx+40]	; r5 = 27 26 25 24
+	 psrlq	r1, 16			; r1 = __ 47 46 45
+	pand	r1, M(1)		; r1 = __ __ 46 __
+	 por	r0, r4			; r0 = 61 57 __ __
+	pand	r2, r7			; r2 = __ __ __ 40
+	 por	r0, r1			; r0 = 61 57 46 __
+	por		r0, r2			; r0 = 61 57 46 40 = R11
+	 psllq	r3, 16			; r3 = 46 45 44 __
+	movq	r4, r3			; r4 = 46 45 44 __
+	 movq	r2, r5			; r2 = 27 26 25 24
+	movq	[edx+112], r0	; write R11 = r0
+	 psrlq	r2, 48			; r2 = __ __ __ 27
+	pand	r4, M(2)		; r4 = __ 45 __ __
+	 por	r6, r2			; r6 = 60 __ __ 27
+	movq	r2, M(1)		; r2 = __ __ FF __
+	 por	r6, r4			; r6 = 60 45 __ 27
+	pand	r2, r7			; r2 = __ __ 41 __	 
+	 psllq	r3, 32			; r3 = 44 __ __ __
+	por		r3, [edx+80]	; r3 = 44 __ __ 23
+	 por	r6, r2			; r6 = 60 45 41 27 = R10
+	movq	r2, M(3)		; r2 = FF __ __ __
+	 psllq	r5, 16			; r5 = 26 25 24 __
+	movq	[edx+96], r6	; store R10 = r6
+	 pand	r2, r5			; r2 = 26 __ __ __
+	movq	r6, M(2)		; r6 = __ FF __ __
+	 pxor	r5, r2			; r5 = __ 25 24 __
+	pand	r6, r7			; r6 = __ 42 __ __
+	 psrlq	r2, 32			; r2 = __ __ 26 __
+	pand	r7, M(3)		; r7 = 43 __ __ __
+	 por	r3, r2			; r3 = 44 __ 26 23
+	por		r7, [edx+64]	; r7 = 43 __ __ 12
+	 por	r6, r3			; r6 = 44 42 26 23 = R9
+	por		r7, r5			; r7 = 43 25 24 12 = R8
+	 ;
+	movq	[edx+80], r6	; store R9 = r6
+	 ;
+	movq	[edx+64], r7	; store R8 = r7
+	 ;
+	; 123c  ( / 64 coeffs  < 2c / coeff)
+#	undef M
+
+; Done w/dequant + descramble + partial transpose; now do the idct itself.
+
+#	define I( K)	[edx + (  K      * 16)]
+#	define J( K)	[edx + ( (K - 4) * 16) + 8]
+
+	RowIDCT			; 46 c
+	Transpose		; 19 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (  K      * 16) + 64]
+#	define J( K)	[edx + ( (K - 4) * 16) + 72]
+
+	RowIDCT			; 46 c
+	Transpose		; 19 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (K * 16)]
+#	define J( K)	I( K)
+
+	ColumnIDCT		; 57 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (K * 16) + 8]
+#	define J( K)	I( K)
+
+	ColumnIDCT		; 57 c
+
+#	undef I
+#	undef J
+	pop ebx
+	pop ecx
+	pop edx
+	ret
+	; 368 cycles  ( / 64 coeff  <  6 c / coeff)
+ }
+}
+
+/**************************************************************************************
+ *
+ *		Routine:		MMX_idct10
+ *		
+ *		Description:	Perform IDCT on a 8x8 block with at most 10 nonzero coefficients
+ *
+ *		Input:			Pointer to input and output buffer				
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	The input coefficients are in transposed ZigZag order
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+/* --------------------------------------------------------------- */
+// This macro does four 4-sample one-dimensional idcts in parallel.  Inputs
+// 4 thru 7 are assumed to be zero.
+#define BeginIDCT_10 __asm { \
+\
+	__asm	movq		r2, I(3)  \
+__asm   nop \
+\
+	__asm	movq		r6, C(3) \
+	__asm	movq		r4, r2 \
+\
+	__asm	movq		r1, C(5) \
+	__asm	pmulhw		r4, r6		/* r4 = c3*i3 - i3 */ \
+\
+	__asm	movq		r3, I(1) \
+	__asm	pmulhw		r1, r2		/* r1 = c5*i3 - i3 */ \
+\
+	__asm	movq		r0, C(1)	/* (all registers are in use) */ \
+	__asm	paddw		r4, r2		/* r4 = C = c3*i3 */ \
+\
+    __asm   pxor        r6,r6       /* used to get -(c5*i3) */ \
+	__asm	paddw		r2, r1		/* r2 = c5*i3 */ \
+\
+	__asm	movq		r5, I(2) \
+	__asm	pmulhw		r0, r3		/* r0 = c1*i1 - i1 */ \
+\
+	__asm	movq		r1, r5 \
+	__asm	paddw		r0, r3		/* r0 = A = c1*i1 */ \
+\
+	__asm	pmulhw		r3, C(7)	/* r3 = B = c7*i1 */ \
+	__asm	psubsw		r6, r2		/* r6 = D = -c5*i3 */ \
+\
+	__asm	pmulhw		r5, C(2)	/* r1 = c2*i2 - i2 */ \
+	__asm	psubsw		r0, r4		/* r0 = A - C */ \
+\
+    __asm   movq        r7,I(2) \
+	__asm	paddsw		r4, r4		/* r4 = C + C */ \
+\
+	__asm	paddw		r7, r5		/* r7 = G = c2*i2 */ \
+	__asm	paddsw		r4, r0		/* r4 = C. = A + C */ \
+\
+	__asm	pmulhw		r1, C(6)	/* r1 = H = c6*i2 */ \
+	__asm	psubsw		r3, r6		/* r3 = B - D */ \
+\
+	__asm	movq		I(1), r4	/* save C. at I(1) */ \
+	__asm	paddsw		r6, r6		/* r6 = D + D */ \
+\
+    __asm	movq		r4, C(4) \
+	__asm	paddsw		r6, r3		/* r6 = D. = B + D */ \
+\
+	__asm	movq		r5, r3		/* r5 = B - D */ \
+	__asm	pmulhw		r3, r4		/* r3 = (c4 - 1) * (B - D) */ \
+\
+	__asm	movq		I(2), r6	/* save D. at I(2) */ \
+	__asm	movq		r2, r0		/* r2 = A - C */ \
+\
+	__asm	movq		r6, I(0) \
+	__asm	pmulhw		r0, r4		/* r0 = (c4 - 1) * (A - C) */ \
+\
+	__asm	paddw		r5, r3		/* r5 = B. = c4 * (B - D) */ \
+	__asm	paddw		r2, r0		/* r0 = A. = c4 * (A - C) */ \
+\
+	__asm	psubsw		r5, r1		/* r5 = B.. = B. - H */ \
+	__asm	pmulhw		r6, r4		/* r6 = c4*i0 - i0 */ \
+\
+    __asm   paddw       r6, I(0)    /* r6 = E = c4*i0 */ \
+	__asm	paddsw		r1, r1		/* r1 = H + H */ \
+\
+	__asm	movq		r4, r6      /* r4 = E */ \
+	__asm	paddsw		r1, r5		/* r1 = H. = B + H */ \
+\
+	__asm	psubsw		r6, r2		/* r6 = F. = E - A. */ \
+	__asm	paddsw		r2, r2		/* r2 = A. + A. */ \
+\
+	__asm	movq		r0, I(1)	/* r0 = C. */ \
+	__asm	paddsw		r2, r6		/* r2 = A.. = E + A. */ \
+\
+	__asm	psubsw		r2, r1		/* r2 = R2 = A.. - H. */ \
+__asm   nop \
+}
+// end BeginIDCT_10 macro (25 cycles).
+
+#define RowIDCT_10 __asm { \
+	\
+	BeginIDCT_10 \
+	\
+	__asm	movq		r3, I(2)	/* r3 = D. */ \
+	__asm	 psubsw		r4, r7		/* r4 = E. = E - G */ \
+	__asm	paddsw		r1, r1		/* r1 = H. + H. */ \
+	__asm	 paddsw		r7, r7		/* r7 = G + G */ \
+	__asm	paddsw		r1, r2		/* r1 = R1 = A.. + H. */ \
+	__asm	 paddsw		r7, r4		/* r7 = G. = E + G */ \
+	__asm	psubsw		r4, r3		/* r4 = R4 = E. - D. */ \
+	__asm	 paddsw		r3, r3 \
+	__asm	psubsw		r6, r5		/* r6 = R6 = F. - B.. */ \
+	__asm	 paddsw		r5, r5 \
+	__asm	paddsw		r3, r4		/* r3 = R3 = E. + D. */ \
+	__asm	 paddsw		r5, r6		/* r5 = R5 = F. + B.. */ \
+	__asm	psubsw		r7, r0		/* r7 = R7 = G. - C. */ \
+	__asm	 paddsw		r0, r0 \
+	__asm	movq		I(1), r1	/* save R1 */ \
+	__asm	 paddsw		r0, r7		/* r0 = R0 = G. + C. */ \
+}
+// end RowIDCT macro (8 + 38 = 46 cycles)
+
+// Column IDCT normalizes and stores final results.
+
+#define ColumnIDCT_10 __asm { \
+	\
+	BeginIDCT_10 \
+	\
+	__asm	paddsw		r2, Eight	/* adjust R2 (and R1) for shift */ \
+	__asm	 paddsw		r1, r1		/* r1 = H. + H. */ \
+	__asm	paddsw		r1, r2		/* r1 = R1 = A.. + H. */ \
+	__asm	 psraw		r2, 4		/* r2 = NR2 */ \
+	__asm	psubsw		r4, r7		/* r4 = E. = E - G */ \
+	__asm	 psraw		r1, 4		/* r1 = NR1 */ \
+	__asm	movq		r3, I(2)	/* r3 = D. */ \
+	__asm	 paddsw		r7, r7		/* r7 = G + G */ \
+	__asm	movq		I(2), r2	/* store NR2 at I2 */ \
+	__asm	 paddsw		r7, r4		/* r7 = G. = E + G */ \
+	__asm	movq		I(1), r1	/* store NR1 at I1 */ \
+	__asm	 psubsw		r4, r3		/* r4 = R4 = E. - D. */ \
+	__asm	paddsw		r4, Eight	/* adjust R4 (and R3) for shift */ \
+	__asm	 paddsw		r3, r3		/* r3 = D. + D. */ \
+	__asm	paddsw		r3, r4		/* r3 = R3 = E. + D. */ \
+	__asm	 psraw		r4, 4		/* r4 = NR4 */ \
+	__asm	psubsw		r6, r5		/* r6 = R6 = F. - B.. */ \
+	__asm	 psraw		r3, 4		/* r3 = NR3 */ \
+	__asm	paddsw		r6, Eight	/* adjust R6 (and R5) for shift */ \
+	__asm	 paddsw		r5, r5		/* r5 = B.. + B.. */ \
+	__asm	paddsw		r5, r6		/* r5 = R5 = F. + B.. */ \
+	__asm	 psraw		r6, 4		/* r6 = NR6 */ \
+	__asm	movq		J(4), r4	/* store NR4 at J4 */ \
+	__asm	 psraw		r5, 4		/* r5 = NR5 */ \
+	__asm	movq		I(3), r3	/* store NR3 at I3 */ \
+	__asm	 psubsw		r7, r0		/* r7 = R7 = G. - C. */ \
+	__asm	paddsw		r7, Eight	/* adjust R7 (and R0) for shift */ \
+	__asm	 paddsw		r0, r0 		/* r0 = C. + C. */ \
+	__asm	paddsw		r0, r7		/* r0 = R0 = G. + C. */ \
+	__asm	 psraw		r7, 4		/* r7 = NR7 */ \
+	__asm	movq		J(6), r6	/* store NR6 at J6 */ \
+	__asm	 psraw		r0, 4		/* r0 = NR0 */ \
+	__asm	movq		J(5), r5	/* store NR5 at J5 */ \
+	 \
+	__asm	movq		J(7), r7	/* store NR7 at J7 */ \
+	 \
+	__asm	movq		I(0), r0	/* store NR0 at I0 */ \
+	 \
+}
+// end ColumnIDCT macro (38 + 19 = 57 cycles)
+/* --------------------------------------------------------------- */
+
+
+/* --------------------------------------------------------------- */
+/* IDCT 10 */
+__declspec ( naked ) void MMX_idct10 (	INT16 * input, INT16 * qtbl, INT16 * output) 
+{
+
+#	define M(I)		[ecx + MaskOffset + I*8]
+#	define C(I)		[ecx + CosineOffset + (I-1)*8]
+#	define Eight	[ecx + EightOffset]
+#   undef Arg
+#	define Arg(I)	[esp + 16 + I*4]
+
+#	define r0	mm0
+#	define r1	mm1
+#	define r2	mm2
+#	define r3	mm3
+#	define r4	mm4
+#	define r5	mm5
+#	define r6	mm6
+#	define r7	mm7
+	(void) output;
+	(void) qtbl;
+	(void) input;
+
+ __asm {
+	push	edx				
+	push	ecx
+	push	ebx
+
+// Label:
+	mov		eax, Arg( 0)	; eax = quantized input
+	 mov	edx, Arg( 2)	; edx = destination (= idct buffer)
+
+	mov		ecx, [edx]		; (+1 at least) preload the cache before writing
+	 mov	ebx, [edx+28]   ; in case proc doesn't cache on writes
+	mov		ecx, [edx+56]	; gets all the cache lines
+	 mov	ebx, [edx+84]	; regardless of alignment (beyond 32-bit)
+	mov		ecx, [edx+112]	; also avoids address contention stalls
+	 mov	ebx, [edx+124]
+
+	mov		ebx, Arg( 1)	; ebx = quantization table
+	lea     ecx, idctconstants ;; [0];
+
+	movq	r0, [eax]
+	 ;
+	pmullw	r0, [ebx]		; r0 = 03 02 01 00
+	 ;
+	movq	r1, [eax+16]
+	 ;
+	pmullw	r1, [ebx+16]	; r1 = 13 12 11 10
+	 ;
+	movq	r2, M(0)		; r2 = __ __ __ FF
+	 movq	r3, r0			; r3 = 03 02 01 00
+	movq	r4, [eax+8]
+	 psrlq	r0, 16			; r0 = __ 03 02 01
+	pmullw	r4, [ebx+8]		; r4 = 07 06 05 04
+	 pand	r3, r2			; r3 = __ __ __ 00
+	movq	r5, r0			; r5 = __ 03 02 01
+	 movq	r6, r1			; r6 = 13 12 11 10
+	pand	r5, r2			; r5 = __ __ __ 01
+	 psllq	r6, 32			; r6 = 11 10 __ __
+	movq	r7, M(3)		; r7 = FF __ __ __
+	 pxor	r0, r5			; r0 = __ 03 02 __
+	pand	r7, r6			; r7 = 11 __ __ __
+	 por	r0, r3			; r0 = __ 03 02 00
+	pxor	r6, r7			; r6 = __ 10 __ __
+	 por	r0, r7			; r0 = 11 03 02 00 = R0
+	movq	r7, M(3)		; r7 = FF __ __ __
+	 movq	r3, r4			; r3 = 07 06 05 04
+	movq	[edx], r0		; write R0 = r0
+	 pand	r3, r2			; r3 = __ __ __ 04
+	movq	r0, [eax+32]
+	 psllq	r3, 16			; r3 = __ __ 04 __
+	pmullw	r0, [ebx+32]	; r0 = 23 22 21 20
+	 pand	r7, r1			; r7 = 13 __ __ __
+	por		r5, r3			; r5 = __ __ 04 01
+	 por	r7, r6			; r7 = 13 10 __ __
+	movq	r3, [eax+24]
+	 por	r7, r5			; r7 = 13 10 04 01 = R1
+	pmullw	r3, [ebx+24]	; r3 = 17 16 15 14
+	 psrlq	r4, 16			; r4 = __ 07 06 05
+	movq	[edx+16], r7	; write R1 = r7
+	 movq	r5, r4			; r5 = __ 07 06 05
+	movq	r7, r0			; r7 = 23 22 21 20
+	 psrlq	r4, 16			; r4 = __ __ 07 06
+	psrlq	r7, 48			; r7 = __ __ __ 23
+	 movq	r6, r2			; r6 = __ __ __ FF
+	pand	r5, r2			; r5 = __ __ __ 05
+	 pand	r6, r4			; r6 = __ __ __ 06
+	movq	[edx+80], r7	; partial R9 = __ __ __ 23
+	 pxor	r4, r6			; r4 = __ __ 07 __
+	psrlq	r1, 32			; r1 = __ __ 13 12
+	 por	r4, r5			; r4 = __ __ 07 05
+	movq	r7, M(3)		; r7 = FF __ __ __
+	 pand	r1, r2			; r1 = __ __ __ 12
+	movq	r5, [eax+48]
+	 psllq	r0, 16			; r0 = 22 21 20 __
+	pmullw	r5, [ebx+48]	; r5 = 33 32 31 30
+	 pand	r7, r0			; r7 = 22 __ __ __
+	movq	[edx+64], r1	; partial R8 = __ __ __ 12
+	 por	r7, r4			; r7 = 22 __ 07 05
+	movq	r4, r3			; r4 = 17 16 15 14
+	 pand	r3, r2			; r3 = __ __ __ 14
+	movq	r1, M(2)		; r1 = __ FF __ __
+	 psllq	r3, 32			; r3 = __ 14 __ __
+	por		r7, r3			; r7 = 22 14 07 05 = R2
+	 movq	r3, r5			; r3 = 33 32 31 30
+	psllq	r3, 48			; r3 = 30 __ __ __
+	 pand	r1, r0			; r1 = __ 21 __ __
+	movq	[edx+32], r7	; write R2 = r7
+	 por	r6, r3			; r6 = 30 __ __ 06
+	movq	r7, M(1)		; r7 = __ __ FF __
+	 por	r6, r1			; r6 = 30 21 __ 06
+	movq	r1, [eax+56]
+	 pand	r7, r4			; r7 = __ __ 15 __
+	pmullw	r1, [ebx+56]	; r1 = 37 36 35 34
+	 por	r7, r6			; r7 = 30 21 15 06 = R3
+	pand	r0, M(1)		; r0 = __ __ 20 __
+	 psrlq	r4, 32			; r4 = __ __ 17 16
+	movq	[edx+48], r7	; write R3 = r7
+	 movq	r6, r4			; r6 = __ __ 17 16
+	movq	r7, M(3)		; r7 = FF __ __ __
+	 pand	r4, r2			; r4 = __ __ __ 16
+	movq	r3, M(1)		; r3 = __ __ FF __
+	 pand	r7, r1			; r7 = 37 __ __ __
+	pand	r3, r5			; r3 = __ __ 31 __
+	 por	r0, r4			; r0 = __ __ 20 16
+	psllq	r3, 16			; r3 = __ 31 __ __
+	 por	r7, r0			; r7 = 37 __ 20 16
+	movq	r4, M(2)		; r4 = __ FF __ __
+	 por	r7, r3			; r7 = 37 31 20 16 = R4
+	movq	r0, [eax+80]
+	 movq	r3, r4			; r3 = __ __ FF __
+	pmullw	r0, [ebx+80]	; r0 = 53 52 51 50
+	 pand	r4, r5			; r4 = __ 32 __ __
+	movq	[edx+8], r7		; write R4 = r7
+	 por	r6, r4			; r6 = __ 32 17 16
+	movq	r4, r3			; r4 = __ FF __ __
+	 psrlq	r6, 16			; r6 = __ __ 32 17
+	movq	r7, r0			; r7 = 53 52 51 50
+	 pand	r4, r1			; r4 = __ 36 __ __
+	psllq	r7, 48			; r7 = 50 __ __ __
+	 por	r6, r4			; r6 = __ 36 32 17
+	movq	r4, [eax+88]
+	 por	r7, r6			; r7 = 50 36 32 17 = R5
+	pmullw	r4, [ebx+88]	; r4 = 57 56 55 54
+	 psrlq	r3, 16			; r3 = __ __ FF __
+	movq	[edx+24], r7	; write R5 = r7	 
+	 pand	r3, r1			; r3 = __ __ 35 __
+	psrlq	r5, 48			; r5 = __ __ __ 33
+	 pand	r1, r2			; r1 = __ __ __ 34
+	movq	r6, [eax+104]
+	 por	r5, r3			; r5 = __ __ 35 33
+	pmullw	r6, [ebx+104]	; r6 = 67 66 65 64
+	 psrlq	r0, 16			; r0 = __ 53 52 51
+	movq	r7, r4			; r7 = 57 56 55 54
+	 movq	r3, r2			; r3 = __ __ __ FF
+	psllq	r7, 48			; r7 = 54 __ __ __
+	 pand	r3, r0			; r3 = __ __ __ 51
+	pxor	r0, r3			; r0 = __ 53 52 __
+	 psllq	r3, 32			; r3 = __ 51 __ __
+	por		r7, r5			; r7 = 54 __ 35 33
+	 movq	r5, r6			; r5 = 67 66 65 64
+	pand	r6, M(1)		; r6 = __ __ 65 __
+	 por	r7, r3			; r7 = 54 51 35 33 = R6
+	psllq	r6, 32			; r6 = 65 __ __ __
+	 por	r0, r1			; r0 = __ 53 52 34
+	movq	[edx+40], r7	; write R6 = r7
+	 por	r0, r6			; r0 = 65 53 52 34 = R7
+	movq	r7, [eax+120]
+	 movq	r6, r5			; r6 = 67 66 65 64
+	pmullw	r7, [ebx+120]	; r7 = 77 76 75 74
+	 psrlq	r5, 32			; r5 = __ __ 67 66
+	pand	r6, r2			; r6 = __ __ __ 64
+	 movq	r1, r5			; r1 = __ __ 67 66
+	movq	[edx+56], r0	; write R7 = r0
+	 pand	r1, r2			; r1 = __ __ __ 66
+	movq	r0, [eax+112]
+	 movq	r3, r7			; r3 = 77 76 75 74
+	pmullw	r0, [ebx+112]	; r0 = 73 72 71 70
+	 psllq	r3, 16			; r3 = 76 75 74 __
+	pand	r7, M(3)		; r7 = 77 __ __ __
+	 pxor	r5, r1			; r5 = __ __ 67 __
+	por		r6, r5			; r6 = __ __ 67 64
+	 movq	r5, r3			; r5 = 76 75 74 __
+	pand	r5, M(3)		; r5 = 76 __ __ __
+	 por	r7, r1			; r7 = 77 __ __ 66
+	movq	r1, [eax+96]
+	 pxor	r3, r5			; r3 = __ 75 74 __
+	pmullw	r1, [ebx+96] 	; r1 = 63 62 61 60
+	 por	r7, r3			; r7 = 77 75 74 66 = R15
+	por		r6, r5			; r6 = 76 __ 67 64
+	 movq	r5, r0			; r5 = 73 72 71 70
+	movq	[edx+120], r7	; store R15 = r7
+	 psrlq	r5, 16			; r5 = __ 73 72 71
+	pand	r5, M(2)		; r5 = __ 73 __ __
+	 movq	r7, r0			; r7 = 73 72 71 70
+	por		r6, r5			; r6 = 76 73 67 64 = R14
+	 pand	r0, r2			; r0 = __ __ __ 70
+	pxor	r7, r0			; r7 = 73 72 71 __
+	 psllq	r0, 32			; r0 = __ 70 __ __	
+	movq	[edx+104], r6	; write R14 = r6
+	 psrlq	r4, 16			; r4 = __ 57 56 55
+	movq	r5, [eax+72]
+	 psllq	r7, 16			; r7 = 72 71 __ __
+	pmullw	r5, [ebx+72]	; r5 = 47 46 45 44
+	 movq	r6, r7			; r6 = 72 71 __ __
+	movq	r3, M(2)		; r3 = __ FF __ __
+	 psllq	r6, 16			; r6 = 71 __ __ __
+	pand	r7, M(3)		; r7 = 72 __ __ __
+	 pand	r3, r1			; r3 = __ 62 __ __
+	por		r7, r0			; r7 = 72 70 __ __
+	 movq	r0, r1			; r0 = 63 62 61 60
+	pand	r1, M(3)		; r1 = 63 __ __ __
+	 por	r6, r3			; r6 = 71 62 __ __
+	movq	r3, r4			; r3 = __ 57 56 55
+	 psrlq	r1, 32			; r1 = __ __ 63 __
+	pand	r3, r2			; r3 = __ __ __ 55
+	 por	r7, r1			; r7 = 72 70 63 __
+	por		r7, r3			; r7 = 72 70 63 55 = R13
+	 movq	r3, r4			; r3 = __ 57 56 55
+	pand	r3, M(1)		; r3 = __ __ 56 __
+	 movq	r1, r5			; r1 = 47 46 45 44
+	movq	[edx+88], r7	; write R13 = r7
+	 psrlq	r5, 48			; r5 = __ __ __ 47
+	movq	r7, [eax+64]
+	 por	r6, r3			; r6 = 71 62 56 __	 
+	pmullw	r7, [ebx+64]	; r7 = 43 42 41 40
+	 por	r6, r5			; r6 = 71 62 56 47 = R12
+	pand	r4, M(2)		; r4 = __ 57 __ __
+	 psllq	r0, 32			; r0 = 61 60 __ __
+	movq	[edx+72], r6	; write R12 = r6
+	 movq	r6, r0			; r6 = 61 60 __ __
+	pand	r0, M(3)		; r0 = 61 __ __ __
+	 psllq	r6, 16			; r6 = 60 __ __ __
+	movq	r5, [eax+40]
+	 movq	r3, r1			; r3 = 47 46 45 44
+	pmullw	r5, [ebx+40]	; r5 = 27 26 25 24
+	 psrlq	r1, 16			; r1 = __ 47 46 45
+	pand	r1, M(1)		; r1 = __ __ 46 __
+	 por	r0, r4			; r0 = 61 57 __ __
+	pand	r2, r7			; r2 = __ __ __ 40
+	 por	r0, r1			; r0 = 61 57 46 __
+	por		r0, r2			; r0 = 61 57 46 40 = R11
+	 psllq	r3, 16			; r3 = 46 45 44 __
+	movq	r4, r3			; r4 = 46 45 44 __
+	 movq	r2, r5			; r2 = 27 26 25 24
+	movq	[edx+112], r0	; write R11 = r0
+	 psrlq	r2, 48			; r2 = __ __ __ 27
+	pand	r4, M(2)		; r4 = __ 45 __ __
+	 por	r6, r2			; r6 = 60 __ __ 27
+	movq	r2, M(1)		; r2 = __ __ FF __
+	 por	r6, r4			; r6 = 60 45 __ 27
+	pand	r2, r7			; r2 = __ __ 41 __	 
+	 psllq	r3, 32			; r3 = 44 __ __ __
+	por		r3, [edx+80]	; r3 = 44 __ __ 23
+	 por	r6, r2			; r6 = 60 45 41 27 = R10
+	movq	r2, M(3)		; r2 = FF __ __ __
+	 psllq	r5, 16			; r5 = 26 25 24 __
+	movq	[edx+96], r6	; store R10 = r6
+	 pand	r2, r5			; r2 = 26 __ __ __
+	movq	r6, M(2)		; r6 = __ FF __ __
+	 pxor	r5, r2			; r5 = __ 25 24 __
+	pand	r6, r7			; r6 = __ 42 __ __
+	 psrlq	r2, 32			; r2 = __ __ 26 __
+	pand	r7, M(3)		; r7 = 43 __ __ __
+	 por	r3, r2			; r3 = 44 __ 26 23
+	por		r7, [edx+64]	; r7 = 43 __ __ 12
+	 por	r6, r3			; r6 = 44 42 26 23 = R9
+	por		r7, r5			; r7 = 43 25 24 12 = R8
+	 ;
+	movq	[edx+80], r6	; store R9 = r6
+	 ;
+	movq	[edx+64], r7	; store R8 = r7
+	 ;
+	; 123c  ( / 64 coeffs  < 2c / coeff)
+
+#	undef M
+
+; Done w/dequant + descramble + partial transpose; now do the idct itself.
+
+#	define I( K)	[edx + (  K      * 16)]
+#	define J( K)	[edx + ( (K - 4) * 16) + 8]
+
+	RowIDCT_10		; 33 c
+	Transpose		; 19 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (  K      * 16) + 64]
+#	define J( K)	[edx + ( (K - 4) * 16) + 72]
+
+//	RowIDCT			; 46 c
+//	Transpose		; 19 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (K * 16)]
+#	define J( K)	I( K)
+
+	ColumnIDCT_10		; 44 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (K * 16) + 8]
+#	define J( K)	I( K)
+
+	ColumnIDCT_10		; 44 c
+
+#	undef I
+#	undef J
+
+
+
+	pop	ebx
+	pop ecx
+	pop	edx
+	 ret		
+ }
+}
+
+/**************************************************************************************
+ *
+ *		Routine:		MMX_idct1
+ *		
+ *		Description:	Perform IDCT on a 8x8 block with at most 1 nonzero coefficients
+ *
+ *		Input:			Pointer to input and output buffer				
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+/* --------------------------------------------------------------- */
+/* IDCT 1 */
+void MMX_idct1 (INT16 * input, INT16 * qtbl, INT16 * output) 
+{
+        if(input[0])
+        {
+            int i;
+            INT32 temp = (INT32)input[0];
+			INT32 *iBuf=(INT32 *)output;
+        
+            temp *= qtbl[0];
+            
+            //necessary in order to match tim's
+            temp += 15;
+
+            temp >>= 5;
+
+            temp &= 0xffff;
+
+            temp += temp << 16;
+
+            for(i = 0; i < 32; i += 4)
+            {
+                iBuf[i] = temp;
+                iBuf[i+1] = temp;
+                iBuf[i+2] = temp;
+                iBuf[i+3] = temp;
+            }
+        }
+        else
+        {
+	        /* special case where there is only a 0 dc coeff */
+    	    memset( output, 0, 128);
+        }
+
+}
+
+/* --------------------------------------------------------------- */
+/*
+    The following functions (MMX_idct_DX and MMX_idct10_DX) are only
+    used by the dxer.  The coeffs are written into a transposed order 
+    during the unpack stage.  
+*/
+/* --------------------------------------------------------------- */
+/**************************************************************************************
+ *
+ *		Routine:		MMX_idct_DX
+ *		
+ *		Description:	Perform IDCT on a 8x8 block
+ *
+ *		Input:			Pointer to input and output buffer				
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	The input coefficients are in transposed ZigZag order
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+__declspec ( naked ) void MMX_idct_DX (	INT16 * input, INT16 * qtbl, INT16 * output) 
+{
+
+//	uINT16 *constants = idctconstants;
+#	define M(I)		[ecx + MaskOffset + I*8]
+#	define C(I)		[ecx + CosineOffset + (I-1)*8]
+#	define Eight	[ecx + EightOffset]
+#   undef Arg
+#	define Arg(I)	[esp + 1*4 + 3*4 + I*4] // 1 return address + 3 pushes prior to args
+
+#	define r0	mm0
+#	define r1	mm1
+#	define r2	mm2
+#	define r3	mm3
+#	define r4	mm4
+#	define r5	mm5
+#	define r6	mm6
+#	define r7	mm7
+	(void) output;
+	(void) qtbl;
+	(void) input;
+
+	__asm {
+
+	push	edx
+	push	ecx
+	push	ebx
+
+;; Label:
+	mov		eax, Arg( 0)	; eax = quantized input
+	 mov	edx, Arg( 2)	; edx = destination (= idct buffer)
+
+	mov		ecx, [edx]		; (+1 at least) preload the cache before writing
+	 mov	ebx, [edx+28]   ; in case proc doesn't cache on writes
+	mov		ecx, [edx+56]	; gets all the cache lines
+	 mov	ebx, [edx+84]	; regardless of alignment (beyond 32-bit)
+	mov		ecx, [edx+112]	; also avoids address contention stalls
+	 mov	ebx, [edx+124]
+
+	mov		ebx, Arg( 1)	; ebx = quantization table
+	 lea    ecx, idctconstants ;;[0];
+
+//dequantization    
+//try to optimize better
+	movq	r0, [eax+0]
+	 ;
+	pmullw	r0, [ebx+0]		; r0 = 03 02 01 00
+	 ;
+	movq	r1, [eax+8]
+	 ;
+	pmullw	r1, [ebx+8]
+	 ;
+	movq	r2, [eax+16]
+	 ;
+	pmullw	r2, [ebx+16]
+	 ;
+	movq	r3, [eax+24]
+	 ;
+	pmullw	r3, [ebx+24]
+	 ;
+	movq	r4, [eax+32]
+	 ;
+	pmullw	r4, [ebx+32]
+	 ;
+	movq	r5, [eax+40]
+	 ;
+	pmullw	r5, [ebx+40]
+	 ;
+	movq	r6, [eax+48]
+	 ;
+	pmullw	r6, [ebx+48]
+	 ;
+	movq	r7, [eax+56]
+	 ;
+	pmullw	r7, [ebx+56]
+	 ;
+    movq    [edx+0],r0
+     ;
+    movq    [edx+8],r1
+     ;
+    movq    [edx+16],r2
+     ;
+    movq    [edx+24],r3
+     ;
+    movq    [edx+32],r4
+     ;
+    movq    [edx+40],r5
+     ;
+    movq    [edx+48],r6
+     ;
+    movq    [edx+56],r7
+     ;
+;;;;;;;;;;;    
+	movq	r0, [eax+64]
+	 ;
+	pmullw	r0, [ebx+64]		; r0 = 03 02 01 00
+	 ;
+	movq	r1, [eax+72]
+	 ;
+	pmullw	r1, [ebx+72]
+	 ;
+	movq	r2, [eax+80]
+	 ;
+	pmullw	r2, [ebx+80]
+	 ;
+	movq	r3, [eax+88]
+	 ;
+	pmullw	r3, [ebx+88]
+	 ;
+	movq	r4, [eax+96]
+	 ;
+	pmullw	r4, [ebx+96]
+	 ;
+	movq	r5, [eax+104]
+	 ;
+	pmullw	r5, [ebx+104]
+	 ;
+	movq	r6, [eax+112]
+	 ;
+	pmullw	r6, [ebx+112]
+	 ;
+	movq	r7, [eax+120]
+	 ;
+	pmullw	r7, [ebx+120]
+	 ;
+    movq    [edx+64],r0
+     ;
+    movq    [edx+72],r1
+     ;
+    movq    [edx+80],r2
+     ;
+    movq    [edx+88],r3
+     ;
+    movq    [edx+96],r4
+     ;
+    movq    [edx+104],r5
+     ;
+    movq    [edx+112],r6
+     ;
+    movq    [edx+120],r7
+     ;
+
+#	undef M
+
+; Done w/dequant + descramble + partial transpose; now do the idct itself.
+
+#	define I( K)	[edx + (  K      * 16)]
+#	define J( K)	[edx + ( (K - 4) * 16) + 8]
+
+	RowIDCT			; 46 c
+	Transpose		; 19 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (  K      * 16) + 64]
+#	define J( K)	[edx + ( (K - 4) * 16) + 72]
+
+	RowIDCT			; 46 c
+	Transpose		; 19 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (K * 16)]
+#	define J( K)	I( K)
+
+	ColumnIDCT		; 57 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (K * 16) + 8]
+#	define J( K)	I( K)
+
+	ColumnIDCT		; 57 c
+
+#	undef I
+#	undef J
+	pop ebx
+	pop ecx
+	pop edx
+	ret
+	; 368 cycles  ( / 64 coeff  <  6 c / coeff)
+ }
+}
+
+/**************************************************************************************
+ *
+ *		Routine:		MMX_idct10_DX
+ *		
+ *		Description:	Perform IDCT on a 8x8 block with at most 10 nonzero coefficients
+ *
+ *		Input:			Pointer to input and output buffer				
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	The input coefficients are in transposed ZigZag order
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+/* --------------------------------------------------------------- */
+/* IDCT 10 */
+__declspec ( naked ) void MMX_idct10_DX (	INT16 * input, INT16 * qtbl, INT16 * output) 
+{
+
+#	define M(I)		[ecx + MaskOffset + I*8]
+#	define C(I)		[ecx + CosineOffset + (I-1)*8]
+#	define Eight	[ecx + EightOffset]
+#	undef Arg
+#	define Arg(I)	[esp + 16 + I*4]
+
+#	define r0	mm0
+#	define r1	mm1
+#	define r2	mm2
+#	define r3	mm3
+#	define r4	mm4
+#	define r5	mm5
+#	define r6	mm6
+#	define r7	mm7
+	(void) output;
+	(void) qtbl;
+	(void) input;
+
+ __asm {
+	push	edx				
+	push	ecx
+	push	ebx
+
+// Label:
+	mov		eax, Arg( 0)	; eax = quantized input
+	 mov	edx, Arg( 2)	; edx = destination (= idct buffer)
+
+	mov		ecx, [edx]		; (+1 at least) preload the cache before writing
+	 mov	ebx, [edx+28]   ; in case proc doesn't cache on writes
+	mov		ecx, [edx+56]	; gets all the cache lines
+	 mov	ebx, [edx+84]	; regardless of alignment (beyond 32-bit)
+	mov		ecx, [edx+112]	; also avoids address contention stalls
+	 mov	ebx, [edx+124]
+
+	mov		ebx, Arg( 1)	; ebx = quantization table
+	lea     ecx, idctconstants ;; [0];
+
+//dequantization    
+	movq	r0, [eax+0]
+	 ;
+	pmullw	r0, [ebx+0]	
+	 ;
+	movq	r1, [eax+16]
+	 ;
+	pmullw	r1, [ebx+16]
+	 ;
+	movq	r2, [eax+32]
+	 ;
+	pmullw	r2, [ebx+32]
+	 ;
+	movq	r3, [eax+48]
+	 ;
+	pmullw	r3, [ebx+48]
+	 ;
+    movq    [edx+0],r0
+    pxor    r5,r5
+
+    movq    [edx+8],r5
+     ;
+    movq    [edx+16],r1
+     ;
+    movq    [edx+24],r5
+     ;
+    movq    [edx+32],r2
+     ;
+    movq    [edx+40],r5
+     ;
+    movq    [edx+48],r3
+     ;
+    movq    [edx+56],r5
+     ;
+    movq    [edx+64],r5
+     ;
+    movq    [edx+72],r5
+     ;
+    movq    [edx+80],r5
+     ;
+    movq    [edx+88],r5
+     ;
+    movq    [edx+96],r5
+     ;
+    movq    [edx+104],r5
+     ;
+    movq    [edx+112],r5
+     ;
+    movq    [edx+120],r5
+     ;
+
+#	undef M
+
+; Done w/dequant + descramble + partial transpose; now do the idct itself.
+
+#	define I( K)	[edx + (  K      * 16)]
+#	define J( K)	[edx + ( (K - 4) * 16) + 8]
+
+	RowIDCT_10		; 33 c
+	Transpose		; 19 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (  K      * 16) + 64]
+#	define J( K)	[edx + ( (K - 4) * 16) + 72]
+
+//	RowIDCT			; 46 c
+//	Transpose		; 19 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (K * 16)]
+#	define J( K)	I( K)
+
+	ColumnIDCT_10		; 44 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (K * 16) + 8]
+#	define J( K)	I( K)
+
+	ColumnIDCT_10		; 44 c
+
+#	undef I
+#	undef J
+
+
+
+	pop	ebx
+	pop ecx
+	pop	edx
+	 ret		
+ }
+}
+
+
+
+/**************************************************************************************
+ *
+ *		Routine:		MMX_idct3
+ *		
+ *		Description:	Perform IDCT on a 8x8 block with at most 3 nonzero coefficients
+ *
+ *		Input:			Pointer to input and output buffer				
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	Only works for three nonzero coefficients.
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+/***************************************************************************************
+	In IDCT 3, we are dealing with only three Non-Zero coefficients in the 8x8 block. 
+	In the case that we work in the fashion RowIDCT -> ColumnIDCT, we only have to 
+	do 1-D row idcts on the first two rows, the rest six rows remain zero anyway. 
+	After row IDCTs, since every column could have nonzero coefficients, we need do
+	eight 1-D column IDCT. However, for each column, there are at most two nonzero
+	coefficients, coefficient 0 and coefficient 1. Same for the coefficents for the 
+	two 1-d row idcts. For this reason, the process of a 1-D IDCT is simplified 
+	
+	from a full version:
+	
+	A = (C1 * I1) + (C7 * I7)		B = (C7 * I1) - (C1 * I7)
+	C = (C3 * I3) + (C5 * I5)		D = (C3 * I5) - (C5 * I3)
+	A. = C4 * (A - C)				B. = C4 * (B - D)
+    C. = A + C						D. = B + D
+   
+    E = C4 * (I0 + I4)				F = C4 * (I0 - I4)
+    G = (C2 * I2) + (C6 * I6)		H = (C6 * I2) - (C2 * I6)
+    E. = E - G
+    G. = E + G
+   
+    A.. = F + A.					B.. = B. - H
+    F.  = F - A. 					H.  = B. + H
+   
+    R0 = G. + C.	R1 = A.. + H.	R3 = E. + D.	R5 = F. + B..
+    R7 = G. - C.	R2 = A.. - H.	R4 = E. - D.	R6 = F. - B..
+
+	To:
+
+
+	A = (C1 * I1)					B = (C7 * I1)
+	C = 0							D = 0
+	A. = C4 * A 					B. = C4 * B 
+    C. = A							D. = B 
+   
+    E = C4 * I0 					F = E
+    G = 0							H = 0
+    E. = E 
+    G. = E 
+
+    A.. = E + A.					B.. = B. 
+    F.  = E - A. 					H.  = B. 
+   
+    R0 = E + A		R1 = E + A. + B.	R3 = E + B		R5 = E - A. + B.
+    R7 = E - A		R2 = E + A. - B.	R4 = E - B		R6 = F - A. - B.
+	
+******************************************************************************************/
+
+#define RowIDCT_3 __asm {			\
+\
+	__asm	movq		r7, I(1)	/* r7 = I1						*/  \
+	__asm	movq		r0, C(1)	/* r0 = C1						*/  \
+\
+	__asm	movq		r3, C(7)	/* r3 = C7						*/  \
+	__asm	pmulhw		r0, r7		/* r0 = C1 * I1 - I1			*/	\
+\
+	__asm   pmulhw		r3, r7		/* r3 = C7 * I1	= B, D.			*/  \
+	__asm	movq		r6, I(0)	/* r6 = I0						*/  \
+\
+	__asm	movq		r4, C(4)	/* r4 = C4						*/  \
+	__asm	paddw		r0, r7		/* r0 = C1 * I1 = A, C.			*/  \
+\
+	__asm	movq		r1, r6		/* make a copy of I0			*/	\
+	__asm	pmulhw		r6, r4      /* r2 = C4 * I0 - I0			*/	\
+\
+	__asm	movq		r2, r0		/* make a copy of A				*/  \
+	__asm   movq		r5, r3		/* make a copy of B				*/	\
+\
+	__asm	pmulhw		r2, r4      /* r2 = C4 * A - A				*/  \
+	__asm   pmulhw		r5, r4		/* r5 = C4 * B - B				*/  \
+\
+	__asm	paddw		r6, r1		/* r2 = C4 * I0	= E, F			*/	\
+	__asm	movq		r4, r6		/* r4 = E						*/  \
+\
+	__asm   paddw		r2, r0		/* r2 = A.   					*/  \
+	__asm	paddw		r5, r3		/* r5 = B.						*/  \
+\
+	__asm	movq		r7, r6		/* r7 = E						*/  \
+	__asm	movq		r1, r5		/* r1 = B.						*/  \
+\
+	/*  r0 = A		*/   \
+	/*	r3 = B		*/	 \
+	/*  r2 = A.		*/   \
+	/*  r5 = B.		*/   \
+	/*  r6 = E		*/   \
+	/*  r4 = E		*/   \
+	/*  r7 = E		*/   \
+	/*  r1 = B.		*/   \
+\
+	__asm	psubw		r6, r2		/* r6 = E - A.					*/  \
+	__asm   psubw		r4, r3		/* r4 = E - B ----R4			*/  \
+\
+	__asm	psubw		r7, r0		/* r7 = E - A ----R7			*/  \
+	__asm	paddw		r2, r2		/* r2 = A. + A.					*/  \
+\
+	__asm	paddw		r3, r3		/* r3 = B + B					*/  \
+	__asm	paddw		r0, r0      /* r0 = A + A					*/  \
+\
+	__asm	paddw		r2, r6		/* r2 = E + A.					*/  \
+	__asm	paddw	    r3, r4		/* r3 = E + B ----R3			*/  \
+\
+	__asm	psubw		r2, r1		/* r2 = E + A. - B.	----R2  	*/  \
+	__asm	psubw		r6, r5		/* r6 = E - A. - B.	----R6		*/  \
+\
+	__asm	paddw		r1, r1      /* r1 = B. + B.					*/  \
+	__asm	paddw		r5, r5		/* r5 = B. + B.					*/  \
+\
+	__asm	paddw	    r0, r7		/* r0 = E + A ----R0			*/  \
+	__asm	paddw		r1, r2		/* r1 = E + A. + B. -----R1     */  \
+\
+	__asm	movq		I(1), r1    /* save r1						*/	\
+	__asm	paddw		r5, r6		/* r5 = E - A. + B.	-----R5     */  \
+\
+}
+//End of RowIDCT_3
+
+#define ColumnIDCT_3 __asm {			\
+\
+	__asm	movq		r7, I(1)	/* r7 = I1						*/  \
+	__asm	movq		r0, C(1)	/* r0 = C1						*/  \
+\
+	__asm	movq		r3, C(7)	/* r3 = C7						*/  \
+	__asm	pmulhw		r0, r7		/* r0 = C1 * I1 - I1			*/	\
+\
+	__asm   pmulhw		r3, r7		/* r3 = C7 * I1	= B, D.			*/  \
+	__asm	movq		r6, I(0)	/* r6 = I0						*/  \
+\
+	__asm	movq		r4, C(4)	/* r4 = C4						*/  \
+	__asm	paddw		r0, r7		/* r0 = C1 * I1 = A, C.			*/  \
+\
+	__asm	movq		r1, r6		/* make a copy of I0			*/	\
+	__asm	pmulhw		r6, r4      /* r2 = C4 * I0 - I0			*/	\
+\
+	__asm	movq		r2, r0		/* make a copy of A				*/  \
+	__asm   movq		r5, r3		/* make a copy of B				*/	\
+\
+	__asm	pmulhw		r2, r4      /* r2 = C4 * A - A				*/  \
+	__asm   pmulhw		r5, r4		/* r5 = C4 * B - B				*/  \
+\
+	__asm	paddw		r6, r1		/* r2 = C4 * I0	= E, F			*/	\
+	__asm	movq		r4, r6		/* r4 = E						*/  \
+\
+	__asm	paddw		r6, Eight	/* +8 for shift					*/  \
+	__asm   Paddw		r4, Eight   /* +8 for shift					*/  \
+\
+	__asm   paddw		r2, r0		/* r2 = A.   					*/  \
+	__asm	paddw		r5, r3		/* r5 = B.						*/  \
+\
+	__asm	movq		r7, r6		/* r7 = E						*/  \
+	__asm	movq		r1, r5		/* r1 = B.						*/  \
+\
+/*  r0 = A		*/   \
+/*	r3 = B		*/	 \
+/*  r2 = A.		*/   \
+/*  r5 = B.		*/   \
+/*  r6 = E		*/   \
+/*  r4 = E		*/   \
+/*  r7 = E		*/   \
+/*  r1 = B.		*/   \
+\
+	__asm	psubw		r6, r2		/* r6 = E - A.					*/  \
+	__asm   psubw		r4, r3		/* r4 = E - B ----R4			*/  \
+\
+	__asm	psubw		r7, r0		/* r7 = E - A ----R7			*/  \
+	__asm	paddw		r2, r2		/* r2 = A. + A.					*/  \
+\
+	__asm	paddw		r3, r3		/* r3 = B + B					*/  \
+	__asm	paddw		r0, r0      /* r0 = A + A					*/  \
+\
+	__asm	paddw		r2, r6		/* r2 = E + A.					*/  \
+	__asm	paddw	    r3, r4		/* r3 = E + B ----R3			*/  \
+\
+	__asm	psraw		r4, 4		/* shift						*/  \
+	__asm   movq		J(4), r4	/* store R4 at J4				*/  \
+\
+	__asm	psraw		r3, 4		/* shift						*/  \
+	__asm   movq		I(3), r3	/* store R3 at I3				*/  \
+\
+	__asm	psubw		r2, r1		/* r2 = E + A. - B.	----R2  	*/  \
+	__asm	psubw		r6, r5		/* r6 = E - A. - B.	----R6		*/  \
+\
+	__asm	paddw		r1, r1      /* r1 = B. + B.					*/  \
+	__asm	paddw		r5, r5		/* r5 = B. + B.					*/  \
+\
+	__asm	paddw	    r0, r7		/* r0 = E + A ----R0			*/  \
+	__asm	paddw		r1, r2		/* r1 = E + A. + B. -----R1     */  \
+\
+	__asm	psraw		r7, 4		/* shift						*/  \
+	__asm	psraw		r2, 4		/* shift						*/	\
+\
+	__asm	psraw		r0, 4		/* shift						*/	\
+	__asm   psraw		r1, 4		/* shift						*/  \
+\
+	__asm   movq		J(7), r7	/* store R7 to J7				*/  \
+	__asm	movq		I(0), r0	/* store R0 to I0				*/  \
+\
+	__asm   movq		I(1), r1    /* store R1 to I1				*/  \
+	__asm	movq		I(2), r2	/* store R2 to I2				*/  \
+\
+	__asm	movq		I(1), r1    /* save r1						*/	\
+	__asm	paddw		r5, r6		/* r5 = E - A. + B.	-----R5     */  \
+\
+	__asm	psraw		r5, 4		/* shift						*/  \
+	__asm   movq		J(5), r5	/* store R5 at J5				*/  \
+\
+	__asm	psraw		r6, 4		/* shift						*/  \
+	__asm   movq		J(6), r6	/* store R6 at J6				*/  \
+\
+}
+//End of ColumnIDCT_3
+
+__declspec ( naked ) void MMX_idct3 (	INT16 * input, INT16 * output ) 
+{
+
+#	define M(I)		[ecx + MaskOffset + I*8]
+#	define C(I)		[ecx + CosineOffset + (I-1)*8]
+#	define Eight	[ecx + EightOffset]
+#   undef Arg
+#	define Arg(I)	[esp + 16 + I*4]
+
+#	define r0	mm0
+#	define r1	mm1
+#	define r2	mm2
+#	define r3	mm3
+#	define r4	mm4
+#	define r5	mm5
+#	define r6	mm6
+#	define r7	mm7
+	(void) output;
+	(void) input;
+
+ __asm {
+	push	edx				
+	push	ecx
+	push	ebx
+
+// Label:
+	mov		eax, Arg( 0)	; eax = quantized input
+	 mov	edx, Arg( 1)	; edx = destination (= idct buffer)
+
+	mov		ecx, [edx]		; (+1 at least) preload the cache before writing
+	 mov	ebx, [edx+28]   ; in case proc doesn't cache on writes
+	mov		ecx, [edx+56]	; gets all the cache lines
+	 mov	ebx, [edx+84]	; regardless of alignment (beyond 32-bit)
+	mov		ecx, [edx+112]	; also avoids address contention stalls
+	 mov	ebx, [edx+124]
+
+	lea     ecx, idctconstants ;; [0];
+
+	movq	r0, [eax]		; r0 = 03 02 01 00
+	 ;
+	pxor	r1, r1			; r1 = 13 12 11 10; all zero
+	 ;
+	movq	r2, M(0)		; r2 = __ __ __ FF
+	 movq	r3, r0			; r3 = 03 02 01 00
+	pxor	r4, r4
+	 psrlq	r0, 16			; r0 = __ 03 02 01
+	 pand	r3, r2			; r3 = __ __ __ 00
+	movq	r5, r0			; r5 = __ 03 02 01
+	 movq	r6, r1			; r6 = 13 12 11 10;all zero
+	pand	r5, r2			; r5 = __ __ __ 01
+	;psllq	r6, 32			; r6 = 11 10 __ __
+	movq	r7, M(3)		; r7 = FF __ __ __
+	 pxor	r0, r5			; r0 = __ 03 02 __
+	pand	r7, r6			; r7 = 11 __ __ __
+	 por	r0, r3			; r0 = __ 03 02 00
+	pxor	r6, r7			; r6 = __ 10 __ __
+	 por	r0, r7			; r0 = 11 03 02 00 = R0
+	movq	r7, M(3)		; r7 = FF __ __ __
+	 movq	r3, r4			; r3 = 07 06 05 04
+	movq	[edx], r0		; write R0 = r0
+	 pand	r3, r2			; r3 = __ __ __ 04
+	psllq	r3, 16			; r3 = __ __ 04 __
+	 pand	r7, r1			; r7 = 13 __ __ __
+	por		r5, r3			; r5 = __ __ 04 01
+	 por	r7, r6			; r7 = 13 10 __ __
+	por	r7, r5				; r7 = 13 10 04 01 = R1
+	 psrlq	r4, 16			; r4 = __ 07 06 05
+	movq	[edx+16], r7	; write R1 = r7
+	movq	[edx+32], r4	; write R2 = r7
+	movq	[edx+48], r4	; write R3 = r7
+	movq	[edx+8], r4		; write R4 = r7
+	movq	[edx+24], r4	; write R5 = r7	 
+	movq	[edx+40], r4	; write R6 = r7
+	movq	[edx+56], r4	; write R7 = r0
+	movq	[edx+120], r4	; store R15 = r7
+	movq	[edx+104], r4	; write R14 = r6
+	movq	[edx+88], r4	; write R13 = r7
+	movq	[edx+72], r4	; write R12 = r6
+	movq	[edx+112], r4	; write R12 = r6
+	movq	[edx+96], r4	; store R10 = r6
+	movq	[edx+80], r4	; store R9 = r6
+	movq	[edx+64], r4	; store R8 = r7
+	 ;
+	; 123c  ( / 64 coeffs  < 2c / coeff)
+
+#	undef M
+
+; Donepartial transpose; now do the idct itself.
+
+#	define I( K)	[edx + (  K      * 16)]
+#	define J( K)	[edx + ( (K - 4) * 16) + 8]
+
+	RowIDCT_3		; 33 c
+	Transpose		; 19 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (  K      * 16) + 64]
+#	define J( K)	[edx + ( (K - 4) * 16) + 72]
+
+//	RowIDCT			; 46 c
+//	Transpose		; 19 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (K * 16)]
+#	define J( K)	I( K)
+
+	ColumnIDCT_3		; 44 c
+
+#	undef I
+#	undef J
+#	define I( K)	[edx + (K * 16) + 8]
+#	define J( K)	I( K)
+
+	ColumnIDCT_3		; 44 c
+
+#	undef I
+#	undef J
+
+	pop	ebx
+	pop ecx
+	pop	edx
+	 ret		
+ }
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxrecon.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxrecon.c
new file mode 100644
index 00000000..7875112b
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/mmxrecon.c
@@ -0,0 +1,856 @@
+/****************************************************************************
+*
+*   Module Title :     OptFunctions.c
+*
+*   Description  :     MMX or otherwise processor specific 
+*                      optimised versions of functions
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+*
+*   1.07 JBB 26/01/01  Removed unused function
+*	1.06 YWX 23/05/00  Remove the clamping in MmxReconPostProcess()
+*	1.05 YWX 15/05/00  Added MmxReconPostProcess()
+*   1.04 SJL 03/14/00  Added in Tim's versions of MmxReconInter and MmxReconInterHalfPixel2. 
+*   1.03 PGW 12/10/99  Changes to reduce uneccessary dependancies. 
+*   1.02 PGW 30/08/99  Minor changes to MmxReconInterHalfPixel2().
+*   1.01 PGW 13/07/99  Changes to keep reconstruction data to 16 bit
+*   1.00 PGW 14/06/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/* 
+    Use Tim's optimized version.
+*/
+#define USING_TIMS 1
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+
+#define STRICT              // Strict type checking. 
+
+#include "codec_common.h"
+
+#include "reconstruct.h"
+
+/****************************************************************************
+*  Module constants.
+*****************************************************************************
+*/        
+
+/****************************************************************************
+*  Imports.
+*****************************************************************************
+*/   
+
+extern INT32 * XX_LUT;
+
+/****************************************************************************
+*  Exported Global Variables
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Exported Functions 
+*****************************************************************************
+*/              
+
+/****************************************************************************
+*  Module Statics
+*****************************************************************************
+*/  
+
+INT16 Ones[4]               = {1,1,1,1};
+INT16 OneTwoEight[4]        = {128,128,128,128};
+UINT8 Eight128s[8]          = {128,128,128,128,128,128,128,128};
+
+#pragma warning( disable : 4799 )  // Disable no emms instruction warning!
+/****************************************************************************
+*  Forward References
+*****************************************************************************
+*/  
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MMXReconIntra
+ *
+ *  INPUTS        :     INT16 *  idct
+ *                               Pointer to the output from the idct for this block
+ *
+ *                      UINT32   stride
+ *                               Line Length in pixels in recon and reference images
+ *                               
+ *
+ *                     
+ *
+ *  OUTPUTS       :     UINT8 *  dest
+ *                               The reconstruction buffer
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs an intra block - MMX version
+ *
+ *  SPECIAL NOTES :     Tim Murphy's optimized version 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void MMXReconIntra( INT16 *TmpDataBuffer, UINT8 * dest, UINT16 * idct, UINT32 stride )
+{
+	(void) TmpDataBuffer;
+    __asm
+    {
+        // u    pipe
+        //   v  pipe
+        mov         eax,[idct]              ; Signed 16 bit inputs
+          mov         edx,[dest]            ; Signed 8 bit outputs
+        movq        mm0,[Eight128s]         ; Set mm0 to 0x8080808080808080
+          ;
+        mov         ebx,[stride]            ; Line stride in output buffer
+          lea         ecx,[eax+128]         ; Endpoint in input buffer
+loop_label:                                 ;
+        movq        mm2,[eax]               ; First four input values
+          ;
+        packsswb    mm2,[eax+8]             ; pack with next(high) four values
+          por         mm0,mm0               ; stall
+        pxor        mm2,mm0                 ; Convert result to unsigned (same as add 128)
+          lea         eax,[eax + 16]        ; Step source buffer
+        cmp         eax,ecx                 ; are we done
+          ;
+        movq        [edx],mm2               ; store results
+          ;
+        lea         edx,[edx+ebx]           ; Step output buffer
+          jc          loop_label            ; Loop back if we are not done
+    }
+    // 6c/8 elts = 9c/8 = 1.125 c/pix
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MmxReconInter
+ *
+ *  INPUTS        :     UINT8 *  RefPtr
+ *                               The last frame reference
+ *
+ *                      INT16 *  ChangePtr
+ *                               Pointer to the change data
+ *
+ *                      UINT32   LineStep
+ *                               Line Length in pixels in recon and ref images
+ *
+ *  OUTPUTS       :     UINT8 *  ReconPtr
+ *                               The reconstruction
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs data from last data and change
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+#if USING_TIMS
+void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+    (void) TmpDataBuffer;
+
+ _asm {
+	push	edi
+;;	 mov	ebx, [ref]
+;;	mov		ecx, [diff]
+;;	 mov	eax, [dest]
+;;	mov		edx, [stride]
+	 mov	ebx, [RefPtr]
+	mov		ecx, [ChangePtr]
+	 mov	eax, [ReconPtr]
+	mov		edx, [LineStep]
+	 pxor	mm0, mm0
+	lea		edi, [ecx + 128]
+	 ;
+  L:
+	movq	mm2, [ebx]			; (+3 misaligned) 8 reference pixels
+	 ;
+	movq	mm4, [ecx]			; first 4 changes
+	 movq	mm3, mm2
+	movq	mm5, [ecx + 8]		; last 4 changes
+	 punpcklbw mm2, mm0			; turn first 4 refs into positive 16-bit #s
+	paddsw	mm2, mm4			; add in first 4 changes
+	 punpckhbw mm3, mm0			; turn last 4 refs into positive 16-bit #s
+	paddsw	mm3, mm5			; add in last 4 changes
+	 add	ebx, edx			; next row of reference pixels
+	packuswb mm2, mm3			; pack result to unsigned 8-bit values
+	 lea	ecx, [ecx + 16]		; next row of changes
+	cmp		ecx, edi			; are we done?
+	 ;
+	movq	[eax], mm2			; store result
+	 ;
+	lea		eax, [eax+edx]		; next row of output
+	 jc		L					; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+	pop		edi
+ }
+}
+#else
+void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+
+    // Note that the line step for the change data is assumed to be 8 * 32 bits.
+__asm
+    {
+        // Set up data pointers
+        mov         eax,dword ptr [ReconPtr]  
+        mov         ebx,dword ptr [RefPtr]      
+        mov         ecx,dword ptr [ChangePtr]   
+		mov         edx,dword ptr [LineStep]
+		pxor        mm6, mm6					; Blank mmx6
+
+        // Row 1
+        // Load the data values. The change data needs to be unpacked to words
+        movq        mm0,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data
+        paddsw      mm0, mm2                    ; First 4 values
+        paddsw      mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [eax],mm0         ; Write the data out to the results buffer
+
+		add         ebx,edx						; Step the reference pointer.
+        add         ecx,16                      ; Step the change pointer.
+        add         eax,edx                     ; Step the reconstruction pointer
+
+        // Row 2
+        // Load the data values. The change data needs to be unpacked to words
+        movq        mm0,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data
+        paddsw      mm0, mm2                    ; First 4 values
+        paddsw      mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [eax],mm0         ; Write the data out to the results buffer
+
+		add         ebx,edx						; Step the reference pointer.
+        add         ecx,16                      ; Step the change pointer.
+        add         eax,edx                     ; Step the reconstruction pointer
+
+        // Row 3
+        // Load the data values. The change data needs to be unpacked to words
+        movq        mm0,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data
+        paddsw      mm0, mm2                    ; First 4 values
+        paddsw      mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [eax],mm0         ; Write the data out to the results buffer
+
+		add         ebx,edx						; Step the reference pointer.
+        add         ecx,16                      ; Step the change pointer.
+        add         eax,edx                     ; Step the reconstruction pointer
+
+        // Row 4
+        // Load the data values. The change data needs to be unpacked to words
+        movq        mm0,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data
+        paddsw      mm0, mm2                    ; First 4 values
+        paddsw      mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [eax],mm0         ; Write the data out to the results buffer
+
+		add         ebx,edx						; Step the reference pointer.
+        add         ecx,16                      ; Step the change pointer.
+        add         eax,edx                     ; Step the reconstruction pointer
+
+        // Row 5
+        // Load the data values. The change data needs to be unpacked to words
+        movq        mm0,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data
+        paddsw      mm0, mm2                    ; First 4 values
+        paddsw      mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [eax],mm0         ; Write the data out to the results buffer
+
+		add         ebx,edx						; Step the reference pointer.
+        add         ecx,16                      ; Step the change pointer.
+        add         eax,edx                     ; Step the reconstruction pointer
+
+        // Row 6
+        // Load the data values. The change data needs to be unpacked to words
+        movq        mm0,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data
+        paddsw      mm0, mm2                    ; First 4 values
+        paddsw      mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [eax],mm0         ; Write the data out to the results buffer
+
+		add         ebx,edx						; Step the reference pointer.
+        add         ecx,16                      ; Step the change pointer.
+        add         eax,edx                     ; Step the reconstruction pointer
+
+        // Row 7
+        // Load the data values. The change data needs to be unpacked to words
+        movq        mm0,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data
+        paddsw      mm0, mm2                    ; First 4 values
+        paddsw      mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [eax],mm0         ; Write the data out to the results buffer
+
+		add         ebx,edx						; Step the reference pointer.
+        add         ecx,16                      ; Step the change pointer.
+        add         eax,edx                     ; Step the reconstruction pointer
+
+        // Row 8
+        // Load the data values. The change data needs to be unpacked to words
+        movq        mm0,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data
+        paddsw      mm0, mm2                    ; First 4 values
+        paddsw      mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [eax],mm0         ; Write the data out to the results buffer
+   
+        //emms									; Clear the MMX state.
+    }
+}
+#endif
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MmxReconInterHalfPixel2
+ *
+ *  INPUTS        :     UINT8 *  RefPtr1, RefPtr2
+ *                               The last frame reference
+ *
+ *                      INT16 *  ChangePtr
+ *                               Pointer to the change data
+ *
+ *                      UINT32   LineStep
+ *                               Line Length in pixels in recon and ref images
+ *                               
+ *
+ *  OUTPUTS       :     UINT8 *  ReconPtr
+ *                               The reconstruction
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs data from half pixel reference data and change. 
+ *                      Half pixel data interpolated from 2 references.
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+#if USING_TIMS
+
+#define A 0
+
+void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, 
+		    	              UINT8 * RefPtr1, UINT8 * RefPtr2, 
+						      INT16 * ChangePtr, UINT32 LineStep )
+{
+#	if A
+		static culong FourOnes[2] = { 65537, 65537};	// only read once
+#	endif
+	(void) TmpDataBuffer;
+
+ _asm {
+	push	esi
+	 push	edi
+
+;;	mov		ecx, [diff]
+;;	 mov	esi, [ref1]
+;;	mov		edi, [ref2]
+;;	 mov	ebx, [dest]
+;;	mov		edx, [stride]
+
+	mov		ecx, [ChangePtr]
+	 mov	esi, [RefPtr1]
+	mov		edi, [RefPtr2]
+	 mov	ebx, [ReconPtr]
+	mov		edx, [LineStep]
+
+	 lea	eax, [ecx+128]
+
+#	if A
+		movq	mm1, [FourOnes]
+#	endif
+
+	 pxor	mm0, mm0
+  L:
+	movq	mm2, [esi]		; (+3 misaligned) mm2 = row from ref1
+	 ;
+	movq	mm4, [edi]		; (+3 misaligned) mm4 = row from ref2
+	 movq	mm3, mm2
+	punpcklbw mm2, mm0		; mm2 = start ref1 as positive 16-bit #s
+	 movq	mm5, mm4
+	movq	mm6, [ecx]		; mm6 = first 4 changes
+	 punpckhbw mm3, mm0		; mm3 = end ref1 as positive 16-bit #s
+	movq	mm7, [ecx+8]	; mm7 = last 4 changes
+	 punpcklbw mm4, mm0		; mm4 = start ref2 as positive 16-bit #s
+	punpckhbw mm5, mm0		; mm5 = end ref2 as positive 16-bit #s
+	 paddw	mm2, mm4		; mm2 = start (ref1 + ref2)
+	paddw	mm3, mm5		; mm3 = end (ref1 + ref2)
+
+#	if A
+		 paddw	mm2, mm1		; rounding adjustment
+		paddw	mm3, mm1
+#	endif
+
+	 psrlw	mm2, 1			; mm2 = start (ref1 + ref2)/2
+	psrlw	mm3, 1			; mm3 = end (ref1 + ref2)/2
+	 paddw	mm2, mm6		; add changes to start
+	paddw	mm3, mm7		; add changes to end
+	 lea	ecx, [ecx+16]	; next row idct
+	packuswb mm2, mm3		; pack start|end to unsigned 8-bit
+	 add	esi, edx		; next row ref1
+	add		edi, edx		; next row ref2
+	 cmp	ecx, eax
+	movq	[ebx], mm2		; store result
+	 ;
+	lea		ebx, [ebx+edx]
+	 jc		L				; 22c / 8 elts = 33c / 8 pixels = 4.125 c/pix
+
+	pop		edi
+	 pop	esi
+ }
+}
+
+#undef A
+
+#else
+void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, 
+		    	              UINT8 * RefPtr1, UINT8 * RefPtr2, 
+						      INT16 * ChangePtr, UINT32 LineStep )
+{
+    UINT8 * TmpDataPtr = (UINT8 *)TmpDataBuffer->TmpReconBuffer;
+
+    // Note that the line step for the change data is assumed to be 8 * 32 bits.
+    __asm
+    {
+		pxor        mm6, mm6					; Blank mmx6
+
+        // Set up data pointers
+        mov         eax,dword ptr [RefPtr1]      
+        mov         ebx,dword ptr [RefPtr2]      
+        mov         edx,dword ptr [LineStep]
+
+        // Row 1
+        // Load the change pointer
+        mov         ecx,dword ptr [ChangePtr]   
+
+        // Load the data values (Ref1 and Ref2) and unpack to signed 16 bit values
+        movq        mm0,dword ptr [eax]         ; Load 8 elements of source data
+        movq        mm2,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+        movq        mm3, mm2                    ; Copy data
+
+        punpcklbw   mm0, mm6					; Low bytes to words
+		punpcklbw   mm2, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+		punpckhbw   mm3, mm6					; High bytes to words
+
+        // Average Ref1 and Ref2
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm3                    ; Second 4 values
+        psrlw       mm0, 1
+        psrlw       mm1, 1
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data reference and difference data
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        mov         ecx,dword ptr [TmpDataPtr]  ; Load the temp results pointer 
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [ecx],mm0         ; Write the data out to the temporary results buffer
+        add         eax,edx                     ; Step the reference pointers
+        add         ebx,edx                    
+
+        // Row 2
+        // Load the change pointer
+        mov         ecx,dword ptr [ChangePtr]   
+        add         ecx,16                    
+
+        // Load the data values (Ref1 and Ref2). 
+        movq        mm0,dword ptr [eax]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+
+        movq        mm2,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm3, mm2                    ; Copy data
+		punpcklbw   mm2, mm6					; Low bytes to words
+		punpckhbw   mm3, mm6					; High bytes to words
+
+        // Average Ref1 and Ref2
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm3                    ; Second 4 values
+        psrlw       mm0, 1
+        psrlw       mm1, 1
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data reference and difference data
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        mov         ecx,dword ptr [TmpDataPtr]  ; Load the temp results pointer 
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [ecx+8],mm0       ; Write the data out to the temporary results buffer
+        add         eax,edx                     ; Step the reference pointers
+        add         ebx,edx                    
+
+        // Row 3
+        // Load the change pointer
+        mov         ecx,dword ptr [ChangePtr]   
+        add         ecx,32                    
+
+        // Load the data values (Ref1 and Ref2). 
+        movq        mm0,dword ptr [eax]         ; Load 8 elements of source data
+        movq        mm2,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+        movq        mm3, mm2                    ; Copy data
+
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+		punpcklbw   mm2, mm6					; Low bytes to words
+		punpckhbw   mm3, mm6					; High bytes to words
+
+        // Average Ref1 and Ref2
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm3                    ; Second 4 values
+        psrlw       mm0, 1
+        psrlw       mm1, 1
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data reference and difference data
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        mov         ecx,dword ptr [TmpDataPtr]   
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [ecx+16],mm0         ; Write the data out to the temporary results buffer
+        add         eax,edx                     ; Step the reference pointers
+        add         ebx,edx                    
+
+        // Row 4
+        // Load the change pointer
+        mov         ecx,dword ptr [ChangePtr]   
+        add         ecx,48                    
+
+        // Load the data values (Ref1 and Ref2). 
+        movq        mm0,dword ptr [eax]         ; Load 8 elements of source data
+        movq        mm2,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+        movq        mm3, mm2                    ; Copy data
+
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+		punpcklbw   mm2, mm6					; Low bytes to words
+		punpckhbw   mm3, mm6					; High bytes to words
+
+        // Average Ref1 and Ref2
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm3                    ; Second 4 values
+        psrlw       mm0, 1
+        psrlw       mm1, 1
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data reference and difference data
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        mov         ecx,dword ptr [TmpDataPtr]   
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [ecx+24],mm0      ; Write the data out to the temporary results buffer
+        add         eax,edx                     ; Step the reference pointers
+        add         ebx,edx                    
+
+        // Row 5
+        // Load the change pointer
+        mov         ecx,dword ptr [ChangePtr]   
+        add         ecx,64                 
+
+        // Load the data values (Ref1 and Ref2). 
+        movq        mm0,dword ptr [eax]         ; Load 8 elements of source data
+        movq        mm2,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+        movq        mm3, mm2                    ; Copy data
+
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+		punpcklbw   mm2, mm6					; Low bytes to words
+		punpckhbw   mm3, mm6					; High bytes to words
+
+        // Average Ref1 and Ref2
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm3                    ; Second 4 values
+        psrlw       mm0, 1
+        psrlw       mm1, 1
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data reference and difference data
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        mov         ecx,dword ptr [TmpDataPtr]   
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [ecx+32],mm0      ; Write the data out to the temporary results buffer
+        add         eax,edx                     ; Step the reference pointers
+        add         ebx,edx                    
+
+        // Row 6
+        // Load the change pointer
+        mov         ecx,dword ptr [ChangePtr]   
+        add         ecx,80                    
+
+        // Load the data values (Ref1 and Ref2). 
+        movq        mm0,dword ptr [eax]         ; Load 8 elements of source data
+        movq        mm2,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+        movq        mm3, mm2                    ; Copy data
+
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+		punpcklbw   mm2, mm6					; Low bytes to words
+		punpckhbw   mm3, mm6					; High bytes to words
+
+        // Average Ref1 and Ref2
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm3                    ; Second 4 values
+        psrlw       mm0, 1
+        psrlw       mm1, 1
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data reference and difference data
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        mov         ecx,dword ptr [TmpDataPtr]   
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [ecx+40],mm0      ; Write the data out to the temporary results buffer
+        add         eax,edx                     ; Step the reference pointers
+        add         ebx,edx                    
+
+        // Row 7
+        // Load the change pointer
+        mov         ecx,dword ptr [ChangePtr]   
+        add         ecx,96                    
+
+        // Load the data values (Ref1 and Ref2). 
+        movq        mm0,dword ptr [eax]         ; Load 8 elements of source data
+        movq        mm2,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+        movq        mm3, mm2                    ; Copy data
+
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+		punpcklbw   mm2, mm6					; Low bytes to words
+		punpckhbw   mm3, mm6					; High bytes to words
+
+        // Average Ref1 and Ref2
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm3                    ; Second 4 values
+        psrlw       mm0, 1
+        psrlw       mm1, 1
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data reference and difference data
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        mov         ecx,dword ptr [TmpDataPtr]   
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [ecx+48],mm0      ; Write the data out to the temporary results buffer
+        add         eax,edx                     ; Step the reference pointers
+        add         ebx,edx                    
+
+        // Row 8
+        // Load the change pointer
+        mov         ecx,dword ptr [ChangePtr]   
+        add         ecx,112                    
+
+        // Load the data values (Ref1 and Ref2). 
+        movq        mm0,dword ptr [eax]         ; Load 8 elements of source data
+        movq        mm2,dword ptr [ebx]         ; Load 8 elements of source data
+        movq        mm1, mm0                    ; Copy data
+        movq        mm3, mm2                    ; Copy data
+
+		punpcklbw   mm0, mm6					; Low bytes to words
+		punpckhbw   mm1, mm6					; High bytes to words
+		punpcklbw   mm2, mm6					; Low bytes to words
+		punpckhbw   mm3, mm6					; High bytes to words
+
+        // Average Ref1 and Ref2
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm3                    ; Second 4 values
+        psrlw       mm0, 1
+        psrlw       mm1, 1
+
+        // Load 8 elements of 16 bit change data
+        movq        mm2,dword ptr [ecx]         ; Load 4 elements of change data
+        movq        mm4,dword ptr [ecx+8]       ; Load next 4 elements of change data
+
+        // Sum the data reference and difference data
+        paddw       mm0, mm2                    ; First 4 values
+        paddw       mm1, mm4                    ; Second 4 values
+
+        // Pack and store
+        mov         ecx,dword ptr [TmpDataPtr]   
+        packuswb    mm0, mm1                    ; Then pack and saturate to unsigned bytes
+        movq        dword ptr [ecx+56],mm0      ; Write the data out to the temporary results buffer
+
+
+        // Now copy the results back to the reconstruction buffer.
+        mov         eax,dword ptr [ReconPtr]    ; Load the reconstruction Pointer  
+        mov         ecx,dword ptr [TmpDataPtr]  ; Load the temp results pointer 
+        // Row 1
+        movq        mm0,dword ptr [ecx]         ; Load 8 elements of results data
+        movq        dword ptr [eax],mm0         ; Write the data tot he reconstruction buffer.
+        add         eax,edx                     ; Step the reconstruction pointer
+        // Row 2
+        movq        mm0,dword ptr [ecx+8]       ; Load 8 elements of results data
+        movq        dword ptr [eax],mm0         ; Write the data tot he reconstruction buffer.
+        add         eax,edx                     ; Step the reconstruction pointer
+        // Row 3
+        movq        mm0,dword ptr [ecx+16]      ; Load 8 elements of results data
+        movq        dword ptr [eax],mm0         ; Write the data tot he reconstruction buffer.
+        add         eax,edx                     ; Step the reconstruction pointer
+        // Row 4
+        movq        mm0,dword ptr [ecx+24]      ; Load 8 elements of results data
+        movq        dword ptr [eax],mm0         ; Write the data tot he reconstruction buffer.
+        add         eax,edx                     ; Step the reconstruction pointer
+        // Row 5
+        movq        mm0,dword ptr [ecx+32]      ; Load 8 elements of results data
+        movq        dword ptr [eax],mm0         ; Write the data tot he reconstruction buffer.
+        add         eax,edx                     ; Step the reconstruction pointer
+        // Row 6
+        movq        mm0,dword ptr [ecx+40]      ; Load 8 elements of results data
+        movq        dword ptr [eax],mm0         ; Write the data tot he reconstruction buffer.
+        add         eax,edx                     ; Step the reconstruction pointer
+        // Row 7
+        movq        mm0,dword ptr [ecx+48]      ; Load 8 elements of results data
+        movq        dword ptr [eax],mm0         ; Write the data tot he reconstruction buffer.
+        add         eax,edx                     ; Step the reconstruction pointer
+        // Row 8
+        movq        mm0,dword ptr [ecx+56]      ; Load 8 elements of results data
+        movq        dword ptr [eax],mm0         ; Write the data tot he reconstruction buffer.
+        add         eax,edx                     ; Step the reconstruction pointer
+
+        //emms
+    }
+}
+#endif
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/uoptsystemdependant.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/uoptsystemdependant.c
new file mode 100644
index 00000000..bfcc194a
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/uoptsystemdependant.c
@@ -0,0 +1,351 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions
+*
+*    AUTHOR      :     Paul Wilkins
+*
+*****************************************************************************
+*   Revision History
+* 
+*   1.20 YWX 06-Nov-02 Added forward DCT function optimized for Pentium 4
+*   1.19 YWX 15-Jun-01 added function pointer setups for new deblocking filter
+*   1.18 YWX 26-Apr-01 Fixed the cpu frequency detection bug caused by Sleep()
+*   1.17 JBX 22-Mar-01 Merged with new vp4-mapca bitstream
+*   1.16 JBB 26-Jan-01 Cleaned out unused function
+*   1.15 YWX 08-dec-00 Added WMT PostProcessor and 
+*                        moved function declarations into _head files
+*   1.14 JBB 30 NOV 00 Version number changes 
+*   1.13 YWX 03-Nov-00 Optimized postprocessor filters
+*   1.12 YWX 02-Nov-00 Added new loopfilter function pointers
+*   1.11 YWX 19-Oct-00 Added 1-2 Scaling functions pointers
+*   1.10 jbb 16 oct 00 added ifdefs to insure version code
+*   1.09 YWX 04-Oct-00 Added function pointers for scaling 
+*   1.08 YWX 06 Sep 00 Added function pointers for new deringing filter 
+*                      using frag baseed Q Value.
+*   1.07 JBB 21 Aug 00 New More Blurry in high variance area deringer
+*	1.06 YWX 2  Aug 00 Added function pointers for postprocess  
+*	1.05 YWX 15/05/00  Added functions to check processor frequency
+*					   and more function pointers for postprocessor
+*	1.04 YWX 08/05/00  Added function pointers setup for postprocess
+*   1.03 SJL 20/04/00  Added ability to enable the new dequant code.
+*   1.02 SJL 22/03/00  Function pointers for the loop filter.
+*   1.01 JBB 21/03/00  More Function Pointers for optimized playback
+*   1.00 PGW 12/10/99  Configuration baseline
+*
+*****************************************************************************
+*/
+
+/****************************************************************************
+*  Header Files
+*****************************************************************************
+*/
+#include "codec_common.h"
+#include "vputil_if.h"
+#include "cpuidlib.h"
+
+//global debugging aid's!
+int fastIDCTDisabled = 0;
+int forceCPUID = 0;
+int CPUID = 0;
+
+
+extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
+
+// Scalar (no mmx) reconstruction functions
+extern void ClearSysState_C(void);
+extern void IDctSlow(  INT16 * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void IDct10(  INT16 * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void IDct1(  INT16 * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void ScalarReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void ScalarReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void ScalarReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+extern void ReconBlock_C(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep);
+extern void SubtractBlock_C( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
+extern void UnpackBlock_C( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void AverageBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void CopyBlock_C(unsigned char *src, unsigned char *dest, unsigned int srcstride);
+extern void Copy12x12_C(const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride);
+extern void fdct_short_C ( INT16 * InputData, INT16 * OutputData );
+extern void FilterBlockBil_8_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
+extern void FilterBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+
+// MMx versions
+extern void fdct_MMX ( INT16 * InputData, INT16 * OutputData );
+extern void ClearMmx(void);
+extern void MMXReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+extern void MMX_idct(  Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void MMX_idct10(  Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void MMX_idct1(  Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void MMX_idct_DX(  Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void MMX_idct10_DX(  Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void ReconBlock_MMX(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep);
+extern void SubtractBlock_MMX( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
+extern void UnpackBlock_MMX( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void AverageBlock_MMX( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
+extern void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride);
+extern void Copy12x12_MMX(const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride);
+extern void FilterBlockBil_8_mmx( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
+extern void FilterBlock_mmx( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+
+// WMT versions
+extern void WmtReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
+extern void WmtReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
+extern void WmtReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
+extern void Wmt_idct1(  Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void Wmt_IDct_Dx( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void Wmt_IDct10_Dx(  Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
+extern void fdct_WMT(short *InputData, short *OutputData);
+extern void FilterBlockBil_8_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
+extern void FilterBlock_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+
+
+#define IdctAdjustBeforeShift 8
+extern UINT16 idctconstants[(4+7+1) * 4];
+extern UINT16 idctcosTbl[ 7];
+
+void fillidctconstants(void)
+{
+	int j = 16;  
+	UINT16 * p; 
+	do 
+	{ 
+		idctconstants[ --j] = 0;
+	}  
+	while( j);
+	
+	idctconstants[0] = idctconstants[5] = idctconstants[10] = idctconstants[15] = 65535;
+	
+	j = 1; 
+	do 
+	{
+		p = idctconstants + ( (j+3) << 2);
+		p[0] = p[1] = p[2] = p[3] = idctcosTbl[ j - 1];
+	} 
+	while( ++j <= 7);
+	
+	idctconstants[44] = idctconstants[45] = idctconstants[46] = idctconstants[47] = IdctAdjustBeforeShift;
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     Get Processor Flags
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Checks for machine specifc features such as MMX support 
+ *                      sets approipriate flags and function pointers.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void GetProcessorFlags
+( 
+ INT32 *MmxEnabled,
+ INT32 *XmmEnabled,
+ INT32 *WmtEnabled
+)
+{
+	
+	PROCTYPE CPUType = findCPUId();
+    if(forceCPUID)
+        CPUType = CPUID;
+
+	switch(CPUType)
+	{
+	case X86    :
+	case PPRO   :
+	case C6X86  :
+	case C6X86MX:
+	case AMDK5  :
+	case MACG3	:
+	case MAC68K	:
+		*MmxEnabled = FALSE;
+		*XmmEnabled = FALSE;
+		*WmtEnabled = FALSE;
+		break;
+	case PII	:   
+	case AMDK63D:
+	case AMDK6  :
+	case PMMX	:   
+		*MmxEnabled = TRUE;
+		*XmmEnabled = FALSE;
+		*WmtEnabled = FALSE;
+		break;
+	case XMM    :
+		*MmxEnabled = TRUE;
+		*XmmEnabled = TRUE;
+		*WmtEnabled = FALSE;
+		break;
+	case WMT	:
+		*MmxEnabled = TRUE;
+		*XmmEnabled = TRUE;
+		*WmtEnabled = TRUE;
+		break;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MachineSpecificConfig
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Checks for machine specifc features such as MMX support 
+ *                      sets approipriate flags and function pointers.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void UtilMachineSpecificConfig
+(
+  void
+)
+{
+	UINT32 i;
+	INT32 MmxEnabled;
+	INT32 XmmEnabled; 
+	INT32 WmtEnabled;
+
+	GetProcessorFlags( &MmxEnabled,&XmmEnabled,&WmtEnabled);
+    
+	if(WmtEnabled)		//Willamette
+	{
+		for(i=0;i<=64;i++)
+		{
+
+            if(fastIDCTDisabled)
+                idct[i]=Wmt_IDct_Dx;
+            else
+            {
+    			if(i<=1)idct[i]=Wmt_idct1;
+	    		else if(i<=10)idct[i]=Wmt_IDct10_Dx;
+		    	else idct[i]=Wmt_IDct_Dx;
+            }
+		}
+		for(i=0;i<=64;i++)
+		{
+            if(fastIDCTDisabled)
+                idctc[i]=MMX_idct;
+            else
+            {
+			    if(i<=1)idctc[i]=Wmt_idct1;
+			    else if(i<=10)idctc[i]=MMX_idct10;
+			    else idctc[i]=MMX_idct;
+            }
+		}
+        fdct_short=fdct_WMT;
+
+        ReconIntra = WmtReconIntra;
+        ReconInter = WmtReconInter;
+        ReconInterHalfPixel2 = WmtReconInterHalfPixel2;
+		ClearSysState = ClearMmx;
+        AverageBlock = AverageBlock_MMX;
+        UnpackBlock = UnpackBlock_MMX;
+        ReconBlock = ReconBlock_MMX;
+        SubtractBlock = SubtractBlock_MMX;
+		CopyBlock = CopyBlockMMX;
+        Copy12x12 = Copy12x12_MMX;    
+        FilterBlockBil_8 = FilterBlockBil_8_wmt;
+        FilterBlock=FilterBlock_wmt;
+        //FilterBlock=FilterBlock_C;
+	}
+	else if ( MmxEnabled )
+    {
+		for(i=0;i<=64;i++)
+		{
+            if(fastIDCTDisabled)
+                idctc[i]=MMX_idct_DX;
+            else
+            {
+    			if(i<=1)idctc[i]=MMX_idct1;
+	    		else if(i<=10)idctc[i]=MMX_idct10;
+		    	else idctc[i]=MMX_idct;
+		    }
+        }
+        fdct_short=fdct_MMX;
+		for(i=0;i<=64;i++)
+		{
+            if(fastIDCTDisabled)
+                idct[i]=MMX_idct_DX;
+            else
+            {
+			    if(i<=1)idct[i]=MMX_idct1;
+			    else if(i<=10)idct[i]=MMX_idct10_DX;
+			    else idct[i]=MMX_idct_DX;
+            }
+		}
+
+        ReconIntra = MMXReconIntra;
+        ReconInter = MmxReconInter;
+        ReconInterHalfPixel2 = MmxReconInterHalfPixel2;
+		ClearSysState = ClearMmx;
+        AverageBlock = AverageBlock_MMX;
+        UnpackBlock = UnpackBlock_MMX;
+        ReconBlock = ReconBlock_MMX;
+        SubtractBlock = SubtractBlock_MMX;
+		CopyBlock = CopyBlockMMX;
+        Copy12x12 = Copy12x12_MMX;
+        FilterBlockBil_8 = FilterBlockBil_8_mmx;
+        FilterBlock=FilterBlock_mmx;
+        //FilterBlock=FilterBlock_C;
+   }
+    else
+    {
+		int i;
+		for(i=0;i<=64;i++)
+		{
+            if(fastIDCTDisabled)
+                idctc[i]=IDctSlow;
+            else
+            {
+			    if(i<=1)idctc[i]=IDct1;
+			    else if(i<=10)idctc[i]=IDct10;
+			    else idctc[i]=IDctSlow;
+            }
+		}
+		fdct_short=fdct_short_C ;
+		for(i=0;i<=64;i++)
+		{
+            if(fastIDCTDisabled)
+                idct[i]=IDctSlow;
+            else
+            {
+			    if(i<=1)idct[i]=IDct1;
+			    else if(i<=10)idct[i]=IDct10;
+			    else idct[i]=IDctSlow;
+            }
+		}
+		ClearSysState = ClearSysState_C;
+		ReconIntra = ScalarReconIntra;
+		ReconInter = ScalarReconInter;
+		ReconInterHalfPixel2 = ScalarReconInterHalfPixel2;
+		AverageBlock = AverageBlock_C;
+		UnpackBlock = UnpackBlock_C;
+		ReconBlock = ReconBlock_C;
+		SubtractBlock = SubtractBlock_C;
+		CopyBlock = CopyBlock_C;
+        Copy12x12 = Copy12x12_MMX;
+        FilterBlockBil_8 = FilterBlockBil_8_C;
+        FilterBlock=FilterBlock_C;
+    } 
+    //FilterBlock=FilterBlock_C;
+
+}
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/vputilasm.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/vputilasm.c
new file mode 100644
index 00000000..3d173913
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/vputilasm.c
@@ -0,0 +1,507 @@
+/****************************************************************************
+ *
+ *   Module Title :     newLoopTest_asm.c 
+ *
+ *   Description  :     Codec specific functions
+ *
+ *   AUTHOR       :     Yaowu Xu
+ *
+ *****************************************************************************
+ *   Revision History
+ *
+ *   1.02 YWX 03-Nov-00 Changed confusing variable name
+ *   1.01 YWX 02-Nov-00 Added the set of functions
+ *   1.00 YWX 19-Oct-00 configuration baseline
+ *****************************************************************************
+ */ 
+
+/****************************************************************************
+ *  Header Frames
+ *****************************************************************************
+ */
+
+
+#define STRICT              /* Strict type checking. */
+#include "codec_common.h"
+#include <math.h>
+
+ /****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */        
+
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
+
+
+/****************************************************************************
+ *  Explicit Imports
+ *****************************************************************************
+ */ 
+extern void SatUnsigned8( UINT8 * ResultPtr, INT16 * DataBlock, 
+                         UINT32 ResultLineStep, UINT32 DataLineStep );
+
+/****************************************************************************
+ *  Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ *  Exported Functions
+ *****************************************************************************
+ */              
+
+/****************************************************************************
+ *  Module Statics
+ *****************************************************************************
+ */              
+
+/****************************************************************************
+ *  Foreward References
+ *****************************************************************************
+ */       
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ClearMmx()
+ *
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :    
+ * 
+ *
+ *  FUNCTION      :     Clears down the MMX state
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ClearMmx(void)
+{
+	__asm
+	{
+		emms									; Clear the MMX state.
+	}
+}
+       
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CopyBlockUsingMMX
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Copies a block from source to destination
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride)
+{
+	unsigned char *s = src;
+	unsigned char *d = dest;
+	unsigned int stride = srcstride;
+	// recon copy 
+	_asm
+	{
+			mov		ecx, [stride]
+			mov		eax, [s]
+			mov		ebx, [d]
+			lea		edx, [ecx + ecx * 2]
+
+			movq	mm0, [eax]
+			movq	mm1, [eax + ecx]
+			movq	mm2, [eax + ecx*2]
+			movq	mm3, [eax + edx]
+
+			lea		eax, [eax + ecx*4]
+
+			movq	[ebx], mm0
+			movq	[ebx + ecx], mm1
+			movq	[ebx + ecx*2], mm2
+			movq	[ebx + edx], mm3
+
+			lea		ebx, [ebx + ecx * 4]
+
+			movq	mm0, [eax]
+			movq	mm1, [eax + ecx]
+			movq	mm2, [eax + ecx*2]
+			movq	mm3, [eax + edx]
+
+			movq	[ebx], mm0
+			movq	[ebx + ecx], mm1
+			movq	[ebx + ecx*2], mm2
+			movq	[ebx + edx], mm3
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     CopyBlockUsingMMX
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Copies a block from source to destination
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void Copy12x12_MMX(
+    const unsigned char *src, 
+    unsigned char *dest, 
+    unsigned int srcstride,
+    unsigned int deststride)
+{
+
+
+	int j=0;
+	do
+	{
+		((UINT32*)dest)[0] = ((UINT32*)src)[0];
+		((UINT32*)dest)[1] = ((UINT32*)src)[1];
+		((UINT32*)dest)[2] = ((UINT32*)src)[2];
+		src+=srcstride;
+		dest+=deststride;
+	}
+	while(++j<12);
+
+}
+
+/****************************************************************************
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     AverageBlock_MMX
+ *  
+ *  INPUTS        :     Two block data to be averaged
+ *						
+ *  OUTPUTS       :     block with the average values
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Do pixel averages on two reference blocks 
+ *
+ *  SPECIAL NOTES :     This functions has a mmx version in newlooptest_asm.c
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void AverageBlock_MMX( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine)
+{
+    
+    __asm 
+    {
+        mov         esi,    ReconPtr1 
+        mov         eax,    ReconPtr2
+
+        mov         edi,    ReconRefPtr
+        mov         ecx,    BLOCK_HEIGHT_WIDTH
+
+        mov         edx,    ReconPixelsPerLine
+        pxor        mm7,    mm7
+
+AverageBlock_Loop:
+
+        movq        mm0,    [esi]
+        movq        mm1,    [eax]
+
+        movq        mm2,    mm0
+        punpcklbw   mm0,    mm7
+
+        movq        mm3,    mm1
+        punpcklbw   mm1,    mm7
+
+        paddw       mm0,    mm1
+        punpckhbw   mm2,    mm7
+
+        psraw       mm0,    1
+        punpckhbw   mm3,    mm7
+
+        paddw       mm2,    mm3
+        movq        [edi],  mm0
+
+        psraw       mm2,    1
+        add         esi,    edx
+
+        add         eax,    edx
+        add         edi,    16
+
+        movq        [edi-8], mm2
+        dec         ecx
+
+        jnz         AverageBlock_Loop
+    }
+    /*    
+    UINT32 i;
+
+    // For each block row
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        ReconRefPtr[0] = (INT16)((INT32)(ReconPtr1[0])+ ((INT32)ReconPtr2[0]))>>1;
+        ReconRefPtr[1] = (INT16)((INT32)(ReconPtr1[1])+ ((INT32)ReconPtr2[1]))>>1;
+        ReconRefPtr[2] = (INT16)((INT32)(ReconPtr1[2])+ ((INT32)ReconPtr2[2]))>>1;
+        ReconRefPtr[3] = (INT16)((INT32)(ReconPtr1[3])+ ((INT32)ReconPtr2[3]))>>1;
+        ReconRefPtr[4] = (INT16)((INT32)(ReconPtr1[4])+ ((INT32)ReconPtr2[4]))>>1;
+        ReconRefPtr[5] = (INT16)((INT32)(ReconPtr1[5])+ ((INT32)ReconPtr2[5]))>>1;
+        ReconRefPtr[6] = (INT16)((INT32)(ReconPtr1[6])+ ((INT32)ReconPtr2[6]))>>1;
+        ReconRefPtr[7] = (INT16)((INT32)(ReconPtr1[7])+ ((INT32)ReconPtr2[7]))>>1;
+        
+        // Start next row
+        ReconPtr1 += ReconPixelsPerLine;
+        ReconPtr2 += ReconPixelsPerLine;
+
+        ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+    }
+    */
+}
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     UnpackBlock
+ *  
+ *  INPUTS        :     Block of char data to be converted to short
+ *						
+ *  OUTPUTS       :     converted output
+ *
+ *  RETURNS       :     None.
+ *
+ *  FUNCTION      :     Converted char block data to short
+ *
+ *  SPECIAL NOTES :     This functions has a mmx version in newlooptest_asm.c
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void UnpackBlock_MMX( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine)
+{
+    
+    __asm 
+    {
+        mov         esi,    ReconPtr 
+        mov         edi,    ReconRefPtr
+
+        mov         ecx,    BLOCK_HEIGHT_WIDTH
+        mov         edx,    ReconPixelsPerLine
+
+        pxor        mm7,    mm7
+
+UnpackBlock_Loop:
+
+        movq        mm0,    [esi] 
+        movq        mm2,    mm0
+
+        punpcklbw   mm0,    mm7
+        movq        [edi],  mm0
+
+        punpckhbw   mm2,    mm7
+        add         esi,    edx
+
+        movq        [edi+8], mm2
+        add         edi,    16
+
+        dec         ecx
+        jnz         UnpackBlock_Loop
+    }
+    
+    /*
+    UINT32 i;
+
+    // For each block row
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+
+        ReconRefPtr[0] = (INT16)(ReconPtr[0]);
+        ReconRefPtr[1] = (INT16)(ReconPtr[1]);
+        ReconRefPtr[2] = (INT16)(ReconPtr[2]);
+        ReconRefPtr[3] = (INT16)(ReconPtr[3]);
+        ReconRefPtr[4] = (INT16)(ReconPtr[4]);
+        ReconRefPtr[5] = (INT16)(ReconPtr[5]);
+        ReconRefPtr[6] = (INT16)(ReconPtr[6]);
+        ReconRefPtr[7] = (INT16)(ReconPtr[7]);
+        
+        // Start next row
+        ReconPtr += ReconPixelsPerLine;
+        ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+    }
+    */
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     SubtractBlock
+ *  
+ *  INPUTS        :     Get the residue data for the block
+ *						
+ *  OUTPUTS       :     Source block data and ref block data
+ *
+ *  RETURNS       :     residue block data
+ *
+ *  FUNCTION      :     do pixel subtraction of ref block from source block
+ *
+ *  SPECIAL NOTES :     This functions has a mmx version in newlooptest_asm.c
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void SubtractBlock_MMX( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep )
+{
+
+    __asm 
+    {
+
+        mov         esi,    SrcBlock
+        mov         edi,    DestPtr
+
+        mov         edx,    LineStep
+        mov         ecx,    8
+
+        pxor        mm7,    mm7
+
+SubtractBlock_Loop:
+
+        movq        mm0,    [esi]
+        movq        mm1,    [edi]
+
+        movq        mm2,    mm0
+        punpcklbw   mm0,    mm7
+
+        movq        mm3,    [edi+8]
+        psubw       mm0,    mm1
+        
+        punpckhbw   mm2,    mm7
+        movq        [edi],  mm0
+
+        psubw       mm2,    mm3
+        add         esi,    edx
+
+        movq        [edi+8], mm2
+        add         edi,    16
+
+        dec         ecx
+        jnz         SubtractBlock_Loop
+    }
+
+    /*    
+    UINT32 i;
+
+    // For each block row
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+
+        DestPtr[0] = (INT16)((INT32)SrcBlock[0] - (INT32)DestPtr[0]);
+        DestPtr[1] = (INT16)((INT32)SrcBlock[1] - (INT32)DestPtr[1]);
+        DestPtr[2] = (INT16)((INT32)SrcBlock[2] - (INT32)DestPtr[2]);
+        DestPtr[3] = (INT16)((INT32)SrcBlock[3] - (INT32)DestPtr[3]);
+        DestPtr[4] = (INT16)((INT32)SrcBlock[4] - (INT32)DestPtr[4]);
+        DestPtr[5] = (INT16)((INT32)SrcBlock[5] - (INT32)DestPtr[5]);
+        DestPtr[6] = (INT16)((INT32)SrcBlock[6] - (INT32)DestPtr[6]);
+        DestPtr[7] = (INT16)((INT32)SrcBlock[7] - (INT32)DestPtr[7]);
+        
+        // Start next row
+        SrcBlock += LineStep;
+        DestPtr += BLOCK_HEIGHT_WIDTH;
+    }
+    */
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     ReconBlock
+ *  
+ *  INPUTS        :     
+ *						
+ *  OUTPUTS       :     
+ *
+ *  RETURNS       :     
+ *
+ *  FUNCTION      :     Reconstrut a block using ref blocka and change data
+ *
+ *  SPECIAL NOTES :     This functions has a mmx version in newlooptest_asm.c
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void ReconBlock_MMX( INT16 *SrcBlock, INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep)
+{
+
+    __asm 
+    {
+    
+        mov         esi,    SrcBlock
+        mov         eax,    ReconRefPtr
+
+        mov         edi,    DestBlock
+        mov         ecx,    8
+
+        mov         edx,    LineStep
+        pxor        mm7,    mm7
+
+ReconBlock_Loop:
+
+        movq        mm0,    [esi]
+        movq        mm1,    [eax]
+    
+        movq        mm2,    [esi+8]
+        movq        mm3,    [eax+8]
+
+        paddw       mm0,    mm1
+        paddw       mm2,    mm3
+
+        packuswb    mm0,    mm2
+        movq        [edi],  mm0
+        
+        add         esi,    16
+        add         eax,    16
+
+        add         edi,    edx
+        dec         ecx
+
+        jnz         ReconBlock_Loop
+        
+    }
+    
+    /*    
+    UINT32 i;
+    INT16 *SrcBlockPtr = SrcBlock;
+
+    // For each block row
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        SrcBlock[0] += ReconRefPtr[0];
+        SrcBlock[1] += ReconRefPtr[1];
+        SrcBlock[2] += ReconRefPtr[2];
+        SrcBlock[3] += ReconRefPtr[3];
+        SrcBlock[4] += ReconRefPtr[4];
+        SrcBlock[5] += ReconRefPtr[5];
+        SrcBlock[6] += ReconRefPtr[6];
+        SrcBlock[7] += ReconRefPtr[7];
+        
+        // Start next row
+        SrcBlock += BLOCK_HEIGHT_WIDTH;
+        ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+    }
+    // Saturated the block and write to the output
+    SatUnsigned8( DestBlock, SrcBlockPtr, LineStep, BLOCK_HEIGHT_WIDTH );
+    */
+
+}
+
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtidct.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtidct.c
new file mode 100644
index 00000000..cec0599c
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtidct.c
@@ -0,0 +1,1859 @@
+/****************************************************************************
+ *
+ *   Module Title :     wmtidct.c
+ *
+ *   Description  :     IDct functions optimized specifically for willamette 
+ *						processor
+ *					
+ *	 Special Notes:		
+ *
+ *   AUTHOR       :     YaoWu Xu
+ *
+ ***************************************************************************** 
+ *   Revision History
+ *		
+ *   1.02 YWX   07-dec-00 Removed code not in use and added push pop ebx
+ *	 1.01 YWX	29/06/00  Added Wmt_IDCT_Dx and Wmt_IDCT10_Dx
+ *   1.00 YWX	31/05/00  Configuration baseline
+ *
+ *****************************************************************************
+ */
+
+
+/*******************************************************************************
+ * Module Constants
+ *******************************************************************************
+ */
+	
+
+/* constants for rounding */
+__declspec(align(32)) static unsigned int Eight[]=
+{ 
+	0x00080008, 
+	0x00080008,
+	0x00080008, 
+	0x00080008 
+}; 
+/* cosine constants, cosine ( i * pi / 8 ) */
+__declspec(align(32)) static unsigned short WmtIdctConst[7 * 8]=
+{
+	64277,64277,64277,64277,64277,64277,64277,64277, 
+	60547,60547,60547,60547,60547,60547,60547,60547, 
+	54491,54491,54491,54491,54491,54491,54491,54491, 
+	46341,46341,46341,46341,46341,46341,46341,46341, 
+	36410,36410,36410,36410,36410,36410,36410,36410, 
+	25080,25080,25080,25080,25080,25080,25080,25080, 
+	12785,12785,12785,12785,12785,12785,12785,12785
+};
+/* Mask constant for dequantization */
+__declspec(align(32)) static unsigned short WmtDequantConst[]=
+{
+	0,65535,65535,0,0,0,0,0,	//0x0000 0000 0000 0000 0000 FFFF FFFF 0000
+	0,0,0,0,65535,65535,0,0,	//0x0000 0000 FFFF FFFF 0000 0000 0000 0000
+	65535,65535,65535,0,0,0,0,0,//0x0000 0000 0000 0000 0000 FFFF FFFF FFFF
+	0,0,0,65535,0,0,0,0,		//0x0000 0000 0000 0000 FFFF 0000 0000 0000
+	0,0,0,65535,65535,0,0,0,	//0x0000 0000 0000 FFFF FFFF 0000 0000 0000
+	65535,0,0,0,0,65535,0,0,	//0x0000 0000 FFFF 0000 0000 0000 0000 FFFF
+	0,0,65535,65535, 0,0,0,0	//0x0000 0000 0000 0000 FFFF FFFF 0000 0000
+};
+
+
+/*******************************************************************************
+ * Forward Reference
+ *******************************************************************************
+ */
+
+/********************************************************************************
+ *	Description of Inverse DCT algorithm.
+ ********************************************************************************
+ *
+
+   Dequantization multiplies user's 16-bit signed indices (range -512 to +511)
+   by unsigned 16-bit quantization table entries.
+   These table entries are upscaled by 4, max is 30 * 128 * 4 < 2^14.
+   Result is scaled signed DCT coefficients (abs value < 2^15).
+
+   In the data stream, the coefficients are sent in order of increasing
+   total (horizontal + vertical) frequency.  The exact picture is as follows:
+
+	00 01 05 06  16 17 33 34
+	02 04 07 15  20 32 35 52
+	03 10 14 21  31 36 51 53
+	11 13 22 30  37 50 54 65
+
+	12 23 27 40  47 55 64 66
+	24 26 41 46	 56 63 67 74
+	25 42 45 57  62 70 73 75
+	43 44 60 61  71 72 76 77
+
+   Here the position in the matrix corresponds to the (horiz,vert)
+   freqency indices and the octal entry in the matrix is the position
+   of the coefficient in the data stream.  Thus the coefficients are sent
+   in sort of a diagonal "snake".
+
+   The dequantization stage "uncurls the snake" and stores the expanded
+   coefficients in more convenient positions.  These are not exactly the
+   natural positions given above but take into account our implementation
+   of the idct, which basically requires two one-dimensional idcts and
+   two transposes.
+
+
+   Transposing the 8x8 matrix above gives
+
+	00 02 03 11  12 24 25 43  
+	01 04 10 13  23 26 42 44  
+	05 07 14 22  27 41 45 60  
+	06 15 21 30  40 46 57 61  
+
+	16 20 31 37  47 56 62 71
+	17 32 36 50  55 63 70 72
+	33 35 51 54  64 67 73 76
+	34 52 53 65  66 74 75 77
+
+
+   The idct itself is more interesting.  Since the two-dimensional dct
+   basis functions are products of the one-dimesional dct basis functions,
+   we can compute an inverse (or forward) dct via two 1-D transforms,
+   on rows then on columns.  To exploit MMX parallelism, we actually do
+   both operations on columns, interposing a (partial) transpose between
+   the two 1-D transforms, the first transpose being done by the expansion
+   described above.
+
+   The 8-sample one-dimensional DCT is a standard orthogonal expansion using
+   the (unnormalized) basis functions
+
+	b[k]( i) = cos( pi * k * (2i + 1) / 16);
+
+   here k = 0 ... 7 is the frequency and i = 0 ... 7 is the spatial coordinate.
+   To normalize, b[0] should be multiplied by 1/sqrt( 8) and the other b[k]
+   should be multiplied by 1/2.
+
+   The 8x8 two-dimensional DCT is just the product of one-dimensional DCTs
+   in each direction.  The (unnormalized) basis functions are
+
+	B[k,l]( i, j) = b[k]( i) * b[l]( j);
+
+   this time k and l are the horizontal and vertical frequencies,
+   i and j are the horizontal and vertical spatial coordinates;
+   all indices vary from 0 ... 7 (as above)
+   and there are now 4 cases of normalization.
+  
+   Our 1-D idct expansion uses constants C1 ... C7 given by
+
+   	(*)  Ck = C(-k) = cos( pi * k/16) = S(8-k) = -S(k-8) = sin( pi * (8-k)/16) 
+
+   and the following 1-D algorithm transforming I0 ... I7  to  R0 ... R7 :
+  
+   A = (C1 * I1) + (C7 * I7)		B = (C7 * I1) - (C1 * I7)
+   C = (C3 * I3) + (C5 * I5)		D = (C3 * I5) - (C5 * I3)
+   A. = C4 * (A - C)				B. = C4 * (B - D)
+   C. = A + C						D. = B + D
+   
+   E = C4 * (I0 + I4)				F = C4 * (I0 - I4)
+   G = (C2 * I2) + (C6 * I6)		H = (C6 * I2) - (C2 * I6)
+   E. = E - G
+   G. = E + G
+   
+   A.. = F + A.					B.. = B. - H
+   F.  = F - A. 				H.  = B. + H
+   
+   R0 = G. + C.	R1 = A.. + H.	R3 = E. + D.	R5 = F. + B..
+   R7 = G. - C.	R2 = A.. - H.	R4 = E. - D.	R6 = F. - B..
+
+   This algorithm was also used by Paul Wilkins in his C implementation;
+   it is due to Vetterli and Lightenberg and may be found in the JPEG
+   reference book by Pennebaker and Mitchell.
+
+   Correctness of the algorithm follows from (*) together with the
+   addition formulas for sine and cosine:
+
+	cos( A + B) = cos( A) * cos( B)  -  sin( A) * sin( B)
+	sin( A + B) = sin( A) * cos( B)  +  cos( A) * sin( B)
+
+   Note that this implementation absorbs the difference in normalization
+   between the 0th and higher frequencies, although the results produced
+   are actually twice as big as they should be.  Since we do this for each
+   dimension, the 2-D idct results are 4x the desired results.  Finally,
+   taking into account that the dequantization multiplies by 4 as well,
+   our actual results are 16x too big.  We fix this by shifting the final
+   results right by 4 bits.
+
+   High precision version approximates C1 ... C7 to 16 bits.
+   Since there is not multiply taking one unsigned and one signed,
+   we have to use the signed multiplay, therefore C1 ... C5 appear to be
+   negative and multiplies involving them must be adjusted to compensate
+   for this.  C6 and C7 do not require this adjustment since
+   they are < 1/2 and are correctly treated as positive numbers.
+
+   Following macro does Eight 8-sample one-dimensional idcts in parallel.
+   This is actually not such a difficult program to write once you
+   make a couple of observations (I of course was unable to make these
+   observations until I'd half-written a couple of other versions).
+
+	1. Everything is easy once you are done with the multiplies.
+	   This is because, given X and Y in registers, one may easily
+	   calculate X+Y and X-Y using just those 2 registers.
+
+	2. You always need at least 2 extra registers to calculate products,
+	   so storing 2 temporaries is inevitable.  C. and D. seem to be
+	   the best candidates.   
+
+	3. The products should be calculated in decreasing order of complexity
+	   (which translates into register pressure).  Since C1 ... C5 require
+	   adjustment (and C6, C7 do not), we begin by calculating C and D.
+
+********************************************************************************/
+
+
+/**************************************************************************************
+ *
+ *		Macro:			Wmt_Column_IDCT
+ *		
+ *		Description:	The Macro does 1-D IDct on 8 columns. 
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+/*	
+	The major difference between Willamette processor and other IA32 processors is that 
+	all of the simd integer instructions now support the 128 bit xmm registers instead 
+	of 64 bit mmx registers. By using these instructions, we can do 8 1-D coloumn idcts 
+	that takes shorts as input and outputs shorts at once
+
+*/
+
+#define Wmt_Column_IDCT __asm {		\
+	\
+	__asm	movdqa	xmm2, I(3)		/* xmm2 = i3 */				\
+	__asm	movdqa	xmm6, C(3)		/* xmm6 = c3 */				\
+	\
+	__asm	movdqa	xmm4, xmm2		/* xmm4 = i3 */				\
+	__asm	movdqa	xmm7, I(5)		/* xmm7 = i5 */				\
+	\
+	__asm	pmulhw xmm4, xmm6		/* xmm4 = c3 * i3 - i3 */	\
+	__asm	movdqa  xmm1, C(5)		/* xmm1 = c5 */				\
+	\
+	__asm	pmulhw xmm6, xmm7		/* xmm6 = c3 * i5 - i5 */	\
+	__asm	movdqa	xmm5, xmm1		/* xmm5 = c5 */				\
+	\
+	__asm	pmulhw	xmm1, xmm2		/* xmm1 = c5 * i3 - i3 */	\
+	__asm	movdqa  xmm3, I(1)		/* xmm3 = i1 */				\
+	\
+	__asm	pmulhw	xmm5, xmm7		/* xmm5 = c5 * i5 - i5 */	\
+	__asm	movdqa	xmm0, C(1)		/* xmm0 = c1 */				\
+	\
+	/* all registers are in use */								\
+	\
+	__asm	paddw   xmm4, xmm2		/* xmm4 = c3 * i3 */		\
+	__asm	paddw	xmm6, xmm7		/* xmm6 = c3 * i5 */		\
+	\
+	__asm	paddw	xmm2, xmm1		/* xmm2 = c5 * i3 */		\
+	__asm	movdqa	xmm1, I(7)		/* xmm1 = i7 */				\
+	\
+	__asm	paddw	xmm7, xmm5		/* xmm7 = c5 * i5 */		\
+	__asm	movdqa	xmm5, xmm0		/* xmm5 = c1 */				\
+	\
+	__asm	pmulhw	xmm0, xmm3		/* xmm0 = c1 * i1 - i1 */				\
+	__asm	paddsw	xmm4, xmm7		/* xmm4 = c3 * i3 + c5 * i5 = C */		\
+	\
+	__asm	pmulhw	xmm5, xmm1		/* xmm5 = c1 * i7 - i7 */				\
+	__asm   movdqa	xmm7, C(7)		/* xmm7 = c7 */							\
+	\
+	__asm	psubsw	xmm6, xmm2		/* xmm6 = c3 * i5 - c5 * i3 = D */		\
+	__asm	paddw	xmm0, xmm3		/* xmm0 = c1 * i1 */					\
+	\
+	__asm	pmulhw	xmm3, xmm7		/* xmm3 = c7 * i1 */		\
+	__asm	movdqa	xmm2, I(2)		/* xmm2 = i2 */				\
+	\
+	__asm	pmulhw  xmm7, xmm1		/* xmm7 = c7 * i7 */		\
+	__asm	paddw	xmm5, xmm1		/* xmm5 = c1 * i7 */		\
+	\
+	__asm	movdqa	xmm1, xmm2		/* xmm1 = i2 */				\
+	__asm	pmulhw	xmm2, C(2)		/* xmm2 = i2 * c2 -i2 */	\
+	\
+	__asm	psubsw	xmm3, xmm5		/* xmm3 = c7 * i1 - c1 * i7 = B */		\
+	__asm	movdqa	xmm5, I(6)		/* xmm5 = i6 */							\
+	\
+	__asm	paddsw	xmm0, xmm7		/* xmm0 = c1 * i1 + c7 * i7	= A */		\
+	__asm	movdqa	xmm7, xmm5		/* xmm7 = i6 */							\
+	\
+	__asm	psubsw	xmm0, xmm4		/* xmm0 = A - C */			\
+	__asm	pmulhw	xmm5, C(2)		/* xmm5 = c2 * i6 - i6 */	\
+	\
+	__asm	paddw	xmm2, xmm1		/* xmm2 = i2 * c2 */		\
+	__asm	pmulhw	xmm1, C(6)		/* xmm1 = c6 * i2 */		\
+	\
+	__asm	paddsw	xmm4, xmm4		/* xmm4 = C + C */			\
+	__asm	paddsw	xmm4, xmm0		/* xmm4 = A + C = C. */		\
+	\
+	__asm	psubsw	xmm3, xmm6		/* xmm3 = B - D */			\
+	__asm	paddw	xmm5, xmm7		/* xmm5 = c2 * i6 */		\
+	\
+	__asm	paddsw	xmm6, xmm6		/* xmm6 = D + D */			\
+	__asm	pmulhw  xmm7, C(6)		/* xmm7 = c6 * i6 */		\
+	\
+	__asm	paddsw	xmm6, xmm3		/* xmm6 = B + D = D. */		\
+	__asm	movdqa	I(1), xmm4		/* Save C. at I(1)	*/		\
+	\
+	__asm	psubsw	xmm1, xmm5		/* xmm1 = c6 * i2 - c2 * i6 = H */		\
+	__asm	movdqa	xmm4, C(4)		/* xmm4 = c4 */							\
+	\
+	__asm	movdqa  xmm5, xmm3		/* xmm5 = B - D */						\
+	__asm	pmulhw	xmm3, xmm4		/* xmm3 = ( c4 -1 ) * ( B - D ) */		\
+	\
+	__asm	paddsw	xmm7, xmm2		/* xmm7 = c2 * i2 + c6 * i6 = G */		\
+	__asm	movdqa	I(2), xmm6		/* Save D. at I(2) */		\
+	\
+	__asm	movdqa	xmm2, xmm0		/* xmm2 = A - C */			\
+	__asm	movdqa	xmm6, I(0)		/* xmm6 = i0 */				\
+	\
+	__asm	pmulhw	xmm0, xmm4		/* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */\
+	__asm	paddw	xmm5, xmm3		/* xmm5 = c4 * ( B - D ) = B. */		\
+	\
+	__asm	movdqa	xmm3, I(4)		/* xmm3 = i4 */				\
+	__asm	psubsw	xmm5, xmm1		/* xmm5 = B. - H = B.. */	\
+	\
+	__asm	paddw	xmm2, xmm0		/* xmm2 = c4 * ( A - C) = A. */			\
+	__asm	psubsw	xmm6, xmm3		/* xmm6 = i0 - i4 */					\
+	\
+	__asm	movdqa	xmm0, xmm6		/* xmm0 = i0 - i4 */					\
+	__asm	pmulhw	xmm6, xmm4		/* xmm6 = (c4 - 1) * (i0 - i4) = F */	\
+	\
+	__asm	paddsw	xmm3, xmm3		/* xmm3 = i4 + i4 */		\
+	__asm	paddsw	xmm1, xmm1		/* xmm1 = H + H */			\
+	\
+	__asm	paddsw	xmm3, xmm0		/* xmm3 = i0 + i4 */		\
+	__asm	paddsw	xmm1, xmm5		/* xmm1 = B. + H = H. */	\
+	\
+	__asm	pmulhw	xmm4, xmm3		/* xmm4 = ( c4 - 1 ) * ( i0 + i4 )  */	\
+	__asm	paddw	xmm6, xmm0		/* xmm6 = c4 * ( i0 - i4 ) */			\
+	\
+	__asm	psubsw	xmm6, xmm2		/* xmm6 = F - A. = F. */	\
+	__asm	paddsw	xmm2, xmm2		/* xmm2 = A. + A. */		\
+	\
+	__asm	movdqa	xmm0, I(1)		/* Load	C. from I(1) */		\
+	__asm	paddsw	xmm2, xmm6		/* xmm2 = F + A. = A.. */	\
+	\
+	__asm	paddw	xmm4, xmm3		/* xmm4 = c4 * ( i0 + i4 ) = 3 */		\
+	__asm	psubsw  xmm2, xmm1		/* xmm2 = A.. - H. = R2 */				\
+	\
+	__asm	paddsw	xmm2, Eight		/* Adjust R2 and R1 before shifting */	\
+	__asm	paddsw  xmm1, xmm1		/* xmm1 = H. + H. */					\
+	\
+	__asm	paddsw  xmm1, xmm2		/* xmm1 = A.. + H. = R1 */	\
+	__asm	psraw	xmm2, 4			/* xmm2 = op2 */			\
+	\
+	__asm	psubsw	xmm4, xmm7		/* xmm4 = E - G = E. */		\
+	__asm	psraw	xmm1, 4			/* xmm1 = op1 */			\
+	\
+	__asm   movdqa	xmm3, I(2)		/* Load D. from I(2) */		\
+	__asm	paddsw	xmm7, xmm7		/* xmm7 = G + G */			\
+	\
+	__asm	movdqa	O(2), xmm2		/* Write out op2 */			\
+	__asm	paddsw  xmm7, xmm4		/* xmm7 = E + G = G. */		\
+	\
+	__asm	movdqa	O(1), xmm1		/* Write out op1 */			\
+	__asm	psubsw  xmm4, xmm3		/* xmm4 = E. - D. = R4 */	\
+	\
+	__asm	paddsw	xmm4, Eight		/* Adjust R4 and R3 before shifting */	\
+	__asm	paddsw  xmm3, xmm3		/* xmm3 = D. + D. */					\
+	\
+	__asm	paddsw	xmm3, xmm4		/* xmm3 = E. + D. = R3 */	\
+	__asm	psraw	xmm4, 4			/* xmm4 = op4 */			\
+	\
+	__asm	psubsw	xmm6, xmm5		/* xmm6 = F. - B..= R6 */	\
+	__asm	psraw	xmm3, 4			/* xmm3 = op3 */			\
+	\
+	__asm	paddsw	xmm6, Eight		/* Adjust R6 and R5 before shifting */	\
+	__asm	paddsw	xmm5, xmm5		/* xmm5 = B.. + B.. */					\
+	\
+	__asm	paddsw	xmm5, xmm6		/* xmm5 = F. + B.. = R5 */	\
+	__asm	psraw	xmm6, 4			/* xmm6 = op6 */			\
+	\
+	__asm	movdqa	O(4), xmm4		/* Write out op4 */			\
+	__asm	psraw	xmm5, 4			/* xmm5 = op5 */			\
+	\
+	__asm 	movdqa	O(3), xmm3		/* Write out op3 */			\
+	__asm	psubsw	xmm7, xmm0		/* xmm7 = G. - C. = R7 */	\
+	\
+	__asm	paddsw  xmm7, Eight		/* Adjust R7 and R0 before shifting */	\
+	__asm	paddsw  xmm0, xmm0		/* xmm0 = C. + C. */					\
+	\
+	__asm	paddsw  xmm0, xmm7		/* xmm0 = G. + C. */		\
+	__asm	psraw	xmm7, 4			/* xmm7 = op7 */			\
+	\
+	__asm	movdqa	O(6), xmm6		/* Write out op6 */			\
+	__asm	psraw	xmm0, 4			/* xmm0 = op0 */			\
+	\
+	__asm	movdqa	O(5), xmm5		/* Write out op5 */			\
+	__asm	movdqa	O(7), xmm7		/* Write out op7 */			\
+	\
+	__asm	movdqa	O(0), xmm0		/* Write out op0 */			\
+	\
+	} /* End of Wmt_Column_IDCT macro */
+
+
+/**************************************************************************************
+ *
+ *		Macro:			Wmt_Row_IDCT
+ *		
+ *		Description:	The Macro does 1-D IDct on 8 columns. 
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+/*	
+	The major difference between Willamette processor and other IA32 processors is that 
+	all of the simd integer instructions now support the 128 bit xmm registers instead 
+	of 64 bit mmx registers. By using these instructions, we can do 8 1-D coloumn idcts 
+	that takes shorts as input and outputs shorts at once
+
+*/
+
+#define Wmt_Row_IDCT __asm {		\
+	\
+	__asm	movdqa	xmm2, I(3)		/* xmm2 = i3 */		\
+	__asm	movdqa	xmm6, C(3)		/* xmm6 = c3 */		\
+	\
+	__asm	movdqa	xmm4, xmm2		/* xmm4 = i3 */		\
+	__asm	movdqa	xmm7, I(5)		/* xmm7 = i5 */		\
+	\
+	__asm	pmulhw xmm4, xmm6		/* xmm4 = c3 * i3 - i3 */	\
+	__asm	movdqa  xmm1, C(5)		/* xmm1 = c5 */		\
+	\
+	__asm	pmulhw xmm6, xmm7		/* xmm6 = c3 * i5 - i5 */	\
+	__asm	movdqa	xmm5, xmm1		/* xmm5 = c5 */		\
+	\
+	__asm	pmulhw	xmm1, xmm2		/* xmm1 = c5 * i3 - i3 */	\
+	__asm	movdqa  xmm3, I(1)		/* xmm3 = i1 */		\
+	\
+	__asm	pmulhw	xmm5, xmm7		/* xmm5 = c5 * i5 - i5 */	\
+	__asm	movdqa	xmm0, C(1)		/* xmm0 = c1 */		\
+	\
+	/* all registers are in use */ \
+	\
+	__asm	paddw   xmm4, xmm2		/* xmm4 = c3 * i3 */	\
+	__asm	paddw	xmm6, xmm7		/* xmm6 = c3 * i5 */	\
+	\
+	__asm	paddw	xmm2, xmm1		/* xmm2 = c5 * i3 */	\
+	__asm	movdqa	xmm1, I(7)		/* xmm1 = i7 */		\
+	\
+	__asm	paddw	xmm7, xmm5		/* xmm7 = c5 * i5 */	\
+	__asm	movdqa	xmm5, xmm0		/* xmm5 = c1 */		\
+	\
+	__asm	pmulhw	xmm0, xmm3		/* xmm0 = c1 * i1 - i1 */	\
+	__asm	paddsw	xmm4, xmm7		/* xmm4 = c3 * i3 + c5 * i5 = C */		\
+	\
+	__asm	pmulhw	xmm5, xmm1		/* xmm5 = c1 * i7 - i7 */	\
+	__asm   movdqa	xmm7, C(7)		/* xmm7 = c7 */		\
+	\
+	__asm	psubsw	xmm6, xmm2		/* xmm6 = c3 * i5 - c5 * i3 = D */		\
+	__asm	paddw	xmm0, xmm3		/* xmm0 = c1 * i1 */	\
+	\
+	__asm	pmulhw	xmm3, xmm7		/* xmm3 = c7 * i1 */	\
+	__asm	movdqa	xmm2, I(2)		/* xmm2 = i2 */		\
+	\
+	__asm	pmulhw  xmm7, xmm1		/* xmm7 = c7 * i7 */	\
+	__asm	paddw	xmm5, xmm1		/* xmm5 = c1 * i7 */	\
+	\
+	__asm	movdqa	xmm1, xmm2		/* xmm1 = i2 */		\
+	__asm	pmulhw	xmm2, C(2)		/* xmm2 = i2 * c2 -i2 */	\
+	\
+	__asm	psubsw	xmm3, xmm5		/* xmm3 = c7 * i1 - c1 * i7 = B */		\
+	__asm	movdqa	xmm5, I(6)		/* xmm5 = i6 */		\
+	\
+	__asm	paddsw	xmm0, xmm7		/* xmm0 = c1 * i1 + c7 * i7	= A */		\
+	__asm	movdqa	xmm7, xmm5		/* xmm7 = i6 */		\
+	\
+	__asm	psubsw	xmm0, xmm4		/* xmm0 = A - C */	\
+	__asm	pmulhw	xmm5, C(2)		/* xmm5 = c2 * i6 - i6 */	\
+	\
+	__asm	paddw	xmm2, xmm1		/* xmm2 = i2 * c2 */	\
+	__asm	pmulhw	xmm1, C(6)		/* xmm1 = c6 * i2 */	\
+	\
+	__asm	paddsw	xmm4, xmm4		/* xmm4 = C + C */		\
+	__asm	paddsw	xmm4, xmm0		/* xmm4 = A + C = C. */	\
+	\
+	__asm	psubsw	xmm3, xmm6		/* xmm3 = B - D */		\
+	__asm	paddw	xmm5, xmm7		/* xmm5 = c2 * i6 */	\
+	\
+	__asm	paddsw	xmm6, xmm6		/* xmm6 = D + D */		\
+	__asm	pmulhw  xmm7, C(6)		/* xmm7 = c6 * i6 */	\
+	\
+	__asm	paddsw	xmm6, xmm3		/* xmm6 = B + D = D. */	\
+	__asm	movdqa	I(1), xmm4		/* Save C. at I(1)	*/	\
+	\
+	__asm	psubsw	xmm1, xmm5		/* xmm1 = c6 * i2 - c2 * i6 = H */	\
+	__asm	movdqa	xmm4, C(4)		/* xmm4 = c4 */		\
+	\
+	__asm	movdqa  xmm5, xmm3		/* xmm5 = B - D */	\
+	__asm	pmulhw	xmm3, xmm4		/* xmm3 = ( c4 -1 ) * ( B - D ) */		\
+	\
+	__asm	paddsw	xmm7, xmm2		/* xmm7 = c2 * i2 + c6 * i6 = G */	\
+	__asm	movdqa	I(2), xmm6		/* Save D. at I(2) */	\
+	\
+	__asm	movdqa	xmm2, xmm0		/* xmm2 = A - C */	\
+	__asm	movdqa	xmm6, I(0)		/* xmm6 = i0 */		\
+	\
+	__asm	pmulhw	xmm0, xmm4		/* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */	\
+	__asm	paddw	xmm5, xmm3		/* xmm5 = c4 * ( B - D ) = B. */	\
+	\
+	__asm	movdqa	xmm3, I(4)		/* xmm3 = i4 */		\
+	__asm	psubsw	xmm5, xmm1		/* xmm5 = B. - H = B.. */	\
+	\
+	__asm	paddw	xmm2, xmm0		/* xmm2 = c4 * ( A - C) = A. */		\
+	__asm	psubsw	xmm6, xmm3		/* xmm6 = i0 - i4 */	\
+	\
+	__asm	movdqa	xmm0, xmm6		/* xmm0 = i0 - i4 */	\
+	__asm	pmulhw	xmm6, xmm4		/* xmm6 = ( c4 - 1 ) * ( i0 - i4 ) = F */	\
+	\
+	__asm	paddsw	xmm3, xmm3		/* xmm3 = i4 + i4 */	\
+	__asm	paddsw	xmm1, xmm1		/* xmm1 = H + H */	\
+	\
+	__asm	paddsw	xmm3, xmm0		/* xmm3 = i0 + i4 */	\
+	__asm	paddsw	xmm1, xmm5		/* xmm1 = B. + H = H. */	\
+	\
+	__asm	pmulhw	xmm4, xmm3		/* xmm4 = ( c4 - 1 ) * ( i0 + i4 )  */	\
+	__asm	paddw	xmm6, xmm0		/* xmm6 = c4 * ( i0 - i4 ) */	\
+	\
+	__asm	psubsw	xmm6, xmm2		/* xmm6 = F - A. = F. */	\
+	__asm	paddsw	xmm2, xmm2		/* xmm2 = A. + A. */	\
+	\
+	__asm	movdqa	xmm0, I(1)		/* Load	C. from I(1) */		\
+	__asm	paddsw	xmm2, xmm6		/* xmm2 = F + A. = A.. */	\
+	\
+	__asm	paddw	xmm4, xmm3		/* xmm4 = c4 * ( i0 + i4 ) = 3 */	\
+	__asm	psubsw  xmm2, xmm1		/* xmm2 = A.. - H. = R2 */	\
+	\
+	__asm	paddsw  xmm1, xmm1		/* xmm1 = H. + H. */	\
+	__asm	paddsw  xmm1, xmm2		/* xmm1 = A.. + H. = R1 */	\
+	\
+	__asm	psubsw	xmm4, xmm7		/* xmm4 = E - G = E. */		\
+	\
+	__asm   movdqa	xmm3, I(2)		/* Load D. from I(2) */		\
+	__asm	paddsw	xmm7, xmm7		/* xmm7 = G + G */	\
+	\
+	__asm	movdqa	I(2), xmm2		/* Write out op2 */		\
+	__asm	paddsw  xmm7, xmm4		/* xmm7 = E + G = G. */		\
+	\
+	__asm	movdqa	I(1), xmm1		/* Write out op1 */		\
+	__asm	psubsw  xmm4, xmm3		/* xmm4 = E. - D. = R4 */	\
+	\
+	__asm	paddsw  xmm3, xmm3		/* xmm3 = D. + D. */	\
+	\
+	__asm	paddsw	xmm3, xmm4		/* xmm3 = E. + D. = R3 */	\
+	\
+	__asm	psubsw	xmm6, xmm5		/* xmm6 = F. - B..= R6 */	\
+	\
+	__asm	paddsw	xmm5, xmm5		/* xmm5 = B.. + B.. */	\
+	\
+	__asm	paddsw	xmm5, xmm6		/* xmm5 = F. + B.. = R5 */	\
+	\
+	__asm	movdqa	I(4), xmm4		/* Write out op4 */		\
+	\
+	__asm 	movdqa	I(3), xmm3		/* Write out op3 */		\
+	__asm	psubsw	xmm7, xmm0		/* xmm7 = G. - C. = R7 */	\
+	\
+	__asm	paddsw  xmm0, xmm0		/* xmm0 = C. + C. */	\
+	\
+	__asm	paddsw  xmm0, xmm7		/* xmm0 = G. + C. */	\
+	\
+	__asm	movdqa	I(6), xmm6		/* Write out op6 */		\
+	\
+	__asm	movdqa	I(5), xmm5		/* Write out op5 */		\
+	__asm	movdqa	I(7), xmm7		/* Write out op7 */		\
+	\
+	__asm	movdqa	I(0), xmm0		/* Write out op0 */		\
+	\
+	} /* End of Wmt_Row_IDCT macro */
+
+/**************************************************************************************
+ *
+ *		Macro:			Transpose
+ *		
+ *		Description:	The Macro does 8x8 transpose
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+
+#define Transpose __asm {	\
+	\
+	__asm	movdqa		xmm4, I(4)		/* xmm4=e7e6e5e4e3e2e1e0 */	\
+	__asm	movdqa		xmm0, I(5)		/* xmm4=f7f6f5f4f3f2f1f0 */	\
+	\
+	__asm	movdqa		xmm5, xmm4		/* make a copy */			\
+	__asm	punpcklwd	xmm4, xmm0		/* xmm4=f3e3f2e2f1e1f0e0 */	\
+	\
+	__asm	punpckhwd	xmm5, xmm0		/* xmm5=f7e7f6e6f5e5f4e4 */	\
+	__asm	movdqa		xmm6, I(6)		/* xmm6=g7g6g5g4g3g2g1g0 */ \
+	\
+	__asm	movdqa		xmm0, I(7)		/* xmm0=h7h6h5h4h3h2h1h0 */ \
+	__asm	movdqa		xmm7, xmm6		/* make a copy */			\
+	\
+	__asm	punpcklwd	xmm6, xmm0		/* xmm6=h3g3h3g2h1g1h0g0 */ \
+	__asm	punpckhwd	xmm7, xmm0		/* xmm7=h7g7h6g6h5g5h4g4 */ \
+	\
+	__asm	movdqa		xmm3, xmm4		/* make a copy */			\
+	__asm	punpckldq	xmm4, xmm6		/* xmm4=h1g1f1e1h0g0f0e0 */	\
+	\
+	__asm	punpckhdq	xmm3, xmm6		/* xmm3=h3g3g3e3h2g2f2e2 */	\
+	__asm	movdqa		I(6), xmm3		/* save h3g3g3e3h2g2f2e2 */	\
+	/* Free xmm6 */ \
+	__asm	movdqa		xmm6, xmm5		/* make a copy */			\
+	__asm	punpckldq	xmm5, xmm7		/* xmm5=h5g5f5e5h4g4f4e4 */ \
+	\
+	__asm	punpckhdq	xmm6, xmm7		/* xmm6=h7g7f7e7h6g6f6e6 */ \
+	__asm	movdqa		xmm0, I(0)		/* xmm0=a7a6a5a4a3a2a1a0 */	\
+	/* Free xmm7 */ \
+	__asm	movdqa		xmm1, I(1)		/* xmm1=b7b6b5b4b3b2b1b0 */	\
+	__asm	movdqa		xmm7, xmm0		/* make a copy */			\
+	\
+	__asm	punpcklwd	xmm0, xmm1		/* xmm0=b3a3b2a2b1a1b0a0 */	\
+	__asm	punpckhwd	xmm7, xmm1		/* xmm7=b7a7b6a6b5a5b4a4 */ \
+	/* Free xmm1 */ \
+	__asm	movdqa		xmm2, I(2)		/* xmm2=c7c6c5c4c3c2c1c0 */ \
+	__asm	movdqa		xmm3, I(3)	    /* xmm3=d7d6d5d4d3d2d1d0 */ \
+	\
+	__asm	movdqa		xmm1, xmm2		/* make a copy */			\
+	__asm	punpcklwd	xmm2, xmm3		/* xmm2=d3c3d2c2d1c1d0c0 */ \
+	\
+	__asm	punpckhwd	xmm1, xmm3		/* xmm1=d7c7d6c6d5c5d4c4 */ \
+	__asm	movdqa		xmm3, xmm0		/* make a copy	*/			\
+	\
+	__asm	punpckldq	xmm0, xmm2		/* xmm0=d1c1b1a1d0c0b0a0 */ \
+	__asm	punpckhdq	xmm3, xmm2		/* xmm3=d3c3b3a3d2c2b2a2 */ \
+	/* Free xmm2 */ \
+	__asm	movdqa		xmm2, xmm7		/* make a copy */			\
+	__asm	punpckldq	xmm2, xmm1		/* xmm2=d5c5b5a5d4c4b4a4 */	\
+	\
+	__asm	punpckhdq	xmm7, xmm1		/* xmm7=d7c7b7a7d6c6b6a6 */ \
+	__asm	movdqa		xmm1, xmm0		/* make a copy */			\
+	\
+	__asm	punpcklqdq	xmm0, xmm4		/* xmm0=h0g0f0e0d0c0b0a0 */	\
+	__asm	punpckhqdq	xmm1, xmm4		/* xmm1=h1g1g1e1d1c1b1a1 */ \
+	\
+	__asm	movdqa		I(0), xmm0		/* save I(0) */				\
+	__asm	movdqa		I(1), xmm1		/* save I(1) */				\
+	\
+	__asm	movdqa		xmm0, I(6)		/* load h3g3g3e3h2g2f2e2 */ \
+	__asm	movdqa		xmm1, xmm3		/* make a copy */			\
+	\
+	__asm	punpcklqdq	xmm1, xmm0		/* xmm1=h2g2f2e2d2c2b2a2 */ \
+	__asm	punpckhqdq	xmm3, xmm0		/* xmm3=h3g3f3e3d3c3b3a3 */	\
+	\
+	__asm	movdqa		xmm4, xmm2		/* make a copy */			\
+	__asm	punpcklqdq	xmm4, xmm5		/* xmm4=h4g4f4e4d4c4b4a4 */	\
+	\
+	__asm	punpckhqdq	xmm2, xmm5		/* xmm2=h5g5f5e5d5c5b5a5 */	\
+	__asm	movdqa		I(2), xmm1		/* save I(2) */				\
+	\
+	__asm	movdqa		I(3), xmm3		/* save I(3) */				\
+	__asm	movdqa		I(4), xmm4		/* save I(4) */				\
+	\
+	__asm	movdqa		I(5), xmm2		/* save I(5) */				\
+	__asm	movdqa		xmm5, xmm7		/* make a copy */			\
+	\
+	__asm	punpcklqdq	xmm5, xmm6		/* xmm5=h6g6f6e6d6c6b6a6 */	\
+	__asm	punpckhqdq	xmm7, xmm6		/* xmm7=h7g7f7e7d7c7b7a7 */	\
+	\
+	__asm	movdqa		I(6), xmm5		/* save I(6) */				\
+	__asm	movdqa		I(7), xmm7		/* save I(7) */				\
+	\
+	}/* End of Transpose Macro */
+
+
+/**************************************************************************************
+ *
+ *		Macro:			Wmt_Dequant
+ *		
+ *		Description:	The Macro does dequantzation and reorder the coefficents to avoid 
+ *						the first transpose before Wmt_Row_IDCT
+ *
+ *		Input:			[eax], quantized input, 
+ *						[ebx], quantizaiton table,
+ *
+ *		Output:			[eax]
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+#define Wmt_Dequant __asm {		\
+	__asm	lea		ecx, WmtDequantConst										\
+	__asm	movdqa	xmm0, [eax]													\
+	\
+	__asm	pmullw	xmm0, [ebx]			/* xmm0 = 07 06 05 04 03 02 01 00 */	\
+	__asm	movdqa	xmm1, [eax + 16]											\
+	\
+	__asm	pmullw	xmm1, [ebx + 16]	/* xmm1 = 17 16 15 14 13 12 11 10 */	\
+	__asm	pshuflw xmm3, xmm0,	078h		/* xmm3 = 07 06 05 04 01 03 02 00 */	\
+	\
+	__asm	movdqa	xmm2, xmm1			/* xmm2 = 17 16 15 14 13 12 11 10 */	\
+	__asm	movdqa	xmm7, [ecx]			/* xmm7 = -- -- -- -- -- FF FF -- */	\
+	\
+	__asm	movdqa	xmm4, [eax + 32]											\
+	__asm	movdqa	xmm5, [eax + 64]											\
+	\
+	__asm	pmullw	xmm4, [ebx + 32]	/* xmm4 = 27 26 25 24 23 22 21 20 */	\
+	__asm	pmullw	xmm5, [ebx + 64]	/* xmm5	= 47 46 45 44 43 42 41 40 */	\
+	\
+	__asm	movdqa	xmm6, [ecx + 16]	/* xmm6 = -- -- FF FF -- -- -- -- */	\
+	__asm	pand	xmm7, xmm2			/* xmm7 = -- -- -- -- -- 12 11 -- */	\
+	\
+	__asm	pand	xmm6, xmm4			/* xmm6 = -- -- 25 24 -- -- -- -- */	\
+	__asm	pxor	xmm2, xmm7			/* xmm2 = 17 16 15 14 13 -- -- 10 */	\
+	\
+	__asm	pxor	xmm4, xmm6			/* xmm4 = 27 26 -- -- 23 22 21 20 */	\
+	__asm	pslldq  xmm7, 4				/* xmm7 = -- -- -- 12 11 -- -- -- */	\
+	\
+	__asm	pslldq	xmm6, 2				/* xmm6 = -- 25 24 -- -- -- -- -- */	\
+	__asm	por		xmm7, xmm6			/* xmm7 = -- 25 24 12 11 -- -- -- */	\
+	\
+	__asm	movdqa	xmm0, [ecx + 32]	/* xmm0 = -- -- -- -- -- FF FF FF */	\
+	__asm	movdqa	xmm6, [ecx + 48]	/* xmm6 = -- -- -- -- FF -- -- -- */	\
+	\
+	__asm	pand	xmm0, xmm3			/* xmm0 = -- -- -- -- -- 03 02 00 */	\
+	__asm   pand	xmm6, xmm5			/* xmm6 = -- -- -- -- 43 -- -- -- */	\
+	\
+	__asm   pxor	xmm3, xmm0			/* xmm3 = 07 06 05 04 01 -- -- -- */	\
+	__asm	pxor	xmm5, xmm6			/* xmm5 = 47 46 45 44 -- 42 41 40 */	\
+	\
+	__asm	por		xmm0, xmm7			/* xmm0 = -- 25 24 12 11 03 02 00 */	\
+	__asm	pslldq	xmm6, 8			    /* xmm6 = 43 -- -- -- -- -- -- -- */	\
+	\
+ 	__asm	por		xmm0, xmm6		/* O0 =xmm0 = 43 25 24 12 11 03 02 00 */	\
+	/* 02345 in use */ \
+	\
+	__asm	movdqa	xmm1, [ecx + 64 ]	/* xmm1 = -- -- -- FF FF -- -- -- */	\
+	__asm	pshuflw	xmm5, xmm5, 0B4h		/* xmm5 = 47 46 45 44 42 -- 41 40 */	\
+	\
+	__asm	movdqa	xmm7, xmm1			/* xmm7 = -- -- -- FF FF -- -- -- */	\
+	__asm	movdqa	xmm6, xmm1			/* xmm6 = -- -- -- FF FF -- -- -- */	\
+	\
+	__asm	movdqa	[eax], xmm0			/* write  43 25 24 12 11 03 02 00 */	\
+	__asm	pshufhw	xmm4, xmm4, 0C2h		/* xmm4 = 27 -- -- 26 23 22 21 20 */	\
+	\
+	__asm	pand	xmm7, xmm4			/* xmm7 = -- -- -- 26 23 -- -- -- */	\
+	__asm	pand	xmm1, xmm5			/* xmm1 = -- -- -- 44 42 -- -- -- */	\
+	\
+	__asm	pxor	xmm4, xmm7			/* xmm4 = 27 -- -- -- -- 22 21 20 */	\
+	__asm	pxor	xmm5, xmm1			/* xmm5 = 47 46 45 -- -- -- 41 40 */	\
+	\
+	__asm	pshuflw	xmm2, xmm2, 0C6h		/* xmm2 = 17 16 15 14 13 10 -- -- */	\
+	__asm	movdqa	xmm0, xmm6			/* xmm0 = -- -- -- FF FF -- -- -- */	\
+	\
+	__asm	pslldq	xmm7, 2				/* xmm7 = -- -- 26 23 -- -- -- -- */	\
+	__asm	pslldq  xmm1, 6				/* xmm1 = 44 42 -- -- -- -- -- -- */	\
+	\
+	__asm	psrldq	xmm0, 2				/* xmm0 = -- -- -- -- FF FF -- -- */	\
+	__asm	pand	xmm6, xmm3			/* xmm6 = -- -- -- 04 01 -- -- -- */	\
+	\
+	__asm	pand	xmm0, xmm2			/* xmm0 = -- -- -- -- 13 10 -- -- */	\
+	__asm	pxor	xmm3, xmm6			/* xmm3 = 07 06 05 -- -- -- -- -- */	\
+	\
+	__asm	pxor	xmm2, xmm0			/* xmm2 = 17 16 15 14 -- -- -- -- */	\
+	__asm	psrldq	xmm6, 6				/* xmm0 = -- -- -- -- -- -- 04 01 */	\
+	\
+	__asm	por		xmm1, xmm7			/* xmm1 = 44 42 26 23 -- -- -- -- */	\
+	__asm	por		xmm0, xmm6			/* xmm1 = -- -- -- -- 13 10 04 01 */	\
+	/* 12345 in use */	\
+	__asm   por		xmm1, xmm0		/* o1 =xmm1 = 44 42 26 23 13 10 04 01 */	\
+	__asm   pshuflw	xmm4, xmm4, 093h		/* xmm4 = 27 -- -- -- 22 21 20 -- */	\
+	\
+	__asm	pshufhw	xmm4, xmm4, 093h		/* xmm4 = -- -- -- 27 22 21 20 -- */	\
+	__asm	movdqa	[eax + 16], xmm1	/* write  44 42 26 23 13 10 04 01 */	\
+	\
+	__asm	pshufhw	xmm3, xmm3, 0D2h		/* xmm3 = 07 05 -- 06 -- -- -- -- */	\
+	__asm	movdqa	xmm0, [ecx + 64]	/* xmm0 = -- -- -- FF FF -- -- -- */	\
+	\
+	__asm	pand	xmm0, xmm3			/* xmm0 = -- -- -- 06 -- -- -- -- */	\
+	__asm	psrldq	xmm3, 12			/* xmm3 = -- -- -- -- -- -- 07 05 */	\
+	\
+	__asm	psrldq	xmm0, 8				/* xmm0 = -- -- -- -- -- -- -- 06 */	\
+	\
+	__asm	movdqa	xmm6, [ecx + 64]	/* xmm6 = -- -- -- FF FF -- -- -- */	\
+	__asm	movdqa	xmm7, [ecx + 96]	/* xmm7 = -- -- -- -- FF FF -- -- */	\
+	\
+	__asm	pand	xmm6, xmm4			/* xmm6 = -- -- -- 27 22 -- -- -- */	\
+	__asm   pxor	xmm4, xmm6			/* xmm4 = -- -- -- -- -- 21 20 -- */	\
+	\
+	__asm	por		xmm3, xmm6			/* xmm3 = -- -- -- 27 22 -- 07 05 */	\
+	__asm	pand	xmm7, xmm4		    /* xmm7 = -- -- -- -- -- 21 -- -- */	\
+	\
+	__asm	por		xmm0, xmm7			/* xmm0 = -- -- -- -- -- 21 -- 06 */	\
+	__asm	pxor	xmm4, xmm7			/* xmm4 = -- -- -- -- -- -- 20 -- */	\
+	\
+	__asm	movdqa	xmm6, [ecx + 16 ]	/* xmm6 = -- -- FF FF -- -- -- -- */	\
+	__asm	movdqa	xmm1, [ecx + 64 ]	/* xmm1 = -- -- -- FF FF -- -- -- */	\
+	\
+	__asm	pand	xmm6, xmm2			/* xmm6 = -- -- 15 14 -- -- -- -- */	\
+	__asm	pand	xmm1, xmm6			/* xmm1 = -- -- -- 14 -- -- -- -- */	\
+	\
+	__asm	pxor	xmm2, xmm6			/* xmm2 = 17 16 -- -- -- -- -- -- */	\
+	__asm	pxor	xmm6, xmm1			/* xmm6 = -- -- 15 -- -- -- -- -- */	\
+	\
+	__asm	psrldq	xmm1, 4				/* xmm1 = -- -- -- -- -- 14 -- -- */	\
+	\
+	__asm	psrldq	xmm6, 8				/* xmm6 = -- -- -- -- -- -- 15 -- */	\
+	__asm	por		xmm3, xmm1			/* xmm3 = -- -- -- 27 22 14 07 05 */	\
+	\
+	__asm	por		xmm0, xmm6			/* xmm0 = -- -- -- -- -- 21 15 06 */	\
+	__asm	pshufhw	xmm5, xmm5, 0E1h		/* xmm5 = 47 46 -- 45 -- -- 41 40 */	\
+	\
+	__asm	movdqa	xmm1, [ecx + 64]	/* xmm1 = -- -- -- FF FF -- -- -- */	\
+	__asm	pshuflw	xmm5, xmm5, 072h		/* xmm5 = 47 46 -- 45 41 -- 40 -- */	\
+	\
+	__asm	movdqa	xmm6, xmm1			/* xmm6 = -- -- -- FF FF -- -- -- */	\
+	__asm	pand	xmm1, xmm5			/* xmm1 = -- -- -- 45 41 -- -- -- */	\
+	\
+	__asm	pxor	xmm5, xmm1			/* xmm5 = 47 46 -- -- -- -- 40 -- */	\
+	__asm	pslldq	xmm1, 4				/* xmm1 = -- 45 41 -- -- -- -- -- */	\
+	\
+	__asm	pshufd	xmm5, xmm5, 09Ch		/* xmm5 = -- -- -- -- 47 46 40 -- */	\
+	__asm	por		xmm3, xmm1			/* xmm3 = -- 45 41 27 22 14 07 05 */	\
+	\
+	__asm	movdqa	xmm1, [eax + 96]	/* xmm1 = 67 66 65 64 63 62 61 60 */	\
+	__asm	pmullw	xmm1, [ebx + 96]											\
+	\
+	__asm	movdqa	xmm7, [ecx]		    /* xmm7 = -- -- -- -- -- FF FF -- */	\
+	\
+	__asm	psrldq	xmm6, 8				/* xmm6 = -- -- -- -- -- -- -- FF */	\
+	__asm	pand	xmm7, xmm5			/* xmm7 = -- -- -- -- -- 46 40 -- */	\
+	\
+	__asm	pand	xmm6, xmm1			/* xmm6 = -- -- -- -- -- -- -- 60 */	\
+	__asm	pxor	xmm5, xmm7		    /* xmm5 = -- -- -- -- 47 -- -- -- */	\
+	\
+	__asm	pxor	xmm1, xmm6			/* xmm1 = 67 66 65 64 63 62 61 -- */	\
+	__asm	pslldq	xmm5, 2				/* xmm5 = -- -- -- 47 -- -- -- -- */	\
+	\
+	__asm	pslldq	xmm6, 14			/* xmm6 = 60 -- -- -- -- -- -- -- */	\
+	__asm	por		xmm4, xmm5			/* xmm4 = -- -- -- 47 -- -- 20 -- */	\
+	\
+	__asm	por		xmm3, xmm6		/* O2 = xmm3= 60 45 41 27 22 14 07 05 */	\
+	__asm	pslldq	xmm7, 6				/* xmm7 = -- -- 46 40 -- -- -- -- */	\
+	\
+	__asm	movdqa	[eax+32], xmm3		/* write  60 45 41 27 22 14 07 05 */	\
+	__asm	por		xmm0, xmm7			/* xmm0 = -- -- 46 40 -- 21 15 06 */	\
+	/* 0, 1, 2, 4 in use */	\
+	__asm	movdqa	xmm3, [eax + 48]	/* xmm3 = 37 36 35 34 33 32 31 30 */	\
+	__asm	movdqa	xmm5, [eax + 80]	/* xmm5 = 57 56 55 54 53 52 51 50 */	\
+	\
+	__asm	pmullw	xmm3, [ebx + 48]											\
+	__asm	pmullw	xmm5, [ebx + 80]											\
+	\
+	__asm	movdqa	xmm6, [ecx + 64]	/* xmm6 = -- -- -- FF FF -- -- -- */	\
+	__asm	movdqa	xmm7, [ecx + 64]	/* xmm7 = -- -- -- FF FF -- -- -- */	\
+	\
+	__asm	psrldq	xmm6, 8				/* xmm6 = -- -- -- -- -- -- -- FF */	\
+	__asm	pslldq	xmm7, 8				/* xmm7 = FF -- -- -- -- -- -- -- */	\
+	\
+	__asm	pand	xmm6, xmm3			/* xmm6 = -- -- -- -- -- -- -- 30 */	\
+	__asm	pand	xmm7, xmm5			/* xmm7 = 57 -- -- -- -- -- -- -- */	\
+	\
+	__asm	pxor	xmm3, xmm6			/* xmm3 = 37 36 35 34 33 32 31 -- */	\
+	__asm	pxor	xmm5, xmm7			/* xmm5 = __ 56 55 54 53 52 51 50 */	\
+	\
+	__asm	pslldq	xmm6, 6				/* xmm6 = -- -- -- -- 30 -- -- -- */	\
+	__asm	psrldq	xmm7, 2				/* xmm7 = -- 57 -- -- -- -- -- -- */	\
+	\
+	__asm	por		xmm6, xmm7			/* xmm6 = -- 57 -- -- 30 -- -- -- */	\
+	__asm	movdqa	xmm7, [ecx]			/* xmm7 = -- -- -- -- -- FF FF -- */	\
+	\
+	__asm	por		xmm0, xmm6			/* xmm0 = -- 57 46 40 30 21 15 06 */	\
+	__asm	psrldq	xmm7, 2				/* xmm7 = -- -- -- -- -- -- FF FF */	\
+	\
+	__asm	movdqa	xmm6, xmm2			/* xmm6 = 17 16 -- -- -- -- -- -- */	\
+	__asm	pand	xmm7, xmm1			/* xmm7 = -- -- -- -- -- -- 61 -- */	\
+	\
+	__asm	pslldq	xmm6, 2				/* xmm6 = 16 -- -- -- -- -- -- -- */	\
+	__asm	psrldq	xmm2, 14			/* xmm2 = -- -- -- -- -- -- -- 17 */	\
+	\
+	__asm	pxor	xmm1, xmm7			/* xmm1 = 67 66 65 64 63 62 -- -- */	\
+	__asm	pslldq	xmm7, 12			/* xmm7 = 61 -- -- -- -- -- -- -- */	\
+	\
+	__asm	psrldq	xmm6, 14			/* xmm6 = -- -- -- -- -- -- -- 16 */	\
+	__asm	por		xmm4, xmm6			/* xmm4 = -- -- -- 47 -- -- 20 16 */	\
+	\
+	__asm	por		xmm0, xmm7			/* xmm0 = 61 57 46 40 30 21 15 06 */	\
+	__asm	movdqa	xmm6, [ecx]			/* xmm6 = -- -- -- -- -- FF FF -- */	\
+	\
+	__asm	psrldq	xmm6, 2				/* xmm6 = -- -- -- -- -- -- FF FF */	\
+	__asm	movdqa	[eax+48], xmm0		/* write  61 57 46 40 30 21 15 06 */	\
+	/* 1, 2, 3, 4, 5 in use */\
+	__asm	movdqa	xmm0, [ecx]			/* xmm0	= -- -- -- -- -- FF FF -- */	\
+	__asm	pand	xmm6, xmm3			/* xmm6 = -- -- -- -- -- -- 31 -- */	\
+	\
+	__asm	movdqa	xmm7, xmm3			/* xmm7 = 37 36 35 34 33 32 31 -- */	\
+	__asm	pxor	xmm3, xmm6			/* xmm3 = 37 36 35 34 33 32 -- -- */	\
+	\
+	__asm	pslldq	xmm3, 2				/* xmm3 = 36 35 34 33 32 -- -- -- */	\
+	__asm	pand	xmm0, xmm1			/* xmm0 = -- -- -- -- -- 62 -- -- */	\
+	\
+	__asm	psrldq	xmm7, 14			/* xmm7 = -- -- -- -- -- -- -- 37 */	\
+	__asm	pxor	xmm1, xmm0			/* xmm1 = 67 66 65 64 63 -- -- -- */	\
+	\
+	__asm	por		xmm6, xmm7			/* xmm6 = -- -- -- -- -- -- 31 37 */	\
+	__asm	movdqa  xmm7, [ecx + 64]	/* xmm7 = -- -- -- FF FF -- -- -- */	\
+	\
+	__asm	pshuflw	xmm6, xmm6, 01Eh		/* xmm6	= -- -- -- -- 37 31 -- -- */	\
+	__asm	pslldq	xmm7, 6				/* xmm7 = FF FF -- -- -- -- -- -- */	\
+	\
+	__asm	por		xmm4, xmm6			/* xmm4 = -- -- -- 47 37 31 20 16 */	\
+	__asm	pand	xmm7, xmm5			/* xmm7 = -- 56 -- -- -- -- -- -- */	\
+	\
+	__asm	pslldq	xmm0, 8				/* xmm0 = -- 62 -- -- -- -- -- -- */	\
+	__asm	pxor	xmm5, xmm7			/* xmm5 = -- -- 55 54 53 52 51 50 */	\
+	\
+	__asm	psrldq	xmm7, 2				/* xmm7 = -- -- 56 -- -- -- -- -- */	\
+	\
+	__asm	pshufhw	xmm3, xmm3, 087h		/* xmm3 = 35 33 34 36 32 -- -- -- */	\
+	__asm	por		xmm0, xmm7			/* xmm0 = -- 62 56 -- -- -- -- -- */	\
+	\
+	__asm	movdqa	xmm7, [eax + 112]	/* xmm7 = 77 76 75 74 73 72 71 70 */	\
+	__asm	pmullw	xmm7, [ebx + 112]											\
+	\
+	__asm	movdqa	xmm6, [ecx + 64]	/* xmm6 = -- -- -- FF FF -- -- -- */	\
+	__asm	por		xmm4, xmm0			/* xmm4 = -- 62 56 47 37 31 20 16 */	\
+	\
+	__asm	pshuflw	xmm7, xmm7, 0E1h		/* xmm7 = 77 76 75 74 73 72 70 71 */	\
+	__asm	psrldq	xmm6, 8				/* xmm6 = -- -- -- -- -- -- -- FF */	\
+	\
+	__asm	movdqa	xmm0, [ecx + 64]	/* xmm0 = -- -- -- FF FF -- -- -- */	\
+	__asm	pand	xmm6, xmm7			/* xmm6 = -- -- -- -- -- -- -- 71 */	\
+	\
+	__asm	pand	xmm0, xmm3			/* xmm0 = -- -- -- 36 32 -- -- -- */	\
+	__asm	pxor	xmm7, xmm6			/* xmm7 = 77 76 75 74 73 72 70 -- */	\
+	\
+	__asm	pxor	xmm3, xmm0			/* xmm3 = 35 33 34 -- -- -- -- -- */	\
+	__asm	pslldq	xmm6, 14			/* xmm6 = 71 -- -- -- -- -- -- -- */	\
+	\
+	__asm	psrldq	xmm0, 4				/* xmm0 = -- -- -- -- -- 36 32 -- */	\
+	__asm	por		xmm4, xmm6			/* xmm4 = 71 62 56 47 37 31 20 16 */	\
+	\
+	__asm	por		xmm2, xmm0			/* xmm2 = -- -- -- -- -- 36 32 17 */	\
+	__asm	movdqa	[eax + 64], xmm4	/* write  71 62 56 47 37 31 20 16 */	\
+	/* 1, 2, 3, 5, 7 in use */ \
+	__asm	movdqa	xmm6, [ecx + 80]	/* xmm6 = -- -- FF -- -- -- -- FF */	\
+	__asm	pshufhw	xmm7, xmm7,	0D2h		/* xmm7 = 77 75 74 76 73 72 70 __ */	\
+	\
+	__asm	movdqa	xmm4, [ecx]			/* xmm4 = -- -- -- -- -- FF FF -- */	\
+	__asm	movdqa	xmm0, [ecx+48]		/* xmm0 = -- -- -- -- FF -- -- -- */	\
+	\
+	__asm	pand	xmm6, xmm5			/* xmm6 = -- -- 55 -- -- -- -- 50 */	\
+	__asm	pand	xmm4, xmm7			/* xmm4 = -- -- -- -- -- 72 70 -- */	\
+	\
+	__asm	pand	xmm0, xmm1			/* xmm0 = -- -- -- -- 63 -- -- -- */	\
+	__asm	pxor	xmm5, xmm6			/* xmm5 = -- -- -- 54 53 52 51 -- */	\
+	\
+	__asm	pxor	xmm7, xmm4			/* xmm7 = 77 75 74 76 73 -- -- -- */	\
+	__asm	pxor	xmm1, xmm0			/* xmm1 = 67 66 65 64 -- -- -- -- */	\
+	\
+	__asm	pshuflw	xmm6, xmm6, 02Bh		/* xmm6 = -- -- 55 -- 50 -- -- -- */	\
+	__asm	pslldq	xmm4, 10				/* xmm4 = 72 20 -- -- -- -- -- -- */	\
+	\
+	__asm	pshufhw	xmm6, xmm6, 0B1h		/* xmm6 = -- -- -- 55 50 -- -- -- */	\
+	__asm	pslldq	xmm0, 4			/* xmm0 = -- -- 63 -- -- -- -- -- */	\
+	\
+	__asm	por		xmm6, xmm4			/* xmm6 = 72 70 -- 55 50 -- -- -- */	\
+	__asm	por		xmm2, xmm0			/* xmm2 = -- -- 63 -- -- 36 32 17 */	\
+	\
+	__asm	por		xmm2, xmm6			/* xmm2 = 72 70 64 55 50 36 32 17 */	\
+	__asm	pshufhw xmm1, xmm1, 0C9h		/* xmm1 = 67 64 66 65 -- -- -- -- */	\
+	\
+	__asm	movdqa	xmm6, xmm3			/* xmm6 = 35 33 34 -- -- -- -- -- */	\
+	__asm	movdqa  [eax+80], xmm2		/* write  72 70 64 55 50 36 32 17 */	\
+	\
+	__asm	psrldq	xmm6, 12			/* xmm6 = -- -- -- -- -- -- 35 33 */	\
+	__asm	pslldq	xmm3, 4				/* xmm3 = 34 -- -- -- -- -- -- -- */	\
+	\
+	__asm	pshuflw	xmm5, xmm5, 04Eh		/* xmm5 = -- -- -- 54 51 -- 53 52 */	\
+	__asm	movdqa	xmm4, xmm7			/* xmm4 = 77 75 74 76 73 -- -- -- */	\
+	\
+	__asm	movdqa	xmm2, xmm5			/* xmm2 = -- -- -- 54 51 -- 53 52 */	\
+	__asm	psrldq	xmm7, 10			/* xmm7 = -- -- -- -- -- 77 75 74 */	\
+	\
+	__asm	pslldq	xmm4, 6				/* xmm4 = 76 73 -- -- -- -- -- -- */	\
+	__asm   pslldq	xmm2, 12			/* xmm2 = 53 52 -- -- -- -- -- -- */	\
+	\
+	__asm	movdqa	xmm0, xmm1			/* xmm0 = 67 64 66 65 -- -- -- -- */	\
+	__asm	psrldq	xmm1, 12			/* xmm1 = -- -- -- -- -- -- 67 64 */	\
+	\
+	__asm	psrldq	xmm5, 6				/* xmm5 = -- -- -- -- -- -- 54 51 */	\
+	__asm	psrldq	xmm3, 14			/* xmm3 = -- -- -- -- -- -- -- 34 */	\
+	\
+	__asm	pslldq	xmm7, 10			/* xmm7 = 77 75 74 -- -- -- -- -- */	\
+	__asm	por		xmm4, xmm6			/* xmm4 = 76 73 -- -- -- -- 35 33 */	\
+	\
+	__asm	psrldq	xmm2, 10			/* xmm2 = -- -- -- -- -- 53 52 -- */	\
+	__asm	pslldq	xmm0, 4				/* xmm0 = 66 65 -- -- -- -- -- -- */	\
+	\
+	__asm	pslldq	xmm1, 8				/* xmm1 = -- -- 67 64 -- -- -- -- */	\
+	__asm	por		xmm3, xmm7			/* xmm3 = 77 75 74 -- -- -- -- 34 */	\
+	\
+	__asm	psrldq	xmm0, 6				/* xmm0 = -- -- -- 66 65 -- -- -- */	\
+	__asm	pslldq	xmm5, 4				/* xmm5 = -- -- -- -- 54 51 -- -- */	\
+	\
+	__asm	por		xmm4, xmm1			/* xmm4 = 76 73 67 64 -- -- 35 33 */	\
+	__asm	por		xmm3, xmm2			/* xmm3 = 77 75 74 -- -- 53 52 34 */	\
+	\
+	__asm	por		xmm4, xmm5			/* xmm4 = 76 73 67 64 54 51 35 33 */	\
+	__asm	por		xmm3, xmm0			/* xmm3 = 77 75 74 66 65 53 52 34 */	\
+	\
+	__asm	movdqa	[eax+96], xmm4		/* write  76 73 67 64 54 51 35 33 */	\
+	__asm	movdqa	[eax+112], xmm3		/* write  77 75 74 66 65 53 52 34 */	\
+	\
+	}/* end of Wmt_Dequant Macro */
+
+
+/**************************************************************************************
+ *
+ *		Macro:			Wmt_Dequant_Dx
+ *		
+ *		Description:	The Macro does dequantzation 
+ *
+ *		Input:			[eax], quantized input, 
+ *						[ebx], quantizaiton table,
+ *
+ *		Output:			[eax]
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+#define Wmt_Dequant_Dx __asm {		\
+	__asm	movdqa	xmm0, [eax]													\
+	__asm	movdqa	xmm1, [eax + 16]											\
+	\
+	__asm	pmullw	xmm0, [ebx]			/* xmm0 = 07 06 05 04 03 02 01 00 */	\
+	__asm	pmullw	xmm1, [ebx + 16]	/* xmm1 = 17 16 15 14 13 12 11 10 */	\
+	\
+	__asm	movdqa	xmm2, [eax + 32]											\
+	__asm	movdqa	xmm3, [eax + 48]	/* xmm3 = 37 36 35 34 33 32 31 30 */	\
+	\
+	__asm	pmullw	xmm2, [ebx + 32]	/* xmm4 = 27 26 25 24 23 22 21 20 */	\
+	__asm	pmullw	xmm3, [ebx + 48]											\
+	\
+	__asm	movdqa	[edx], xmm0			/* write  43 25 24 12 11 03 02 00 */	\
+	__asm	movdqa	[edx + 16], xmm1	/* write  44 42 26 23 13 10 04 01 */	\
+	\
+	__asm	movdqa	xmm4, [eax + 64]											\
+	__asm	movdqa	xmm5, [eax + 80]	/* xmm5 = 57 56 55 54 53 52 51 50 */	\
+	\
+	__asm	pmullw	xmm4, [ebx + 64]	/* xmm5	= 47 46 45 44 43 42 41 40 */	\
+	__asm	pmullw	xmm5, [ebx + 80]											\
+	\
+	__asm	movdqa	[edx+32], xmm2		/* write  60 45 41 27 22 14 07 05 */	\
+	__asm	movdqa	[edx+48], xmm3		/* write  61 57 46 40 30 21 15 06 */	\
+	\
+	__asm	movdqa	xmm6, [eax + 96]	/* xmm1 = 67 66 65 64 63 62 61 60 */	\
+	__asm	movdqa	xmm7, [eax + 112]	/* xmm7 = 77 76 75 74 73 72 71 70 */	\
+	\
+	__asm	pmullw	xmm6, [ebx + 96]											\
+	__asm	pmullw	xmm7, [ebx + 112]											\
+	\
+	__asm	movdqa	[edx+64], xmm4		/* write  71 62 56 47 37 31 20 16 */		\
+	__asm	movdqa  [edx+80], xmm5		/* write  72 70 64 55 50 36 32 17 */	\
+	\
+	__asm	movdqa	[edx+96], xmm6		/* write  76 73 67 64 54 51 35 33 */	\
+	__asm	movdqa	[edx+112], xmm7		/* write  77 75 74 66 65 53 52 34 */	\
+	\
+	}/* end of Wmt_Dequant Macro */
+
+
+
+
+/**************************************************************************************
+ *
+ *		Routine:		Wmt_IDct_Dx
+ *		
+ *		Description:	Perform IDCT on a 8x8 block
+ *
+ *		Input:			Pointer to input and output buffer				
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	The input coefficients are in raster order
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+void  Wmt_IDct_Dx(short *InputData, short *QuantizationTable, short *OutputData)
+{
+
+	
+	__asm 
+	{
+
+        push    ebx
+
+        mov		eax, InputData
+		mov		ebx, QuantizationTable
+		mov		edx, OutputData
+		lea		ecx, WmtIdctConst
+		
+		Wmt_Dequant_Dx
+
+#undef	I
+#undef	O
+#undef	C
+#define I(i) [edx + 16 * i ]
+#define O(i) [edx + 16 * i ]
+#define C(i) [ecx + 16 * (i-1) ]
+
+		
+		/* Transpose - absorbed by the Wmt_dequant */
+
+		Wmt_Row_IDCT
+
+		Transpose
+		
+		Wmt_Column_IDCT
+
+        pop     ebx
+	}
+
+}
+
+/**************************************************************************************
+ **************  Wmt_IDCT10_Dx   ******************************************************
+ **************************************************************************************
+ 
+
+	In IDCT10, we are dealing with only ten Non-Zero coefficients in the 8x8 block. 
+	In the case that we work in the fashion RowIDCT -> ColumnIDCT, we only have to 
+	do 1-D row idcts on the first four rows, the rest four rows remain zero anyway. 
+	After row IDCTs, since every column could have nonzero coefficients, we need do
+	eight 1-D column IDCT. However, for each column, there are at most two nonzero
+	coefficients, coefficient 0 to coefficient 3. Same for the coefficents for the 
+	two 1-d row idcts. For this reason, the process of a 1-D IDCT is simplified 
+	
+	from a full version:
+	
+	A = (C1 * I1) + (C7 * I7)		B = (C7 * I1) - (C1 * I7)
+	C = (C3 * I3) + (C5 * I5)		D = (C3 * I5) - (C5 * I3)
+	A. = C4 * (A - C)				B. = C4 * (B - D)
+    C. = A + C						D. = B + D
+   
+    E = C4 * (I0 + I4)				F = C4 * (I0 - I4)
+    G = (C2 * I2) + (C6 * I6)		H = (C6 * I2) - (C2 * I6)
+    E. = E - G
+    G. = E + G
+   
+    A.. = F + A.					B.. = B. - H
+    F.  = F - A. 					H.  = B. + H
+   
+    R0 = G. + C.	R1 = A.. + H.	R3 = E. + D.	R5 = F. + B..
+    R7 = G. - C.	R2 = A.. - H.	R4 = E. - D.	R6 = F. - B..
+
+
+	To:
+
+  	A = (C1 * I1)					B = (C7 * I1) 
+	C = (C3 * I3)					D = - (C5 * I3)
+	A. = C4 * (A - C)				B. = C4 * (B - D)
+    C. = A + C						D. = B + D
+   
+    E = C4 * I0						F = E
+    G = (C2 * I2)					H = (C6 * I2)
+    E. = E - G
+    G. = E + G
+   
+    A.. = F + A.					B.. = B. - H
+    F.  = F - A. 					H.  = B. + H
+   
+    R0 = G. + C.	R1 = A.. + H.	R3 = E. + D.	R5 = F. + B..
+    R7 = G. - C.	R2 = A.. - H.	R4 = E. - D.	R6 = F. - B..
+
+	
+******************************************************************************************/
+
+
+/**************************************************************************************
+ *
+ *		Macro:			Wmt_Column_IDCT10
+ *		
+ *		Description:	The Macro does 1-D IDct on 8 columns. 
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+/*	
+	The major difference between Willamette processor and other IA32 processors is that 
+	all of the simd integer instructions now support the 128 bit xmm registers instead 
+	of 64 bit mmx registers. By using these instructions, we can do 8 1-D coloumn idcts 
+	that takes shorts as input and outputs shorts at once
+
+*/
+
+#define Wmt_Column_IDCT10 __asm {		\
+	\
+	__asm	movdqa	xmm2, I(3)		/* xmm2 = i3 */				\
+	__asm	movdqa	xmm6, C(3)		/* xmm6 = c3 */				\
+	\
+	__asm	movdqa	xmm4, xmm2		/* xmm4 = i3 */				\
+	__asm	pmulhw  xmm4, xmm6		/* xmm4 = c3 * i3 - i3 */	\
+	\
+	__asm	movdqa  xmm1, C(5)		/* xmm1 = c5 */				\
+	__asm	movdqa	xmm5, xmm1		/* xmm5 = c5 */				\
+	\
+	__asm	pmulhw	xmm1, xmm2		/* xmm1 = c5 * i3 - i3 */	\
+	__asm	movdqa  xmm3, I(1)		/* xmm3 = i1 */				\
+	\
+	__asm	movdqa	xmm0, C(1)		/* xmm0 = c1 */				\
+	__asm	paddw   xmm4, xmm2		/* xmm4 = c3 * i3 = C */	\
+	\
+	__asm	movdqa	xmm7, C(7)		/* xmm7 = c7 */				\
+	\
+	__asm	paddw	xmm2, xmm1		/* xmm2 = c5 * i3 */		\
+	__asm	movdqa	xmm5, xmm0		/* xmm5 = c1 */				\
+	\
+	__asm	pmulhw	xmm0, xmm3		/* xmm0 = c1 * i1 - i1 */	\
+	__asm	pxor	xmm6, xmm6		/* clear xmm6 */			\
+	\
+	__asm	psubsw	xmm6, xmm2		/* xmm6 = - c5 * i3 = D */	\
+	__asm	paddw	xmm0, xmm3		/* xmm0 = c1 * i1 = A */	\
+	\
+	__asm	pmulhw	xmm3, xmm7		/* xmm3 = c7 * i1 = B */		\
+	__asm	movdqa	xmm2, I(2)		/* xmm2 = i2 */				\
+	\
+	__asm	movdqa	xmm1, xmm2		/* xmm1 = i2 */				\
+	__asm	pmulhw	xmm2, C(2)		/* xmm2 = i2 * c2 -i2 */	\
+	\
+	__asm	psubsw	xmm0, xmm4		/* xmm0 = A - C */			\
+	\
+	__asm	paddw	xmm2, xmm1		/* xmm2 = i2 * c2 */		\
+	__asm	pmulhw	xmm1, C(6)		/* xmm1 = c6 * i2 */		\
+	\
+	__asm	paddsw	xmm4, xmm4		/* xmm4 = C + C */			\
+	__asm	paddsw	xmm4, xmm0		/* xmm4 = A + C = C. */		\
+	\
+	__asm	psubsw	xmm3, xmm6		/* xmm3 = B - D */			\
+	__asm	paddsw	xmm6, xmm6		/* xmm6 = D + D */			\
+	\
+	__asm	paddsw	xmm6, xmm3		/* xmm6 = B + D = D. */		\
+	__asm	movdqa	I(1), xmm4		/* Save C. at I(1)	*/		\
+	\
+	__asm	movdqa	xmm4, C(4)		/* xmm4 = c4 */							\
+	__asm	movdqa  xmm5, xmm3		/* xmm5 = B - D */						\
+	\
+	__asm	pmulhw	xmm3, xmm4		/* xmm3 = ( c4 -1 ) * ( B - D ) */		\
+	\
+	__asm	movdqa	xmm7, xmm2		/* xmm7 = c2 * i2 + c6 * i6 = G */		\
+	__asm	movdqa	I(2), xmm6		/* Save D. at I(2) */		\
+	\
+	__asm	movdqa	xmm2, xmm0		/* xmm2 = A - C */			\
+	__asm	movdqa	xmm6, I(0)		/* xmm6 = i0 */				\
+	\
+	__asm	pmulhw	xmm0, xmm4		/* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */\
+	__asm	paddw	xmm5, xmm3		/* xmm5 = c4 * ( B - D ) = B. */		\
+	\
+	__asm	psubsw	xmm5, xmm1		/* xmm5 = B. - H = B.. */	\
+	__asm	paddw	xmm2, xmm0		/* xmm2 = c4 * ( A - C) = A. */			\
+	\
+	__asm	movdqa	xmm0, xmm6		/* xmm0 = i0 */					\
+	__asm	pmulhw	xmm6, xmm4		/* xmm6 = (c4 - 1) * i0 = E = F */	\
+	\
+	__asm	paddsw	xmm1, xmm1		/* xmm1 = H + H */			\
+	__asm	paddsw	xmm1, xmm5		/* xmm1 = B. + H = H. */	\
+	\
+	__asm	paddw	xmm6, xmm0		/* xmm6 = c4 *  i0  */			\
+	__asm	movdqa	xmm4, xmm6		/* xmm4 = c4 *  i0 = E */		\
+	\
+	__asm	psubsw	xmm6, xmm2		/* xmm6 = F - A. = F. */	\
+	__asm	paddsw	xmm2, xmm2		/* xmm2 = A. + A. */		\
+	\
+	__asm	movdqa	xmm0, I(1)		/* Load	C. from I(1) */		\
+	__asm	paddsw	xmm2, xmm6		/* xmm2 = F + A. = A.. */	\
+	\
+	__asm	psubsw  xmm2, xmm1		/* xmm2 = A.. - H. = R2 */				\
+	\
+	__asm	paddsw	xmm2, Eight		/* Adjust R2 and R1 before shifting */	\
+	__asm	paddsw  xmm1, xmm1		/* xmm1 = H. + H. */					\
+	\
+	__asm	paddsw  xmm1, xmm2		/* xmm1 = A.. + H. = R1 */	\
+	__asm	psraw	xmm2, 4			/* xmm2 = op2 */			\
+	\
+	__asm	psubsw	xmm4, xmm7		/* xmm4 = E - G = E. */		\
+	__asm	psraw	xmm1, 4			/* xmm1 = op1 */			\
+	\
+	__asm   movdqa	xmm3, I(2)		/* Load D. from I(2) */		\
+	__asm	paddsw	xmm7, xmm7		/* xmm7 = G + G */			\
+	\
+	__asm	movdqa	O(2), xmm2		/* Write out op2 */			\
+	__asm	paddsw  xmm7, xmm4		/* xmm7 = E + G = G. */		\
+	\
+	__asm	movdqa	O(1), xmm1		/* Write out op1 */			\
+	__asm	psubsw  xmm4, xmm3		/* xmm4 = E. - D. = R4 */	\
+	\
+	__asm	paddsw	xmm4, Eight		/* Adjust R4 and R3 before shifting */	\
+	__asm	paddsw  xmm3, xmm3		/* xmm3 = D. + D. */					\
+	\
+	__asm	paddsw	xmm3, xmm4		/* xmm3 = E. + D. = R3 */	\
+	__asm	psraw	xmm4, 4			/* xmm4 = op4 */			\
+	\
+	__asm	psubsw	xmm6, xmm5		/* xmm6 = F. - B..= R6 */	\
+	__asm	psraw	xmm3, 4			/* xmm3 = op3 */			\
+	\
+	__asm	paddsw	xmm6, Eight		/* Adjust R6 and R5 before shifting */	\
+	__asm	paddsw	xmm5, xmm5		/* xmm5 = B.. + B.. */					\
+	\
+	__asm	paddsw	xmm5, xmm6		/* xmm5 = F. + B.. = R5 */	\
+	__asm	psraw	xmm6, 4			/* xmm6 = op6 */			\
+	\
+	__asm	movdqa	O(4), xmm4		/* Write out op4 */			\
+	__asm	psraw	xmm5, 4			/* xmm5 = op5 */			\
+	\
+	__asm 	movdqa	O(3), xmm3		/* Write out op3 */			\
+	__asm	psubsw	xmm7, xmm0		/* xmm7 = G. - C. = R7 */	\
+	\
+	__asm	paddsw  xmm7, Eight		/* Adjust R7 and R0 before shifting */	\
+	__asm	paddsw  xmm0, xmm0		/* xmm0 = C. + C. */					\
+	\
+	__asm	paddsw  xmm0, xmm7		/* xmm0 = G. + C. */		\
+	__asm	psraw	xmm7, 4			/* xmm7 = op7 */			\
+	\
+	__asm	movdqa	O(6), xmm6		/* Write out op6 */			\
+	__asm	psraw	xmm0, 4			/* xmm0 = op0 */			\
+	\
+	__asm	movdqa	O(5), xmm5		/* Write out op5 */			\
+	__asm	movdqa	O(7), xmm7		/* Write out op7 */			\
+	\
+	__asm	movdqa	O(0), xmm0		/* Write out op0 */			\
+	\
+	} /* End of Wmt_Column_IDCT10 macro */
+
+
+/**************************************************************************************
+ *
+ *		Macro:			Wmt_Row_IDCT10
+ *		
+ *		Description:	The Macro does 1-D IDct on 8 columns. 
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+/*	
+	The major difference between Willamette processor and other IA32 processors is that 
+	all of the simd integer instructions now support the 128 bit xmm registers instead 
+	of 64 bit mmx registers. By using these instructions, we can do 8 1-D coloumn idcts 
+	that takes shorts as input and outputs shorts at once
+
+*/
+
+#define Wmt_Row_IDCT10 __asm {		\
+	\
+	__asm	movdqa	xmm2, I(3)		/* xmm2 = i3 */		\
+	__asm	movdqa	xmm6, C(3)		/* xmm6 = c3 */		\
+	\
+	__asm	movdqa	xmm4, xmm2		/* xmm4 = i3 */		\
+	__asm	pmulhw xmm4, xmm6		/* xmm4 = c3 * i3 - i3 */	\
+	\
+	__asm	movdqa  xmm1, C(5)		/* xmm1 = c5 */		\
+	__asm	movdqa	xmm5, xmm1		/* xmm5 = c5 */		\
+	\
+	__asm	pmulhw	xmm1, xmm2		/* xmm1 = c5 * i3 - i3 */	\
+	__asm	movdqa  xmm3, I(1)		/* xmm3 = i1 */		\
+	\
+	__asm	movdqa	xmm0, C(1)		/* xmm0 = c1 */		\
+	__asm	paddw   xmm4, xmm2		/* xmm4 = c3 * i3 =C */	\
+	\
+	__asm	movdqa	xmm7, C(7)		/* xmm7 = c7 */				\
+	\
+	__asm	paddw	xmm2, xmm1		/* xmm2 = c5 * i3 */	\
+	__asm	movdqa	xmm5, xmm0		/* xmm5 = c1 */		\
+	\
+	__asm	pmulhw	xmm0, xmm3		/* xmm0 = c1 * i1 - i1 */	\
+	__asm	pxor	xmm6, xmm6		/* clear xmm6 */	\
+	\
+	__asm	psubsw	xmm6, xmm2		/* xmm6 = - c5 * i3 = D */		\
+	__asm	paddw	xmm0, xmm3		/* xmm0 = c1 * i1 = A */	\
+	\
+	__asm	pmulhw	xmm3, xmm7		/* xmm3 = c7 * i1 = B */	\
+	__asm	movdqa	xmm2, I(2)		/* xmm2 = i2 */		\
+	\
+	__asm	movdqa	xmm1, xmm2		/* xmm1 = i2 */		\
+	__asm	pmulhw	xmm2, C(2)		/* xmm2 = i2 * c2 -i2 */	\
+	\
+	__asm	psubsw	xmm0, xmm4		/* xmm0 = A - C */	\
+	\
+	__asm	paddw	xmm2, xmm1		/* xmm2 = i2 * c2 = G */	\
+	__asm	pmulhw	xmm1, C(6)		/* xmm1 = c6 * i2 = H */	\
+	\
+	__asm	paddsw	xmm4, xmm4		/* xmm4 = C + C */			\
+	__asm	paddsw	xmm4, xmm0		/* xmm4 = A + C = C. */		\
+	\
+	__asm	psubsw	xmm3, xmm6		/* xmm3 = B - D */			\
+	__asm	paddsw	xmm6, xmm6		/* xmm6 = D + D */			\
+	\
+	__asm	paddsw	xmm6, xmm3		/* xmm6 = B + D = D. */		\
+	__asm	movdqa	I(1), xmm4		/* Save C. at I(1)	*/		\
+	\
+	__asm	movdqa	xmm4, C(4)		/* xmm4 = c4 */				\
+	\
+	__asm	movdqa  xmm5, xmm3		/* xmm5 = B - D */			\
+	__asm	pmulhw	xmm3, xmm4		/* xmm3 = ( c4 -1 ) * ( B - D ) */		\
+	\
+	__asm	movdqa	xmm7, xmm2		/* xmm7 = c2 * i2 = G */				\
+	__asm	movdqa	I(2), xmm6		/* Save D. at I(2) */					\
+	\
+	__asm	movdqa	xmm2, xmm0		/* xmm2 = A - C */	\
+	__asm	movdqa	xmm6, I(0)		/* xmm6 = i0 */		\
+	\
+	__asm	pmulhw	xmm0, xmm4		/* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */	\
+	__asm	paddw	xmm5, xmm3		/* xmm5 = c4 * ( B - D ) = B. */			\
+	\
+	__asm	psubsw	xmm5, xmm1		/* xmm5 = B. - H = B.. */			\
+	__asm	paddw	xmm2, xmm0		/* xmm2 = c4 * ( A - C) = A. */		\
+	\
+	__asm	movdqa	xmm0, xmm6		/* xmm0 = i0  */	\
+	__asm	pmulhw	xmm6, xmm4		/* xmm6 = ( c4 - 1 ) *  i0 = E = F */	\
+	\
+	__asm	paddsw	xmm1, xmm1		/* xmm1 = H + H */			\
+	__asm	paddsw	xmm1, xmm5		/* xmm1 = B. + H = H. */	\
+	\
+	__asm	paddw	xmm6, xmm0		/* xmm6 = c4 * i0  */	\
+	__asm	movdqa	xmm4, xmm6		/* xmm4 = c4 * i0  */	\
+	\
+	__asm	psubsw	xmm6, xmm2		/* xmm6 = F - A. = F. */	\
+	__asm	paddsw	xmm2, xmm2		/* xmm2 = A. + A. */	\
+	\
+	__asm	movdqa	xmm0, I(1)		/* Load	C. from I(1) */		\
+	__asm	paddsw	xmm2, xmm6		/* xmm2 = F + A. = A.. */	\
+	\
+	__asm	psubsw  xmm2, xmm1		/* xmm2 = A.. - H. = R2 */	\
+	\
+	__asm	paddsw  xmm1, xmm1		/* xmm1 = H. + H. */	\
+	__asm	paddsw  xmm1, xmm2		/* xmm1 = A.. + H. = R1 */	\
+	\
+	__asm	psubsw	xmm4, xmm7		/* xmm4 = E - G = E. */		\
+	\
+	__asm   movdqa	xmm3, I(2)		/* Load D. from I(2) */		\
+	__asm	paddsw	xmm7, xmm7		/* xmm7 = G + G */	\
+	\
+	__asm	movdqa	I(2), xmm2		/* Write out op2 */		\
+	__asm	paddsw  xmm7, xmm4		/* xmm7 = E + G = G. */		\
+	\
+	__asm	movdqa	I(1), xmm1		/* Write out op1 */		\
+	__asm	psubsw  xmm4, xmm3		/* xmm4 = E. - D. = R4 */	\
+	\
+	__asm	paddsw  xmm3, xmm3		/* xmm3 = D. + D. */	\
+	\
+	__asm	paddsw	xmm3, xmm4		/* xmm3 = E. + D. = R3 */	\
+	\
+	__asm	psubsw	xmm6, xmm5		/* xmm6 = F. - B..= R6 */	\
+	\
+	__asm	paddsw	xmm5, xmm5		/* xmm5 = B.. + B.. */	\
+	\
+	__asm	paddsw	xmm5, xmm6		/* xmm5 = F. + B.. = R5 */	\
+	\
+	__asm	movdqa	I(4), xmm4		/* Write out op4 */		\
+	\
+	__asm 	movdqa	I(3), xmm3		/* Write out op3 */		\
+	__asm	psubsw	xmm7, xmm0		/* xmm7 = G. - C. = R7 */	\
+	\
+	__asm	paddsw  xmm0, xmm0		/* xmm0 = C. + C. */	\
+	\
+	__asm	paddsw  xmm0, xmm7		/* xmm0 = G. + C. */	\
+	\
+	__asm	movdqa	I(6), xmm6		/* Write out op6 */		\
+	\
+	__asm	movdqa	I(5), xmm5		/* Write out op5 */		\
+	__asm	movdqa	I(7), xmm7		/* Write out op7 */		\
+	\
+	__asm	movdqa	I(0), xmm0		/* Write out op0 */		\
+	\
+	} /* End of Wmt_Row_IDCT10 macro */
+
+/**************************************************************************************
+ *
+ *		Macro:			Transpose
+ *		
+ *		Description:	The Macro does 8x8 transpose
+ *
+ *		Input:			None
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+
+#define Transpose10 __asm {	\
+	\
+	__asm	movdqa		xmm4, I(4)		/* xmm4=e7e6e5e4e3e2e1e0 */	\
+	__asm	movdqa		xmm0, I(5)		/* xmm4=f7f6f5f4f3f2f1f0 */	\
+	\
+	__asm	movdqa		xmm5, xmm4		/* make a copy */			\
+	__asm	punpcklwd	xmm4, xmm0		/* xmm4=f3e3f2e2f1e1f0e0 */	\
+	\
+	__asm	punpckhwd	xmm5, xmm0		/* xmm5=f7e7f6e6f5e5f4e4 */	\
+	__asm	movdqa		xmm6, I(6)		/* xmm6=g7g6g5g4g3g2g1g0 */ \
+	\
+	__asm	movdqa		xmm0, I(7)		/* xmm0=h7h6h5h4h3h2h1h0 */ \
+	__asm	movdqa		xmm7, xmm6		/* make a copy */			\
+	\
+	__asm	punpcklwd	xmm6, xmm0		/* xmm6=h3g3h3g2h1g1h0g0 */ \
+	__asm	punpckhwd	xmm7, xmm0		/* xmm7=h7g7h6g6h5g5h4g4 */ \
+	\
+	__asm	movdqa		xmm3, xmm4		/* make a copy */			\
+	__asm	punpckldq	xmm4, xmm6		/* xmm4=h1g1f1e1h0g0f0e0 */	\
+	\
+	__asm	punpckhdq	xmm3, xmm6		/* xmm3=h3g3g3e3h2g2f2e2 */	\
+	__asm	movdqa		I(6), xmm3		/* save h3g3g3e3h2g2f2e2 */	\
+	/* Free xmm6 */ \
+	__asm	movdqa		xmm6, xmm5		/* make a copy */			\
+	__asm	punpckldq	xmm5, xmm7		/* xmm5=h5g5f5e5h4g4f4e4 */ \
+	\
+	__asm	punpckhdq	xmm6, xmm7		/* xmm6=h7g7f7e7h6g6f6e6 */ \
+	__asm	movdqa		xmm0, I(0)		/* xmm0=a7a6a5a4a3a2a1a0 */	\
+	/* Free xmm7 */ \
+	__asm	movdqa		xmm1, I(1)		/* xmm1=b7b6b5b4b3b2b1b0 */	\
+	__asm	movdqa		xmm7, xmm0		/* make a copy */			\
+	\
+	__asm	punpcklwd	xmm0, xmm1		/* xmm0=b3a3b2a2b1a1b0a0 */	\
+	__asm	punpckhwd	xmm7, xmm1		/* xmm7=b7a7b6a6b5a5b4a4 */ \
+	/* Free xmm1 */ \
+	__asm	movdqa		xmm2, I(2)		/* xmm2=c7c6c5c4c3c2c1c0 */ \
+	__asm	movdqa		xmm3, I(3)	    /* xmm3=d7d6d5d4d3d2d1d0 */ \
+	\
+	__asm	movdqa		xmm1, xmm2		/* make a copy */			\
+	__asm	punpcklwd	xmm2, xmm3		/* xmm2=d3c3d2c2d1c1d0c0 */ \
+	\
+	__asm	punpckhwd	xmm1, xmm3		/* xmm1=d7c7d6c6d5c5d4c4 */ \
+	__asm	movdqa		xmm3, xmm0		/* make a copy	*/			\
+	\
+	__asm	punpckldq	xmm0, xmm2		/* xmm0=d1c1b1a1d0c0b0a0 */ \
+	__asm	punpckhdq	xmm3, xmm2		/* xmm3=d3c3b3a3d2c2b2a2 */ \
+	/* Free xmm2 */ \
+	__asm	movdqa		xmm2, xmm7		/* make a copy */			\
+	__asm	punpckldq	xmm2, xmm1		/* xmm2=d5c5b5a5d4c4b4a4 */	\
+	\
+	__asm	punpckhdq	xmm7, xmm1		/* xmm7=d7c7b7a7d6c6b6a6 */ \
+	__asm	movdqa		xmm1, xmm0		/* make a copy */			\
+	\
+	__asm	punpcklqdq	xmm0, xmm4		/* xmm0=h0g0f0e0d0c0b0a0 */	\
+	__asm	punpckhqdq	xmm1, xmm4		/* xmm1=h1g1g1e1d1c1b1a1 */ \
+	\
+	__asm	movdqa		I(0), xmm0		/* save I(0) */				\
+	__asm	movdqa		I(1), xmm1		/* save I(1) */				\
+	\
+	__asm	movdqa		xmm0, I(6)		/* load h3g3g3e3h2g2f2e2 */ \
+	__asm	movdqa		xmm1, xmm3		/* make a copy */			\
+	\
+	__asm	punpcklqdq	xmm1, xmm0		/* xmm1=h2g2f2e2d2c2b2a2 */ \
+	__asm	punpckhqdq	xmm3, xmm0		/* xmm3=h3g3f3e3d3c3b3a3 */	\
+	\
+	__asm	movdqa		xmm4, xmm2		/* make a copy */			\
+	__asm	punpcklqdq	xmm4, xmm5		/* xmm4=h4g4f4e4d4c4b4a4 */	\
+	\
+	__asm	punpckhqdq	xmm2, xmm5		/* xmm2=h5g5f5e5d5c5b5a5 */	\
+	__asm	movdqa		I(2), xmm1		/* save I(2) */				\
+	\
+	__asm	movdqa		I(3), xmm3		/* save I(3) */				\
+	__asm	movdqa		I(4), xmm4		/* save I(4) */				\
+	\
+	__asm	movdqa		I(5), xmm2		/* save I(5) */				\
+	__asm	movdqa		xmm5, xmm7		/* make a copy */			\
+	\
+	__asm	punpcklqdq	xmm5, xmm6		/* xmm5=h6g6f6e6d6c6b6a6 */	\
+	__asm	punpckhqdq	xmm7, xmm6		/* xmm7=h7g7f7e7d7c7b7a7 */	\
+	\
+	__asm	movdqa		I(6), xmm5		/* save I(6) */				\
+	__asm	movdqa		I(7), xmm7		/* save I(7) */				\
+	\
+	}/* End of Transpose10 Macro */
+
+
+/**************************************************************************************
+ *
+ *		Macro:			Wmt_Dequant10_Dx
+ *		
+ *		Description:	The Macro does dequantzation 
+ *
+ *		Input:			[eax], quantized input, 
+ *						[ebx], quantizaiton table,
+ *
+ *		Output:			[eax]
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	None
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+#define Wmt_Dequant10_Dx __asm {		\
+	__asm	movdqa	xmm0, [eax]													\
+	__asm	movdqa	xmm1, [eax + 16]											\
+	\
+	__asm	pmullw	xmm0, [ebx]			/* xmm0 = 07 06 05 04 03 02 01 00 */	\
+	__asm	pmullw	xmm1, [ebx + 16]	/* xmm1 = 17 16 15 14 13 12 11 10 */	\
+	\
+	__asm	movdqa	xmm2, [eax + 32]											\
+	__asm	movdqa	xmm3, [eax + 48]	/* xmm3 = 37 36 35 34 33 32 31 30 */	\
+	\
+	__asm	pmullw	xmm2, [ebx + 32]	/* xmm2 = 27 26 25 24 23 22 21 20 */	\
+	__asm	pmullw	xmm3, [ebx + 48]											\
+	\
+	__asm	movdqa	[edx], xmm0			/* write  */	\
+	__asm	movdqa	[edx + 16], xmm1	/* write  */	\
+	\
+	__asm	movdqa	[edx+32], xmm2		/* write  */	\
+	__asm	movdqa	[edx+48], xmm3		/* write  */	\
+	\
+	}/* end of Wmt_Dequant10_Dx Macro */
+
+
+
+
+/**************************************************************************************
+ *
+ *		Routine:		Wmt_IDct10_Dx
+ *		
+ *		Description:	Perform IDCT on a 8x8 block where only the first 10 coeffs are 
+ *						non-zero coefficients.
+ *
+ *		Input:			Pointer to input and output buffer				
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	The input coefficients are in raster order
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+void  Wmt_IDct10_Dx(short *InputData, short *QuantizationTable, short *OutputData)
+{
+
+	
+	__asm 
+	{
+        push    ebx
+
+		mov		eax, InputData
+		mov		ebx, QuantizationTable
+		mov		edx, OutputData
+		lea		ecx, WmtIdctConst
+		
+		Wmt_Dequant10_Dx
+
+#define I(i) [edx + 16 * i ]
+#define O(i) [edx + 16 * i ]
+#define C(i) [ecx + 16 * (i-1) ]
+
+		
+		/* Transpose - absorbed by the Wmt_dequant */
+
+		Wmt_Row_IDCT10
+
+		Transpose10
+		
+		Wmt_Column_IDCT10
+
+        pop     ebx
+	}
+
+}
+/**************************************************************************************
+ *
+ *		Routine:		Wmt_IDct1
+ *		
+ *		Description:	Perform IDCT on a 8x8 block where only the first 1 coeff
+ *
+ *		Input:			Pointer to input and output buffer				
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	We only have one coefficient
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+void Wmt_idct1 (short * input, short * qtbl, short * output) 
+{
+    __asm
+    {
+        mov         eax,    [input]
+        mov         edx,    0xf
+
+        movd        xmm2,   edx
+
+        mov         ecx,    [qtbl]
+        mov         edx,    [output]
+        
+        movq        xmm0,   QWORD ptr [eax]
+        movq        xmm1,   QWORD ptr [ecx]
+
+        pmullw      xmm0,   xmm1;
+        paddw       xmm0,   xmm2
+
+        psraw       xmm0,   5;        
+        punpcklwd   xmm0,   xmm0;
+        
+        punpckldq   xmm0,   xmm0;
+        punpcklqdq  xmm0,   xmm0;
+
+        movdqa      xmm1,   xmm0
+        
+        movdqa      [edx],  xmm0;        
+        movdqa      [edx+16], xmm1;
+
+        movdqa      [edx+32],  xmm0;        
+        movdqa      [edx+48], xmm1;
+
+        movdqa      [edx+64],  xmm0;        
+        movdqa      [edx+80], xmm1;
+        
+        movdqa      [edx+96],  xmm0;        
+        movdqa      [edx+112], xmm1;
+
+    }
+}
+/**************************************************************************************
+ **************  Wmt_IDCT3       ******************************************************
+ **************************************************************************************
+ */
+
+/**************************************************************************************
+ *
+ *		Routine:		Wmt_IDCT3
+ *		
+ *		Description:	Perform IDCT on a 8x8 block with at most 3 nonzero coefficients
+ *
+ *		Input:			Pointer to input and output buffer				
+ *
+ *		Output:			None
+ *		
+ *		Return:			None			
+ *
+ *		Special Note:	Intel Compiler, Please
+ *
+ *		Error:			None
+ *
+ ***************************************************************************************
+ */
+
+/***************************************************************************************
+	In IDCT 3, we are dealing with only three Non-Zero coefficients in the 8x8 block. 
+	In the case that we work in the fashion RowIDCT -> ColumnIDCT, we only have to 
+	do 1-D row idcts on the first two rows, the rest six rows remain zero anyway. 
+	After row IDCTs, since every column could have nonzero coefficients, we need do
+	eight 1-D column IDCT. However, for each column, there are at most two nonzero
+	coefficients, coefficient 0 and coefficient 1. Same for the coefficents for the 
+	two 1-d row idcts. For this reason, the process of a 1-D IDCT is simplified 
+	
+	from a full version:
+	
+	A = (C1 * I1) + (C7 * I7)		B = (C7 * I1) - (C1 * I7)
+	C = (C3 * I3) + (C5 * I5)		D = (C3 * I5) - (C5 * I3)
+	A. = C4 * (A - C)				B. = C4 * (B - D)
+    C. = A + C						D. = B + D
+   
+    E = C4 * (I0 + I4)				F = C4 * (I0 - I4)
+    G = (C2 * I2) + (C6 * I6)		H = (C6 * I2) - (C2 * I6)
+    E. = E - G
+    G. = E + G
+   
+    A.. = F + A.					B.. = B. - H
+    F.  = F - A. 					H.  = B. + H
+   
+    R0 = G. + C.	R1 = A.. + H.	R3 = E. + D.	R5 = F. + B..
+    R7 = G. - C.	R2 = A.. - H.	R4 = E. - D.	R6 = F. - B..
+
+	To:
+
+
+	A = (C1 * I1)					B = (C7 * I1)
+	C = 0							D = 0
+	A. = C4 * A 					B. = C4 * B 
+    C. = A							D. = B 
+   
+    E = C4 * I0 					F = E
+    G = 0							H = 0
+    E. = E 
+    G. = E 
+
+    A.. = E + A.					B.. = B. 
+    F.  = E - A. 					H.  = B. 
+   
+    R0 = E + A		R1 = E + A. + B.	R3 = E + B		R5 = E - A. + B.
+    R7 = E - A		R2 = E + A. - B.	R4 = E - B		R6 = F - A. - B.
+	
+******************************************************************************************/
diff --git a/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtrecon.c b/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtrecon.c
new file mode 100644
index 00000000..60436225
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vputil/win32/wmtrecon.c
@@ -0,0 +1,281 @@
+ /****************************************************************************
+ *
+ *   Module Title :     WmtOptFunctions.c
+ *
+ *   Description  :     willamette processor specific 
+ *                      optimised versions of functions
+ *
+ *   AUTHOR      :		Yaowu Xu
+ *
+ *	 Special Note:		
+ *
+ *****************************************************************************
+ *   Revision History
+ *
+ *
+ *   1.03 YWX 07-Dec-00 Removed constants and functions that are not in use
+ * 			Added push and pop ebx in WmtReconIntra
+ *   1.02 YWX 30 Aug 00 changed to be compatible with Microsoft compiler
+ *   1.01 YWX 13 JUL 00 New Willamette Optimized Functions
+ *   1.00 YWX 14/06/00  Configuration baseline from OptFunctions.c
+ *
+ *****************************************************************************
+ */
+ 
+/* 
+    Use Tim's optimized version.
+*/
+
+/****************************************************************************
+ *  Header Files
+ *****************************************************************************
+ */
+
+#define STRICT              // Strict type checking. 
+
+#include "reconstruct.h"
+
+/****************************************************************************
+ *  Module constants.
+ *****************************************************************************
+ */        
+
+/**************************************************************************** 
+ *  Imports.
+ *****************************************************************************
+ */   
+
+
+/****************************************************************************
+ *  Exported Global Variables
+ *****************************************************************************
+ */
+
+/****************************************************************************
+ *  Exported Functions 
+ *****************************************************************************
+ */              
+
+/****************************************************************************
+ *  Module Statics
+ *****************************************************************************
+ */  
+
+
+
+_declspec(align(16)) static  UINT8 Eight128s[8] =  {128,128,128,128,128,128,128,128};
+
+#pragma warning( disable : 4799 )  // Disable no emms instruction warning!
+                                      
+/****************************************************************************
+*  Forward References
+*****************************************************************************
+*/  
+
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     WmtReconIntra
+ *
+ *  INPUTS        :     INT16 *  idct
+ *                               Pointer to the output from the idct for this block
+ *
+ *                      UINT32   stride
+ *                               Line Length in pixels in recon and reference images
+ *                               
+ *
+ *                     
+ *
+ *  OUTPUTS       :     UINT8 *  dest
+ *                               The reconstruction buffer
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs an intra block - wmt version
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void WmtReconIntra( INT16 *TmpDataBuffer, UINT8 * dest, UINT16 * idct, UINT32 stride )
+{
+	(void)TmpDataBuffer;
+    __asm
+    {
+	
+		push		ebx
+
+        mov         eax,[idct]						; Signed 16 bit inputs
+        mov         edx,[dest]						; Unsigned 8 bit outputs
+
+        movq		xmm0,QWORD PTR [Eight128s]		; Set xmm0 to 0x000000000000008080808080808080
+		pxor		xmm3, xmm3						; set xmm3 to 0
+													;
+        mov         ebx,[stride]					; Line stride in output buffer
+        lea         ecx,[eax+128]					; Endpoint in input buffer
+
+loop_label:                                 
+
+        movdqa		xmm2,XMMWORD PTR [eax]			; Read the eight inputs
+		packsswb	xmm2,xmm3						;		
+		
+		pxor        xmm2,xmm0						; Convert result to unsigned (same as add 128)
+        lea         eax,[eax + 16]					; Step source buffer
+
+        cmp         eax,ecx							; are we done
+        movq		QWORD PTR [edx],xmm2			; store results
+
+        lea         edx,[edx+ebx]					; Step output buffer
+        jc          loop_label						; Loop back if we are not done
+
+		pop			ebx
+    }
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     WmtReconInter
+ *
+ *  INPUTS        :     UINT8 *  RefPtr
+ *                               The last frame reference
+ *
+ *                      INT16 *  ChangePtr
+ *                               Pointer to the change data
+ *
+ *                      UINT32   LineStep
+ *                               Line Length in pixels in recon and ref images
+ *
+ *  OUTPUTS       :     UINT8 *  ReconPtr
+ *                               The reconstruction
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs data from last data and change
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+void WmtReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
+{
+    (void) TmpDataBuffer;
+
+ _asm {
+		push	edi
+		
+		mov		ebx, [RefPtr]
+		mov		ecx, [ChangePtr]
+
+		mov		eax, [ReconPtr]
+		mov		edx, [LineStep]
+
+		pxor	xmm0, xmm0
+		lea		edi, [ecx + 128]
+  L:
+		movq	xmm2, QWORD ptr [ebx]		; (+3 misaligned) 8 reference pixels
+		movdqa	xmm4, XMMWORD ptr [ecx]		; 8 changes
+		
+		punpcklbw xmm2, xmm0				; 
+
+		add	ebx, edx						; next row of reference pixels
+		paddsw	xmm2, xmm4					; add in first 4 changes
+
+		lea		ecx, [ecx + 16]				; next row of changes
+		packuswb xmm2, xmm0					; pack result to unsigned 8-bit values
+
+		cmp		ecx, edi					; are we done?
+		movq	QWORD PTR [eax], xmm2		; store result
+
+		lea		eax, [eax+edx]				; next row of output
+		jc		L							; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
+
+		pop		edi
+ }
+
+}
+/****************************************************************************
+ * 
+ *  ROUTINE       :     WmtReconInterHalfPixel2
+ *
+ *  INPUTS        :     UINT8 *  RefPtr1, RefPtr2
+ *                               The last frame reference
+ *
+ *                      INT16 *  ChangePtr
+ *                               Pointer to the change data
+ *
+ *                      UINT32   LineStep
+ *                               Line Length in pixels in recon and ref images
+ *                               
+ *
+ *  OUTPUTS       :     UINT8 *  ReconPtr
+ *                               The reconstruction
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Reconstructs data from half pixel reference data and change. 
+ *                      Half pixel data interpolated from 2 references.
+ *
+ *  SPECIAL NOTES :     
+ *
+ *
+ *  ERRORS        :     None.
+ *
+ ****************************************************************************/
+
+void WmtReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, 
+		    	              UINT8 * RefPtr1, UINT8 * RefPtr2, 
+						      INT16 * ChangePtr, UINT32 LineStep )
+{
+	(void)TmpDataBuffer;
+
+ _asm {
+	push	esi
+	push	edi
+
+	mov		ecx, [ChangePtr]
+	mov		esi, [RefPtr1]
+
+	mov		edi, [RefPtr2]
+	mov		ebx, [ReconPtr]
+	
+	mov		edx, [LineStep]
+	lea		eax, [ecx+128]
+
+	pxor	xmm0, xmm0
+
+  L:
+	
+	movq		xmm2, QWORD PTR [esi]		; (+3 misaligned) mm2 = row from ref1
+	movq		xmm4, QWORD PTR [edi]		; (+3 misaligned) mm4 = row from ref2
+
+	punpcklbw	xmm2, xmm0					;
+	punpcklbw	xmm4, xmm0					;
+
+	movdqa		xmm6, [ecx]					; mm6 = first 4 changes
+	paddw		xmm2, xmm4					; mm2 = start (ref1 + ref2)
+
+
+	psrlw		xmm2, 1						; mm2 = start (ref1 + ref2)/2
+	paddw		xmm2, xmm6					; add changes to start
+
+	lea			ecx, [ecx+16]				; next row idct
+	packuswb	xmm2, xmm0					; pack start|end to unsigned 8-bit
+	
+	add			esi, edx					; next row ref1
+	add			edi, edx					; next row ref2
+	
+	cmp			ecx, eax
+	movq		QWORD PTR [ebx], xmm2		; store result
+	 ;
+	lea			ebx, [ebx+edx]
+	jc		L				
+
+	pop		edi
+	pop		esi
+ }
+}
+
+
diff --git a/Src/libvpShared/corelibs/cdxv/vpxblit/Releasebcy00.lst b/Src/libvpShared/corelibs/cdxv/vpxblit/Releasebcy00.lst
new file mode 100644
index 00000000..248b173e
--- /dev/null
+++ b/Src/libvpShared/corelibs/cdxv/vpxblit/Releasebcy00.lst
@@ -0,0 +1,495 @@
+Microsoft (R) Macro Assembler Version 9.00.30729.01	    06/23/11 13:09:02
+c:\Winamp\libvp6\corelibs\cdxv\vpxblit\wx86\bcy00.asm	     Page 1 - 1
+
+
+				;//==========================================================================
+				;//
+				;//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+				;//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+				;//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+				;//  PURPOSE.
+				;//
+				;//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+				;//
+				;//--------------------------------------------------------------------------
+
+
+				;/***********************************************\
+				;??? bcy00.asm   
+				; yv12 to yuy2 same blitter
+				;\***********************************************/ 
+				 
+				        .586
+				        .387
+				        .MODEL  flat, SYSCALL, os_dos
+						.MMX
+				 
+ 00000000			        .CODE
+
+				NAME x86bcy00
+
+				PUBLIC bcy00_MMX_
+				PUBLIC _bcy00_MMX
+				 
+
+				INCLUDE wilk.ash
+			      C ;//==========================================================================
+			      C ;//
+			      C ;//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+			      C ;//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+			      C ;//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+			      C ;//  PURPOSE.
+			      C ;//
+			      C ;//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+			      C ;//
+			      C ;//--------------------------------------------------------------------------
+			      C 
+			      C 
+			      C ;/***********************************************\
+			      C ; 
+			      C ;\***********************************************/ 
+			      C 
+			      C ;;
+			      C ;; YUV buffer configuration structure
+			      C ;;
+			      C ;------------------------------------------------
+ 00000030		      C YUV_BUFFER_CONFIG  STRUC
+ 00000000  00000000	      C     YWidth              dd ?
+ 00000004  00000000	      C     YHeight             dd ?
+ 00000008  00000000	      C     YStride             dd ?
+			      C 
+ 0000000C  00000000	      C     UVWidth             dd ?
+ 00000010  00000000	      C     UVHeight            dd ?
+ 00000014  00000000	      C     UVStride            dd ?
+			      C 
+ 00000018  00000000	      C     YBuffer             dd ?
+ 0000001C  00000000	      C     UBuffer             dd ?
+ 00000020  00000000	      C     VBuffer             dd ?
+			      C 
+ 00000024  00000000	      C     uvStart             dd ?
+ 00000028  00000000	      C     uvDstArea           dd ?
+ 0000002C  00000000	      C     uvUsedArea          dd ?
+			      C YUV_BUFFER_CONFIG  ENDS
+			      C ;------------------------------------------------
+			      C 
+				INCLUDE wblit.ash
+			      C ;//==========================================================================
+			      C ;//
+			      C ;//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+			      C ;//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+			      C ;//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+			      C ;//  PURPOSE.
+			      C ;//
+			      C ;//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+			      C ;//
+			      C ;//--------------------------------------------------------------------------
+			      C 
+			      C 
+			      C ;/***********************************************\
+			      C ;??? wblit.ash
+			      C ; 
+			      C ;\***********************************************/ 
+			      C 
+			      C ;------------------------------------------------
+ 00000028		      C x86_Params  STRUC
+ 00000000  00000006 [	      C                     dd  6 dup (?)   ;6 pushed regs
+	    00000000
+	   ]
+ 0DD0000C  00000000	      C                     dd  ?           ;return address
+ 0DD00010  00000000	      C     dst             dd  ?
+ 0DD00014  00000000	      C     scrnPitch       dd  ?
+ 0DD00018  00000000	      C     buffConfig      dd  ?
+			      C x86_Params  ENDS
+			      C ;------------------------------------------------
+			      C 
+			      C EXTERNDEF _WK_YforY:DWORD
+			      C EXTERNDEF _WK_UforBG:DWORD
+			      C EXTERNDEF _WK_VforRG:DWORD
+			      C 
+			      C EXTERNDEF _WK_YforY_MMX:DWORD
+			      C EXTERNDEF _WK_UforBG_MMX:DWORD
+			      C EXTERNDEF _WK_VforRG_MMX:DWORD
+			      C 
+			      C EXTERNDEF _WK_ClampTableR:DWORD
+			      C EXTERNDEF _WK_ClampTableG:DWORD
+			      C EXTERNDEF _WK_ClampTableB:DWORD
+			      C 
+			      C EXTERNDEF _WK_ClampTableR555:DWORD
+			      C EXTERNDEF _WK_ClampTableG555:DWORD
+			      C EXTERNDEF _WK_ClampTableB555:DWORD
+			      C 
+			      C EXTERNDEF _WK_ClampTableR565:DWORD
+			      C EXTERNDEF _WK_ClampTableG565:DWORD
+			      C EXTERNDEF _WK_ClampTableB565:DWORD
+			      C 
+ = 00000600		      C CLAMPCENTER EQU 256*4+128*4
+			      C 
+			      C 
+			      C EXTERNDEF WK_johnsTable_MMX:DWORD
+			      C EXTERNDEF WK_johnsTable:DWORD
+			      C 
+			      C EXTERNDEF WK_RGB_MULFACTOR_555:QWORD
+			      C EXTERNDEF WK_RB_MASK_555:QWORD
+			      C EXTERNDEF WK_G_MASK_555:QWORD
+			      C 
+			      C EXTERNDEF WK_RGB_MULFACTOR_565:QWORD
+			      C EXTERNDEF WK_RB_MASK_565:QWORD
+			      C EXTERNDEF WK_G_MASK_565:QWORD
+			      C 
+			      C EXTERNDEF WK_MASK_YY_MMX:DWORD
+			      C EXTERNDEF WK_MASK_BYTE0:DWORD
+			      C 
+			      C 
+
+				;------------------------------------------------
+				; local vars
+ 00000000			L_3s            QWORD   0003000300030003h      ; 4 3's 
+	   0003000300030003
+ 00000008			L_2s            QWORD   0002000200020002h      ; 4 3's 
+	   0002000200020002
+
+ = 00000000			L_blkWidth      EQU 0
+ = 00000004			L_YStride       EQU L_blkWidth+4
+ = 00000008			L_Height		EQU L_YStride+4
+ = 0000000C			L_extraWidth	EQU L_Height+4
+ = 00000010			L_tempspaceL    EQU L_extraWidth+4
+ = 00000018			L_tempspaceH    EQU L_tempspaceL+8
+ = 00000020			LOCAL_SPACE     EQU L_tempspaceL+16
+				 
+				;------------------------------------------------
+				;void bcy00_MMX(unsigned long *dst, int scrnPitch, YUV_BUFFER_CONFIG *buffConfig); 
+				;
+ 00000010			bcy00_MMX_:
+ 00000010			_bcy00_MMX:
+ 00000010  56			    push    esi
+ 00000011  57			    push    edi
+
+ 00000012  55			    push    ebp
+ 00000013  53			    push    ebx 
+
+ 00000014  51			    push    ecx
+ 00000015  52			    push    edx
+
+ 00000016  8B 7C 24 1C		    mov         edi,[esp].dst				; edi = dst
+ 0000001A  8B 6C 24 24		    mov         ebp,[esp].buffConfig		; ebp = buffConfig
+
+ 0000001E  90			nop
+ 0000001F  83 EC 20		    sub         esp,LOCAL_SPACE				
+
+ 00000022  8B 45 08		    mov         eax,[ebp].YStride			; eax = YStride
+ 00000025  89 44 24 04		    mov         L_YStride[esp],eax			; save to local
+
+ 00000029  8B 45 04		    mov         eax,[ebp].YHeight			; eax = Height
+ 0000002C  48			    dec         eax                         ; 1 less than full height
+
+ 0000002D  89 44 24 08		    mov			L_Height[esp], eax			; save to local
+ 00000031  8B 4D 00		    mov         ecx,[ebp].YWidth			; ecx = YWidth
+
+				;	mov			eax, [esp+LOCAL_SPACE].scrnPitch
+
+ 00000034  8B C1			mov			eax, ecx					; eax = YWidth
+ 00000036  C1 E9 03		    shr         ecx,3                       ;blocks of 8 pixels
+
+ 00000039  8B 75 18		    mov         esi,[ebp].YBuffer			; esi = YBuffer
+ 0000003C  33 DB		    xor         ebx,ebx						; ebx = 0
+
+ 0000003E  89 0C 24		    mov         L_blkWidth[esp],ecx			; Save YWidth/8 to local
+ 00000041  83 E0 07		    and			eax, 7						; extraWidth
+					
+ 00000044  89 44 24 0C			mov			L_extraWidth[esp], eax		; save extraWidth;
+ 00000048  8B C3			mov         eax,ebx						; eax = 0;
+
+ 0000004A  8B 55 20		    mov         edx,[ebp].VBuffer			; edx = YBuffer
+ 0000004D  8B 6D 1C		    mov         ebp,[ebp].UBuffer			; ebp = UBuffer
+
+ 00000050  0F EF E4		    pxor        mm4,mm4			    
+				;
+				;   eax = 0
+				;	ebx = 0;
+				;   ecx = YWidth/8
+				;   edx = VBuffer
+				;   ebp = UBuffer
+				;	esi = YBuffer
+				;   edi = dst
+				;
+
+ 00000053			hloop:
+ 00000053			wloop:
+ 00000053  0F 6F 3C 13		    movq        mm7,[edx+ebx]               ; get 8 v's
+ 00000057  0F 60 FC		    punpcklbw   mm7,mm4                     ; unpack v's with 0's
+
+ 0000005A  0F D5 3D		    pmullw      mm7,L_3s					; v's * 3
+	   00000000 R
+ 00000061  0F 6F 2C 10		    movq        mm5,[edx+eax]               ; get 8 v's pointed to by eax
+
+ 00000065  0F 7F EE		    movq        mm6,mm5						; copy to mm7
+ 00000068  0F 60 F4		    punpcklbw   mm6,mm4                     ; unpack v's with 0's
+
+ 0000006B  0F FD FE		    paddw	    mm7,mm6						; mm7 = 3 * [ebx] + 1 * [eax]
+ 0000006E  0F FD 3D		    paddw       mm7,L_2s					; mm7 = 3 * [ebx] + 1 * [eax] + 2
+	   00000008 R
+
+ 00000075  0F 71 D7 02		    psrlw       mm7,2						; mm7 = ( 3 * [ebx] + 1 * [eax] + 2 ) / 4
+
+ 00000079  0F 6F 1C 2B		    movq        mm3,[ebp+ebx]               ; get 8 u's into mm1
+ 0000007D  0F 60 DC		    punpcklbw   mm3,mm4                     ; unpack u's with 0's
+
+ 00000080  0F D5 1D		    pmullw      mm3,L_3s					; u's * 3
+	   00000000 R
+ 00000087  0F 6F 2C 28		    movq        mm5,[ebp+eax]               ; get 8 u's pointed to by eax
+
+ 0000008B  0F 7F EE		    movq        mm6,mm5						; copy to mm3
+ 0000008E  0F 60 F4		    punpcklbw   mm6,mm4                     ; unpack u's with 0's
+
+ 00000091  0F FD DE		    paddw	    mm3,mm6						; mm3 = 3 * [ebx] + 1 * [eax]
+ 00000094  0F FD 1D		    paddw       mm3,L_2s					; mm3 = 3 * [ebx] + 1 * [eax] + 2
+	   00000008 R
+					
+ 0000009B  0F 71 D3 02		    psrlw       mm3,2						; mm3 = ( 3 * [ebx] + 1 * [eax] + 2 ) / 4
+
+ 0000009F  0F 71 F7 08		    psllw       mm7,8						; v3 0 v2 0 v1 0 v0 0 
+ 000000A3  0F EB DF		    por			mm3,mm7						; v3 u3 v2 u2 v1 u1 v0 u0 
+
+ 000000A6  0F 6F 04 5E		    movq        mm0,[esi+ebx*2]             ; get the y's
+ 000000AA  0F 7F C1		    movq        mm1,mm0                     ; save upper y's
+
+ 000000AD  0F 60 C3		    punpcklbw   mm0,mm3                     ; v1 y3 u1 y2 v0 y1 u0 y0
+ 000000B0  0F 68 CB		    punpckhbw   mm1,mm3                     ; v3 y7 u3 y6 v2 y5 u2 y4
+
+ 000000B3  49			    dec         ecx
+ 000000B4  0F 7F 04 9F		    movq        [edi+ebx*4],mm0             ;write first 4 pixels
+				;-
+
+ 000000B8  0F 7F 4C 9F 08	    movq        8[edi+ebx*4],mm1			;write next 4 pixels
+				;-
+
+ 000000BD  8D 40 04		    lea         eax,[eax+4]			  ;increment *1 pointer to next pixel
+ 000000C0  8D 5B 04		    lea         ebx,[ebx+4]			  ;increment *3 pointer to next pixel
+				    
+ 000000C3  7F 8E			jg          wloop
+				;------------------------------------------------------------
+				; need to handle the line end condition when YWidth%8 !=0
+				;------------------------------------------------------------
+ 000000C5  8B 4C 24 0C			mov			ecx, L_extraWidth[esp]		; extraWidth
+
+ 000000C9			ewloop:
+ 000000C9  49				dec			ecx
+ 000000CA  78 79			js			phloop
+
+ 000000CC  0F 6F 3C 13		    movq        mm7,[edx+ebx]               ; get 8 v's
+ 000000D0  0F 60 FC		    punpcklbw   mm7,mm4                     ; unpack v's with 0's
+
+ 000000D3  0F D5 3D		    pmullw      mm7,L_3s					; v's * 3
+	   00000000 R
+ 000000DA  0F 6F 2C 10		    movq        mm5,[edx+eax]               ; get 8 v's pointed to by eax
+
+ 000000DE  0F 7F EE		    movq        mm6,mm5						; copy to mm7
+ 000000E1  0F 60 F4		    punpcklbw   mm6,mm4                     ; unpack v's with 0's
+
+ 000000E4  0F FD FE		    paddw	    mm7,mm6						; mm7 = 3 * [ebx] + 1 * [eax]
+ 000000E7  0F FD 3D		    paddw       mm7,L_2s					; mm7 = 3 * [ebx] + 1 * [eax] + 2
+	   00000008 R
+
+ 000000EE  0F 71 D7 02		    psrlw       mm7,2						; mm7 = ( 3 * [ebx] + 1 * [eax] + 2 ) / 4
+
+ 000000F2  0F 6F 1C 2B		    movq        mm3,[ebp+ebx]               ; get 8 u's into mm1
+ 000000F6  0F 60 DC		    punpcklbw   mm3,mm4                     ; unpack u's with 0's
+
+ 000000F9  0F D5 1D		    pmullw      mm3,L_3s					; u's * 3
+	   00000000 R
+ 00000100  0F 6F 2C 28		    movq        mm5,[ebp+eax]               ; get 8 u's pointed to by eax
+
+ 00000104  0F 7F EE		    movq        mm6,mm5						; copy to mm3
+ 00000107  0F 60 F4		    punpcklbw   mm6,mm4                     ; unpack u's with 0's
+
+ 0000010A  0F FD DE		    paddw	    mm3,mm6						; mm3 = 3 * [ebx] + 1 * [eax]
+ 0000010D  0F FD 1D		    paddw       mm3,L_2s					; mm3 = 3 * [ebx] + 1 * [eax] + 2
+	   00000008 R
+					
+ 00000114  0F 71 D3 02		    psrlw       mm3,2						; mm3 = ( 3 * [ebx] + 1 * [eax] + 2 ) / 4
+
+ 00000118  0F 71 F7 08		    psllw       mm7,8						; v3 0 v2 0 v1 0 v0 0 
+ 0000011C  0F EB DF		    por			mm3,mm7						; v3 u3 v2 u2 v1 u1 v0 u0 
+
+ 0000011F  0F 6F 04 5E		    movq        mm0,[esi+ebx*2]             ; get the y's
+ 00000123  0F 7F C1		    movq        mm1,mm0                     ; save upper y's
+
+ 00000126  0F 60 C3		    punpcklbw   mm0,mm3                     ; v1 y3 u1 y2 v0 y1 u0 y0
+ 00000129  0F 68 CB		    punpckhbw   mm1,mm3                     ; v3 y7 u3 y6 v2 y5 u2 y4
+
+ 0000012C  0F 7F 44 24 10	    movq        L_tempspaceL[esp],mm0       ;write first 4 pixels
+				;-
+
+ 00000131  0F 7F 4C 24 18	    movq        L_tempspaceH[esp],mm1		;write next 4 pixels
+				;-
+				;------------------------------------------------------------
+				; uncomment the next two line of code will make the image
+				; output have a white last vertical line
+				;------------------------------------------------------------
+				;	mov			eax, 0ff80ff80h
+				;	mov			L_tempspaceH[esp], eax;		; read two bytes			
+				;------------------------------------------------------------
+
+ 00000136  8D 1C 9F			lea			ebx, [edi + ebx*4];get the destination pointer	
+ 00000139			cploop:
+ 00000139  66| 8B 44 4C			mov			ax,  L_tempspaceL[esp + ecx * 2];		; read two bytes			
+	   10
+ 0000013E  66| 89 04 4B			mov			[ebx+ecx*2], ax
+
+ 00000142  49				dec			ecx	
+ 00000143  7D F4			jge			cploop
+
+ 00000145			phloop:
+				;------------------------------------------------------------
+				; prepare for the next line
+				;------------------------------------------------------------
+ 00000145  8B 4C 24 08		    mov         ecx,DWORD PTR L_Height[esp] ;get current line number
+ 00000149  8B 5C 24 40		    mov         ebx,[esp+LOCAL_SPACE].scrnPitch
+
+ 0000014D  C1 E1 1F		    shl         ecx,31                      ; save low bit
+ 00000150  03 FB		    add         edi,ebx
+
+ 00000152  C1 F9 1F		    sar         ecx,31                      ; even lines ecx = 00000000 odd lines it equals FFFFFFFF
+
+ 00000155  8B 5C 24 04		    mov         ebx,L_YStride[esp]		  
+ 00000159  D1 FB		    sar         ebx,1						; ebx is uv stride
+				 
+ 0000015B  8B C3		    mov         eax,ebx                     ; eax is uv stride 
+ 0000015D  23 C1		    and         eax,ecx                     ; odd lines eax equals uvpitch even lines eax = 0 
+
+ 0000015F  F7 D1		    not         ecx                         ; even lines ecx = ffffffff odd lines it equals 00000000
+ 00000161  23 D9		    and         ebx,ecx						; ebx = uv pitch on even lines and 0 on odd lines
+
+ 00000163  2B EB		    sub	    ebp,ebx							; increment u pointer if we're on an odd line
+ 00000165  2B D3		    sub         edx,ebx						; increment v pointer if we're on an odd line
+
+ 00000167  F7 D8		    neg	    eax								; eax = -uvpitch on odd lines and 0 on even lines
+ 00000169  03 C3		    add         eax,ebx						; eax = -uvpitch on odd lines and +uv pitch on even lines
+
+ 0000016B  33 DB		    xor         ebx,ebx						; ebx is used as column pointer so set it to 0  
+ 0000016D  8B 0C 24		    mov         ecx,L_blkWidth[esp]
+
+ 00000170  2B 74 24 04		    sub         esi,DWORD PTR L_YStride[esp]
+
+ 00000174  FF 4C 24 08		    dec         DWORD PTR L_Height[esp]
+ 00000178  0F 8F FFFFFED5	    jg          hloop
+
+ 0000017E  8B C3		    mov	    eax,ebx				  ; last line ebx and eax should point to the same line
+ 00000180  0F 84 FFFFFECD	    jz          hloop 
+				;------------------------------------------------
+
+ 00000186			theExit:
+ 00000186  83 C4 20		    add         esp,LOCAL_SPACE
+ 00000189  90			nop
+
+ 0000018A  0F 77		    emms
+
+ 0000018C  5A			    pop     edx
+ 0000018D  59			    pop     ecx
+ 0000018E  5B			    pop     ebx
+ 0000018F  5D			    pop     ebp
+ 00000190  5F			    pop     edi
+ 00000191  5E			    pop     esi
+
+ 00000192  C3			    ret
+
+				;************************************************
+				        END
+Microsoft (R) Macro Assembler Version 9.00.30729.01	    06/23/11 13:09:02
+c:\Winamp\libvp6\corelibs\cdxv\vpxblit\wx86\bcy00.asm	     Symbols 2 - 1
+
+
+
+
+Structures and Unions:
+
+                N a m e                  Size
+                                         Offset      Type
+
+YUV_BUFFER_CONFIG  . . . . . . .	 00000030
+  YWidth . . . . . . . . . . . .	 00000000	 DWord
+  YHeight  . . . . . . . . . . .	 00000004	 DWord
+  YStride  . . . . . . . . . . .	 00000008	 DWord
+  UVWidth  . . . . . . . . . . .	 0000000C	 DWord
+  UVHeight . . . . . . . . . . .	 00000010	 DWord
+  UVStride . . . . . . . . . . .	 00000014	 DWord
+  YBuffer  . . . . . . . . . . .	 00000018	 DWord
+  UBuffer  . . . . . . . . . . .	 0000001C	 DWord
+  VBuffer  . . . . . . . . . . .	 00000020	 DWord
+  uvStart  . . . . . . . . . . .	 00000024	 DWord
+  uvDstArea  . . . . . . . . . .	 00000028	 DWord
+  uvUsedArea . . . . . . . . . .	 0000002C	 DWord
+x86_Params . . . . . . . . . . .	 00000028
+  dst  . . . . . . . . . . . . .	 0000001C	 DWord
+  scrnPitch  . . . . . . . . . .	 00000020	 DWord
+  buffConfig . . . . . . . . . .	 00000024	 DWord
+
+
+Segments and Groups:
+
+                N a m e                 Size     Length   Align   Combine Class
+
+FLAT . . . . . . . . . . . . . .	GROUP
+_DATA  . . . . . . . . . . . . .	32 Bit	 00000000 Para	  Public  'DATA'	
+_TEXT  . . . . . . . . . . . . .	32 Bit	 00000193 Para	  Public  'CODE'	
+
+
+Symbols:
+
+                N a m e                 Type     Value    Attr
+
+@CodeSize  . . . . . . . . . . .	Number	 00000000h   
+@DataSize  . . . . . . . . . . .	Number	 00000000h   
+@Interface . . . . . . . . . . .	Number	 00000002h   
+@Model . . . . . . . . . . . . .	Number	 00000007h   
+@code  . . . . . . . . . . . . .	Text   	 _TEXT
+@data  . . . . . . . . . . . . .	Text   	 FLAT
+@fardata?  . . . . . . . . . . .	Text   	 FLAT
+@fardata . . . . . . . . . . . .	Text   	 FLAT
+@stack . . . . . . . . . . . . .	Text   	 FLAT
+CLAMPCENTER  . . . . . . . . . .	Number	 00000600h   
+LOCAL_SPACE  . . . . . . . . . .	Number	 00000020h   
+L_2s . . . . . . . . . . . . . .	QWord	 00000008 _TEXT	
+L_3s . . . . . . . . . . . . . .	QWord	 00000000 _TEXT	
+L_Height . . . . . . . . . . . .	Number	 00000008h   
+L_YStride  . . . . . . . . . . .	Number	 00000004h   
+L_blkWidth . . . . . . . . . . .	Number	 00000000h   
+L_extraWidth . . . . . . . . . .	Number	 0000000Ch   
+L_tempspaceH . . . . . . . . . .	Number	 00000018h   
+L_tempspaceL . . . . . . . . . .	Number	 00000010h   
+WK_G_MASK_555  . . . . . . . . .	QWord	 00000000 FLAT	External SYSCALL
+WK_G_MASK_565  . . . . . . . . .	QWord	 00000000 FLAT	External SYSCALL
+WK_MASK_BYTE0  . . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+WK_MASK_YY_MMX . . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+WK_RB_MASK_555 . . . . . . . . .	QWord	 00000000 FLAT	External SYSCALL
+WK_RB_MASK_565 . . . . . . . . .	QWord	 00000000 FLAT	External SYSCALL
+WK_RGB_MULFACTOR_555 . . . . . .	QWord	 00000000 FLAT	External SYSCALL
+WK_RGB_MULFACTOR_565 . . . . . .	QWord	 00000000 FLAT	External SYSCALL
+WK_johnsTable_MMX  . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+WK_johnsTable  . . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_ClampTableB555 . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_ClampTableB565 . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_ClampTableB  . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_ClampTableG555 . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_ClampTableG565 . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_ClampTableG  . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_ClampTableR555 . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_ClampTableR565 . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_ClampTableR  . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_UforBG_MMX . . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_UforBG . . . . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_VforRG_MMX . . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_VforRG . . . . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_YforY_MMX  . . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_WK_YforY  . . . . . . . . . . .	DWord	 00000000 FLAT	External SYSCALL
+_bcy00_MMX . . . . . . . . . . .	L Near	 00000010 _TEXT	Public SYSCALL
+bcy00_MMX_ . . . . . . . . . . .	L Near	 00000010 _TEXT	Public SYSCALL
+cploop . . . . . . . . . . . . .	L Near	 00000139 _TEXT	
+ewloop . . . . . . . . . . . . .	L Near	 000000C9 _TEXT	
+hloop  . . . . . . . . . . . . .	L Near	 00000053 _TEXT	
+phloop . . . . . . . . . . . . .	L Near	 00000145 _TEXT	
+theExit  . . . . . . . . . . . .	L Near	 00000186 _TEXT	
+wloop  . . . . . . . . . . . . .	L Near	 00000053 _TEXT	
+
+	   0 Warnings
+	   0 Errors
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.plg b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.plg
new file mode 100644
index 00000000..83ce8fbe
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.plg
@@ -0,0 +1,1786 @@
+<html>
+<body>
+<pre>
+<h1>Build Log</h1>
+<h3>
+--------------------Configuration: CPUIdLib - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB01.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\VerifyXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\VerifyXMMReg.obj .\Win32\VerifyXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB01.bat"
+Creating temporary file "C:\tmp\RSPB02.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\TrashXMMreg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\TrashXMMreg.obj .\Win32\TrashXMMreg.asm
+]
+Creating command line "C:\tmp\RSPB02.bat"
+Creating temporary file "C:\tmp\RSPB03.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\InitXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\release\InitXMMReg.obj .\Win32\InitXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB03.bat"
+Creating temporary file "C:\tmp\RSPB04.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\D9xOSSupXMM.lst /Fo .\..\..\..\ObjectCode\cpuID\release\D9xOSSupXMM.obj .\Win32\D9xOSSupXMM.asm
+]
+Creating command line "C:\tmp\RSPB04.bat"
+Creating temporary file "C:\tmp\RSPB05.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\release\cpuid.lst /Fo .\..\..\..\ObjectCode\cpuID\release\cpuid.obj .\Win32\cpuid.asm
+]
+Creating command line "C:\tmp\RSPB05.bat"
+Creating temporary file "C:\tmp\RSPB06.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\include" /I "..\..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fo"..\..\..\ObjectCode\cpuID\release/" /Fd"..\..\..\ObjectCode\cpuID\release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\cid.c"
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\Wmt_CpuID.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB06.tmp" 
+Performing Custom Build Step on .\Win32\VerifyXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\VerifyXMMReg.asm
+Performing Custom Build Step on .\Win32\TrashXMMreg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\TrashXMMreg.asm
+Performing Custom Build Step on .\Win32\InitXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\InitXMMReg.asm
+Performing Custom Build Step on .\Win32\D9xOSSupXMM.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\D9xOSSupXMM.asm
+Performing Custom Build Step on .\Win32\cpuid.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\cpuid.asm
+Creating temporary file "C:\tmp\RSPB07.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\Release\s_cpuid.lib" 
+\NEWZIP\ObjectCode\cpuID\release\cid.obj
+\NEWZIP\ObjectCode\cpuID\release\Wmt_CpuID.obj
+\NEWZIP\ObjectCode\cpuID\release\cpuid.obj
+\NEWZIP\ObjectCode\cpuID\release\D9xOSSupXMM.obj
+\NEWZIP\ObjectCode\cpuID\release\InitXMMReg.obj
+\NEWZIP\ObjectCode\cpuID\release\TrashXMMreg.obj
+\NEWZIP\ObjectCode\cpuID\release\VerifyXMMReg.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB07.tmp"
+<h3>Output Window</h3>
+Compiling...
+cid.c
+Generating Code...
+Compiling...
+Wmt_CpuID.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cpuid.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: CPUIdLib - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB08.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\VerifyXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\VerifyXMMReg.obj .\Win32\VerifyXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB08.bat"
+Creating temporary file "C:\tmp\RSPB09.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\TrashXMMreg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\TrashXMMreg.obj .\Win32\TrashXMMreg.asm
+]
+Creating command line "C:\tmp\RSPB09.bat"
+Creating temporary file "C:\tmp\RSPB0A.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\InitXMMReg.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\InitXMMReg.obj .\Win32\InitXMMReg.asm
+]
+Creating command line "C:\tmp\RSPB0A.bat"
+Creating temporary file "C:\tmp\RSPB0B.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\D9xOSSupXMM.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\D9xOSSupXMM.obj .\Win32\D9xOSSupXMM.asm
+]
+Creating command line "C:\tmp\RSPB0B.bat"
+Creating temporary file "C:\tmp\RSPB0C.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\cpuID\debug\cpuid.lst /Fo .\..\..\..\ObjectCode\cpuID\debug\cpuid.obj .\Win32\cpuid.asm
+]
+Creating command line "C:\tmp\RSPB0C.bat"
+Creating temporary file "C:\tmp\RSPB0D.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /GX /Z7 /Od /I "..\..\include" /I "..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\cpuID\debug/CPUIdLib.pch" /YX /Fo"..\..\..\ObjectCode\cpuID\debug/" /Fd"..\..\..\ObjectCode\cpuID\debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\cid.c"
+"C:\NEWZIP\VP6\corelibs\CpuID\Win32\Wmt_CpuID.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB0D.tmp" 
+Performing Custom Build Step on .\Win32\VerifyXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\VerifyXMMReg.asm
+Performing Custom Build Step on .\Win32\TrashXMMreg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\TrashXMMreg.asm
+Performing Custom Build Step on .\Win32\InitXMMReg.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\InitXMMReg.asm
+Performing Custom Build Step on .\Win32\D9xOSSupXMM.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\D9xOSSupXMM.asm
+Performing Custom Build Step on .\Win32\cpuid.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\cpuid.asm
+Creating temporary file "C:\tmp\RSPB0E.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\debug\s_cpuid.lib" 
+\NEWZIP\ObjectCode\cpuID\debug\cid.obj
+\NEWZIP\ObjectCode\cpuID\debug\Wmt_CpuID.obj
+\NEWZIP\ObjectCode\cpuID\debug\cpuid.obj
+\NEWZIP\ObjectCode\cpuID\debug\D9xOSSupXMM.obj
+\NEWZIP\ObjectCode\cpuID\debug\InitXMMReg.obj
+\NEWZIP\ObjectCode\cpuID\debug\TrashXMMreg.obj
+\NEWZIP\ObjectCode\cpuID\debug\VerifyXMMReg.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB0E.tmp"
+<h3>Output Window</h3>
+Compiling...
+cid.c
+Wmt_CpuID.cpp
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cpuid.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: colorconversions - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB0F.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.obj .\Win32\rgb32toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB0F.bat"
+Creating temporary file "C:\tmp\RSPB10.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.obj .\Win32\rgb32toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB10.bat"
+Creating temporary file "C:\tmp\RSPB11.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.obj .\Win32\rgb24toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB11.bat"
+Creating temporary file "C:\tmp\RSPB12.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.obj .\Win32\rgb24toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB12.bat"
+Creating temporary file "C:\tmp\RSPB13.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\..\include" /I "..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\ColorSpaces\Release/colorconversions.pch" /YX /Fo"..\..\..\ObjectCode\ColorSpaces\Release/" /Fd"..\..\..\ObjectCode\ColorSpaces\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\uyvytoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yuy2toyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yvyutoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\ColorConversions.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\lutbl.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12f.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB13.tmp" 
+Performing Custom Build Step on .\Win32\rgb32toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb32toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_mmx.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_mmx.asm
+Creating temporary file "C:\tmp\RSPB14.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\release\s_cconv.lib" 
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\ColorConversions.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\lutbl.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgbtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\uyvytoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvitorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvitoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuvtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yuy2toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\yvyutoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb24toyv12_xmm.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Release\rgb32toyv12_xmm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB14.tmp"
+<h3>Output Window</h3>
+Compiling...
+uyvytoyv12_mmx.c
+yuy2toyv12_mmx.c
+yvyutoyv12_mmx.c
+ColorConversions.c
+lutbl.c
+rgb24toyv12.c
+rgb24toyv12f.c
+rgb32toyv12.c
+rgb32toyv12f.c
+rgbtorgb.c
+rgbtoyuv.c
+rgbtoyuvi.c
+uyvytoyv12.c
+uyvytoyv12f.c
+yuvitorgb.c
+yuvitoyuv.c
+yuvtorgb.c
+yuvtoyuv.c
+yuvtoyuvi.c
+yuy2toyv12.c
+yuy2toyv12f.c
+yvyutoyv12.c
+yvyutoyv12f.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cconv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: colorconversions - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB15.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.obj .\Win32\rgb32toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB15.bat"
+Creating temporary file "C:\tmp\RSPB16.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.obj .\Win32\rgb32toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB16.bat"
+Creating temporary file "C:\tmp\RSPB17.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.obj .\Win32\rgb24toyv12_xmm.asm
+]
+Creating command line "C:\tmp\RSPB17.bat"
+Creating temporary file "C:\tmp\RSPB18.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.lst /Fo .\..\..\..\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.obj .\Win32\rgb24toyv12_mmx.asm
+]
+Creating command line "C:\tmp\RSPB18.bat"
+Creating temporary file "C:\tmp\RSPB19.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /GX /Z7 /Od /I "..\..\include" /I "..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\ObjectCode\ColorSpaces\Debug/colorconversions.pch" /YX /Fo"..\..\..\ObjectCode\ColorSpaces\Debug/" /Fd"..\..\..\ObjectCode\ColorSpaces\Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\uyvytoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yuy2toyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\Win32\yvyutoyv12_mmx.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\ColorConversions.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\lutbl.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb24toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgb32toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\rgbtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\uyvytoyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvitoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtorgb.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuv.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuvtoyuvi.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yuy2toyv12f.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12.c"
+"C:\NEWZIP\VP6\corelibs\ColorSpaces\yvyutoyv12f.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB19.tmp" 
+Performing Custom Build Step on .\Win32\rgb32toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb32toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb32toyv12_mmx.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_xmm.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_xmm.asm
+Performing Custom Build Step on .\Win32\rgb24toyv12_mmx.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\Win32\rgb24toyv12_mmx.asm
+Creating temporary file "C:\tmp\RSPB1A.tmp" with contents
+[
+/nologo /out:"..\..\lib\win32\debug\s_cconv.lib" 
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\ColorConversions.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\lutbl.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgbtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\uyvytoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvitorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvitoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtorgb.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtoyuv.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuvtoyuvi.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yuy2toyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\yvyutoyv12f.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb24toyv12_xmm.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12_mmx.obj
+\NEWZIP\ObjectCode\ColorSpaces\Debug\rgb32toyv12_xmm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB1A.tmp"
+<h3>Output Window</h3>
+Compiling...
+uyvytoyv12_mmx.c
+yuy2toyv12_mmx.c
+yvyutoyv12_mmx.c
+ColorConversions.c
+lutbl.c
+rgb24toyv12.c
+rgb24toyv12f.c
+rgb32toyv12.c
+rgb32toyv12f.c
+rgbtorgb.c
+rgbtoyuv.c
+rgbtoyuvi.c
+uyvytoyv12.c
+uyvytoyv12f.c
+yuvitorgb.c
+yuvitoyuv.c
+yuvtorgb.c
+yuvtoyuv.c
+yuvtoyuvi.c
+yuy2toyv12.c
+yuy2toyv12f.c
+yvyutoyv12.c
+yvyutoyv12f.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_cconv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: dxv - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB1B.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\include" /I "..\..\include" /I "..\..\..\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\ObjectCode\dxv\Release/dxv.pch" /YX /Fo"..\..\..\..\ObjectCode\dxv\Release/" /Fd"..\..\..\..\ObjectCode\dxv\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\vscreen.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\ximage.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB1B.tmp" 
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Release\s_dxv.lib"  \NEWZIP\ObjectCode\dxv\Release\vscreen.obj \NEWZIP\ObjectCode\dxv\Release\ximage.obj "
+<h3>Output Window</h3>
+Compiling...
+vscreen.c
+ximage.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_dxv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: dxv - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB1C.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\include" /I "..\..\include\win32" /I "..\..\include" /I "..\..\..\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\ObjectCode\dxv\debug/dxv.pch" /YX /Fo"..\..\..\..\ObjectCode\dxv\debug/" /Fd"..\..\..\..\ObjectCode\dxv\debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\vscreen.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\dxv\generic\ximage.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB1C.tmp" 
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Debug\s_dxv.lib"  \NEWZIP\ObjectCode\dxv\debug\vscreen.obj \NEWZIP\ObjectCode\dxv\debug\ximage.obj "
+<h3>Output Window</h3>
+Compiling...
+vscreen.c
+ximage.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_dxv.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: on2_mem - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB1D.tmp" with contents
+[
+/nologo /MT /W3 /GX /O2 /I "..\..\include" /I "..\..\..\common\include" /I "..\..\memory_manager\include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\obj\on2_mem\win32\release/on2_mem.pch" /YX /Fo"..\..\..\..\obj\on2_mem\win32\release/" /Fd"..\..\..\..\obj\on2_mem\win32\release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_alloc.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_base.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_dflt_abort.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_grow.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_largest.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_resize.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_shrink.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_true.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem_tracker.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB1D.tmp" 
+Creating temporary file "C:\tmp\RSPB1E.tmp" with contents
+[
+/nologo /out:"..\..\..\..\..\..\lib\win32\release\on2_mem.lib" 
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_alloc.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_base.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_dflt_abort.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_grow.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_largest.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_resize.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_shrink.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\hmm_true.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\on2_mem.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\release\on2_mem_tracker.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB1E.tmp"
+<h3>Output Window</h3>
+Compiling...
+hmm_alloc.c
+hmm_base.c
+hmm_dflt_abort.c
+hmm_grow.c
+hmm_largest.c
+hmm_resize.c
+hmm_shrink.c
+hmm_true.c
+on2_mem.c
+on2_mem_tracker.c
+Creating library...
+
+
+
+<h3>Results</h3>
+on2_mem.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: on2_mem - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB1F.tmp" with contents
+[
+/nologo /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\common\include" /I "..\..\memory_manager\include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\obj\on2_mem\win32\debug/on2_mem.pch" /YX /Fo"..\..\..\..\obj\on2_mem\win32\debug/" /Fd"..\..\..\..\obj\on2_mem\win32\debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_alloc.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_base.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_dflt_abort.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_grow.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_largest.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_resize.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_shrink.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\memory_manager\hmm_true.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem.c"
+"C:\NEWZIP\VP6\corelibs\on2_common\src\on2_mem\on2_mem_tracker.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB1F.tmp" 
+Creating temporary file "C:\tmp\RSPB20.tmp" with contents
+[
+/nologo /out:"..\..\..\..\..\..\lib\win32\debug\on2_mem.lib" 
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_alloc.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_base.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_dflt_abort.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_grow.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_largest.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_resize.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_shrink.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\hmm_true.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\on2_mem.obj
+\NEWZIP\VP6\corelibs\on2_common\obj\on2_mem\win32\debug\on2_mem_tracker.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB20.tmp"
+<h3>Output Window</h3>
+Compiling...
+hmm_alloc.c
+hmm_base.c
+hmm_dflt_abort.c
+hmm_grow.c
+hmm_largest.c
+hmm_resize.c
+hmm_shrink.c
+hmm_true.c
+on2_mem.c
+on2_mem_tracker.c
+Creating library...
+
+
+
+<h3>Results</h3>
+on2_mem.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: preproc - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB21.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I "..\vp60\include" /I "..\include" /I "..\..\include" /I ".\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /Fp"Release/preproc.pch" /YX /Fo"Release/" /Fd"Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\preproc\preproc.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB21.tmp" 
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Release\s_preproc.lib"  .\Release\preproc.obj "
+<h3>Output Window</h3>
+Compiling...
+preproc.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_preproc.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: preproc - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB22.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\vp60\include" /I "..\..\include" /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /Fo"Debug/" /Fd"Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\preproc\preproc.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB22.tmp" 
+Creating command line "link.exe -lib /nologo /out:"..\..\..\Lib\Win32\Debug\s_preproc.lib"  .\Debug\preproc.obj "
+<h3>Output Window</h3>
+Compiling...
+preproc.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_preproc.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6d - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB23.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Ox /Ot /Oa /Ow /Og /Oi /Ob2 /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6D_EXPORTS" /D "PREDICT_2D" /D "PBDLL" /D "VFW_PB" /D "USE_DRAWDIB" /D "POSTPROCESS" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6d\Release/" /Fd"..\..\..\..\ObjectCode\vp6d\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\boolhuff.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\debug.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\DFrameR.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\FrameIni.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\Huffman.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\pb_globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\quantize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\recon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\TokenEntropy.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\vfwpbdll_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\dsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\quantindexmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\vp60dxv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\xprintf\xprintf.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB23.tmp" 
+Creating temporary file "C:\tmp\RSPB24.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Release\s_vp60d.lib" 
+\NEWZIP\VP6\ObjectCode\vp6d\Release\boolhuff.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\debug.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\decodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\DFrameR.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\FrameIni.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\Huffman.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\pb_globals.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\quantize.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\recon.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\TokenEntropy.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\vfwpbdll_if.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\dsystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\quantindexmmx.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\vp60dxv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Release\xprintf.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB24.tmp"
+<h3>Output Window</h3>
+Compiling...
+boolhuff.c
+debug.c
+decodembs.c
+decodemode.c
+decodemv.c
+DFrameR.c
+FrameIni.c
+Huffman.c
+pb_globals.c
+quantize.c
+recon.c
+TokenEntropy.c
+vfwpbdll_if.c
+dsystemdependant.c
+quantindexmmx.c
+vp60dxv.c
+Generating Code...
+Compiling...
+xprintf.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60d.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6d - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB25.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /I "..\..\..\include" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6D_EXPORTS" /D "PREDICT_2D" /D "PBDLL" /D "VFW_PB" /D "USE_DRAWDIB" /D "POSTPROCESS" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6d\Debug/" /Fd"..\..\..\..\ObjectCode\vp6d\Debug/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\boolhuff.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\debug.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\decodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\DFrameR.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\FrameIni.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\Huffman.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\pb_globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\quantize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\recon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Generic\TokenEntropy.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\DX\Generic\vfwpbdll_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\dsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\quantindexmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\dx\Win32\vp60dxv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\xprintf\xprintf.cpp"
+]
+Creating command line "cl.exe @C:\tmp\RSPB25.tmp" 
+Creating temporary file "C:\tmp\RSPB26.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Debug\s_vp60d.lib" 
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\boolhuff.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\debug.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\decodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\DFrameR.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\FrameIni.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\Huffman.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\pb_globals.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\quantize.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\recon.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\TokenEntropy.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\vfwpbdll_if.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\dsystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\quantindexmmx.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\vp60dxv.obj
+\NEWZIP\VP6\ObjectCode\vp6d\Debug\xprintf.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB26.tmp"
+<h3>Output Window</h3>
+Compiling...
+boolhuff.c
+debug.c
+decodembs.c
+decodemode.c
+decodemv.c
+DFrameR.c
+FrameIni.c
+Huffman.c
+pb_globals.c
+quantize.c
+recon.c
+TokenEntropy.c
+vfwpbdll_if.c
+dsystemdependant.c
+quantindexmmx.c
+vp60dxv.c
+Generating Code...
+Compiling...
+xprintf.cpp
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60d.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6e - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB27.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmSAD.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmSAD.obj .\cx\Win32\XmmSAD.asm
+]
+Creating command line "C:\tmp\RSPB27.bat"
+Creating temporary file "C:\tmp\RSPB28.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmGetSAD8.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmGetSAD8.obj .\CX\Win32\XmmGetSAD8.asm
+]
+Creating command line "C:\tmp\RSPB28.bat"
+Creating temporary file "C:\tmp\RSPB29.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\XmmGetError.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\XmmGetError.obj .\cx\Win32\XmmGetError.asm
+]
+Creating command line "C:\tmp\RSPB29.bat"
+Creating temporary file "C:\tmp\RSPB2A.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\Release\MmxEncodeMath.lst /Fo .\..\..\..\..\ObjectCode\vp6e\Release\MmxEncodeMath.obj .\cx\Win32\MmxEncodeMath.asm
+]
+Creating command line "C:\tmp\RSPB2A.bat"
+Creating temporary file "C:\tmp\RSPB2B.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\Include\vp60" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "vp6E_EXPORTS" /D "PREDICT_2D" /D "VFW_COMP" /D "COMPDLL" /D "POSTPROCESS" /D "CPUISLITTLEENDIAN" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6e\Release/" /Fd"..\..\..\..\ObjectCode\vp6e\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\Comp_Globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Encode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\fullframefdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\mcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\misc_common.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PackVideo.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PickModes.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\RawBuffer.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Tokenize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Transform.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\twopass.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\COptFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\csystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\CWmtFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\WmtTransform.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB2B.tmp" 
+Performing Custom Build Step on .\cx\Win32\XmmSAD.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\XmmSAD.asm
+Performing Custom Build Step on .\CX\Win32\XmmGetSAD8.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\CX\Win32\XmmGetSAD8.asm
+Performing Custom Build Step on .\cx\Win32\XmmGetError.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\XmmGetError.asm
+Performing Custom Build Step on .\cx\Win32\MmxEncodeMath.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\MmxEncodeMath.asm
+Creating temporary file "C:\tmp\RSPB2C.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Release\s_vp60e.lib" 
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Comp_Globals.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Encode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\encodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\fullframefdct.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\mcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\misc_common.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\PackVideo.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\PickModes.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\RawBuffer.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Tokenize.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\Transform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\twopass.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\vfwcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\vfwcomp_if.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\COptFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\csystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\CWmtFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\WmtTransform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\MmxEncodeMath.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmGetError.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmGetSAD8.obj
+\NEWZIP\VP6\ObjectCode\vp6e\Release\XmmSAD.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB2C.tmp"
+<h3>Output Window</h3>
+Compiling...
+Comp_Globals.c
+Encode.c
+encodembs.c
+encodemode.c
+encodemv.c
+fullframefdct.c
+mcomp.c
+misc_common.c
+PackVideo.c
+PickModes.c
+RawBuffer.c
+Tokenize.c
+Transform.c
+twopass.c
+vfwcomp.c
+vfwcomp_if.c
+COptFunctions.c
+csystemdependant.c
+CWmtFunctions.c
+WmtTransform.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60e.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vp6e - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB2D.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmSAD.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmSAD.obj .\cx\Win32\XmmSAD.asm
+]
+Creating command line "C:\tmp\RSPB2D.bat"
+Creating temporary file "C:\tmp\RSPB2E.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmGetSAD8.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmGetSAD8.obj .\CX\Win32\XmmGetSAD8.asm
+]
+Creating command line "C:\tmp\RSPB2E.bat"
+Creating temporary file "C:\tmp\RSPB2F.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\XmmGetError.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\XmmGetError.obj .\cx\Win32\XmmGetError.asm
+]
+Creating command line "C:\tmp\RSPB2F.bat"
+Creating temporary file "C:\tmp\RSPB30.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\ObjectCode\vp6e\debug\MmxEncodeMath.lst /Fo .\..\..\..\..\ObjectCode\vp6e\debug\MmxEncodeMath.obj .\cx\Win32\MmxEncodeMath.asm
+]
+Creating command line "C:\tmp\RSPB30.bat"
+Creating temporary file "C:\tmp\RSPB31.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\..\include" /I "..\..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\Include\vp60" /I "..\..\..\..\include\vp60" /D "vp6E_EXPORTS" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PREDICT_2D" /D "VFW_COMP" /D "COMPDLL" /D "POSTPROCESS" /D "CPUISLITTLEENDIAN" /D "NORMALIZED" /D INLINE=__forceinline /D FORCEINLINE=__forceinline /Fo"..\..\..\..\ObjectCode\vp6e\debug/" /Fd"..\..\..\..\ObjectCode\vp6e\debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\Comp_Globals.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Encode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodembs.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemode.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\encodemv.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\fullframefdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\mcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\misc_common.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PackVideo.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\PickModes.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\RawBuffer.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Tokenize.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\Transform.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Generic\twopass.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\CX\Generic\vfwcomp_if.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\COptFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\csystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\CWmtFunctions.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\VP60\vp60\cx\Win32\WmtTransform.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB31.tmp" 
+Performing Custom Build Step on .\cx\Win32\XmmSAD.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\XmmSAD.asm
+Performing Custom Build Step on .\CX\Win32\XmmGetSAD8.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\CX\Win32\XmmGetSAD8.asm
+Performing Custom Build Step on .\cx\Win32\XmmGetError.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\XmmGetError.asm
+Performing Custom Build Step on .\cx\Win32\MmxEncodeMath.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\cx\Win32\MmxEncodeMath.asm
+Creating temporary file "C:\tmp\RSPB32.tmp" with contents
+[
+/nologo /out:"..\..\..\..\Lib\Win32\Debug\s_vp60e.lib" 
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Comp_Globals.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Encode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodembs.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodemode.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\encodemv.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\fullframefdct.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\mcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\misc_common.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\PackVideo.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\PickModes.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\RawBuffer.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Tokenize.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\Transform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\twopass.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\vfwcomp.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\vfwcomp_if.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\COptFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\csystemdependant.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\CWmtFunctions.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\WmtTransform.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\MmxEncodeMath.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmGetError.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmGetSAD8.obj
+\NEWZIP\VP6\ObjectCode\vp6e\debug\XmmSAD.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB32.tmp"
+<h3>Output Window</h3>
+Compiling...
+Comp_Globals.c
+Encode.c
+encodembs.c
+encodemode.c
+encodemv.c
+fullframefdct.c
+mcomp.c
+misc_common.c
+PackVideo.c
+PickModes.c
+RawBuffer.c
+Tokenize.c
+Transform.c
+twopass.c
+vfwcomp.c
+vfwcomp_if.c
+COptFunctions.c
+csystemdependant.c
+CWmtFunctions.c
+WmtTransform.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vp60e.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vppp - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB33.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /Ob2 /I ".\include" /I "..\include" /I "..\vp60\include" /I "..\..\..\include" /I "..\..\include" /D "_MBCS" /D "_LIB" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\ObjectCode\vpppd6\Release/" /Fd"..\..\..\..\ObjectCode\vpppd6\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\borders.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\clamp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\deblock.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\DeInterlace.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\dering.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\loopfilter.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\postproc.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\scale.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\simpledeblocker.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\clamp_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceMmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceWmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\doptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\loopf_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\newlooptest_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\scaleopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\simpledeblock_asm.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB33.tmp" 
+Creating temporary file "C:\tmp\RSPB34.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\release\s_vpppd.lib" 
+\NEWZIP\ObjectCode\vpppd6\Release\borders.obj
+\NEWZIP\ObjectCode\vpppd6\Release\clamp.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblock.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlace.obj
+\NEWZIP\ObjectCode\vpppd6\Release\dering.obj
+\NEWZIP\ObjectCode\vpppd6\Release\loopfilter.obj
+\NEWZIP\ObjectCode\vpppd6\Release\postproc.obj
+\NEWZIP\ObjectCode\vpppd6\Release\scale.obj
+\NEWZIP\ObjectCode\vpppd6\Release\simpledeblocker.obj
+\NEWZIP\ObjectCode\vpppd6\Release\clamp_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblockopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deblockwmtopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlaceMmx.obj
+\NEWZIP\ObjectCode\vpppd6\Release\DeInterlaceWmt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deringopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\deringwmtopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\doptsystemdependant.obj
+\NEWZIP\ObjectCode\vpppd6\Release\loopf_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\newlooptest_asm.obj
+\NEWZIP\ObjectCode\vpppd6\Release\scaleopt.obj
+\NEWZIP\ObjectCode\vpppd6\Release\simpledeblock_asm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB34.tmp"
+<h3>Output Window</h3>
+Compiling...
+borders.c
+clamp.c
+deblock.c
+DeInterlace.c
+dering.c
+loopfilter.c
+postproc.c
+scale.c
+simpledeblocker.c
+clamp_asm.c
+deblockopt.c
+deblockwmtopt.c
+DeInterlaceMmx.c
+DeInterlaceWmt.c
+deringopt.c
+deringwmtopt.c
+doptsystemdependant.c
+loopf_asm.c
+newlooptest_asm.c
+scaleopt.c
+Generating Code...
+Compiling...
+simpledeblock_asm.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpppd.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vppp - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB35.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /Zi /Od /I ".\include" /I "..\include" /I "..\vp60\include" /I "..\..\..\include" /I "..\..\include" /D "_MBCS" /D "_LIB" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\ObjectCode\vppp\Debug/vppp.pch" /YX /Fo"..\..\..\..\ObjectCode\vppp\Debug/" /Fd"..\..\..\..\ObjectCode\vppp\Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\borders.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\clamp.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\deblock.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\DeInterlace.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\dering.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\loopfilter.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\postproc.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\scale.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\generic\simpledeblocker.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\clamp_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deblockwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceMmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\DeInterlaceWmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\deringwmtopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\doptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\loopf_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\newlooptest_asm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\scaleopt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vppp\win32\simpledeblock_asm.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB35.tmp" 
+Creating temporary file "C:\tmp\RSPB36.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vpppd.lib" 
+\NEWZIP\ObjectCode\vppp\Debug\borders.obj
+\NEWZIP\ObjectCode\vppp\Debug\clamp.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblock.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlace.obj
+\NEWZIP\ObjectCode\vppp\Debug\dering.obj
+\NEWZIP\ObjectCode\vppp\Debug\loopfilter.obj
+\NEWZIP\ObjectCode\vppp\Debug\postproc.obj
+\NEWZIP\ObjectCode\vppp\Debug\scale.obj
+\NEWZIP\ObjectCode\vppp\Debug\simpledeblocker.obj
+\NEWZIP\ObjectCode\vppp\Debug\clamp_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblockopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deblockwmtopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlaceMmx.obj
+\NEWZIP\ObjectCode\vppp\Debug\DeInterlaceWmt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deringopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\deringwmtopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\doptsystemdependant.obj
+\NEWZIP\ObjectCode\vppp\Debug\loopf_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\newlooptest_asm.obj
+\NEWZIP\ObjectCode\vppp\Debug\scaleopt.obj
+\NEWZIP\ObjectCode\vppp\Debug\simpledeblock_asm.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB36.tmp"
+<h3>Output Window</h3>
+Compiling...
+borders.c
+clamp.c
+deblock.c
+DeInterlace.c
+dering.c
+loopfilter.c
+postproc.c
+scale.c
+simpledeblocker.c
+clamp_asm.c
+deblockopt.c
+deblockwmtopt.c
+DeInterlaceMmx.c
+DeInterlaceWmt.c
+deringopt.c
+deringwmtopt.c
+doptsystemdependant.c
+loopf_asm.c
+newlooptest_asm.c
+scaleopt.c
+simpledeblock_asm.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpppd.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vputil - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB37.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /O2 /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\vp60\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /Fo"..\..\..\..\ObjectCode\vputil\Release/" /Fd"..\..\..\..\ObjectCode\vputil\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\fdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\idctpart.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\reconstruct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\vputil.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxrecon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\uoptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\vputilasm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtrecon.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB37.tmp" 
+Creating temporary file "C:\tmp\RSPB38.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Release\s_vputil.lib" 
+\NEWZIP\ObjectCode\vputil\Release\fdct.obj
+\NEWZIP\ObjectCode\vputil\Release\idctpart.obj
+\NEWZIP\ObjectCode\vputil\Release\reconstruct.obj
+\NEWZIP\ObjectCode\vputil\Release\vputil.obj
+\NEWZIP\ObjectCode\vputil\Release\fdctmmx.obj
+\NEWZIP\ObjectCode\vputil\Release\fdctwmt.obj
+\NEWZIP\ObjectCode\vputil\Release\filtmmx.obj
+\NEWZIP\ObjectCode\vputil\Release\filtwmt.obj
+\NEWZIP\ObjectCode\vputil\Release\mmxidct.obj
+\NEWZIP\ObjectCode\vputil\Release\mmxrecon.obj
+\NEWZIP\ObjectCode\vputil\Release\uoptsystemdependant.obj
+\NEWZIP\ObjectCode\vputil\Release\vputilasm.obj
+\NEWZIP\ObjectCode\vputil\Release\wmtidct.obj
+\NEWZIP\ObjectCode\vputil\Release\wmtrecon.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB38.tmp"
+<h3>Output Window</h3>
+Compiling...
+fdct.c
+idctpart.c
+reconstruct.c
+vputil.c
+fdctmmx.c
+fdctwmt.c
+filtmmx.c
+filtwmt.c
+mmxidct.c
+mmxrecon.c
+uoptsystemdependant.c
+vputilasm.c
+wmtidct.c
+wmtrecon.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vputil.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vputil - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB39.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I ".\include" /I "..\include" /I "..\..\..\include" /I "..\vp60\include" /I "..\..\include" /I "..\..\..\..\include" /I "..\..\..\..\include\vp60" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /Fp"..\..\..\..\..\ObjectCode\vputil\Debug/vputil.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vputil\Debug/" /Fd"..\..\..\..\..\ObjectCode\vputil\Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\fdct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\idctpart.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\reconstruct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\generic\vputil.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\fdctwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtmmx.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\filtwmt.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\mmxrecon.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\uoptsystemdependant.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\vputilasm.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtidct.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vputil\win32\wmtrecon.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB39.tmp" 
+Creating temporary file "C:\tmp\RSPB3A.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vputil.lib" 
+\ObjectCode\vputil\Debug\fdct.obj
+\ObjectCode\vputil\Debug\idctpart.obj
+\ObjectCode\vputil\Debug\reconstruct.obj
+\ObjectCode\vputil\Debug\vputil.obj
+\ObjectCode\vputil\Debug\fdctmmx.obj
+\ObjectCode\vputil\Debug\fdctwmt.obj
+\ObjectCode\vputil\Debug\filtmmx.obj
+\ObjectCode\vputil\Debug\filtwmt.obj
+\ObjectCode\vputil\Debug\mmxidct.obj
+\ObjectCode\vputil\Debug\mmxrecon.obj
+\ObjectCode\vputil\Debug\uoptsystemdependant.obj
+\ObjectCode\vputil\Debug\vputilasm.obj
+\ObjectCode\vputil\Debug\wmtidct.obj
+\ObjectCode\vputil\Debug\wmtrecon.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB3A.tmp"
+<h3>Output Window</h3>
+Compiling...
+fdct.c
+idctpart.c
+reconstruct.c
+vputil.c
+fdctmmx.c
+fdctwmt.c
+filtmmx.c
+filtwmt.c
+mmxidct.c
+mmxrecon.c
+uoptsystemdependant.c
+vputilasm.c
+wmtidct.c
+wmtrecon.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vputil.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vpxblit - Win32 Release--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB3B.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\const.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\const.obj .\wx86\const.asm
+]
+Creating command line "C:\tmp\RSPB3B.bat"
+Creating temporary file "C:\tmp\RSPB3C.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcy00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcy00.obj .\wx86\bcy00.asm
+]
+Creating command line "C:\tmp\RSPB3C.bat"
+Creating temporary file "C:\tmp\RSPB3D.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcu00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcu00.obj .\wx86\bcu00.asm
+]
+Creating command line "C:\tmp\RSPB3D.bat"
+Creating temporary file "C:\tmp\RSPB3E.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bct10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bct10.obj .\wx86\bct10.asm
+]
+Creating command line "C:\tmp\RSPB3E.bat"
+Creating temporary file "C:\tmp\RSPB3F.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bct00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bct00.obj .\wx86\bct00.asm
+]
+Creating command line "C:\tmp\RSPB3F.bat"
+Creating temporary file "C:\tmp\RSPB40.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs10.obj .\wx86\bcs10.asm
+]
+Creating command line "C:\tmp\RSPB40.bat"
+Creating temporary file "C:\tmp\RSPB41.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcs00.obj .\wx86\bcs00.asm
+]
+Creating command line "C:\tmp\RSPB41.bat"
+Creating temporary file "C:\tmp\RSPB42.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf10.obj .\wx86\bcf10.asm
+]
+Creating command line "C:\tmp\RSPB42.bat"
+Creating temporary file "C:\tmp\RSPB43.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcf00.obj .\wx86\bcf00.asm
+]
+Creating command line "C:\tmp\RSPB43.bat"
+Creating temporary file "C:\tmp\RSPB44.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcd00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcd00.obj .\wx86\bcd00.asm
+]
+Creating command line "C:\tmp\RSPB44.bat"
+Creating temporary file "C:\tmp\RSPB45.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc10.obj .\wx86\bcc10.asm
+]
+Creating command line "C:\tmp\RSPB45.bat"
+Creating temporary file "C:\tmp\RSPB46.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Release\bcc00.obj .\wx86\bcc00.asm
+]
+Creating command line "C:\tmp\RSPB46.bat"
+Creating temporary file "C:\tmp\RSPB47.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /I ".\generic" /D "_WINDOWS" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\..\ObjectCode\vpxblit\Release/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\ctables.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\wksetblt.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB47.tmp" 
+Creating temporary file "C:\tmp\RSPB48.tmp" with contents
+[
+/nologo /G6 /MT /W3 /GX /Zi /O2 /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /D "_WINDOWS" /D "NDEBUG" /D INLINE=__inline /D "WIN32" /Fo"..\..\..\..\..\ObjectCode\vpxblit\Release/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Release/" /FD /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcu00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcy00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\vpx_reg.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_targa_c.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB48.tmp" 
+Performing Custom Build Step on .\wx86\const.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\const.asm
+Performing Custom Build Step on .\wx86\bcy00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcy00.asm
+Performing Custom Build Step on .\wx86\bcu00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcu00.asm
+Performing Custom Build Step on .\wx86\bct10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bct10.asm
+Performing Custom Build Step on .\wx86\bct00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bct00.asm
+Performing Custom Build Step on .\wx86\bcs10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcs10.asm
+Performing Custom Build Step on .\wx86\bcs00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcs00.asm
+Performing Custom Build Step on .\wx86\bcf10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcf10.asm
+Performing Custom Build Step on .\wx86\bcf00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcf00.asm
+Performing Custom Build Step on .\wx86\bcd00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcd00.asm
+Performing Custom Build Step on .\wx86\bcc10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcc10.asm
+Performing Custom Build Step on .\wx86\bcc00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcc00.asm
+Creating temporary file "C:\tmp\RSPB49.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Release\s_vpxblit.lib" 
+\ObjectCode\vpxblit\Release\ctables.obj
+\ObjectCode\vpxblit\Release\wksetblt.obj
+\ObjectCode\vpxblit\Release\bcf00_c.obj
+\ObjectCode\vpxblit\Release\bcf10_c.obj
+\ObjectCode\vpxblit\Release\bcs00_c.obj
+\ObjectCode\vpxblit\Release\bcs10_c.obj
+\ObjectCode\vpxblit\Release\bct00_c.obj
+\ObjectCode\vpxblit\Release\bct10_c.obj
+\ObjectCode\vpxblit\Release\bcu00_c.obj
+\ObjectCode\vpxblit\Release\bcy00_c.obj
+\ObjectCode\vpxblit\Release\vpx_reg.obj
+\ObjectCode\vpxblit\Release\bct00_targa_c.obj
+\ObjectCode\vpxblit\Release\bcc00.obj
+\ObjectCode\vpxblit\Release\bcc10.obj
+\ObjectCode\vpxblit\Release\bcd00.obj
+\ObjectCode\vpxblit\Release\bcf00.obj
+\ObjectCode\vpxblit\Release\bcf10.obj
+\ObjectCode\vpxblit\Release\bcs00.obj
+\ObjectCode\vpxblit\Release\bcs10.obj
+\ObjectCode\vpxblit\Release\bct00.obj
+\ObjectCode\vpxblit\Release\bct10.obj
+\ObjectCode\vpxblit\Release\bcu00.obj
+\ObjectCode\vpxblit\Release\bcy00.obj
+\ObjectCode\vpxblit\Release\const.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB49.tmp"
+<h3>Output Window</h3>
+Compiling...
+ctables.c
+wksetblt.c
+Generating Code...
+Compiling...
+bcf00_c.c
+bcf10_c.c
+bcs00_c.c
+bcs10_c.c
+bct00_c.c
+bct10_c.c
+bcu00_c.c
+bcy00_c.c
+vpx_reg.c
+bct00_targa_c.c
+Generating Code...
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpxblit.lib - 0 error(s), 0 warning(s)
+<h3>
+--------------------Configuration: vpxblit - Win32 Debug--------------------
+</h3>
+<h3>Command Lines</h3>
+Creating temporary file "C:\tmp\RSPB4A.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\const.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\const.obj .\wx86\const.asm
+]
+Creating command line "C:\tmp\RSPB4A.bat"
+Creating temporary file "C:\tmp\RSPB4B.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcy00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcy00.obj .\wx86\bcy00.asm
+]
+Creating command line "C:\tmp\RSPB4B.bat"
+Creating temporary file "C:\tmp\RSPB4C.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcu00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcu00.obj .\wx86\bcu00.asm
+]
+Creating command line "C:\tmp\RSPB4C.bat"
+Creating temporary file "C:\tmp\RSPB4D.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct10.obj .\wx86\bct10.asm
+]
+Creating command line "C:\tmp\RSPB4D.bat"
+Creating temporary file "C:\tmp\RSPB4E.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bct00.obj .\wx86\bct00.asm
+]
+Creating command line "C:\tmp\RSPB4E.bat"
+Creating temporary file "C:\tmp\RSPB4F.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs10.obj .\wx86\bcs10.asm
+]
+Creating command line "C:\tmp\RSPB4F.bat"
+Creating temporary file "C:\tmp\RSPB50.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcs00.obj .\wx86\bcs00.asm
+]
+Creating command line "C:\tmp\RSPB50.bat"
+Creating temporary file "C:\tmp\RSPB51.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf10.obj .\wx86\bcf10.asm
+]
+Creating command line "C:\tmp\RSPB51.bat"
+Creating temporary file "C:\tmp\RSPB52.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcf00.obj .\wx86\bcf00.asm
+]
+Creating command line "C:\tmp\RSPB52.bat"
+Creating temporary file "C:\tmp\RSPB53.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcd00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcd00.obj .\wx86\bcd00.asm
+]
+Creating command line "C:\tmp\RSPB53.bat"
+Creating temporary file "C:\tmp\RSPB54.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc10.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc10.obj .\wx86\bcc10.asm
+]
+Creating command line "C:\tmp\RSPB54.bat"
+Creating temporary file "C:\tmp\RSPB55.bat" with contents
+[
+@echo off
+ml /Zi /Zm /Cx /c /coff /Fl.\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc00.lst /Fo .\..\..\..\..\..\ObjectCode\vpxblit\Debug\bcc00.obj .\wx86\bcc00.asm
+]
+Creating command line "C:\tmp\RSPB55.bat"
+Creating temporary file "C:\tmp\RSPB56.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /I ".\generic" /D "_WINDOWS" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\..\ObjectCode\vpxblit\Debug/vpxblit.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\ctables.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\win32\wksetblt.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB56.tmp" 
+Creating temporary file "C:\tmp\RSPB57.tmp" with contents
+[
+/nologo /G6 /MTd /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\..\include\vp31" /I "..\..\..\include" /D "_WINDOWS" /D "_DEBUG" /D INLINE=__inline /D "WIN32" /Fp"..\..\..\..\..\ObjectCode\vpxblit\Debug/vpxblit.pch" /YX /Fo"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /Fd"..\..\..\..\..\ObjectCode\vpxblit\Debug/" /FD /GZ /c 
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcf10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcs10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct10_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcu00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bcy00_c.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\vpx_reg.c"
+"C:\NEWZIP\VP6\corelibs\cdxv\vpxblit\generic\bct00_targa_c.c"
+]
+Creating command line "cl.exe @C:\tmp\RSPB57.tmp" 
+Performing Custom Build Step on .\wx86\const.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\const.asm
+Performing Custom Build Step on .\wx86\bcy00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcy00.asm
+Performing Custom Build Step on .\wx86\bcu00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcu00.asm
+Performing Custom Build Step on .\wx86\bct10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bct10.asm
+Performing Custom Build Step on .\wx86\bct00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bct00.asm
+Performing Custom Build Step on .\wx86\bcs10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcs10.asm
+Performing Custom Build Step on .\wx86\bcs00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcs00.asm
+Performing Custom Build Step on .\wx86\bcf10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcf10.asm
+Performing Custom Build Step on .\wx86\bcf00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcf00.asm
+Performing Custom Build Step on .\wx86\bcd00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcd00.asm
+Performing Custom Build Step on .\wx86\bcc10.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcc10.asm
+Performing Custom Build Step on .\wx86\bcc00.asm
+Microsoft (R) Macro Assembler Version 6.15.8803
+Copyright (C) Microsoft Corp 1981-2000.  All rights reserved.
+
+ Assembling: .\wx86\bcc00.asm
+Creating temporary file "C:\tmp\RSPB58.tmp" with contents
+[
+/nologo /out:"..\..\..\Lib\Win32\Debug\s_vpxblit.lib" 
+\ObjectCode\vpxblit\Debug\ctables.obj
+\ObjectCode\vpxblit\Debug\wksetblt.obj
+\ObjectCode\vpxblit\Debug\bcf00_c.obj
+\ObjectCode\vpxblit\Debug\bcf10_c.obj
+\ObjectCode\vpxblit\Debug\bcs00_c.obj
+\ObjectCode\vpxblit\Debug\bcs10_c.obj
+\ObjectCode\vpxblit\Debug\bct00_c.obj
+\ObjectCode\vpxblit\Debug\bct10_c.obj
+\ObjectCode\vpxblit\Debug\bcu00_c.obj
+\ObjectCode\vpxblit\Debug\bcy00_c.obj
+\ObjectCode\vpxblit\Debug\vpx_reg.obj
+\ObjectCode\vpxblit\Debug\bct00_targa_c.obj
+\ObjectCode\vpxblit\Debug\bcc00.obj
+\ObjectCode\vpxblit\Debug\bcc10.obj
+\ObjectCode\vpxblit\Debug\bcd00.obj
+\ObjectCode\vpxblit\Debug\bcf00.obj
+\ObjectCode\vpxblit\Debug\bcf10.obj
+\ObjectCode\vpxblit\Debug\bcs00.obj
+\ObjectCode\vpxblit\Debug\bcs10.obj
+\ObjectCode\vpxblit\Debug\bct00.obj
+\ObjectCode\vpxblit\Debug\bct10.obj
+\ObjectCode\vpxblit\Debug\bcu00.obj
+\ObjectCode\vpxblit\Debug\bcy00.obj
+\ObjectCode\vpxblit\Debug\const.obj
+]
+Creating command line "link.exe -lib @C:\tmp\RSPB58.tmp"
+<h3>Output Window</h3>
+Compiling...
+ctables.c
+wksetblt.c
+Compiling...
+bcf00_c.c
+bcf10_c.c
+bcs00_c.c
+bcs10_c.c
+bct00_c.c
+bct10_c.c
+bcu00_c.c
+bcy00_c.c
+vpx_reg.c
+bct00_targa_c.c
+Creating library...
+
+
+
+<h3>Results</h3>
+s_vpxblit.lib - 0 error(s), 0 warning(s)
+</pre>
+</body>
+</html>
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj
new file mode 100644
index 00000000..1eb11535
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj
@@ -0,0 +1,354 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>17.0</VCProjectVersion>
+    <ProjectGuid>{C3547FC9-A6AC-4706-BED7-D696A8EF9EED}</ProjectGuid>
+    <RootNamespace>on2_mem</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>..\..\..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\..\..\..\obj\on2_mem\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <OutDir>..\..\..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\..\..\..\obj\on2_mem\$(PlatformShortName)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>..\..\..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\..\..\..\obj\on2_mem\$(PlatformShortName)_$(Configuration)\</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <IncludePath>$(IncludePath)</IncludePath>
+    <LibraryPath>$(LibraryPath)</LibraryPath>
+    <OutDir>..\..\..\..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
+    <IntDir>..\..\..\..\..\..\obj\on2_mem\$(PlatformShortName)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnableManifest>false</VcpkgEnableManifest>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+    <VcpkgConfiguration>Debug</VcpkgConfiguration>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <VcpkgInstalledDir>
+    </VcpkgInstalledDir>
+    <VcpkgUseStatic>false</VcpkgUseStatic>
+    <VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>..\..\include;..\..\..\common\include;..\..\memory_manager\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader />
+      <PrecompiledHeaderOutputFile>$(IntDir)on2_mem.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+      <AdditionalIncludeDirectories>..\..\include;..\..\..\common\include;..\..\memory_manager\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <StringPooling>true</StringPooling>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>$(IntDir)on2_mem.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..\..\common\include;..\..\memory_manager\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader />
+      <PrecompiledHeaderOutputFile>$(IntDir)on2_mem.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..\..\common\include;..\..\memory_manager\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <PrecompiledHeaderOutputFile>$(IntDir)on2_mem.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <ObjectFileName>$(IntDir)</ObjectFileName>
+      <ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
+      <WarningLevel>Level3</WarningLevel>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CompileAs>Default</CompileAs>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <ResourceCompile>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <Culture>0x0409</Culture>
+    </ResourceCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
+      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\memory_manager\hmm_alloc.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_base.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_dflt_abort.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_grow.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_largest.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_resize.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_shrink.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_true.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="..\..\on2_mem.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+    <ClCompile Include="..\..\on2_mem_tracker.c">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\include\on2_mem.h" />
+    <ClInclude Include="..\..\include\on2_mem_tracker.h" />
+    <ClInclude Include="..\..\memory_manager\include\cavl_if.h" />
+    <ClInclude Include="..\..\memory_manager\include\cavl_impl.h" />
+    <ClInclude Include="..\..\memory_manager\include\heapmm.h" />
+    <ClInclude Include="..\..\memory_manager\include\hmm_cnfg.h" />
+    <ClInclude Include="..\..\memory_manager\include\hmm_intrnl.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj.filters b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj.filters
new file mode 100644
index 00000000..517d62e8
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/build/win32/on2_mem.vcxproj.filters
@@ -0,0 +1,74 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{9e1216e0-9418-47c8-8b65-d829ad4b69a5}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions>
+    </Filter>
+    <Filter Include="Source Files\memory_manager">
+      <UniqueIdentifier>{4c33a825-25eb-47cb-b841-87f800085fba}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{70389f5d-efd2-456e-a0e2-09499eb80192}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl</Extensions>
+    </Filter>
+    <Filter Include="Header Files\memory_manager_hdrs">
+      <UniqueIdentifier>{7deb3717-4ad2-4cdc-8773-cc326f108428}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\on2_mem.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\on2_mem_tracker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_alloc.c">
+      <Filter>Source Files\memory_manager</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_base.c">
+      <Filter>Source Files\memory_manager</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_dflt_abort.c">
+      <Filter>Source Files\memory_manager</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_grow.c">
+      <Filter>Source Files\memory_manager</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_largest.c">
+      <Filter>Source Files\memory_manager</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_resize.c">
+      <Filter>Source Files\memory_manager</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_shrink.c">
+      <Filter>Source Files\memory_manager</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\memory_manager\hmm_true.c">
+      <Filter>Source Files\memory_manager</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\include\on2_mem.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\on2_mem_tracker.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\memory_manager\include\cavl_if.h">
+      <Filter>Header Files\memory_manager_hdrs</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\memory_manager\include\cavl_impl.h">
+      <Filter>Header Files\memory_manager_hdrs</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\memory_manager\include\heapmm.h">
+      <Filter>Header Files\memory_manager_hdrs</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\memory_manager\include\hmm_cnfg.h">
+      <Filter>Header Files\memory_manager_hdrs</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\memory_manager\include\hmm_intrnl.h">
+      <Filter>Header Files\memory_manager_hdrs</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem.h
new file mode 100644
index 00000000..6a63475c
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem.h
@@ -0,0 +1,108 @@
+#ifndef __ON2_MEM_H__
+#define __ON2_MEM_H__
+
+/* on2_mem version info */
+#define on2_mem_version "2.0.1.1"
+
+#define ON2_MEM_VERSION_CHIEF 2
+#define ON2_MEM_VERSION_MAJOR 0
+#define ON2_MEM_VERSION_MINOR 1
+#define ON2_MEM_VERSION_PATCH 1
+/* end - on2_mem version info */
+
+#define ON2_TRACK_MEM_USAGE       0  //enable memory tracking/integrity checks
+#define ON2_CHECK_MEM_FUNCTIONS   0  //enable basic safety checks in _memcpy,
+                                     //_memset, and _memmove
+#define REPLACE_BUILTIN_FUNCTIONS 0  //replace builtin functions with their
+                                     //on2_ equivalents
+
+#include <stddef.h>
+#include "on2_mem_tracker.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+    on2_mem_get_version()
+    provided for runtime version checking. Returns an unsigned int of the form
+    CHIEF | MAJOR | MINOR | PATCH, where the chief version number is the high
+    order byte.
+*/
+unsigned int on2_mem_get_version();
+
+/*
+    on2_mem_set_heap_size(size_t size)
+      size - size in bytes for the memory manager to allocate for its heap
+    Sets the memory manager's initial heap size
+    Return:
+      0: on success
+      -1: if memory manager calls have not been included in the on2_mem lib
+      -2: if the memory manager has been compiled to use static memory
+      -3: if the memory manager has already allocated its heap
+*/
+int on2_mem_set_heap_size(size_t size);
+
+void* on2_memalign(size_t align, size_t size);
+void* on2_malloc(size_t size);
+void* on2_calloc(size_t num, size_t size);
+void* on2_realloc(void* memblk, size_t size);
+void on2_free(void* memblk);
+
+void* on2_memcpy(void* dest, const void* src, size_t length);
+void* on2_memset(void* dest, int val, size_t length);
+void* on2_memmove(void* dest, const void* src, size_t count);
+
+/* some defines for backward compatibility */
+#define DMEM_GENERAL 0
+
+#define duck_memalign(X,Y,Z) on2_memalign(X,Y)
+#define duck_malloc(X,Y) on2_malloc(X)
+#define duck_calloc(X,Y,Z) on2_calloc(X,Y)
+#define duck_realloc  on2_realloc
+#define duck_free     on2_free
+#define duck_memcpy   on2_memcpy
+#define duck_memmove  on2_memmove
+#define duck_memset   on2_memset
+
+#if REPLACE_BUILTIN_FUNCTIONS
+#define memalign on2_memalign
+#define malloc   on2_malloc
+#define calloc   on2_calloc
+#define realloc  on2_realloc
+#define free     on2_free
+#define memcpy   on2_memcpy
+#define memmove  on2_memmove
+#define memset   on2_memset
+#endif
+
+#if ON2_TRACK_MEM_USAGE
+# ifndef __ON2_MEM_C__
+#  define on2_memalign(align, size) xon2_memalign((align), (size), __FILE__, __LINE__)
+#  define on2_malloc(size)          xon2_malloc((size), __FILE__, __LINE__)
+#  define on2_calloc(num, size)     xon2_calloc(num, size, __FILE__, __LINE__)
+#  define on2_realloc(addr, size)   xon2_realloc(addr, size, __FILE__, __LINE__)
+#  define on2_free(addr)            xon2_free(addr, __FILE__, __LINE__)
+# endif
+
+ void* xon2_memalign(size_t align, size_t size, char* file, int line);
+ void* xon2_malloc(size_t size, char* file, int line);
+ void* xon2_calloc(size_t num, size_t size, char* file, int line);
+ void* xon2_realloc(void* memblk, size_t size, char* file, int line);
+ void xon2_free(void* memblk, char* file, int line);
+#endif
+
+#if !ON2_CHECK_MEM_FUNCTIONS
+# ifndef __ON2_MEM_C__
+#  include <string.h>
+#  define on2_memcpy  memcpy
+#  define on2_memset  memset
+#  define on2_memmove memmove
+# endif
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __ON2_MEM_H__ */
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem_tracker.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem_tracker.h
new file mode 100644
index 00000000..4c5e6774
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/include/on2_mem_tracker.h
@@ -0,0 +1,126 @@
+#ifndef __ON2_MEM_TRACKER_H__
+#define __ON2_MEM_TRACKER_H__
+
+/* on2_mem_tracker version info */
+#define on2_mem_tracker_version "2.3.1.2"
+
+#define ON2_MEM_TRACKER_VERSION_CHIEF 2
+#define ON2_MEM_TRACKER_VERSION_MAJOR 3
+#define ON2_MEM_TRACKER_VERSION_MINOR 1
+#define ON2_MEM_TRACKER_VERSION_PATCH 2
+/* END - on2_mem_tracker version info */
+
+struct MemBlock
+{
+    size_t addr;
+    unsigned int size,
+                 line;
+    char* file;
+    struct MemBlock* prev,
+                   * next;
+};
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+    on2_MemoryTrackerInit(int padding_size, int pad_value)
+      padding_size - the size of the padding before and after each mem addr.
+                     Values > 0 indicate that integrity checks can be performed
+                     by inspecting these areas.
+      pad_value - the initial value within the padding area before and after
+                  each mem addr.
+      
+    Initializes the memory tracker interface. Should be called before any
+    other calls to the memory tracker.
+*/
+int on2_MemoryTrackerInit(int padding_size, int pad_value);
+
+/*
+    on2_MemoryTrackerDestroy()
+    Deinitializes the memory tracker interface
+*/
+void on2_MemoryTrackerDestroy();
+
+/*
+    on2_MemoryTrackerAdd(size_t addr, unsigned int size,
+                         char * file, unsigned int line)
+      addr - memory address to be added to list
+      size - size of addr
+      file - the file addr was referenced from
+      line - the line in file addr was referenced from
+    Adds memory address addr, it's size, file and line it came from
+    to the memory tracker allocation table
+*/
+void on2_MemoryTrackerAdd(size_t addr, unsigned int size,
+                          char * file, unsigned int line);
+
+/*
+    on2_MemoryTrackerAdd(size_t addr, unsigned int size, char * file, unsigned int line)
+      addr - memory address to be added to be removed
+    Removes the specified address from the memory tracker's allocation
+    table
+    Return:
+      0: on success
+      -1: if memory allocation table's mutex could not be locked
+      -2: if the addr was not found in the list
+*/
+int on2_MemoryTrackerRemove(size_t addr);
+
+/*
+    on2_MemoryTrackerFind(unsigned int addr)
+      addr - address to be found in the memory tracker's
+             allocation table
+    Return:
+        If found, pointer to the memory block that matches addr
+        NULL otherwise
+*/
+struct MemBlock* on2_MemoryTrackerFind(size_t addr);
+
+/*
+    on2_MemoryTrackerDump()
+    Dumps the current contents of the memory
+    tracker allocation table
+*/
+void on2_MemoryTrackerDump();
+
+/*
+    on2_MemoryTrackerCheckIntegrity()
+    If a padding_size was provided to on2_MemoryTrackerInit()
+    This function will verify that the region before and after each
+    memory address contains the specified pad_value. Should the check
+    fail, the filename and line of the check will be printed out.
+*/
+void on2_MemoryTrackerCheckIntegrity(char* file, unsigned int line);
+
+/*
+    on2_MemoryTrackerSetLogType
+      type - value representing the logging type to use
+      option - type specific option. This will be interpreted differently
+               based on the type.
+    Sets the logging type for the memory tracker.
+    Values currently supported:
+      0: if option is NULL, log to stderr, otherwise interpret option as a
+         filename and attempt to open it.
+      -1: Use OutputDebugString (WIN32 only), option ignored
+    Return:
+      0: on success
+      -1: if the logging type could not be set, because the value was invalid
+          or because a file could not be opened
+*/
+int on2_MemoryTrackerSetLogType(int type, char* option);
+
+#if !defined(__ON2_MEM_TRACKER_C__) && !defined(__ON2_MEM_C__)
+#if ON2_TRACK_MEM_USAGE
+#define on2_MemoryTrackerCheckIntegrity() on2_MemoryTrackerCheckIntegrity(__FILE__, __LINE__)
+#else
+#define on2_MemoryTrackerCheckIntegrity()
+#endif
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif //__ON2_MEM_TRACKER_H__
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_alloc.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_alloc.c
new file mode 100644
index 00000000..502f4bd9
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_alloc.c
@@ -0,0 +1,48 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+void * U(alloc)(U(descriptor) *desc, U(size_aau) n)
+  {
+    #ifdef HMM_AUDIT_FAIL
+    if (desc->avl_tree_root)
+      AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+    #endif
+
+    if (desc->last_freed)
+      {
+	#ifdef HMM_AUDIT_FAIL
+	AUDIT_BLOCK(desc->last_freed)
+	#endif
+
+	U(into_free_collection)(desc, (head_record *) (desc->last_freed));
+
+	desc->last_freed = 0;
+      }
+
+    /* Add space for block header. */
+    n += HEAD_AAUS;
+
+    /* Convert n from number of address alignment units to block alignment
+    ** units. */
+    n = DIV_ROUND_UP(n, HMM_BLOCK_ALIGN_UNIT);
+
+    if (n < MIN_BLOCK_BAUS)
+      n = MIN_BLOCK_BAUS;
+
+    {
+      /* Search for the first node of the bin containing the smallest
+      ** block big enough to satisfy request. */
+      ptr_record *ptr_rec_ptr =
+	U(avl_search)(
+	  (U(avl_avl) *) &(desc->avl_tree_root), (U(size_bau)) n,
+	  AVL_GREATER_EQUAL);
+
+      /* If an approprate bin is found, satisfy the allocation request,
+      ** otherwise return null pointer. */
+      return(ptr_rec_ptr ?
+	       U(alloc_from_bin)(desc, ptr_rec_ptr, (U(size_bau)) n) : 0);
+    }
+  }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_base.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_base.c
new file mode 100644
index 00000000..ea2a5463
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_base.c
@@ -0,0 +1,418 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+void U(init)(U(descriptor) *desc)
+  {
+    desc->avl_tree_root = 0;
+    desc->last_freed = 0;
+  }
+
+/* Remove a free block from a bin's doubly-linked list when it is not,
+** the first block in the bin.
+*/
+void U(dll_remove)(
+  /* Pointer to pointer record in the block to be removed. */
+  ptr_record *to_remove)
+  {
+    to_remove->prev->next = to_remove->next;
+    if (to_remove->next)
+      to_remove->next->prev = to_remove->prev;
+  }
+
+/* Put a block into the free collection of a heap.
+*/
+void U(into_free_collection)(
+  /* Pointer to heap descriptor. */
+  U(descriptor) *desc,
+  /* Pointer to _head record of block. */
+  head_record *head_ptr)
+  {
+    ptr_record *ptr_rec_ptr = HEAD_TO_PTR_REC(head_ptr);
+
+    ptr_record *bin_front_ptr =
+      U(avl_insert)((U(avl_avl) *) &(desc->avl_tree_root), ptr_rec_ptr);
+
+    if (bin_front_ptr != ptr_rec_ptr)
+      {
+	/* The block was not inserted into the AVL tree because there is
+	** already a bin for the size of the block. */
+
+	MARK_SUCCESSIVE_BLOCK_IN_FREE_BIN(head_ptr)
+	ptr_rec_ptr->self = ptr_rec_ptr;
+
+	/* Make the block the new second block in the bin's doubly-linked
+	** list. */
+	ptr_rec_ptr->prev = bin_front_ptr;
+	ptr_rec_ptr->next = bin_front_ptr->next;
+	bin_front_ptr->next = ptr_rec_ptr;
+	if (ptr_rec_ptr->next)
+	  ptr_rec_ptr->next->prev = ptr_rec_ptr;
+      }
+    else
+      /* Block is first block in new bin. */
+      ptr_rec_ptr->next = 0;
+  }
+
+/* Allocate a block from a given bin.  Returns a pointer to the payload
+** of the removed block.  The "last freed" pointer must be null prior
+** to calling this function.
+*/
+void * U(alloc_from_bin)(
+  /* Pointer to heap descriptor. */
+  U(descriptor) *desc,
+  /* Pointer to pointer record of first block in bin. */
+  ptr_record *bin_front_ptr,
+  /* Number of BAUs needed in the allocated block.  If the block taken
+  ** from the bin is significantly larger than the number of BAUs needed,
+  ** the "extra" BAUs are split off to form a new free block. */
+  U(size_bau) n_baus)
+  {
+    head_record *head_ptr;
+    U(size_bau) rem_baus;
+
+    if (bin_front_ptr->next)
+      {
+	/* There are multiple blocks in this bin.  Use the 2nd block in
+	** the bin to avoid needless change to the AVL tree.
+	*/
+
+	ptr_record *ptr_rec_ptr = bin_front_ptr->next;
+	head_ptr = PTR_REC_TO_HEAD(ptr_rec_ptr);
+
+	#ifdef AUDIT_FAIL
+	AUDIT_BLOCK(head_ptr)
+	#endif
+
+	U(dll_remove)(ptr_rec_ptr);
+      }
+    else
+      {
+	/* There is only one block in the bin, so it has to be removed
+	** from the AVL tree.
+	*/
+
+	head_ptr = PTR_REC_TO_HEAD(bin_front_ptr);
+
+	U(avl_remove)(
+	  (U(avl_avl) *) &(desc->avl_tree_root), BLOCK_BAUS(head_ptr));
+      }
+
+    MARK_BLOCK_ALLOCATED(head_ptr)
+
+    rem_baus = BLOCK_BAUS(head_ptr) - n_baus;
+
+    if (rem_baus >= MIN_BLOCK_BAUS)
+      {
+	/* Since there are enough "extra" BAUs, split them off to form
+	** a new free block.
+	*/
+
+	head_record *rem_head_ptr =
+	  (head_record *) BAUS_FORWARD(head_ptr, n_baus);
+
+	/* Change the next block's header to reflect the fact that the
+	** block preceeding it is now smaller.
+	*/
+	SET_PREV_BLOCK_BAUS(
+	  BAUS_FORWARD(head_ptr, head_ptr->block_size), rem_baus)
+
+	head_ptr->block_size = n_baus;
+
+	rem_head_ptr->previous_block_size = n_baus;
+	rem_head_ptr->block_size = rem_baus;
+
+	desc->last_freed = rem_head_ptr;
+      }
+
+    return(HEAD_TO_PTR_REC(head_ptr));
+  }
+
+/* Take a block out of the free collection.
+*/
+void U(out_of_free_collection)(
+  /* Descriptor of heap that block is in. */
+  U(descriptor) *desc,
+  /* Pointer to _head of block to take out of free collection. */
+  head_record *head_ptr)
+  {
+    ptr_record *ptr_rec_ptr = HEAD_TO_PTR_REC(head_ptr);
+
+    if (ptr_rec_ptr->self == ptr_rec_ptr)
+      /* Block is not the front block in its bin, so all we have to
+      ** do is take it out of the bin's doubly-linked list. */
+      U(dll_remove)(ptr_rec_ptr);
+    else
+      {
+	ptr_record *next = ptr_rec_ptr->next;
+
+        if (next)
+	  /* Block is the front block in its bin, and there is at least
+	  ** one other block in the bin.  Substitute the next block for
+	  ** the front block. */
+	  U(avl_subst)((U(avl_avl) *) &(desc->avl_tree_root), next);
+	else
+	  /* Block is the front block in its bin, but there is no other
+	  ** block in the bin.  Eliminate the bin. */
+	  U(avl_remove)(
+	    (U(avl_avl) *) &(desc->avl_tree_root), BLOCK_BAUS(head_ptr));
+      }
+  }
+
+void U(free)(U(descriptor) *desc, void *payload_ptr)
+  {
+    /* Flags if coalesce with adjacent block. */
+    int coalesce;
+
+    head_record *fwd_head_ptr;
+    head_record *free_head_ptr = PTR_REC_TO_HEAD(payload_ptr);
+
+    desc->num_baus_can_shrink = 0;
+
+    #ifdef HMM_AUDIT_FAIL
+
+    AUDIT_BLOCK(free_head_ptr)
+
+    /* Make sure not freeing an already free block. */
+    if (!IS_BLOCK_ALLOCATED(free_head_ptr))
+      HMM_AUDIT_FAIL
+
+    if (desc->avl_tree_root)
+      /* Audit root block in AVL tree. */
+      AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+
+    #endif
+
+    fwd_head_ptr =
+      (head_record *) BAUS_FORWARD(free_head_ptr, free_head_ptr->block_size);
+
+    if (free_head_ptr->previous_block_size)
+      {
+	/* Coalesce with backward block if possible. */
+
+	head_record *bkwd_head_ptr =
+	  (head_record *) BAUS_BACKWARD(
+	    free_head_ptr, free_head_ptr->previous_block_size);
+
+	#ifdef HMM_AUDIT_FAIL
+	AUDIT_BLOCK(bkwd_head_ptr)
+	#endif
+
+	if (bkwd_head_ptr == (head_record *) (desc->last_freed))
+	  {
+	    desc->last_freed = 0;
+	    coalesce = 1;
+	  }
+	else if (IS_BLOCK_ALLOCATED(bkwd_head_ptr))
+	  coalesce = 0;
+	else
+	  {
+	    U(out_of_free_collection)(desc, bkwd_head_ptr);
+	    coalesce = 1;
+	  }
+
+	if (coalesce)
+	  {
+	    bkwd_head_ptr->block_size += free_head_ptr->block_size;
+	    SET_PREV_BLOCK_BAUS(fwd_head_ptr, BLOCK_BAUS(bkwd_head_ptr))
+	    free_head_ptr = bkwd_head_ptr;
+	  }
+      }
+
+    if (fwd_head_ptr->block_size == 0)
+      {
+	/* Block to be freed is last block before dummy end-of-chunk block. */
+	desc->end_of_shrinkable_chunk =
+	  BAUS_FORWARD(fwd_head_ptr, DUMMY_END_BLOCK_BAUS);
+	desc->num_baus_can_shrink = BLOCK_BAUS(free_head_ptr);
+	if (PREV_BLOCK_BAUS(free_head_ptr) == 0)
+	  /* Free block is the entire chunk, so shrinking can eliminate
+	  ** entire chunk including dummy end block. */
+	  desc->num_baus_can_shrink += DUMMY_END_BLOCK_BAUS;
+      }
+    else
+      {
+	/* Coalesce with forward block if possible. */
+
+	#ifdef HMM_AUDIT_FAIL
+	AUDIT_BLOCK(fwd_head_ptr)
+	#endif
+
+	if (fwd_head_ptr == (head_record *) (desc->last_freed))
+	  {
+	    desc->last_freed = 0;
+	    coalesce = 1;
+	  }
+	else if (IS_BLOCK_ALLOCATED(fwd_head_ptr))
+	  coalesce = 0;
+	else
+	  {
+	    U(out_of_free_collection)(desc, fwd_head_ptr);
+	    coalesce = 1;
+	  }
+
+	if (coalesce)
+	  {
+	    free_head_ptr->block_size += fwd_head_ptr->block_size;
+
+	    fwd_head_ptr =
+	      (head_record *) BAUS_FORWARD(
+		fwd_head_ptr, BLOCK_BAUS(fwd_head_ptr));
+
+	    SET_PREV_BLOCK_BAUS(fwd_head_ptr, BLOCK_BAUS(free_head_ptr))
+
+	    if (fwd_head_ptr->block_size == 0)
+	      {
+		/* Coalesced block to be freed is last block before dummy
+		** end-of-chunk block. */
+		desc->end_of_shrinkable_chunk =
+		  BAUS_FORWARD(fwd_head_ptr, DUMMY_END_BLOCK_BAUS);
+		desc->num_baus_can_shrink = BLOCK_BAUS(free_head_ptr);
+		if (PREV_BLOCK_BAUS(free_head_ptr) == 0)
+		  /* Free block is the entire chunk, so shrinking can
+		  ** eliminate entire chunk including dummy end block. */
+		  desc->num_baus_can_shrink += DUMMY_END_BLOCK_BAUS;
+	      }
+	  }
+      }
+
+    if (desc->last_freed)
+      {
+	/* There is a last freed block, but it is not adjacent to the
+	** block being freed by this call to free, so put the last
+	** freed block into the free collection.
+	*/
+
+	#ifdef HMM_AUDIT_FAIL
+	AUDIT_BLOCK(desc->last_freed)
+	#endif
+
+	U(into_free_collection)(desc, (head_record *) (desc->last_freed));
+      }
+    
+    desc->last_freed = free_head_ptr;
+  }
+
+void U(new_chunk)(U(descriptor) *desc, void *start, U(size_bau) n_baus)
+  {
+    #ifdef HMM_AUDIT_FAIL
+    if (desc->avl_tree_root)
+      /* Audit root block in AVL tree. */
+      AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+    #endif
+
+    #undef HEAD_PTR
+    #define HEAD_PTR ((head_record *) start)
+
+    /* Make the chunk one big free block followed by a dummy end block.
+    */
+
+    n_baus -= DUMMY_END_BLOCK_BAUS;
+
+    HEAD_PTR->previous_block_size = 0;
+    HEAD_PTR->block_size = n_baus;
+
+    U(into_free_collection)(desc, HEAD_PTR);
+
+    /* Set up the dummy end block. */
+    start = BAUS_FORWARD(start, n_baus);
+    HEAD_PTR->previous_block_size = n_baus;
+    HEAD_PTR->block_size = 0;
+
+    #undef HEAD_PTR
+  }
+
+#ifdef HMM_AUDIT_FAIL
+
+/* Function that does audit fail actions defined my preprocessor symbol,
+** and returns a dummy integer value.
+*/
+int U(audit_block_fail_dummy_return)(void)
+  {
+    HMM_AUDIT_FAIL
+
+    /* Dummy return. */
+    return(0);
+  }
+
+#endif
+
+/* AVL Tree instantiation. */
+
+#ifdef HMM_AUDIT_FAIL
+
+/* The AVL tree generic package passes an ACCESS of 1 when it "touches"
+** a child node for the first time during a particular operation.  I use
+** this feature to audit only one time (per operation) the free blocks
+** that are tree nodes.  Since the root node is not a child node, it has
+** to be audited directly.
+*/
+
+/* The pain you feel while reading these macros will not be in vain.  It
+** will remove all doubt from you mind that C++ inline functions are
+** a very good thing.
+*/
+
+#define AVL_GET_LESS(H, ACCESS) \
+  (((ACCESS) ? AUDIT_BLOCK_AS_EXPR(PTR_REC_TO_HEAD(H)) : 0), (H)->self)
+#define AVL_GET_GREATER(H, ACCESS) \
+  (((ACCESS) ? AUDIT_BLOCK_AS_EXPR(PTR_REC_TO_HEAD(H)) : 0), (H)->prev)
+
+#else
+
+#define AVL_GET_LESS(H, ACCESS) ((H)->self)
+#define AVL_GET_GREATER(H, ACCESS) ((H)->prev)
+
+#endif
+
+#define AVL_SET_LESS(H, LH) (H)->self = (LH);
+#define AVL_SET_GREATER(H, GH) (H)->prev = (GH);
+
+/*	high bit of	high bit of
+**	block_size	previous_block_size	balance factor
+**	-----------	-------------------	--------------
+**	0		0			n/a (block allocated)
+**	0		1			1
+**	1		0			-1
+**	1		1			0
+*/
+
+#define AVL_GET_BALANCE_FACTOR(H) \
+  ((((head_record *) (PTR_REC_TO_HEAD(H)))->block_size & \
+     HIGH_BIT_BAU_SIZE) ? \
+   (((head_record *) (PTR_REC_TO_HEAD(H)))->previous_block_size & \
+     HIGH_BIT_BAU_SIZE ? 0 : -1) : 1)
+
+#define AVL_SET_BALANCE_FACTOR(H, BF) \
+  {							\
+    register head_record *p =				\
+      (head_record *) PTR_REC_TO_HEAD(H);		\
+    register int bal_f = (BF); 				\
+							\
+    if (bal_f <= 0)					\
+      p->block_size |= HIGH_BIT_BAU_SIZE;		\
+    else						\
+      p->block_size &= ~HIGH_BIT_BAU_SIZE;		\
+    if (bal_f >= 0)					\
+      p->previous_block_size |= HIGH_BIT_BAU_SIZE;	\
+    else						\
+      p->previous_block_size &= ~HIGH_BIT_BAU_SIZE;	\
+  }
+
+#define COMPARE_KEY_KEY(K1, K2) ((K1) == (K2) ? 0 : ((K1) > (K2) ? 1 : -1))
+
+#define AVL_COMPARE_KEY_NODE(K, H) \
+  COMPARE_KEY_KEY(K, BLOCK_BAUS(PTR_REC_TO_HEAD(H)))
+
+#define AVL_COMPARE_NODE_NODE(H1, H2) \
+  COMPARE_KEY_KEY(BLOCK_BAUS(PTR_REC_TO_HEAD(H1)), \
+		  BLOCK_BAUS(PTR_REC_TO_HEAD(H2)))
+
+#define AVL_NULL ((ptr_record *) 0)
+
+#define AVL_IMPL_MASK \
+  ( AVL_IMPL_INSERT | AVL_IMPL_SEARCH | AVL_IMPL_REMOVE | AVL_IMPL_SUBST )
+
+#include "cavl_impl.h"
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_dflt_abort.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_dflt_abort.c
new file mode 100644
index 00000000..0bbee582
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_dflt_abort.c
@@ -0,0 +1,43 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+/* The function in this file performs default actions if self-auditing
+** finds heap corruption.  Don't rely my feeble attempt to handle the
+** case where HMM is being used to implement the malloc and free standard
+** library functions.  Rewrite the function if necessary to avoid using
+** I/O and execution termination functions that call malloc or free.
+** In Unix, for example, you would replace the fputs calls with calls
+** to the write system call using file handle number 2.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static int entered = 0;
+
+/* Print abort message, file and line.  Terminate execution.
+*/
+void HMM_dflt_abort(const char *file, const char *line)
+  {
+    /* Avoid use of printf(), which is more likely to use heap. */
+
+    if (entered)
+      /* The standard I/O functions called a heap function and caused
+      ** an indirect recursive call to this function.  So we'll have
+      ** to just exit without printing a message.  */
+      while(1);
+
+    entered = 1;
+
+    fputs("\nABORT - Heap corruption\n" "File: ", stderr);
+    fputs(file, stderr);
+    fputs("  Line: ", stderr);
+    fputs(line, stderr);
+    fputs("\n\n", stderr);
+    fputs( "HMM_dflt_abort: while(1)!!!\n", stderr );    
+	fflush(stderr);	
+
+	while(1);
+  }
+
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_grow.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_grow.c
new file mode 100644
index 00000000..b90212dc
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_grow.c
@@ -0,0 +1,39 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+void U(grow_chunk)(U(descriptor) *desc, void *end, U(size_bau) n_baus)
+  {
+    #undef HEAD_PTR
+    #define HEAD_PTR ((head_record *) end)
+
+    end = BAUS_BACKWARD(end, DUMMY_END_BLOCK_BAUS);
+
+    #ifdef HMM_AUDIT_FAIL
+
+    if (HEAD_PTR->block_size != 0)
+      /* Chunk does not have valid dummy end block. */
+      HMM_AUDIT_FAIL
+
+    #endif
+
+    /* Create a new block that absorbs the old dummy end block. */
+    HEAD_PTR->block_size = n_baus;
+
+    /* Set up the new dummy end block. */
+    {
+      head_record *dummy = (head_record *) BAUS_FORWARD(end, n_baus);
+      dummy->previous_block_size = n_baus;
+      dummy->block_size = 0;
+    }
+
+    /* Simply free the new block, allowing it to coalesce with any
+    ** free block at that was the last block in the chunk prior to
+    ** growth.
+    */
+    U(free)(desc, HEAD_TO_PTR_REC(end));
+
+    #undef HEAD_PTR
+  }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_largest.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_largest.c
new file mode 100644
index 00000000..60b0cf7f
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_largest.c
@@ -0,0 +1,49 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+U(size_aau) U(largest_available)(U(descriptor) *desc)
+  {
+    U(size_bau) largest;
+
+    if (!(desc->avl_tree_root))
+      largest = 0;
+    else
+      {
+	#ifdef HMM_AUDIT_FAIL
+	/* Audit root block in AVL tree. */
+	AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+	#endif
+
+	largest =
+	  BLOCK_BAUS(
+	    PTR_REC_TO_HEAD(
+	      U(avl_search)(
+		(U(avl_avl) *) &(desc->avl_tree_root),
+		(U(size_bau)) ~ (U(size_bau)) 0, AVL_LESS)));
+      }
+
+    if (desc->last_freed)
+      {
+	/* Size of last freed block. */
+	register U(size_bau) lf_size;
+
+	#ifdef HMM_AUDIT_FAIL
+	AUDIT_BLOCK(desc->last_freed)
+	#endif
+
+	lf_size = BLOCK_BAUS(desc->last_freed);
+
+	if (lf_size > largest)
+	  largest = lf_size;
+      }
+
+    /* Convert largest size to AAUs and subract _head size leaving payload
+    ** size.
+    */
+    return(largest ?
+	     ((largest * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT)) - HEAD_AAUS) :
+	     0);
+  }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_resize.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_resize.c
new file mode 100644
index 00000000..c17d15a7
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_resize.c
@@ -0,0 +1,107 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+int U(resize)(U(descriptor) *desc, void *mem, U(size_aau) n)
+  {
+    U(size_aau) i;
+    head_record *next_head_ptr;
+    head_record *head_ptr = PTR_REC_TO_HEAD(mem);
+
+    /* Flag. */
+    int next_block_free;
+
+    /* Convert n from desired block size in AAUs to BAUs. */
+    n += HEAD_AAUS;
+    n = DIV_ROUND_UP(n, HMM_BLOCK_ALIGN_UNIT);
+    if (n < MIN_BLOCK_BAUS)
+      n = MIN_BLOCK_BAUS;
+
+    #ifdef HMM_AUDIT_FAIL
+
+    AUDIT_BLOCK(head_ptr)
+
+    if (!IS_BLOCK_ALLOCATED(head_ptr))
+      HMM_AUDIT_FAIL
+
+    if (desc->avl_tree_root)
+      AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+
+    #endif
+
+    i = head_ptr->block_size;
+
+    next_head_ptr =
+      (head_record *) BAUS_FORWARD(head_ptr, head_ptr->block_size);
+
+    next_block_free =
+      (next_head_ptr == desc->last_freed) ||
+      !IS_BLOCK_ALLOCATED(next_head_ptr);
+
+    if (next_block_free)
+      /* Block can expand into next free block. */
+      i += BLOCK_BAUS(next_head_ptr);
+
+    if (n > i)
+      /* Not enough room for block to expand. */
+      return(-1);
+
+    if (next_block_free)
+      {
+	#ifdef HMM_AUDIT_FAIL
+	AUDIT_BLOCK(next_head_ptr)
+	#endif
+
+	if (next_head_ptr == desc->last_freed)
+	  desc->last_freed = 0;
+	else
+	  U(out_of_free_collection)(desc, next_head_ptr);
+
+	next_head_ptr =
+	  (head_record *) BAUS_FORWARD(head_ptr, (U(size_bau)) i);
+      }
+
+    /* Set i to number of "extra" BAUs. */
+    i -= n;
+
+    if (i < MIN_BLOCK_BAUS)
+      /* Not enough extra BAUs to be a block on their own, so just keep them
+      ** in the block being resized.
+      */
+      {
+	n += i;
+	i = n;
+      }
+    else
+      {
+	/* There are enough "leftover" BAUs in the next block to
+	** form a remainder block. */
+
+	head_record *rem_head_ptr;
+
+	rem_head_ptr = (head_record *) BAUS_FORWARD(head_ptr, n);
+
+	rem_head_ptr->previous_block_size = (U(size_bau)) n;
+	rem_head_ptr->block_size = (U(size_bau)) i;
+
+	if (desc->last_freed)
+	  {
+	    #ifdef HMM_AUDIT_FAIL
+	    AUDIT_BLOCK(desc->last_freed)
+	    #endif
+
+	    U(into_free_collection)(desc, (head_record *) (desc->last_freed));
+
+	    desc->last_freed = 0;
+	  }
+
+	desc->last_freed = rem_head_ptr;
+      }
+
+    head_ptr->block_size = (U(size_bau)) n;
+    next_head_ptr->previous_block_size = (U(size_bau)) i;
+
+    return(0);
+  }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_shrink.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_shrink.c
new file mode 100644
index 00000000..651bac66
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_shrink.c
@@ -0,0 +1,96 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+void U(shrink_chunk)(U(descriptor) *desc, U(size_bau) n_baus_to_shrink)
+  {
+    head_record *dummy_end_block = (head_record *)
+      BAUS_BACKWARD(desc->end_of_shrinkable_chunk, DUMMY_END_BLOCK_BAUS);
+
+    #ifdef HMM_AUDIT_FAIL
+
+    if (dummy_end_block->block_size != 0)
+      /* Chunk does not have valid dummy end block. */
+      HMM_AUDIT_FAIL
+
+    #endif
+
+    if (n_baus_to_shrink)
+      {
+	head_record *last_block = (head_record *)
+	  BAUS_BACKWARD(
+	    dummy_end_block, dummy_end_block->previous_block_size);
+
+	#ifdef HMM_AUDIT_FAIL
+	AUDIT_BLOCK(last_block)
+	#endif
+
+	if (last_block == desc->last_freed)
+	  {
+	    U(size_bau) bs = BLOCK_BAUS(last_block);
+
+	    /* Chunk will not be shrunk out of existence if
+	    ** 1.  There is at least one allocated block in the chunk
+	    **     and the amount to shrink is exactly the size of the
+	    **     last block, OR
+	    ** 2.  After the last block is shrunk, there will be enough
+	    **     BAUs left in it to form a minimal size block. */
+	    int chunk_will_survive =
+	      (PREV_BLOCK_BAUS(last_block) && (n_baus_to_shrink == bs)) ||
+	      (n_baus_to_shrink <= (U(size_bau)) (bs - MIN_BLOCK_BAUS));
+
+	    if (chunk_will_survive ||
+		(!PREV_BLOCK_BAUS(last_block) &&
+		 (n_baus_to_shrink ==
+		  (U(size_bau)) (bs + DUMMY_END_BLOCK_BAUS))))
+	      {
+		desc->last_freed = 0;
+
+		if (chunk_will_survive)
+		  {
+		    bs -= n_baus_to_shrink;
+		    if (bs)
+		      {
+			/* The last (non-dummy) block was not completely
+			** eliminated by the shrink. */
+
+			last_block->block_size = bs;
+
+			/* Create new dummy end record.
+			*/
+			dummy_end_block =
+			  (head_record *) BAUS_FORWARD(last_block, bs);
+			dummy_end_block->previous_block_size = bs;
+			dummy_end_block->block_size = 0;
+
+			#ifdef HMM_AUDIT_FAIL
+			if (desc->avl_tree_root)
+			  AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
+			#endif
+
+			U(into_free_collection)(desc, last_block);
+		      }
+		    else
+		      {
+			/* The last (non-dummy) block was completely
+			** eliminated by the shrink.  Make its _head
+			** the new dummy end block.
+			*/
+			last_block->block_size = 0;
+			last_block->previous_block_size &= ~HIGH_BIT_BAU_SIZE;
+		      }
+		  }
+	      }
+	    #ifdef HMM_AUDIT_FAIL
+	    else
+	      HMM_AUDIT_FAIL
+	    #endif
+	  }
+	#ifdef HMM_AUDIT_FAIL
+	else
+	  HMM_AUDIT_FAIL
+	#endif
+      }
+  }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_true.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_true.c
new file mode 100644
index 00000000..7d057a49
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/hmm_true.c
@@ -0,0 +1,21 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+#include "hmm_intrnl.h"
+
+U(size_aau) U(true_size)(void *payload_ptr)
+  {
+    register  head_record *head_ptr = PTR_REC_TO_HEAD(payload_ptr);
+
+    #ifdef HMM_AUDIT_FAIL
+    AUDIT_BLOCK(head_ptr)
+    #endif
+
+    /* Convert block size from BAUs to AAUs.  Subtract _head size, leaving
+    ** payload size.
+    */
+    return(
+      (BLOCK_BAUS(head_ptr) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT)) -
+      HEAD_AAUS);
+  }
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_if.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_if.h
new file mode 100644
index 00000000..a2df0830
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_if.h
@@ -0,0 +1,216 @@
+/* Abstract AVL Tree Generic C Package.
+** Interface generation header file.
+**
+** This code is in the public domain.  See cavl_tree.html for interface
+** documentation.
+**
+** Version: 1.5  Author: Walt Karas
+*/
+
+/* This header contains the definition of CHAR_BIT (number of bits in a
+** char). */
+#include <limits.h>
+
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__SC
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
+
+#ifndef AVL_SEARCH_TYPE_DEFINED_
+#define AVL_SEARCH_TYPE_DEFINED_
+
+typedef enum
+  {
+    AVL_EQUAL = 1,
+    AVL_LESS = 2,
+    AVL_GREATER = 4,
+    AVL_LESS_EQUAL = AVL_EQUAL | AVL_LESS,
+    AVL_GREATER_EQUAL = AVL_EQUAL | AVL_GREATER
+  }
+avl_search_type;
+
+#endif
+
+#ifdef AVL_UNIQUE
+
+#define L__ AVL_UNIQUE
+
+#else
+
+#define L__(X) X
+
+#endif
+
+/* Determine storage class for function prototypes. */
+#ifdef AVL_PRIVATE
+
+#define L__SC static
+
+#else
+
+#define L__SC extern
+
+#endif
+
+#ifdef AVL_SIZE
+
+#define L__SIZE AVL_SIZE
+
+#else
+
+#define L__SIZE unsigned long
+
+#endif
+
+typedef struct
+  {
+    #ifdef AVL_INSIDE_STRUCT
+
+    AVL_INSIDE_STRUCT
+
+    #endif
+
+    AVL_HANDLE root;
+  }
+L__(avl);
+
+/* Function prototypes. */
+
+L__SC void L__(init)(L__(avl) *tree);
+
+L__SC int L__(is_empty)(L__(avl) *tree);
+
+L__SC AVL_HANDLE L__(insert)(L__(avl) *tree, AVL_HANDLE h);
+
+L__SC AVL_HANDLE L__(search)(L__(avl) *tree, AVL_KEY k, avl_search_type st);
+
+L__SC AVL_HANDLE L__(search_least)(L__(avl) *tree);
+
+L__SC AVL_HANDLE L__(search_greatest)(L__(avl) *tree);
+
+L__SC AVL_HANDLE L__(remove)(L__(avl) *tree, AVL_KEY k);
+
+L__SC AVL_HANDLE L__(subst)(L__(avl) *tree, AVL_HANDLE new_node);
+
+#ifdef AVL_BUILD_ITER_TYPE
+
+L__SC int L__(build)(
+  L__(avl) *tree, AVL_BUILD_ITER_TYPE p, L__SIZE num_nodes);
+
+#endif
+
+/* ANSI C/ISO C++ require that a long have at least 32 bits.  Set
+** L__EST_LONG_BIT to be the greatest multiple of 8 in the range
+** 32 - 64 (inclusive) that is less than or equal to the number of
+** bits in a long.
+*/
+
+#if (((LONG_MAX >> 31) >> 7) == 0)
+
+#define L__EST_LONG_BIT 32
+
+#elif (((LONG_MAX >> 31) >> 15) == 0)
+
+#define L__EST_LONG_BIT 40
+
+#elif (((LONG_MAX >> 31) >> 23) == 0)
+
+#define L__EST_LONG_BIT 48
+
+#elif (((LONG_MAX >> 31) >> 31) == 0)
+
+#define L__EST_LONG_BIT 56
+
+#else
+
+#define L__EST_LONG_BIT 64
+
+#endif
+
+/* Number of bits in a long. */
+#define L__LONG_BIT (sizeof(long) * CHAR_BIT)
+
+/* The macro L__BIT_ARR_DEFN defines a bit array whose index is a (0-based)
+** node depth.  The definition depends on whether the maximum depth is more
+** or less than the number of bits in a single long.
+*/
+
+#if ((AVL_MAX_DEPTH) > L__EST_LONG_BIT)
+
+/* Maximum depth may be more than number of bits in a long. */
+
+#define L__BIT_ARR_DEFN(NAME) \
+  unsigned long NAME[((AVL_MAX_DEPTH) + L__LONG_BIT - 1) / L__LONG_BIT];
+
+#else
+
+/* Maximum depth is definitely less than number of bits in a long. */
+
+#define L__BIT_ARR_DEFN(NAME) unsigned long NAME;
+
+#endif
+
+/* Iterator structure. */
+typedef struct
+  {
+    /* Tree being iterated over. */
+    L__(avl) *tree_;
+
+    /* Records a path into the tree.  If bit n is true, indicates
+    ** take greater branch from the nth node in the path, otherwise
+    ** take the less branch.  bit 0 gives branch from root, and
+    ** so on. */
+    L__BIT_ARR_DEFN(branch)
+
+    /* Zero-based depth of path into tree. */
+    unsigned depth;
+
+    /* Handles of nodes in path from root to current node (returned by *). */
+    AVL_HANDLE path_h[(AVL_MAX_DEPTH) - 1];
+  }
+L__(iter);
+
+/* Iterator function prototypes. */
+
+L__SC void L__(start_iter)(
+  L__(avl) *tree, L__(iter) *iter, AVL_KEY k, avl_search_type st);
+
+L__SC void L__(start_iter_least)(L__(avl) *tree, L__(iter) *iter);
+
+L__SC void L__(start_iter_greatest)(L__(avl) *tree, L__(iter) *iter);
+
+L__SC AVL_HANDLE L__(get_iter)(L__(iter) *iter);
+
+L__SC void L__(incr_iter)(L__(iter) *iter);
+
+L__SC void L__(decr_iter)(L__(iter) *iter);
+
+L__SC void L__(init_iter)(L__(iter) *iter);
+
+#define AVL_IMPL_INIT			1
+#define AVL_IMPL_IS_EMPTY		(1 << 1)
+#define AVL_IMPL_INSERT			(1 << 2)
+#define AVL_IMPL_SEARCH			(1 << 3)
+#define AVL_IMPL_SEARCH_LEAST		(1 << 4)
+#define AVL_IMPL_SEARCH_GREATEST	(1 << 5)
+#define AVL_IMPL_REMOVE			(1 << 6)
+#define AVL_IMPL_BUILD			(1 << 7)
+#define AVL_IMPL_START_ITER		(1 << 8)
+#define AVL_IMPL_START_ITER_LEAST	(1 << 9)
+#define AVL_IMPL_START_ITER_GREATEST	(1 << 10)
+#define AVL_IMPL_GET_ITER		(1 << 11)
+#define AVL_IMPL_INCR_ITER		(1 << 12)
+#define AVL_IMPL_DECR_ITER		(1 << 13)
+#define AVL_IMPL_INIT_ITER		(1 << 14)
+#define AVL_IMPL_SUBST			(1 << 15)
+
+#define AVL_IMPL_ALL			(~0)
+
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__SC
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_impl.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_impl.h
new file mode 100644
index 00000000..21242da9
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/cavl_impl.h
@@ -0,0 +1,1181 @@
+/* Abstract AVL Tree Generic C Package.
+** Implementation generation header file.
+**
+** This code is in the public domain.  See cavl_tree.html for interface
+** documentation.
+**
+** Version: 1.5  Author: Walt Karas
+*/
+
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__tree
+#undef L__MASK_HIGH_BIT
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
+#undef L__BIT_ARR_VAL
+#undef L__BIT_ARR_0
+#undef L__BIT_ARR_1
+#undef L__BIT_ARR_ALL
+#undef L__BIT_ARR_LONGS
+#undef L__IMPL_MASK
+#undef L__CHECK_READ_ERROR
+#undef L__CHECK_READ_ERROR_INV_DEPTH
+#undef L__SC
+#undef L__BALANCE_PARAM_PREFIX
+
+#ifdef AVL_UNIQUE
+
+#define L__ AVL_UNIQUE
+
+#else
+
+#define L__(X) X
+
+#endif
+
+/* Determine correct storage class for functions */
+#ifdef AVL_PRIVATE
+
+#define L__SC static
+
+#else
+
+#define L__SC
+
+#endif
+
+#ifdef AVL_SIZE
+
+#define L__SIZE AVL_SIZE
+
+#else
+
+#define L__SIZE unsigned long
+
+#endif
+
+#define L__MASK_HIGH_BIT ((int) ~ ((~ (unsigned) 0) >> 1))
+
+/* ANSI C/ISO C++ require that a long have at least 32 bits.  Set
+** L__EST_LONG_BIT to be the greatest multiple of 8 in the range
+** 32 - 64 (inclusive) that is less than or equal to the number of
+** bits in a long.
+*/
+
+#if (((LONG_MAX >> 31) >> 7) == 0)
+
+#define L__EST_LONG_BIT 32
+
+#elif (((LONG_MAX >> 31) >> 15) == 0)
+
+#define L__EST_LONG_BIT 40
+
+#elif (((LONG_MAX >> 31) >> 23) == 0)
+
+#define L__EST_LONG_BIT 48
+
+#elif (((LONG_MAX >> 31) >> 31) == 0)
+
+#define L__EST_LONG_BIT 56
+
+#else
+
+#define L__EST_LONG_BIT 64
+
+#endif
+
+#define L__LONG_BIT (sizeof(long) * CHAR_BIT)
+
+#if ((AVL_MAX_DEPTH) > L__EST_LONG_BIT)
+
+/* The maximum depth may be greater than the number of bits in a long,
+** so multiple longs are needed to hold a bit array indexed by node
+** depth. */
+
+#define L__BIT_ARR_LONGS (((AVL_MAX_DEPTH) + L__LONG_BIT - 1) / L__LONG_BIT)
+
+#define L__BIT_ARR_DEFN(NAME) unsigned long NAME[L__BIT_ARR_LONGS];
+
+#define L__BIT_ARR_VAL(BIT_ARR, BIT_NUM) \
+  ((BIT_ARR)[(BIT_NUM) / L__LONG_BIT] & (1L << ((BIT_NUM) % L__LONG_BIT)))
+
+#define L__BIT_ARR_0(BIT_ARR, BIT_NUM) \
+  (BIT_ARR)[(BIT_NUM) / L__LONG_BIT] &= ~(1L << ((BIT_NUM) % L__LONG_BIT));
+
+#define L__BIT_ARR_1(BIT_ARR, BIT_NUM) \
+  (BIT_ARR)[(BIT_NUM) / L__LONG_BIT] |= 1L << ((BIT_NUM) % L__LONG_BIT);
+
+#define L__BIT_ARR_ALL(BIT_ARR, BIT_VAL) \
+  { int i = L__BIT_ARR_LONGS; do (BIT_ARR)[--i] = 0L - (BIT_VAL); while(i); }
+
+#else /* The bit array can definitely fit in one long */
+
+#define L__BIT_ARR_DEFN(NAME) unsigned long NAME;
+
+#define L__BIT_ARR_VAL(BIT_ARR, BIT_NUM) ((BIT_ARR) & (1L << (BIT_NUM)))
+
+#define L__BIT_ARR_0(BIT_ARR, BIT_NUM) (BIT_ARR) &= ~(1L << (BIT_NUM));
+
+#define L__BIT_ARR_1(BIT_ARR, BIT_NUM) (BIT_ARR) |= 1L << (BIT_NUM);
+
+#define L__BIT_ARR_ALL(BIT_ARR, BIT_VAL) (BIT_ARR) = 0L - (BIT_VAL);
+
+#endif
+
+#ifdef AVL_READ_ERRORS_HAPPEN
+
+#define L__CHECK_READ_ERROR(ERROR_RETURN) \
+{ if (AVL_READ_ERROR) return(ERROR_RETURN); }
+
+#else
+
+#define L__CHECK_READ_ERROR(ERROR_RETURN)
+
+#endif
+
+/* The presumed reason that an instantiation places additional fields
+** inside the AVL tree structure is that the SET_ and GET_ macros
+** need these fields.  The "balance" function does not explicitly use
+** any fields in the AVL tree structure, so only pass an AVL tree
+** structure pointer to "balance" if it has instantiation-specific
+** fields that are (presumably) needed by the SET_/GET_ calls within
+** "balance".
+*/
+#ifdef AVL_INSIDE_STRUCT
+
+#define L__BALANCE_PARAM_CALL_PREFIX L__tree,
+#define L__BALANCE_PARAM_DECL_PREFIX L__(avl) *L__tree,
+
+#else
+
+#define L__BALANCE_PARAM_CALL_PREFIX
+#define L__BALANCE_PARAM_DECL_PREFIX
+
+#endif
+
+#ifdef AVL_IMPL_MASK
+
+#define L__IMPL_MASK (AVL_IMPL_MASK)
+
+#else
+
+/* Define all functions. */
+#define L__IMPL_MASK AVL_IMPL_ALL
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_INIT)
+
+L__SC void L__(init)(L__(avl) *L__tree) { L__tree->root = AVL_NULL; }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_IS_EMPTY)
+
+L__SC int L__(is_empty)(L__(avl) *L__tree)
+  { return(L__tree->root == AVL_NULL); }
+
+#endif
+
+/* Put the private balance function in the same compilation module as
+** the insert function.  */
+#if (L__IMPL_MASK & AVL_IMPL_INSERT)
+
+/* Balances subtree, returns handle of root node of subtree after balancing.
+*/
+L__SC AVL_HANDLE L__(balance)(L__BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h)
+  {
+    AVL_HANDLE deep_h;
+
+    /* Either the "greater than" or the "less than" subtree of
+    ** this node has to be 2 levels deeper (or else it wouldn't
+    ** need balancing).
+    */
+    if (AVL_GET_BALANCE_FACTOR(bal_h) > 0)
+      {
+	/* "Greater than" subtree is deeper. */
+
+	deep_h = AVL_GET_GREATER(bal_h, 1);
+
+	L__CHECK_READ_ERROR(AVL_NULL)
+
+	if (AVL_GET_BALANCE_FACTOR(deep_h) < 0)
+	  {
+	    int bf;
+
+	    AVL_HANDLE old_h = bal_h;
+	    bal_h = AVL_GET_LESS(deep_h, 1);
+	    L__CHECK_READ_ERROR(AVL_NULL)
+	    AVL_SET_GREATER(old_h, AVL_GET_LESS(bal_h, 1))
+	    AVL_SET_LESS(deep_h, AVL_GET_GREATER(bal_h, 1))
+	    AVL_SET_LESS(bal_h, old_h)
+	    AVL_SET_GREATER(bal_h, deep_h)
+
+	    bf = AVL_GET_BALANCE_FACTOR(bal_h);
+	    if (bf != 0)
+	      {
+		if (bf > 0)
+		  {
+		    AVL_SET_BALANCE_FACTOR(old_h, -1)
+		    AVL_SET_BALANCE_FACTOR(deep_h, 0)
+		  }
+		else
+		  {
+		    AVL_SET_BALANCE_FACTOR(deep_h, 1)
+		    AVL_SET_BALANCE_FACTOR(old_h, 0)
+		  }
+		AVL_SET_BALANCE_FACTOR(bal_h, 0)
+	      }
+	    else
+	      {
+		AVL_SET_BALANCE_FACTOR(old_h, 0)
+		AVL_SET_BALANCE_FACTOR(deep_h, 0)
+	      }
+	  }
+	else
+	  {
+	    AVL_SET_GREATER(bal_h, AVL_GET_LESS(deep_h, 0))
+	    AVL_SET_LESS(deep_h, bal_h)
+	    if (AVL_GET_BALANCE_FACTOR(deep_h) == 0)
+	      {
+		AVL_SET_BALANCE_FACTOR(deep_h, -1)
+		AVL_SET_BALANCE_FACTOR(bal_h, 1)
+	      }
+	    else
+	      {
+		AVL_SET_BALANCE_FACTOR(deep_h, 0)
+		AVL_SET_BALANCE_FACTOR(bal_h, 0)
+	      }
+	    bal_h = deep_h;
+	  }
+      }
+    else
+      {
+	/* "Less than" subtree is deeper. */
+
+	deep_h = AVL_GET_LESS(bal_h, 1);
+	L__CHECK_READ_ERROR(AVL_NULL)
+
+	if (AVL_GET_BALANCE_FACTOR(deep_h) > 0)
+	  {
+	    int bf;
+	    AVL_HANDLE old_h = bal_h;
+	    bal_h = AVL_GET_GREATER(deep_h, 1);
+	    L__CHECK_READ_ERROR(AVL_NULL)
+	    AVL_SET_LESS(old_h, AVL_GET_GREATER(bal_h, 0))
+	    AVL_SET_GREATER(deep_h, AVL_GET_LESS(bal_h, 0))
+	    AVL_SET_GREATER(bal_h, old_h)
+	    AVL_SET_LESS(bal_h, deep_h)
+
+	    bf = AVL_GET_BALANCE_FACTOR(bal_h);
+	    if (bf != 0)
+	      {
+		if (bf < 0)
+		  {
+		    AVL_SET_BALANCE_FACTOR(old_h, 1)
+		    AVL_SET_BALANCE_FACTOR(deep_h, 0)
+		  }
+		else
+		  {
+		    AVL_SET_BALANCE_FACTOR(deep_h, -1)
+		    AVL_SET_BALANCE_FACTOR(old_h, 0)
+		  }
+		AVL_SET_BALANCE_FACTOR(bal_h, 0)
+	      }
+	    else
+	      {
+		AVL_SET_BALANCE_FACTOR(old_h, 0)
+		AVL_SET_BALANCE_FACTOR(deep_h, 0)
+	      }
+	  }
+	else
+	  {
+	    AVL_SET_LESS(bal_h, AVL_GET_GREATER(deep_h, 0))
+	    AVL_SET_GREATER(deep_h, bal_h)
+	    if (AVL_GET_BALANCE_FACTOR(deep_h) == 0)
+	      {
+		AVL_SET_BALANCE_FACTOR(deep_h, 1)
+		AVL_SET_BALANCE_FACTOR(bal_h, -1)
+	      }
+	    else
+	      {
+		AVL_SET_BALANCE_FACTOR(deep_h, 0)
+		AVL_SET_BALANCE_FACTOR(bal_h, 0)
+	      }
+	    bal_h = deep_h;
+	  }
+      }
+
+    return(bal_h);
+  }
+
+L__SC AVL_HANDLE L__(insert)(L__(avl) *L__tree, AVL_HANDLE h)
+  {
+    AVL_SET_LESS(h, AVL_NULL)
+    AVL_SET_GREATER(h, AVL_NULL)
+    AVL_SET_BALANCE_FACTOR(h, 0)
+
+    if (L__tree->root == AVL_NULL)
+      L__tree->root = h;
+    else
+      {
+	/* Last unbalanced node encountered in search for insertion point. */
+	AVL_HANDLE unbal = AVL_NULL;
+	/* Parent of last unbalanced node. */
+	AVL_HANDLE parent_unbal = AVL_NULL;
+	/* Balance factor of last unbalanced node. */
+	int unbal_bf;
+
+	/* Zero-based depth in tree. */
+	unsigned depth = 0, unbal_depth = 0;
+
+	/* Records a path into the tree.  If bit n is true, indicates
+	** take greater branch from the nth node in the path, otherwise
+	** take the less branch.  bit 0 gives branch from root, and
+	** so on. */
+	L__BIT_ARR_DEFN(branch)
+
+	AVL_HANDLE hh = L__tree->root;
+	AVL_HANDLE parent = AVL_NULL;
+	int cmp;
+
+	do
+ 	  {
+	    if (AVL_GET_BALANCE_FACTOR(hh) != 0)
+	      {
+		unbal = hh;
+		parent_unbal = parent;
+		unbal_depth = depth;
+	      }
+	    cmp = AVL_COMPARE_NODE_NODE(h, hh);
+	    if (cmp == 0)
+	      /* Duplicate key. */
+	      return(hh);
+	    parent = hh;
+	    if (cmp > 0)
+	      {
+		hh = AVL_GET_GREATER(hh, 1);
+		L__BIT_ARR_1(branch, depth)
+	      }
+	    else
+	      {
+		hh = AVL_GET_LESS(hh, 1);
+		L__BIT_ARR_0(branch, depth)
+	      }
+	    L__CHECK_READ_ERROR(AVL_NULL)
+	    depth++;
+	  }
+	while (hh != AVL_NULL);
+
+	/*  Add node to insert as leaf of tree. */
+	if (cmp < 0)
+	  AVL_SET_LESS(parent, h)
+	else
+	  AVL_SET_GREATER(parent, h)
+
+	depth = unbal_depth;
+
+	if (unbal == AVL_NULL)
+	  hh = L__tree->root;
+	else
+	  {
+	    cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+	    depth++;
+	    unbal_bf = AVL_GET_BALANCE_FACTOR(unbal);
+	    if (cmp < 0)
+	      unbal_bf--;
+	    else  /* cmp > 0 */
+	      unbal_bf++;
+	    hh = cmp < 0 ? AVL_GET_LESS(unbal, 1) : AVL_GET_GREATER(unbal, 1);
+	    L__CHECK_READ_ERROR(AVL_NULL)
+	    if ((unbal_bf != -2) && (unbal_bf != 2))
+	      {
+		/* No rebalancing of tree is necessary. */
+		AVL_SET_BALANCE_FACTOR(unbal, unbal_bf)
+		unbal = AVL_NULL;
+	      }
+	  }
+
+	if (hh != AVL_NULL)
+	  while (h != hh)
+	    {
+	      cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+	      depth++;
+	      if (cmp < 0)
+		{
+		  AVL_SET_BALANCE_FACTOR(hh, -1)
+		  hh = AVL_GET_LESS(hh, 1);
+		}
+	      else /* cmp > 0 */
+		{
+		  AVL_SET_BALANCE_FACTOR(hh, 1)
+		  hh = AVL_GET_GREATER(hh, 1);
+		}
+	      L__CHECK_READ_ERROR(AVL_NULL)
+	    }
+
+	if (unbal != AVL_NULL)
+	  {
+	    unbal = L__(balance)(L__BALANCE_PARAM_CALL_PREFIX unbal);
+	    L__CHECK_READ_ERROR(AVL_NULL)
+	    if (parent_unbal == AVL_NULL)
+	      L__tree->root = unbal;
+	    else
+	      {
+		depth = unbal_depth - 1;
+		cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+		if (cmp < 0)
+		  AVL_SET_LESS(parent_unbal, unbal)
+		else  /* cmp > 0 */
+		  AVL_SET_GREATER(parent_unbal, unbal)
+	      }
+	  }
+
+      }
+
+    return(h);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SEARCH)
+
+L__SC AVL_HANDLE L__(search)(L__(avl) *L__tree, AVL_KEY k, avl_search_type st)
+  {
+    int cmp, target_cmp;
+    AVL_HANDLE match_h = AVL_NULL;
+    AVL_HANDLE h = L__tree->root;
+
+    if (st & AVL_LESS)
+      target_cmp = 1;
+    else if (st & AVL_GREATER)
+      target_cmp = -1;
+    else
+      target_cmp = 0;
+
+    while (h != AVL_NULL)
+      {
+	cmp = AVL_COMPARE_KEY_NODE(k, h);
+	if (cmp == 0)
+	  {
+	    if (st & AVL_EQUAL)
+	      {
+		match_h = h;
+		break;
+	      }
+	    cmp = -target_cmp;
+	  }
+	else if (target_cmp != 0)
+	  if (!((cmp ^ target_cmp) & L__MASK_HIGH_BIT))
+	    /* cmp and target_cmp are both positive or both negative. */
+	    match_h = h;
+	h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+	L__CHECK_READ_ERROR(AVL_NULL)
+      }
+
+    return(match_h);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SEARCH_LEAST)
+
+L__SC AVL_HANDLE L__(search_least)(L__(avl) *L__tree)
+  {
+    AVL_HANDLE h = L__tree->root;
+    AVL_HANDLE parent = AVL_NULL;
+
+    while (h != AVL_NULL)
+      {
+	parent = h;
+	h = AVL_GET_LESS(h, 1);
+	L__CHECK_READ_ERROR(AVL_NULL)
+      }
+
+    return(parent);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SEARCH_GREATEST)
+
+L__SC AVL_HANDLE L__(search_greatest)(L__(avl) *L__tree)
+  {
+    AVL_HANDLE h = L__tree->root;
+    AVL_HANDLE parent = AVL_NULL;
+
+    while (h != AVL_NULL)
+      {
+	parent = h;
+	h = AVL_GET_GREATER(h, 1);
+	L__CHECK_READ_ERROR(AVL_NULL)
+      }
+
+    return(parent);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_REMOVE)
+
+/* Prototype of balance function (called by remove) in case not in
+** same compilation unit.
+*/
+L__SC AVL_HANDLE L__(balance)(L__BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h);
+
+L__SC AVL_HANDLE L__(remove)(L__(avl) *L__tree, AVL_KEY k)
+  {
+    /* Zero-based depth in tree. */
+    unsigned depth = 0, rm_depth;
+
+    /* Records a path into the tree.  If bit n is true, indicates
+    ** take greater branch from the nth node in the path, otherwise
+    ** take the less branch.  bit 0 gives branch from root, and
+    ** so on. */
+    L__BIT_ARR_DEFN(branch)
+
+    AVL_HANDLE h = L__tree->root;
+    AVL_HANDLE parent = AVL_NULL;
+    AVL_HANDLE child;
+    AVL_HANDLE path;
+    int cmp, cmp_shortened_sub_with_path;
+    int reduced_depth;
+    int bf;
+    AVL_HANDLE rm;
+    AVL_HANDLE parent_rm;
+
+    for ( ; ; )
+      {
+	if (h == AVL_NULL)
+	  /* No node in tree with given key. */
+	  return(AVL_NULL);
+	cmp = AVL_COMPARE_KEY_NODE(k, h);
+	if (cmp == 0)
+	  /* Found node to remove. */
+	  break;
+	parent = h;
+	if (cmp > 0)
+	  {
+	    h = AVL_GET_GREATER(h, 1);
+	    L__BIT_ARR_1(branch, depth)
+	  }
+	else
+	  {
+	    h = AVL_GET_LESS(h, 1);
+	    L__BIT_ARR_0(branch, depth)
+	  }
+	L__CHECK_READ_ERROR(AVL_NULL)
+	depth++;
+	cmp_shortened_sub_with_path = cmp;
+      }
+    rm = h;
+    parent_rm = parent;
+    rm_depth = depth;
+
+    /* If the node to remove is not a leaf node, we need to get a
+    ** leaf node, or a node with a single leaf as its child, to put
+    ** in the place of the node to remove.  We will get the greatest
+    ** node in the less subtree (of the node to remove), or the least
+    ** node in the greater subtree.  We take the leaf node from the
+    ** deeper subtree, if there is one. */
+
+    if (AVL_GET_BALANCE_FACTOR(h) < 0)
+      {
+	child = AVL_GET_LESS(h, 1);
+	L__BIT_ARR_0(branch, depth)
+	cmp = -1;
+      }
+    else
+      {
+	child = AVL_GET_GREATER(h, 1);
+	L__BIT_ARR_1(branch, depth)
+	cmp = 1;
+      }
+    L__CHECK_READ_ERROR(AVL_NULL)
+    depth++;
+
+    if (child != AVL_NULL)
+      {
+	cmp = -cmp;
+	do
+	  {
+	    parent = h;
+	    h = child;
+	    if (cmp < 0)
+	      {
+		child = AVL_GET_LESS(h, 1);
+		L__BIT_ARR_0(branch, depth)
+	      }
+	    else
+	      {
+		child = AVL_GET_GREATER(h, 1);
+		L__BIT_ARR_1(branch, depth)
+	      }
+	    L__CHECK_READ_ERROR(AVL_NULL)
+	    depth++;
+	  }
+	while (child != AVL_NULL);
+
+	if (parent == rm)
+	  /* Only went through do loop once.  Deleted node will be replaced
+	  ** in the tree structure by one of its immediate children. */
+	  cmp_shortened_sub_with_path = -cmp;
+        else
+	  cmp_shortened_sub_with_path = cmp;
+
+	/* Get the handle of the opposite child, which may not be null. */
+	child = cmp > 0 ? AVL_GET_LESS(h, 0) : AVL_GET_GREATER(h, 0);
+      }
+
+    if (parent == AVL_NULL)
+      /* There were only 1 or 2 nodes in this tree. */
+      L__tree->root = child;
+    else if (cmp_shortened_sub_with_path < 0)
+      AVL_SET_LESS(parent, child)
+    else
+      AVL_SET_GREATER(parent, child)
+
+    /* "path" is the parent of the subtree being eliminated or reduced
+    ** from a depth of 2 to 1.  If "path" is the node to be removed, we
+    ** set path to the node we're about to poke into the position of the
+    ** node to be removed. */
+    path = parent == rm ? h : parent;
+
+    if (h != rm)
+      {
+	/* Poke in the replacement for the node to be removed. */
+	AVL_SET_LESS(h, AVL_GET_LESS(rm, 0))
+	AVL_SET_GREATER(h, AVL_GET_GREATER(rm, 0))
+	AVL_SET_BALANCE_FACTOR(h, AVL_GET_BALANCE_FACTOR(rm))
+	if (parent_rm == AVL_NULL)
+	  L__tree->root = h;
+	else
+	  {
+	    depth = rm_depth - 1;
+	    if (L__BIT_ARR_VAL(branch, depth))
+	      AVL_SET_GREATER(parent_rm, h)
+	    else
+	      AVL_SET_LESS(parent_rm, h)
+	  }
+      }
+
+    if (path != AVL_NULL)
+      {
+	/* Create a temporary linked list from the parent of the path node
+	** to the root node. */
+	h = L__tree->root;
+	parent = AVL_NULL;
+	depth = 0;
+	while (h != path)
+	  {
+	    if (L__BIT_ARR_VAL(branch, depth))
+	      {
+	        child = AVL_GET_GREATER(h, 1);
+		AVL_SET_GREATER(h, parent)
+	      }
+	    else
+	      {
+	        child = AVL_GET_LESS(h, 1);
+		AVL_SET_LESS(h, parent)
+	      }
+	    L__CHECK_READ_ERROR(AVL_NULL)
+	    depth++;
+	    parent = h;
+	    h = child;
+	  }
+
+	/* Climb from the path node to the root node using the linked
+	** list, restoring the tree structure and rebalancing as necessary.
+	*/
+	reduced_depth = 1;
+	cmp = cmp_shortened_sub_with_path;
+	for ( ; ; )
+	  {
+	    if (reduced_depth)
+	      {
+		bf = AVL_GET_BALANCE_FACTOR(h);
+		if (cmp < 0)
+		  bf++;
+		else  /* cmp > 0 */
+		  bf--;
+		if ((bf == -2) || (bf == 2))
+		  {
+		    h = L__(balance)(L__BALANCE_PARAM_CALL_PREFIX h);
+		    L__CHECK_READ_ERROR(AVL_NULL)
+		    bf = AVL_GET_BALANCE_FACTOR(h);
+		  }
+		else
+		  AVL_SET_BALANCE_FACTOR(h, bf)
+		reduced_depth = (bf == 0);
+	      }
+	    if (parent == AVL_NULL)
+	      break;
+	    child = h;
+	    h = parent;
+	    depth--;
+	    cmp = L__BIT_ARR_VAL(branch, depth) ? 1 : -1;
+	    if (cmp < 0)
+	      {
+		parent = AVL_GET_LESS(h, 1);
+		AVL_SET_LESS(h, child)
+	      }
+	    else
+	      {
+		parent = AVL_GET_GREATER(h, 1);
+		AVL_SET_GREATER(h, child)
+	      }
+	    L__CHECK_READ_ERROR(AVL_NULL)
+	  }
+	L__tree->root = h;
+      }
+
+    return(rm);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_SUBST)
+
+L__SC AVL_HANDLE L__(subst)(L__(avl) *L__tree, AVL_HANDLE new_node)
+  {
+    AVL_HANDLE h = L__tree->root;
+    AVL_HANDLE parent = AVL_NULL;
+    int cmp, last_cmp;
+
+    /* Search for node already in tree with same key. */
+    for ( ; ; )
+      {
+	if (h == AVL_NULL)
+	  /* No node in tree with same key as new node. */
+	  return(AVL_NULL);
+	cmp = AVL_COMPARE_NODE_NODE(new_node, h);
+	if (cmp == 0)
+	  /* Found the node to substitute new one for. */
+	  break;
+	last_cmp = cmp;
+	parent = h;
+	h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+	L__CHECK_READ_ERROR(AVL_NULL)
+      }
+
+    /* Copy tree housekeeping fields from node in tree to new node. */
+    AVL_SET_LESS(new_node, AVL_GET_LESS(h, 0))
+    AVL_SET_GREATER(new_node, AVL_GET_GREATER(h, 0))
+    AVL_SET_BALANCE_FACTOR(new_node, AVL_GET_BALANCE_FACTOR(h))
+
+    if (parent == AVL_NULL)
+      /* New node is also new root. */
+      L__tree->root = new_node;
+    else
+      {
+	/* Make parent point to new node. */
+	if (last_cmp < 0)
+	  AVL_SET_LESS(parent, new_node)
+	else
+	  AVL_SET_GREATER(parent, new_node)
+      }
+
+    return(h);
+  }
+
+#endif
+
+#ifdef AVL_BUILD_ITER_TYPE
+
+#if (L__IMPL_MASK & AVL_IMPL_BUILD)
+
+L__SC int L__(build)(
+  L__(avl) *L__tree, AVL_BUILD_ITER_TYPE p, L__SIZE num_nodes)
+  {
+    /* Gives path to subtree being built.  If bit n is false, branch
+    ** less from the node at depth n, if true branch greater. */
+    L__BIT_ARR_DEFN(branch)
+
+    /* If bit n is true, then for the current subtree at depth n, its
+    ** greater subtree has one more node than its less subtree. */
+    L__BIT_ARR_DEFN(rem)
+
+    /* Depth of root node of current subtree. */
+    unsigned depth = 0;
+
+    /* Number of nodes in current subtree. */
+    L__SIZE num_sub = num_nodes;
+
+    /* The algorithm relies on a stack of nodes whose less subtree has
+    ** been built, but whose greater subtree has not yet been built.
+    ** The stack is implemented as linked list.  The nodes are linked
+    ** together by having the "greater" handle of a node set to the
+    ** next node in the list.  "less_parent" is the handle of the first
+    ** node in the list. */
+    AVL_HANDLE less_parent = AVL_NULL;
+
+    /* h is root of current subtree, child is one of its children. */
+    AVL_HANDLE h;
+    AVL_HANDLE child;
+
+    if (num_nodes == 0)
+      {
+	L__tree->root = AVL_NULL;
+	return(1);
+      }
+
+    for ( ; ; )
+      {
+	while (num_sub > 2)
+	  {
+	    /* Subtract one for root of subtree. */
+	    num_sub--;
+	    if (num_sub & 1)
+	      L__BIT_ARR_1(rem, depth)
+	    else
+	      L__BIT_ARR_0(rem, depth)
+	    L__BIT_ARR_0(branch, depth)
+	    depth++;
+	    num_sub >>= 1;
+	  }
+
+	if (num_sub == 2)
+	  {
+	    /* Build a subtree with two nodes, slanting to greater.
+	    ** I arbitrarily chose to always have the extra node in the
+	    ** greater subtree when there is an odd number of nodes to
+	    ** split between the two subtrees. */
+
+	    h = AVL_BUILD_ITER_VAL(p);
+	    L__CHECK_READ_ERROR(0)
+	    AVL_BUILD_ITER_INCR(p)
+	    child = AVL_BUILD_ITER_VAL(p);
+	    L__CHECK_READ_ERROR(0)
+	    AVL_BUILD_ITER_INCR(p)
+	    AVL_SET_LESS(child, AVL_NULL)
+	    AVL_SET_GREATER(child, AVL_NULL)
+	    AVL_SET_BALANCE_FACTOR(child, 0)
+	    AVL_SET_GREATER(h, child)
+	    AVL_SET_LESS(h, AVL_NULL)
+	    AVL_SET_BALANCE_FACTOR(h, 1)
+	  }
+	else  /* num_sub == 1 */
+	  {
+	    /* Build a subtree with one node. */
+
+	    h = AVL_BUILD_ITER_VAL(p);
+	    L__CHECK_READ_ERROR(0)
+	    AVL_BUILD_ITER_INCR(p)
+	    AVL_SET_LESS(h, AVL_NULL)
+	    AVL_SET_GREATER(h, AVL_NULL)
+	    AVL_SET_BALANCE_FACTOR(h, 0)
+	  }
+
+	while (depth)
+	  {
+	    depth--;
+	    if (!L__BIT_ARR_VAL(branch, depth))
+	      /* We've completed a less subtree. */
+	      break;
+
+	    /* We've completed a greater subtree, so attach it to
+	    ** its parent (that is less than it).  We pop the parent
+	    ** off the stack of less parents. */
+	    child = h;
+	    h = less_parent;
+	    less_parent = AVL_GET_GREATER(h, 1);
+	    L__CHECK_READ_ERROR(0)
+	    AVL_SET_GREATER(h, child)
+	    /* num_sub = 2 * (num_sub - rem[depth]) + rem[depth] + 1 */
+	    num_sub <<= 1;
+	    num_sub += L__BIT_ARR_VAL(rem, depth) ? 0 : 1;
+	    if (num_sub & (num_sub - 1))
+	      /* num_sub is not a power of 2. */
+	      AVL_SET_BALANCE_FACTOR(h, 0)
+	    else
+	      /* num_sub is a power of 2. */
+	      AVL_SET_BALANCE_FACTOR(h, 1)
+	  }
+
+	if (num_sub == num_nodes)
+	  /* We've completed the full tree. */
+	  break;
+
+	/* The subtree we've completed is the less subtree of the
+	** next node in the sequence. */
+
+	child = h;
+	h = AVL_BUILD_ITER_VAL(p);
+	L__CHECK_READ_ERROR(0)
+	AVL_BUILD_ITER_INCR(p)
+	AVL_SET_LESS(h, child)
+
+	/* Put h into stack of less parents. */
+	AVL_SET_GREATER(h, less_parent)
+	less_parent = h;
+
+	/* Proceed to creating greater than subtree of h. */
+	L__BIT_ARR_1(branch, depth)
+	num_sub += L__BIT_ARR_VAL(rem, depth) ? 1 : 0;
+	depth++;
+
+      } /* end for ( ; ; ) */
+
+    L__tree->root = h;
+
+    return(1);
+  }
+
+#endif
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_INIT_ITER)
+
+/* Initialize depth to invalid value, to indicate iterator is
+** invalid.   (Depth is zero-base.)  It's not necessary to initialize
+** iterators prior to passing them to the "start" function.
+*/
+L__SC void L__(init_iter)(L__(iter) *iter) { iter->depth = ~0; }
+
+#endif
+
+#ifdef AVL_READ_ERRORS_HAPPEN
+
+#define L__CHECK_READ_ERROR_INV_DEPTH \
+{ if (AVL_READ_ERROR) { iter->depth = ~0; return; } }
+
+#else
+
+#define L__CHECK_READ_ERROR_INV_DEPTH
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_START_ITER)
+
+L__SC void L__(start_iter)(
+  L__(avl) *L__tree, L__(iter) *iter, AVL_KEY k, avl_search_type st)
+  {
+    AVL_HANDLE h = L__tree->root;
+    unsigned d = 0;
+    int cmp, target_cmp;
+
+    /* Save the tree that we're going to iterate through in a
+    ** member variable. */
+    iter->tree_ = L__tree;
+
+    iter->depth = ~0;
+
+    if (h == AVL_NULL)
+      /* Tree is empty. */
+      return;
+
+    if (st & AVL_LESS)
+      /* Key can be greater than key of starting node. */
+      target_cmp = 1;
+    else if (st & AVL_GREATER)
+      /* Key can be less than key of starting node. */
+      target_cmp = -1;
+    else
+      /* Key must be same as key of starting node. */
+      target_cmp = 0;
+
+    for ( ; ; )
+      {
+	cmp = AVL_COMPARE_KEY_NODE(k, h);
+	if (cmp == 0)
+	  {
+	    if (st & AVL_EQUAL)
+	      {
+		/* Equal node was sought and found as starting node. */
+		iter->depth = d;
+		break;
+	      }
+	    cmp = -target_cmp;
+	  }
+	else if (target_cmp != 0)
+	  if (!((cmp ^ target_cmp) & L__MASK_HIGH_BIT))
+	    /* cmp and target_cmp are both negative or both positive. */
+	    iter->depth = d;
+	h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
+	L__CHECK_READ_ERROR_INV_DEPTH
+	if (h == AVL_NULL)
+	  break;
+	if (cmp > 0)
+	  L__BIT_ARR_1(iter->branch, d)
+	else
+	  L__BIT_ARR_0(iter->branch, d)
+	iter->path_h[d++] = h;
+      }
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_START_ITER_LEAST)
+
+L__SC void L__(start_iter_least)(L__(avl) *L__tree, L__(iter) *iter)
+  {
+    AVL_HANDLE h = L__tree->root;
+
+    iter->tree_ = L__tree;
+
+    iter->depth = ~0;
+
+    L__BIT_ARR_ALL(iter->branch, 0)
+
+    while (h != AVL_NULL)
+      {
+	if (iter->depth != ~0)
+	  iter->path_h[iter->depth] = h;
+	iter->depth++;
+	h = AVL_GET_LESS(h, 1);
+	L__CHECK_READ_ERROR_INV_DEPTH
+      }
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_START_ITER_GREATEST)
+
+L__SC void L__(start_iter_greatest)(L__(avl) *L__tree, L__(iter) *iter)
+  {
+    AVL_HANDLE h = L__tree->root;
+
+    iter->tree_ = L__tree;
+
+    iter->depth = ~0;
+
+    L__BIT_ARR_ALL(iter->branch, 1)
+
+    while (h != AVL_NULL)
+      {
+	if (iter->depth != ~0)
+	  iter->path_h[iter->depth] = h;
+	iter->depth++;
+	h = AVL_GET_GREATER(h, 1);
+	L__CHECK_READ_ERROR_INV_DEPTH
+      }
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_GET_ITER)
+
+L__SC AVL_HANDLE L__(get_iter)(L__(iter) *iter)
+  {
+    if (iter->depth == ~0)
+      return(AVL_NULL);
+
+    return(iter->depth == 0 ?
+	     iter->tree_->root : iter->path_h[iter->depth - 1]);
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_INCR_ITER)
+
+L__SC void L__(incr_iter)(L__(iter) *iter)
+  {
+    #define L__tree (iter->tree_)
+
+    if (iter->depth != ~0)
+      {
+	AVL_HANDLE h =
+	  AVL_GET_GREATER((iter->depth == 0 ?
+	    iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
+	L__CHECK_READ_ERROR_INV_DEPTH
+
+	if (h == AVL_NULL)
+	  do
+	    {
+	      if (iter->depth == 0)
+		{
+		  iter->depth = ~0;
+		  break;
+		}
+	      iter->depth--;
+	    }
+	  while (L__BIT_ARR_VAL(iter->branch, iter->depth));
+	else
+	  {
+	    L__BIT_ARR_1(iter->branch, iter->depth)
+	    iter->path_h[iter->depth++] = h;
+	    for ( ; ; )
+	      {
+		h = AVL_GET_LESS(h, 1);
+		L__CHECK_READ_ERROR_INV_DEPTH
+		if (h == AVL_NULL)
+		  break;
+		L__BIT_ARR_0(iter->branch, iter->depth)
+		iter->path_h[iter->depth++] = h;
+	      }
+	  }
+      }
+
+    #undef L__tree
+  }
+
+#endif
+
+#if (L__IMPL_MASK & AVL_IMPL_DECR_ITER)
+
+L__SC void L__(decr_iter)(L__(iter) *iter)
+  {
+    #define L__tree (iter->tree_)
+
+    if (iter->depth != ~0)
+      {
+	AVL_HANDLE h =
+	  AVL_GET_LESS((iter->depth == 0 ?
+	    iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
+	L__CHECK_READ_ERROR_INV_DEPTH
+
+	if (h == AVL_NULL)
+	  do
+	    {
+	      if (iter->depth == 0)
+		{
+		  iter->depth = ~0;
+		  break;
+		}
+	      iter->depth--;
+	    }
+	  while (!L__BIT_ARR_VAL(iter->branch, iter->depth));
+	else
+	  {
+	    L__BIT_ARR_0(iter->branch, iter->depth)
+	    iter->path_h[iter->depth++] = h;
+	    for ( ; ; )
+	      {
+		h = AVL_GET_GREATER(h, 1);
+		L__CHECK_READ_ERROR_INV_DEPTH
+		if (h == AVL_NULL)
+		  break;
+		L__BIT_ARR_1(iter->branch, iter->depth)
+		iter->path_h[iter->depth++] = h;
+	      }
+	  }
+      }
+
+    #undef L__tree
+  }
+
+#endif
+
+/* Tidy up the preprocessor symbol name space. */
+#undef L__
+#undef L__EST_LONG_BIT
+#undef L__SIZE
+#undef L__MASK_HIGH_BIT
+#undef L__LONG_BIT
+#undef L__BIT_ARR_DEFN
+#undef L__BIT_ARR_VAL
+#undef L__BIT_ARR_0
+#undef L__BIT_ARR_1
+#undef L__BIT_ARR_ALL
+#undef L__CHECK_READ_ERROR
+#undef L__CHECK_READ_ERROR_INV_DEPTH
+#undef L__BIT_ARR_LONGS
+#undef L__IMPL_MASK
+#undef L__CHECK_READ_ERROR
+#undef L__CHECK_READ_ERROR_INV_DEPTH
+#undef L__SC
+#undef L__BALANCE_PARAM_CALL_PREFIX
+#undef L__BALANCE_PARAM_DECL_PREFIX
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/heapmm.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/heapmm.h
new file mode 100644
index 00000000..797e4d07
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/heapmm.h
@@ -0,0 +1,142 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+/* External header file for Heap Memory Manager.  See documentation in
+** heapmm.html.
+*/
+
+#undef HMM_PROCESS
+
+/* Include once per configuration in a particular translation unit. */
+
+#ifndef HMM_CNFG_NUM
+
+/* Default configuration. */
+
+#ifndef HMM_INC_CNFG_DFLT
+#define HMM_INC_CNFG_DFLT
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 0
+
+/* Test configuration. */
+
+#ifndef HMM_INC_CNFG_0
+#define HMM_INC_CNFG_0
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 1
+
+#ifndef HMM_INC_CNFG_1
+#define HMM_INC_CNFG_1
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 2
+
+#ifndef HMM_INC_CNFG_2
+#define HMM_INC_CNFG_2
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 3
+
+#ifndef HMM_INC_CNFG_3
+#define HMM_INC_CNFG_3
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 4
+
+#ifndef HMM_INC_CNFG_4
+#define HMM_INC_CNFG_4
+#define HMM_PROCESS
+#endif
+
+#elif HMM_CNFG_NUM == 5
+
+#ifndef HMM_INC_CNFG_5
+#define HMM_INC_CNFG_5
+#define HMM_PROCESS
+#endif
+
+#endif
+
+#ifdef HMM_PROCESS
+
+#include "hmm_cnfg.h"
+
+/* Heap descriptor. */
+typedef struct HMM_UNIQUE(structure)
+  {
+    /* private: */
+
+    /* Pointer to (payload of) root node in AVL tree.  This field should
+    ** really be the AVL tree descriptor (type avl_avl).  But (in the
+    ** instantiation of the AVL tree generic package used in package) the
+    ** AVL tree descriptor simply contains a pointer to the root.  So,
+    ** whenever a pointer to the AVL tree descriptor is needed, I use the
+    ** cast:
+    **
+    ** (avl_avl *) &(heap_desc->avl_tree_root)
+    **
+    ** (where heap_desc is a pointer to a heap descriptor).  This trick
+    ** allows me to avoid including cavl_if.h in this external header. */
+    void *avl_tree_root;
+
+    /* Pointer to first byte of last block freed, after any coalescing. */
+    void *last_freed;
+
+    /* public: */
+
+    HMM_UNIQUE(size_bau) num_baus_can_shrink;
+    void *end_of_shrinkable_chunk;
+  }
+HMM_UNIQUE(descriptor);
+
+/* Prototypes for externally-callable functions. */
+
+void HMM_UNIQUE(init)(HMM_UNIQUE(descriptor) *desc);
+
+void * HMM_UNIQUE(alloc)(
+  HMM_UNIQUE(descriptor) *desc, HMM_UNIQUE(size_aau) num_addr_align_units);
+
+/* NOT YET IMPLEMENTED */
+void * HMM_UNIQUE(greedy_alloc)(
+  HMM_UNIQUE(descriptor) *desc, HMM_UNIQUE(size_aau) needed_addr_align_units,
+  HMM_UNIQUE(size_aau) coveted_addr_align_units);
+
+int HMM_UNIQUE(resize)(
+  HMM_UNIQUE(descriptor) *desc, void *mem,
+  HMM_UNIQUE(size_aau) num_addr_align_units);
+
+/* NOT YET IMPLEMENTED */
+int HMM_UNIQUE(greedy_resize)(
+  HMM_UNIQUE(descriptor) *desc, void *mem,
+  HMM_UNIQUE(size_aau) needed_addr_align_units,
+  HMM_UNIQUE(size_aau) coveted_addr_align_units);
+
+void HMM_UNIQUE(free)(HMM_UNIQUE(descriptor) *desc, void *mem);
+
+HMM_UNIQUE(size_aau) HMM_UNIQUE(true_size)(void *mem);
+
+HMM_UNIQUE(size_aau) HMM_UNIQUE(largest_available)(
+  HMM_UNIQUE(descriptor) *desc);
+
+void HMM_UNIQUE(new_chunk)(
+  HMM_UNIQUE(descriptor) *desc, void *start_of_chunk,
+  HMM_UNIQUE(size_bau) num_block_align_units);
+
+void HMM_UNIQUE(grow_chunk)(
+  HMM_UNIQUE(descriptor) *desc, void *end_of_chunk,
+  HMM_UNIQUE(size_bau) num_block_align_units);
+
+/* NOT YET IMPLEMENTED */
+void HMM_UNIQUE(shrink_chunk)(
+  HMM_UNIQUE(descriptor) *desc,
+  HMM_UNIQUE(size_bau) num_block_align_units);
+
+#endif /* defined HMM_PROCESS */
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_cnfg.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_cnfg.h
new file mode 100644
index 00000000..3a453ef4
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_cnfg.h
@@ -0,0 +1,105 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+/* Configure Heap Memory Manager for processor architecture, compiler,
+** and desired performance characteristics.  This file is included
+** by heapmm.h, so these definitions can be used by code external to
+** HMM.  You can change the default configuration, and/or create alternate
+** configuration(s).
+*/
+
+/* To allow for multiple configurations of HMM to be used in the same
+** compilation unit, undefine all preprocessor symbols that will be
+** defined below.
+*/
+#undef HMM_ADDR_ALIGN_UNIT
+#undef HMM_BLOCK_ALIGN_UNIT
+#undef HMM_UNIQUE
+#undef HMM_DESC_PARAM
+#undef HMM__SYM_TO_STRING
+#undef HMM_SYM_TO_STRING
+#undef HMM_AUDIT_FAIL
+
+/* Turn X into a string after one macro expansion pass of X.  This trick
+** works with both GCC and Visual C++. */
+#define HMM_SYM_TO_STRING(X) HMM__SYM_TO_STRING(X)
+#define HMM__SYM_TO_STRING(X) #X
+
+#ifndef HMM_CNFG_NUM
+
+/* Default configuration. */
+
+/* Use hmm_ prefix to avoid identifier conflicts. */
+#define HMM_UNIQUE(BASE) hmm_ ## BASE
+
+/* Number of bytes in an Address Alignment Unit (AAU). */
+//fwghack
+//#define HMM_ADDR_ALIGN_UNIT sizeof(int) 
+#define HMM_ADDR_ALIGN_UNIT 32
+
+/* Number of AAUs in a Block Alignment Unit (BAU). */
+#define HMM_BLOCK_ALIGN_UNIT 1
+
+/* Type of unsigned integer big enough to hold the size of a Block in AAUs. */
+typedef unsigned long HMM_UNIQUE(size_aau);
+
+/* Type of unsigned integer big enough to hold the size of a Block/Chunk
+** in BAUs.  The high bit will be robbed. */
+typedef unsigned long HMM_UNIQUE(size_bau);
+
+void HMM_dflt_abort(const char *, const char *);
+
+/* Actions upon a self-audit failure.  Must expand to a single complete
+** statement.  If you remove the definition of this macro, no self-auditing
+** will be performed. */
+#define HMM_AUDIT_FAIL \
+  HMM_dflt_abort(__FILE__, HMM_SYM_TO_STRING(__LINE__));
+
+#elif HMM_CNFG_NUM == 0
+
+/* Definitions for testing. */
+
+#define HMM_UNIQUE(BASE) thmm_ ## BASE
+
+#define HMM_ADDR_ALIGN_UNIT sizeof(int) 
+
+#define HMM_BLOCK_ALIGN_UNIT 3
+
+typedef unsigned HMM_UNIQUE(size_aau);
+
+typedef unsigned short HMM_UNIQUE(size_bau);
+
+/* Under this test setup, a long jump is done if there is a self-audit
+** failure.
+*/
+
+extern jmp_buf HMM_UNIQUE(jmp_buf);
+extern const char * HMM_UNIQUE(fail_file);
+extern unsigned HMM_UNIQUE(fail_line);
+
+#define HMM_AUDIT_FAIL \
+   { HMM_UNIQUE(fail_file) = __FILE__; HMM_UNIQUE(fail_line) = __LINE__; \
+     longjmp(HMM_UNIQUE(jmp_buf), 1); }
+
+#elif HMM_CNFG_NUM == 1
+
+/* Put configuration 1 definitions here (if there is a configuration 1). */
+
+#elif HMM_CNFG_NUM == 2
+
+/* Put configuration 2 definitions here. */
+
+#elif HMM_CNFG_NUM == 3
+
+/* Put configuration 3 definitions here. */
+
+#elif HMM_CNFG_NUM == 4
+
+/* Put configuration 4 definitions here. */
+
+#elif HMM_CNFG_NUM == 5
+
+/* Put configuration 5 definitions here. */
+
+#endif
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_intrnl.h b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_intrnl.h
new file mode 100644
index 00000000..bc6500d5
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/memory_manager/include/hmm_intrnl.h
@@ -0,0 +1,149 @@
+/* This code is in the public domain.
+** Version: 1.1  Author: Walt Karas
+*/
+
+#ifndef HMM_INTRNL_H_
+#define HMM_INTRNL_H_
+
+#include "heapmm.h"
+
+#define U(BASE) HMM_UNIQUE(BASE)
+
+/* Mask of high bit of variable of size_bau type. */
+#define HIGH_BIT_BAU_SIZE \
+  ((U(size_bau)) ~ (((U(size_bau)) ~ (U(size_bau)) 0) >> 1))
+
+/* Add a given number of AAUs to pointer. */
+#define AAUS_FORWARD(PTR, AAU_OFFSET) \
+  (((char *) (PTR)) + ((AAU_OFFSET) * ((U(size_aau)) HMM_ADDR_ALIGN_UNIT)))
+
+/* Subtract a given number of AAUs from pointer. */
+#define AAUS_BACKWARD(PTR, AAU_OFFSET) \
+  (((char *) (PTR)) - ((AAU_OFFSET) * ((U(size_aau)) HMM_ADDR_ALIGN_UNIT)))
+
+/* Add a given number of BAUs to a pointer. */
+#define BAUS_FORWARD(PTR, BAU_OFFSET) \
+  AAUS_FORWARD((PTR), (BAU_OFFSET) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT))
+
+/* Subtract a given number of BAUs to a pointer. */
+#define BAUS_BACKWARD(PTR, BAU_OFFSET) \
+  AAUS_BACKWARD((PTR), (BAU_OFFSET) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT))
+
+typedef struct head_struct
+  {
+    /* Sizes in Block Alignment Units. */
+    HMM_UNIQUE(size_bau) previous_block_size, block_size;
+  }
+head_record;
+
+typedef struct ptr_struct
+  {
+    struct ptr_struct *self, *prev, *next;
+  }
+ptr_record;
+
+/* Divide and round up any fraction to the next whole number. */
+#define DIV_ROUND_UP(NUMER, DENOM) (((NUMER) + (DENOM) - 1) / (DENOM))
+
+/* Number of AAUs in a block _head. */
+#define HEAD_AAUS DIV_ROUND_UP(sizeof(head_record), HMM_ADDR_ALIGN_UNIT)
+
+/* Number of AAUs in a block pointer record. */
+#define PTR_RECORD_AAUS DIV_ROUND_UP(sizeof(ptr_record), HMM_ADDR_ALIGN_UNIT)
+
+/* Number of BAUs in a dummy end record (at end of chunk). */
+#define DUMMY_END_BLOCK_BAUS DIV_ROUND_UP(HEAD_AAUS, HMM_BLOCK_ALIGN_UNIT)
+
+/* Minimum number of BAUs in a block (allowing room for the pointer record. */
+#define MIN_BLOCK_BAUS \
+  DIV_ROUND_UP(HEAD_AAUS + PTR_RECORD_AAUS, HMM_BLOCK_ALIGN_UNIT)
+
+/* Return number of BAUs in block (masking off high bit containing block
+** status). */
+#define BLOCK_BAUS(HEAD_PTR) \
+  (((head_record *) (HEAD_PTR))->block_size & ~HIGH_BIT_BAU_SIZE)
+
+/* Return number of BAUs in previous block (masking off high bit containing
+** block status). */
+#define PREV_BLOCK_BAUS(HEAD_PTR) \
+  (((head_record *) (HEAD_PTR))->previous_block_size & ~HIGH_BIT_BAU_SIZE)
+
+/* Set number of BAUs in previous block, preserving high bit containing
+** block status. */
+#define SET_PREV_BLOCK_BAUS(HEAD_PTR, N_BAUS) \
+  { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
+    h_ptr->previous_block_size &= HIGH_BIT_BAU_SIZE; \
+    h_ptr->previous_block_size |= (N_BAUS); }
+
+/* Convert pointer to pointer record of block to pointer to block's _head
+** record. */
+#define PTR_REC_TO_HEAD(PTR_REC_PTR) \
+  ((head_record *) AAUS_BACKWARD(PTR_REC_PTR, HEAD_AAUS))
+
+/* Convert pointer to block _head to pointer to block's pointer record. */
+#define HEAD_TO_PTR_REC(HEAD_PTR) \
+  ((ptr_record *) AAUS_FORWARD(HEAD_PTR, HEAD_AAUS))
+
+/* Returns non-zero if block is allocated. */
+#define IS_BLOCK_ALLOCATED(HEAD_PTR) \
+  (((((head_record *) (HEAD_PTR))->block_size | \
+     ((head_record *) (HEAD_PTR))->previous_block_size) & \
+    HIGH_BIT_BAU_SIZE) == 0)
+
+#define MARK_BLOCK_ALLOCATED(HEAD_PTR) \
+  { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
+    h_ptr->block_size &= ~HIGH_BIT_BAU_SIZE; \
+    h_ptr->previous_block_size &= ~HIGH_BIT_BAU_SIZE; }
+
+/* Mark a block as free when it is not the first block in a bin (and
+** therefore not a node in the AVL tree). */
+#define MARK_SUCCESSIVE_BLOCK_IN_FREE_BIN(HEAD_PTR) \
+  { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
+    h_ptr->block_size |= HIGH_BIT_BAU_SIZE; }
+
+/* Prototypes for internal functions implemented in one file and called in
+** another.
+*/
+
+void U(into_free_collection)(U(descriptor) *desc, head_record *head_ptr);
+
+void U(out_of_free_collection)(U(descriptor) *desc, head_record *head_ptr);
+
+void * U(alloc_from_bin)(
+  U(descriptor) *desc, ptr_record *bin_front_ptr, U(size_bau) n_baus);
+
+#ifdef HMM_AUDIT_FAIL
+
+/* Simply contains a reference to the HMM_AUDIT_FAIL macro and a
+** dummy return. */
+int U(audit_block_fail_dummy_return)(void);
+
+/* More sickness needed because C has no inline function (yes, it's the
+** "use the comma operator like a semicolon" thing.)
+*/
+
+/* Auditing a block consists of checking that the size in its _head
+** matches the previous block size in the _head of the next block. */
+#define AUDIT_BLOCK_AS_EXPR(HEAD_PTR) \
+  ((BLOCK_BAUS(HEAD_PTR) == \
+    PREV_BLOCK_BAUS(BAUS_FORWARD(HEAD_PTR, BLOCK_BAUS(HEAD_PTR)))) ? \
+   0 : U(audit_block_fail_dummy_return)())
+
+#define AUDIT_BLOCK(HEAD_PTR) \
+  { void *h_ptr = (HEAD_PTR); AUDIT_BLOCK_AS_EXPR(h_ptr); }
+
+#endif
+
+/* Interface to AVL tree generic package instantiation. */
+
+#define AVL_UNIQUE(BASE) U(avl_ ## BASE)
+
+#define AVL_HANDLE ptr_record *
+
+#define AVL_KEY U(size_bau)
+
+#define AVL_MAX_DEPTH 64
+
+#include "cavl_if.h"
+
+#endif /* Include once. */
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.c
new file mode 100644
index 00000000..3bbebac6
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.c
@@ -0,0 +1,561 @@
+#define __ON2_MEM_C__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "on2_mem.h"
+
+#define INCLUDE_MEMORY_MANAGER  0 //include heap manager functionality
+#define INCLUDE_MEM_TRACKER     0 //include xon2_* calls in the lib
+#define INCLUDE_MEM_CHECKS      1 //include some basic safety checks in
+                                  //on2_memcpy, _memset, and _memmove
+#if INCLUDE_MEM_TRACKER
+# include "on2_mem_tracker.h"
+# if ON2_MEM_TRACKER_VERSION_CHIEF != 2
+#  error "on2_mem requires memory tracker version 2 to track memory usage"
+# endif
+#endif
+
+#define ADDRESS_STORAGE_SIZE      sizeof(size_t)
+
+#if defined(VXWORKS)
+# define DEFAULT_ALIGNMENT        32        //default addr alignment to use in
+                                            //calls to on2_* functions other
+                                            //than on2_memalign
+#else
+# define DEFAULT_ALIGNMENT        1
+#endif
+
+#if INCLUDE_MEM_TRACKER
+# define TRY_BOUNDS_CHECK         1         //when set to 1 pads each allocation,
+                                            //integrity can be checked using 
+                                            //on2_MemoryTrackerCheckIntegrity
+                                            //or on free by defining
+                                            //TRY_BOUNDS_CHECK_ON_FREE
+#else
+# define TRY_BOUNDS_CHECK         0
+#endif
+
+#if TRY_BOUNDS_CHECK
+# define TRY_BOUNDS_CHECK_ON_FREE 0          //checks mem integrity on every
+                                             //free, very expensive
+# define BOUNDS_CHECK_VALUE       0xdeadbeef //value stored before/after ea.
+                                             //mem addr for bounds checking
+# define BOUNDS_CHECK_PAD_SIZE    32         //size of the padding before and
+                                             //after ea allocation to be filled
+                                             //with BOUNDS_CHECK_VALUE.
+                                             //this should be a multiple of 4
+#else
+# define BOUNDS_CHECK_VALUE       0
+# define BOUNDS_CHECK_PAD_SIZE    0
+#endif
+
+unsigned long g_AllocCount = 0;
+
+#if INCLUDE_MEMORY_MANAGER
+# include "heapmm.h"
+# include "hmm_intrnl.h"
+
+# define SHIFT_HMM_ADDR_ALIGN_UNIT 5
+# define TOTAL_MEMORY_TO_ALLOCATE  20971520 // 20 * 1024 * 1024
+//# define TOTAL_MEMORY_TO_ALLOCATE 10485100 // 10 * 1024 * 1024
+//# define TOTAL_MEMORY_TO_ALLOCATE 16777216 // 16 * 1024 * 1024
+
+# define MM_DYNAMIC_MEMORY 1
+# if MM_DYNAMIC_MEMORY
+   unsigned char* g_p_mng_memory_raw = NULL;
+   unsigned char* g_p_mng_memory     = NULL;
+# else
+   unsigned char g_p_mng_memory[TOTAL_MEMORY_TO_ALLOCATE];
+# endif
+
+ size_t g_mm_memory_size = TOTAL_MEMORY_TO_ALLOCATE;
+
+ hmm_descriptor hmm_d;
+ int g_mngMemoryAllocated = 0;
+
+ static int On2_MM_CreateHeapMemory();
+ static void* On2_MM_realloc(void* memblk, size_t size);
+#endif //INCLUDE_MEMORY_MANAGER
+
+unsigned int on2_mem_get_version()
+{
+    unsigned int ver = ((unsigned int)(unsigned char)ON2_MEM_VERSION_CHIEF << 24 |
+                        (unsigned int)(unsigned char)ON2_MEM_VERSION_MAJOR << 16 |
+                        (unsigned int)(unsigned char)ON2_MEM_VERSION_MINOR << 8  |
+                        (unsigned int)(unsigned char)ON2_MEM_VERSION_PATCH);
+    return ver;
+}
+
+int on2_mem_set_heap_size(size_t size)
+{
+    int ret = -1;
+
+    #if INCLUDE_MEMORY_MANAGER
+    #if MM_DYNAMIC_MEMORY
+    if(!g_mngMemoryAllocated && size) {
+        g_mm_memory_size = size;
+        ret = 0;
+    } else
+        ret = -3;
+    #else
+    ret = -2;
+    #endif
+    #else
+    (void)size;
+    #endif
+
+    return ret;
+}
+
+void* on2_memalign(size_t align, size_t size)
+{
+    void* addr,
+	    * x = NULL;
+
+    #if INCLUDE_MEMORY_MANAGER
+	int number_aau;
+
+	if (On2_MM_CreateHeapMemory() < 0)
+	{
+		printf("[on2][mm] ERROR xon2_memalign() Couldn't create memory for Heap.\n");
+	}
+
+	number_aau = ((size + align + ADDRESS_STORAGE_SIZE) >>
+                   SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
+	
+	addr = hmm_alloc(&hmm_d, number_aau);
+    #else
+    addr = malloc(size + align + ADDRESS_STORAGE_SIZE);
+    #endif //INCLUDE_MEMORY_MANAGER
+    
+    if(addr) {
+        ptrdiff_t align_ = align;
+
+        x = (void*)(((size_t)
+                ((unsigned char*)addr + ADDRESS_STORAGE_SIZE) + (align_ - 1)) & (size_t)-align_);
+        /* save the actual malloc address */
+        ((size_t*)x)[-1] = (size_t)addr;
+    }
+
+    return x;
+}
+
+void* on2_malloc(size_t size)
+{   
+    return on2_memalign(DEFAULT_ALIGNMENT, size);
+}
+
+void* on2_calloc(size_t num, size_t size)
+{   
+	void *x; 
+
+	x = on2_memalign(DEFAULT_ALIGNMENT, num*size);
+
+	if(x)
+        memset(x, 0, num*size);
+
+	return x;
+}
+
+void* on2_realloc(void* memblk, size_t size)
+{
+    void* addr,
+        * new_addr = NULL;
+    int align = DEFAULT_ALIGNMENT;
+	/*
+	The realloc() function changes the size of the object pointed to by 
+	ptr to the size specified by size, and returns a pointer to the 
+	possibly moved block. The contents are unchanged up to the lesser 
+	of the new and old sizes. If ptr is null, realloc() behaves like 
+	malloc() for the specified size. If size is zero (0) and ptr is 
+	not a null pointer, the object pointed to is freed. 
+	*/
+    if(!memblk)
+        new_addr = on2_malloc(size);
+    else if (!size)
+        on2_free(memblk);
+    else
+    {
+        addr   = (void*)(((size_t*)memblk)[-1]);
+        memblk = NULL;
+
+        #if INCLUDE_MEMORY_MANAGER
+        new_addr = On2_MM_realloc(addr, size + align + ADDRESS_STORAGE_SIZE);
+        #else
+        new_addr = realloc(addr, size + align + ADDRESS_STORAGE_SIZE); 
+        #endif
+        if(new_addr) {
+            addr = new_addr;
+            new_addr = (void*)(((size_t)
+                ((unsigned char*)new_addr + ADDRESS_STORAGE_SIZE) + (align - 1)) &
+                (size_t)-align);
+            /* save the actual malloc address */
+            ((size_t*)new_addr)[-1] = (size_t)addr;
+        }
+    }
+
+    return new_addr;
+}
+
+void on2_free(void* memblk)
+{
+	if(memblk) {
+        void* addr = (void*)(((size_t*)memblk)[-1]);
+        #if INCLUDE_MEMORY_MANAGER
+	    hmm_free(&hmm_d, addr);
+        #else
+        free(addr);
+        #endif
+    }
+}
+
+#if INCLUDE_MEM_TRACKER
+
+void* xon2_memalign(size_t align, size_t size, char* file, int line)
+{
+    #if TRY_BOUNDS_CHECK
+	unsigned char *xBounds; 
+    #endif
+
+	void *x;
+
+	if (g_AllocCount == 0)
+	{
+		int iRv = on2_MemoryTrackerInit(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE);
+		if (iRv < 0)
+		{
+			printf("ERROR xon2_malloc MEM_TRACK_USAGE error on2_MemoryTrackerInit().\n");
+		}
+	}
+
+    #if TRY_BOUNDS_CHECK
+	{
+        int i;
+		unsigned int tempme = BOUNDS_CHECK_VALUE;
+
+        xBounds = on2_memalign(align, size + (BOUNDS_CHECK_PAD_SIZE * 2));
+
+        for (i=0;i<BOUNDS_CHECK_PAD_SIZE;i+=sizeof(unsigned int))
+        {
+		    memcpy(xBounds+i, &tempme, sizeof(unsigned int));
+		    memcpy(xBounds + size + BOUNDS_CHECK_PAD_SIZE + i, &tempme, sizeof(unsigned int));
+        }
+		x = (void*)(xBounds + BOUNDS_CHECK_PAD_SIZE);
+	}
+    #else
+    x = on2_memalign(align, size);
+    #endif //TRY_BOUNDS_CHECK
+
+	g_AllocCount++;
+
+    on2_MemoryTrackerAdd((size_t)x, size, file, line);
+
+    return x;
+}
+
+void* xon2_malloc(size_t size, char *file, int line)
+{
+    return xon2_memalign(DEFAULT_ALIGNMENT, size, file, line);
+}
+
+void* xon2_calloc(size_t num, size_t size, char *file, int line)
+{   
+    void* x = xon2_memalign(DEFAULT_ALIGNMENT, num*size, file, line);
+
+	if(x)
+        memset(x, 0, num*size);
+
+    return x;
+}
+
+void* xon2_realloc(void* memblk, size_t size, char *file, int line)
+{
+    struct MemBlock* p = NULL;
+    int orig_size = 0,
+        orig_line = 0;
+    char* orig_file = NULL;
+
+    #if TRY_BOUNDS_CHECK
+	unsigned char *xBounds = memblk ?
+                             (unsigned char*)memblk - BOUNDS_CHECK_PAD_SIZE :
+                             NULL;
+    #endif
+
+	void *x;
+
+	if (g_AllocCount == 0)
+	{
+		if (!on2_MemoryTrackerInit(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE))
+		{
+			printf("ERROR xon2_malloc MEM_TRACK_USAGE error on2_MemoryTrackerInit().\n");
+		}
+	}
+
+    if (p = on2_MemoryTrackerFind((size_t)memblk))
+    {
+        orig_size = p->size;
+        orig_file = p->file;
+        orig_line = p->line;
+    }
+
+    #if TRY_BOUNDS_CHECK_ON_FREE
+    on2_MemoryTrackerCheckIntegrity(file, line);
+    #endif
+
+    //have to do this regardless of success, because
+    //the memory that does get realloc'd may change
+    //the bounds values of this block
+    on2_MemoryTrackerRemove((size_t)memblk);
+
+    #if TRY_BOUNDS_CHECK
+	{
+		xBounds = on2_realloc(xBounds, size + (BOUNDS_CHECK_PAD_SIZE * 2));
+
+        if (xBounds)
+        {
+            int i;
+		    unsigned int tempme = BOUNDS_CHECK_VALUE;
+
+            for (i=0;i<BOUNDS_CHECK_PAD_SIZE;i+=sizeof(unsigned int))
+            {
+		        memcpy(xBounds+i, &tempme, 4);
+		        memcpy(xBounds + size + BOUNDS_CHECK_PAD_SIZE + i, &tempme, 4);
+            }
+
+		    x = (void*)(xBounds + BOUNDS_CHECK_PAD_SIZE);
+        }
+        else
+            x = NULL;
+	}
+    #else
+    x = on2_realloc(memblk, size);
+    #endif //TRY_BOUNDS_CHECK
+
+    if (x)
+        on2_MemoryTrackerAdd((size_t)x, size, file, line);
+    else
+        on2_MemoryTrackerAdd((size_t)memblk, orig_size, orig_file, orig_line);
+
+    return x;
+}
+
+void xon2_free(void *pAddress, char *file, int line)
+{
+    #if TRY_BOUNDS_CHECK
+	unsigned char *pBoundsAddress = (unsigned char*)pAddress;
+	pBoundsAddress -= BOUNDS_CHECK_PAD_SIZE;
+    #endif
+
+    #if !TRY_BOUNDS_CHECK_ON_FREE
+    (void)file; (void)line;
+    #endif
+
+	if(pAddress)
+	{
+	    g_AllocCount--;
+
+        #if TRY_BOUNDS_CHECK_ON_FREE
+        on2_MemoryTrackerCheckIntegrity(file, line);
+        #endif
+
+        //if the addr isn't found in the list, assume it was allocated via
+        //on2_ calls not xon2_, therefore it does not contain any padding
+        if (on2_MemoryTrackerRemove((size_t)pAddress) == -2)
+            pBoundsAddress = pAddress;
+
+        #if TRY_BOUNDS_CHECK
+	    on2_free(pBoundsAddress);
+        #else
+	    on2_free(pAddress);
+        #endif
+    }
+}
+
+#endif /*INCLUDE_MEM_TRACKER*/
+
+#if INCLUDE_MEM_CHECKS
+#if defined(VXWORKS)
+/* This function is only used to get a stack trace of the player 
+object so we can se where we are having a problem. */
+int getMyTT(int task)
+{
+	tt(task);
+
+	return 0;
+}
+#endif
+#endif
+
+void * on2_memcpy(void *dest, const void *source, size_t length)
+{
+    #if INCLUDE_MEM_CHECKS
+	if (((intptr_t)dest < 0x4000) || ((intptr_t)source < 0x4000))
+	{
+		printf("WARNING: on2_memcpy dest:0x%p source:0x%p len:%d\n", dest, source, length);
+
+        #if defined(VXWORKS)
+		sp(getMyTT, taskIdSelf(), 0, 0, 0, 0, 0, 0, 0, 0);
+
+		on2Timer_Sleep(10000);
+        #endif
+	}
+    #endif
+
+	return memcpy(dest, source, length);
+}
+
+
+void * on2_memset(void *dest, int val, size_t length)
+{
+    #if INCLUDE_MEM_CHECKS
+	if ((intptr_t)dest < 0x4000)
+	{
+		printf("WARNING: on2_memset dest:0x%p val:%d len:%d\n", dest, val, length);
+
+        #if defined(VXWORKS)
+		sp(getMyTT, taskIdSelf(), 0, 0, 0, 0, 0, 0, 0, 0);
+
+		on2Timer_Sleep(10000);
+        #endif
+	}
+    #endif
+
+	return memset(dest, val, length);
+}
+
+
+void * on2_memmove(void *dest, const void *src, size_t count)
+{
+    #if INCLUDE_MEM_CHECKS
+	if (((intptr_t)dest < 0x4000) || ((intptr_t)src < 0x4000))
+	{
+		printf("WARNING: on2_memmove dest:0x%p src:0x%p count:%d\n", dest, src, count);
+
+        #if defined(VXWORKS)
+		sp(getMyTT, taskIdSelf(), 0, 0, 0, 0, 0, 0, 0, 0);
+
+		on2Timer_Sleep(10000);
+        #endif
+	}
+    #endif
+
+    return memmove(dest, src, count);
+}
+
+#if INCLUDE_MEMORY_MANAGER
+
+static int On2_MM_CreateHeapMemory()
+{
+	int iRv = 0;
+
+	if (!g_mngMemoryAllocated)
+	{
+        #if MM_DYNAMIC_MEMORY
+		g_p_mng_memory_raw =
+            (unsigned char*)malloc(g_mm_memory_size + HMM_ADDR_ALIGN_UNIT);
+
+		if (g_p_mng_memory_raw)
+		{
+			g_p_mng_memory = (unsigned char*)((((unsigned int)g_p_mng_memory_raw) +
+                                               HMM_ADDR_ALIGN_UNIT-1) &
+                                              -(int)HMM_ADDR_ALIGN_UNIT);
+
+			printf("[on2][mm] total memory size:%d g_p_mng_memory_raw:0x%x g_p_mng_memory:0x%x\n"
+				, g_mm_memory_size + HMM_ADDR_ALIGN_UNIT
+				, (unsigned int)g_p_mng_memory_raw
+				, (unsigned int)g_p_mng_memory);
+		}
+		else
+		{
+			printf("[on2][mm] Couldn't allocate memory:%d for on2 memory manager.\n"
+				, g_mm_memory_size);
+
+			iRv = -1;
+		}
+
+		if (g_p_mng_memory)
+        #endif
+		{
+			int chunkSize = 0;
+
+			g_mngMemoryAllocated = 1;
+
+			hmm_init(&hmm_d);
+
+			chunkSize = g_mm_memory_size >> SHIFT_HMM_ADDR_ALIGN_UNIT;
+
+			chunkSize -= DUMMY_END_BLOCK_BAUS;
+
+			printf("[on2][mm] memory size:%d for on2 memory manager. g_p_mng_memory:0x%x  chunkSize:%d\n"
+				, g_mm_memory_size
+				, (unsigned int)g_p_mng_memory
+				, chunkSize);
+
+			hmm_new_chunk(&hmm_d, (void*)g_p_mng_memory, chunkSize);
+		}
+        #if MM_DYNAMIC_MEMORY
+		else
+		{
+			printf("[on2][mm] Couldn't allocate memory:%d for on2 memory manager.\n"
+				, g_mm_memory_size);
+
+			iRv = -1;
+		}
+        #endif
+	}
+
+	return iRv;
+}
+
+static void* On2_MM_realloc(void* memblk, size_t size)
+{
+	void* pRet = NULL;
+
+	if (On2_MM_CreateHeapMemory() < 0)
+	{
+		printf("[on2][mm] ERROR On2_MM_realloc() Couldn't create memory for Heap.\n");
+	}
+	else
+	{
+		int iRv = 0;
+		int old_num_aaus;
+		int new_num_aaus;
+		
+		old_num_aaus = hmm_true_size(memblk);
+		new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
+		
+		if (old_num_aaus == new_num_aaus)
+		{
+			pRet = memblk;
+		}
+		else
+		{
+			iRv = hmm_resize(&hmm_d, memblk, new_num_aaus);
+			if (iRv == 0)
+			{
+                pRet = memblk;
+			}
+			else
+			{
+				/* Error. Try to malloc and then copy data. */
+				void* pFromMalloc;
+				
+	            new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
+	            pFromMalloc  = hmm_alloc(&hmm_d, new_num_aaus);
+				
+				if (pFromMalloc)
+				{
+					on2_memcpy(pFromMalloc, memblk, size);
+                    hmm_free(&hmm_d, memblk);
+					
+					pRet = pFromMalloc;
+				}
+			}
+		}
+	}
+
+	return pRet;
+}
+
+#endif //INCLUDE_MEMORY_MANAGER
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.xcodeproj/project.pbxproj b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..8b6d6d89
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem.xcodeproj/project.pbxproj
@@ -0,0 +1,197 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 42;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		0CC4DD1E0BB7930400837D4E /* on2_mem.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CC4DD1D0BB7930400837D4E /* on2_mem.c */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		0CC4DD1D0BB7930400837D4E /* on2_mem.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; path = on2_mem.c; sourceTree = "<group>"; };
+		D2AAC046055464E500DB518D /* libon2_mem.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libon2_mem.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		D289987405E68DCB004EDB86 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		08FB7794FE84155DC02AAC07 /* on2_mem */ = {
+			isa = PBXGroup;
+			children = (
+				08FB7795FE84155DC02AAC07 /* Source */,
+				C6A0FF2B0290797F04C91782 /* Documentation */,
+				1AB674ADFE9D54B511CA2CBB /* Products */,
+			);
+			name = on2_mem;
+			sourceTree = "<group>";
+		};
+		08FB7795FE84155DC02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				0CC4DD1D0BB7930400837D4E /* on2_mem.c */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		1AB674ADFE9D54B511CA2CBB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				D2AAC046055464E500DB518D /* libon2_mem.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		C6A0FF2B0290797F04C91782 /* Documentation */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			name = Documentation;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		D2AAC043055464E500DB518D /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		D2AAC045055464E500DB518D /* on2_mem */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "on2_mem" */;
+			buildPhases = (
+				D2AAC043055464E500DB518D /* Headers */,
+				D2AAC044055464E500DB518D /* Sources */,
+				D289987405E68DCB004EDB86 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = on2_mem;
+			productName = on2_mem;
+			productReference = D2AAC046055464E500DB518D /* libon2_mem.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		08FB7793FE84155DC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "on2_mem" */;
+			hasScannedForEncodings = 1;
+			mainGroup = 08FB7794FE84155DC02AAC07 /* on2_mem */;
+			projectDirPath = "";
+			targets = (
+				D2AAC045055464E500DB518D /* on2_mem */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		D2AAC044055464E500DB518D /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				0CC4DD1E0BB7930400837D4E /* on2_mem.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		1DEB91EC08733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COPY_PHASE_STRIP = NO;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_FIX_AND_CONTINUE = YES;
+				GCC_MODEL_TUNING = G5;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				INSTALL_PATH = /usr/local/lib;
+				PRODUCT_NAME = on2_mem;
+				ZERO_LINK = YES;
+			};
+			name = Debug;
+		};
+		1DEB91ED08733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = (
+					ppc,
+					i386,
+				);
+				GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+				GCC_MODEL_TUNING = G5;
+				INSTALL_PATH = /usr/local/lib;
+				PRODUCT_NAME = on2_mem;
+			};
+			name = Release;
+		};
+		1DEB91F008733DB70010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				OBJROOT = build;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+				SYMROOT = ../../../../lib/osx;
+				USER_HEADER_SEARCH_PATHS = include;
+			};
+			name = Debug;
+		};
+		1DEB91F108733DB70010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				OBJROOT = build;
+				PREBINDING = NO;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+				SYMROOT = ../../../../lib/osx;
+				USER_HEADER_SEARCH_PATHS = include;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "on2_mem" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91EC08733DB70010E9CD /* Debug */,
+				1DEB91ED08733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "on2_mem" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB91F008733DB70010E9CD /* Debug */,
+				1DEB91F108733DB70010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem_tracker.c b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem_tracker.c
new file mode 100644
index 00000000..cf3d0e24
--- /dev/null
+++ b/Src/libvpShared/corelibs/on2_common/src/on2_mem/on2_mem_tracker.c
@@ -0,0 +1,596 @@
+#define __ON2_MEM_TRACKER_C__
+/*
+  on2_mem_tracker.c
+
+  jwz 2003-09-30:
+   Stores a list of addreses, their size, and file and line they came from.
+   All exposed lib functions are prefaced by on2_ and allow the global list
+   to be thread safe.
+   Current supported platforms are:
+    Linux, Win32, WinCE and VxWorks
+   Further support can be added by defining the platform specific mutex
+   in the MemoryTracker struct as well as calls to create/destroy/lock/unlock
+   the mutex in on2_MemoryTrackerInit/Destroy and MemoryTrackerLockMutex/UnlockMutex
+*/
+
+#if defined(LINUX)
+#include <pthread.h>
+#elif defined(WIN32) || defined(_WIN32_WCE)
+#include <windows.h>
+#include <winbase.h>
+#elif defined(VXWORKS)
+#include <semLib.h>
+#endif
+
+#include "on2_mem_tracker.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> //VXWORKS doesn't have a malloc/memory.h file,
+                    //this should pull in malloc,free,etc.
+#include <stdarg.h>
+
+#undef on2_malloc   //undefine any on2_mem macros that may affect calls to
+#undef on2_free     //memory functions in this file
+#undef on2_memcpy
+#undef on2_memset
+
+struct MemoryTracker
+{
+    struct MemBlock * head,
+                    * tail;
+    int len,
+        totalsize;
+    unsigned int current_allocated,
+                 max_allocated;
+
+    #if defined(LINUX)
+    pthread_mutex_t mutex;
+    #elif defined(WIN32) || defined(_WIN32_WCE)
+    HANDLE mutex;
+    #elif defined(VXWORKS)
+    SEM_ID mutex;
+    #else
+    #error "No mutex type defined for this platform!"
+    #endif
+
+    int padding_size,
+        pad_value;
+
+};
+
+
+/* prototypes for internal library functions */
+static void memtrack_log(const char* fmt, ...);
+static void MemoryTrackerDump();
+static void MemoryTrackerCheckIntegrity(char* file, unsigned int line);
+static void MemoryTrackerAdd(size_t addr, unsigned int size,
+                             char* file, unsigned int line);
+static int MemoryTrackerRemove(size_t addr);
+static struct MemBlock* MemoryTrackerFind(size_t addr);
+
+static int MemoryTrackerLockMutex();
+static int MemoryTrackerUnlockMutex();
+
+static struct MemoryTracker memtrack;   //our global memory allocation list
+static int g_bMemTrackerInited = 0;    //indicates whether the global list has
+                                        //been initialized (1:yes/0:no)
+static FILE* g_logfile = NULL;
+static int g_logtype   = 0;
+
+/*
+ *
+ * Exposed library functions
+ *
+*/
+
+/*
+    on2_MemoryTrackerInit(int padding_size, int pad_value)
+      padding_size - the size of the padding before and after each mem addr.
+                     Values > 0 indicate that integrity checks can be performed
+                     by inspecting these areas.
+      pad_value - the initial value within the padding area before and after
+                  each mem addr.
+      
+    Initializes global memory tracker structure
+    Allocates the _head of the list
+*/
+int on2_MemoryTrackerInit(int padding_size, int pad_value)
+{
+    if (!g_bMemTrackerInited)
+    {
+        if (memtrack.head = (struct MemBlock*)malloc(sizeof(struct MemBlock)))
+        {
+            int ret;
+
+            memset(memtrack.head, 0, sizeof(struct MemBlock));
+
+            memtrack.tail = memtrack.head;
+
+            memtrack.current_allocated = 0;
+            memtrack.max_allocated     = 0;
+
+            memtrack.padding_size = padding_size;
+            memtrack.pad_value    = pad_value;
+            
+            #if defined(LINUX)
+            ret = pthread_mutex_init(&memtrack.mutex,
+                                     NULL);            /*mutex attributes (NULL=default)*/
+            #elif defined(WIN32) || defined(_WIN32_WCE)
+            memtrack.mutex = CreateMutex(NULL,   /*security attributes*/
+                                         FALSE,  /*we don't want initial ownership*/
+                                         NULL);  /*mutex name*/
+            ret = !memtrack.mutex;
+            #elif defined(VXWORKS)
+            memtrack.mutex = semBCreate(SEM_Q_FIFO, /*SEM_Q_FIFO non-priority based mutex*/
+                                        SEM_FULL);  /*SEM_FULL initial state is unlocked*/
+            ret = !memtrack.mutex;
+            #endif
+
+            if (ret)
+            {
+                memtrack_log("on2_MemoryTrackerInit: Error creating mutex!\n");
+
+                free(memtrack.head);
+                memtrack.head = NULL;
+            }
+            else
+            {
+                memtrack_log("Memory Tracker init'd, v."on2_mem_tracker_version"\n");
+                g_bMemTrackerInited = 1;
+            }
+        }
+    }
+    
+    return g_bMemTrackerInited;
+}
+
+/*
+    on2_MemoryTrackerDestroy()
+    If our global struct was initialized zeros out all its members,
+    frees memory and destroys it's mutex
+*/
+void on2_MemoryTrackerDestroy()
+{
+    if (!MemoryTrackerLockMutex())
+    {
+        struct MemBlock* p  = memtrack.head,
+                       * p2 = memtrack.head;
+
+        MemoryTrackerDump();
+
+        while(p)
+        {
+            p2 = p;
+            p  = p->next;
+
+            free(p2);
+        }
+
+        memtrack.head              = NULL;
+        memtrack.tail              = NULL;
+        memtrack.len               = 0;
+        memtrack.current_allocated = 0;
+        memtrack.max_allocated     = 0;
+
+        if(!g_logtype && g_logfile && g_logfile != stderr) {
+            fclose(g_logfile);
+            g_logfile = NULL;
+        }
+
+        MemoryTrackerUnlockMutex();
+
+	    g_bMemTrackerInited = 0;
+    }
+}
+
+/*
+    on2_MemoryTrackerAdd(size_t addr, unsigned int size,
+                         char * file, unsigned int line)
+      addr - memory address to be added to list
+      size - size of addr
+      file - the file addr was referenced from
+      line - the line in file addr was referenced from
+    Adds memory address addr, it's size, file and line it came from
+    to the global list via the thread safe internal library function
+*/
+void on2_MemoryTrackerAdd(size_t addr, unsigned int size,
+                          char * file, unsigned int line)
+{
+    MemoryTrackerAdd(addr, size, file, line);
+}
+
+/*
+    on2_MemoryTrackerRemove(size_t addr)
+      addr - memory address to be removed from list
+    Removes addr from the global list via the thread safe
+    internal remove function
+    Return:
+      Same as described for MemoryTrackerRemove
+*/
+int on2_MemoryTrackerRemove(size_t addr)
+{
+    return MemoryTrackerRemove(addr);
+}
+
+/*
+    on2_MemoryTrackerFind(size_t addr)
+      addr - address to be found in list
+    Return:
+        If found, pointer to the memory block that matches addr
+        NULL otherwise
+*/
+struct MemBlock* on2_MemoryTrackerFind(size_t addr)
+{
+    struct MemBlock* p = NULL;
+
+    if (!MemoryTrackerLockMutex())
+    {
+        p = MemoryTrackerFind(addr);
+        MemoryTrackerUnlockMutex();
+    }
+
+    return p;
+}
+
+/*
+    on2_MemoryTrackerDump()
+    Locks the memory tracker's mutex and calls the internal
+    library function to dump the current contents of the
+    global memory allocation list
+*/
+void on2_MemoryTrackerDump()
+{
+    if (!MemoryTrackerLockMutex())
+    {
+        MemoryTrackerDump();
+        MemoryTrackerUnlockMutex();
+    }
+}
+
+/*
+    on2_MemoryTrackerCheckIntegrity(char* file, unsigned int line)
+      file - The file name where the check was placed
+      line - The line in file where the check was placed
+    Locks the memory tracker's mutex and calls the internal
+    integrity check function to inspect every address in the global
+    memory allocation list
+*/
+void on2_MemoryTrackerCheckIntegrity(char* file, unsigned int line)
+{
+    if (!MemoryTrackerLockMutex())
+    {
+        MemoryTrackerCheckIntegrity(file, line);
+        MemoryTrackerUnlockMutex();
+    }
+}
+
+/*
+    on2_MemoryTrackerSetLogType
+    Sets the logging type for the memory tracker. Based on the value it will
+    direct its output to the appropriate place.
+    Return:
+      0: on success
+      -1: if the logging type could not be set, because the value was invalid
+          or because a file could not be opened
+*/
+int on2_MemoryTrackerSetLogType(int type, char* option)
+{
+    int ret = -1;
+
+    switch(type) {
+    case 0:
+        g_logtype = 0;
+        if(!option) {
+            g_logfile = stderr;
+            ret = 0;
+        } else {
+            if (g_logfile = fopen(option, "w"))
+                ret = 0;
+        }
+        break;
+#if defined(WIN32) && !defined(_WIN32_WCE)
+    case 1:
+        g_logtype = type;
+        ret = 0;
+        break;
+#endif
+    default:
+        break;
+    }
+
+    //output the version to the new logging destination
+    if(!ret)
+        memtrack_log("Memory Tracker init'd, v."on2_mem_tracker_version"\n");
+
+    return ret;
+}
+
+/*
+ *
+ * END - Exposed library functions
+ *
+*/
+
+
+/*
+ *
+ * Internal library functions
+ *
+*/
+
+static void memtrack_log(const char* fmt, ...)
+{
+    va_list list;
+
+    va_start(list, fmt);
+    switch(g_logtype) {
+    case 0:
+        if (g_logfile) {
+            vfprintf(g_logfile, fmt, list);
+            fflush(g_logfile);
+        }
+        break;
+#if defined(WIN32) && !defined(_WIN32_WCE)
+    case 1:
+        {
+            char temp[1024];
+            _vsnprintf(temp, sizeof(temp)/sizeof(char)-1, fmt, list);
+            OutputDebugString(temp);
+        }
+        break;
+#endif
+    default:
+        break;
+    }
+    va_end(list);
+}
+
+/*
+    MemoryTrackerDump()
+    Dumps the current contents of the global memory allocation list
+*/
+static void MemoryTrackerDump()
+{
+	int i = 0;
+    struct MemBlock* p = (memtrack.head ? memtrack.head->next : NULL);
+
+    memtrack_log("Currently Allocated= %d; Max allocated= %d\n",
+        memtrack.current_allocated, memtrack.max_allocated);
+
+    while(p)
+    {
+        memtrack_log("memblocks[%d].addr= 0x%.8x, memblocks[%d].size= %d, file: %s, line: %d\n", i, 
+	        p->addr, i, p->size, 
+	        p->file, p->line);
+
+        p = p->next;
+        ++i;
+    }
+}
+
+/*
+    MemoryTrackerCheckIntegrity(char* file, unsigned int file)
+      file - the file name where the check was placed
+      line - the line in file where the check was placed
+    If a padding_size was supplied to on2_MemoryTrackerInit()
+    this function will ea. addr in the list verifying that
+    addr-padding_size and addr+padding_size is filled with pad_value
+*/
+static void MemoryTrackerCheckIntegrity(char* file, unsigned int line)
+{
+    if (memtrack.padding_size)
+    {
+	    int i,
+            index = 0;
+        unsigned int * pShowMe,
+                     * pShowMe2;
+	    unsigned int tempme = memtrack.pad_value,
+                     dead1,
+                     dead2;
+        unsigned char *xBounds;
+        struct MemBlock* p = memtrack.head->next;
+
+        while (p)
+	    {
+            xBounds = (unsigned char*)p->addr; 
+
+		    //back up ON2_BYTE_ALIGNMENT
+		    xBounds -= memtrack.padding_size;
+
+            for (i=0;i<memtrack.padding_size;i+=sizeof(unsigned int))
+            {
+		        pShowMe = (unsigned int*)(xBounds+i);
+		        pShowMe2 = (unsigned int*)(xBounds + p->size + memtrack.padding_size + i);
+
+		        memcpy(&dead1, pShowMe, sizeof(unsigned int));
+		        memcpy(&dead2, pShowMe2, sizeof(unsigned int));
+
+		        if ((dead1 != tempme) || (dead2 != tempme))
+		        {
+			        memtrack_log("\n[on2_mem integrity check failed]:\n"
+                                 "    index[%d] {%s:%d} addr=0x%x, size= %d,"
+                                 " file: %s, line: %d c0:0x%x c1:0x%x\n",
+				            index, file, line, p->addr, p->size, p->file,
+                            p->line, dead1, dead2);
+		        }
+            }
+
+            ++index;
+            p = p->next;
+	    }
+    }
+}
+
+/*
+    MemoryTrackerAdd(size_t addr, unsigned int size,
+                     char * file, unsigned int line)
+    Adds an address (addr), it's size, file and line number to our list.
+    Adjusts the total bytes allocated and max bytes allocated if necessary.
+    If memory cannot be allocated the list will be destroyed.
+*/
+void MemoryTrackerAdd(size_t addr, unsigned int size,
+                      char * file, unsigned int line)
+{
+    if (!MemoryTrackerLockMutex())
+    {
+        struct MemBlock* p;
+
+        p = malloc(sizeof(struct MemBlock));
+
+        if (p)
+        {
+            p->prev       = memtrack.tail;
+            p->prev->next = p;
+            p->addr       = addr;
+            p->size       = size;
+            p->line       = line;
+            p->file       = file;
+            p->next       = NULL;
+
+            memtrack.tail = p;
+
+            memtrack.current_allocated += size;
+
+            if (memtrack.current_allocated > memtrack.max_allocated)
+                memtrack.max_allocated = memtrack.current_allocated;
+
+            MemoryTrackerUnlockMutex();
+        }
+        else
+        {
+            memtrack_log("MemoryTrackerAdd: error allocating memory!\n");
+            MemoryTrackerUnlockMutex();
+            on2_MemoryTrackerDestroy();
+        }
+    }
+}
+
+/*
+    MemoryTrackerRemove(size_t addr)
+    Removes an address and its corresponding size (if they exist)
+    from the memory tracker list and adjusts the current number
+    of bytes allocated.
+    Return:
+      0: on success
+      -1: if the mutex could not be locked
+      -2: if the addr was not found in the list
+*/
+int MemoryTrackerRemove(size_t addr)
+{
+    int ret = -1;
+
+    if (!MemoryTrackerLockMutex())
+    {
+        struct MemBlock* p;
+
+        if (p = MemoryTrackerFind(addr))
+        {
+            memtrack.current_allocated -= p->size;
+
+            p->prev->next = p->next;
+            if (p->next)
+                p->next->prev = p->prev;
+            else
+                memtrack.tail = p->prev;
+
+            ret = 0;
+            free(p);
+        }
+        else
+        {
+            memtrack_log("MemoryTrackerRemove(): addr not found in list, 0x%.8x\n", addr);
+            ret = -2;
+        }
+
+        MemoryTrackerUnlockMutex();
+    }
+
+    return ret;
+}
+
+/*
+    MemoryTrackerFind(size_t addr)
+    Finds an address in our addrs list
+    NOTE: the mutex MUST be locked in the other internal
+          functions before calling this one. This avoids
+          the need for repeated locking and unlocking as in Remove
+    Returns: pointer to the mem block if found, NULL otherwise
+*/
+static struct MemBlock* MemoryTrackerFind(size_t addr)
+{
+    struct MemBlock* p = NULL;
+
+    if (memtrack.head)
+    {
+        p = memtrack.head->next;
+
+        while(p && (p->addr != addr))
+            p = p->next;
+    }
+
+    return p;
+}
+
+/*
+    MemoryTrackerLockMutex()
+    Locks the memory tracker mutex with a platform specific call
+    Returns:
+        0: Success
+       <0: Failure, either the mutex was not initialized
+           or the call to lock the mutex failed
+*/
+static int MemoryTrackerLockMutex()
+{
+    int ret = -1;
+
+    if (g_bMemTrackerInited)
+    {
+
+        #if defined(LINUX)
+        ret = pthread_mutex_lock(&memtrack.mutex);
+        #elif defined(WIN32) || defined(_WIN32_WCE)
+        ret = WaitForSingleObject(memtrack.mutex, INFINITE);
+        #elif defined(VXWORKS)
+        ret = semTake(memtrack.mutex, WAIT_FOREVER);
+        #endif
+
+        if (ret)
+        {
+            memtrack_log("MemoryTrackerLockMutex: mutex lock failed\n");
+        }
+    }
+
+	return ret;
+}
+
+/*
+    MemoryTrackerUnlockMutex()
+    Unlocks the memory tracker mutex with a platform specific call
+    Returns:
+        0: Success
+       <0: Failure, either the mutex was not initialized
+           or the call to unlock the mutex failed
+*/
+static int MemoryTrackerUnlockMutex()
+{
+    int ret = -1;
+
+    if (g_bMemTrackerInited)
+    {
+        
+        #if defined(LINUX)
+        ret = pthread_mutex_unlock(&memtrack.mutex);
+        #elif defined(WIN32) || defined(_WIN32_WCE)
+        ret = !ReleaseMutex(memtrack.mutex);
+        #elif defined(VXWORKS)
+        ret = semGive(memtrack.mutex);
+        #endif
+
+        if (ret)
+        {
+	        memtrack_log("MemoryTrackerUnlockMutex: mutex unlock failed\n");
+        }
+    }
+
+	return ret;
+}
author	Jef <jef@targetspot.com>	2024-09-24 08:54:57 -0400
committer	Jef <jef@targetspot.com>	2024-09-24 08:54:57 -0400
commit	20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree	12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/libvpShared/corelibs
parent	537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
download	winamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz