/*****************************************************************************
** File:        videoRender.c
**
** Author:      Daniel Vik
**
** Description: Video render methods. 
**
** License:     Freeware. Anyone may distribute, use and modify the file 
**              without notifying the author. Even though it is not a 
**              requirement, the autor will be happy if you mention his 
**              name when using the file as is or in modified form.
**
** History:     1.0 - 10/17 2003 Initial version
**              1.1 - 11/26 2003 Added delay and glow in pal 32 rendering.
**              1.2 - 12/01 2003 Added double src width support (for MSX
**                               screen 6,7 and text80).
**              1.3 / 12/05 2003 Added support for scale2x (by SLotman)
**
******************************************************************************
*/
#include "videoRender.h"
#include "scalebit.h"
#include <stdlib.h>
 
#define YCBCR_MASK 0xFBEF
#define MAX_YCBCR_VALUE (1 << 16)

static UInt32 pRgbTableColor32[MAX_YCBCR_VALUE];
static UInt32 pRgbTableGreen32[MAX_YCBCR_VALUE];
static UInt32 pRgbTableWhite32[MAX_YCBCR_VALUE];
static UInt16 pRgbTableColor16[MAX_YCBCR_VALUE];
static UInt16 pRgbTableGreen16[MAX_YCBCR_VALUE];
static UInt16 pRgbTableWhite16[MAX_YCBCR_VALUE];

UInt32 YJKtoYCbCrTable[32][32][32];

static UInt32 history[480][640];

struct Video {
    UInt16* pRgbTable16;
    UInt32* pRgbTable32;
    VideoPalMode palMode;
    UInt32 decay;
};

#define ABS(a) ((a) < 0 ? -1 * (a) : (a))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))

static void initYJKtoYCbCrTable() {
    int Y;
    int J;
    int K;

    for (Y = 0; Y < 32; Y++) {
        for (J = -16; J < 16; J++) {
            for (K = -16; K < 16; K++) {
                int Y2, Cb, Cr;
                int R=(Y+2*J);
                int G=(Y+2*K);
                int B=((5*Y-4*J-2*K)/4);
                UInt32 YCbCr;

                Y2 = (int)(0.2989*R + 0.5866*G + 0.1145*B);
                Cb = B - Y2;
                Cr = R - Y2;

                Y2=Y2<0? 0:Y2>31? 31:Y2;
                Cb=Cb<-32? -32:Cb>31? 31:Cb;
                Cr=Cr<-32? -32:Cr>31? 31:Cr;

                YCbCr = (Y2 / 2) | ((16 + (Cb / 2)) << 5) | ((16 + (Cr / 2)) << 11);
                
                YJKtoYCbCrTable[Y][J < 0 ? J + 32 : J][K < 0 ? K + 32 : K] = YCbCr;
            }
        }
    }
}

static void initRGBTable() 
{
    int YCbCr;

    for (YCbCr = 0; YCbCr < MAX_YCBCR_VALUE; YCbCr++) {
        if (pRgbTableColor32[YCbCr] == 0) {
            int Y  = 8 + 16 * (YCbCr & 0x0f);
            int Cb = 8 + 16 * (((YCbCr >> 5) & 0x1f) - 16);
            int Cr = 8 + 16 * (((YCbCr >> 11) & 0x1f) - 16);

            int R = Cr + Y;
            int G = (int)(Y - (0.1145/0.5866)*Cb - (0.2989/0.5866)*Cr);
            int B = Cb + Y;
            int L = Y;
            
            L = MAX(6, MIN(247, (int)(1.12 * L)));
            R = MAX(6, MIN(247, (int)(1.12 * R)));
            G = MAX(6, MIN(247, (int)(1.12 * G)));
            B = MAX(6, MIN(247, (int)(1.12 * B)));

            pRgbTableColor32[YCbCr] = (R << 16) | (G << 8) | (B << 0);
            pRgbTableColor16[YCbCr] = ((R >> 3) << 11) | ((G >> 2) << 5) | (B >> 3);

            pRgbTableGreen32[YCbCr] = 0x100010 | (L << 8);
            pRgbTableGreen16[YCbCr] = 0x0801 | (UInt16)((L >> 2) << 5);

            pRgbTableWhite32[YCbCr] = (L << 16) | (L << 8) | (L << 0);
            pRgbTableWhite16[YCbCr] = (UInt16)(((L >> 3) << 11) | ((L >> 2) << 5) | (L >> 3));
        }
    }
}


/*****************************************************************************
**
** PAL emulation rendering routines
**
******************************************************************************
*/
static void copyMonitor_2x2_16(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                               int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage, int interlace)
{
    UInt16* pRgbTable16 = (UInt16*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt16* pDst1       = (UInt16*)pDestination;
    UInt16* pDst2       = pDst1 + dstPitch / sizeof(UInt16);
    UInt16* pDst3       = pDst2;
    int w;
    int h;

    srcPitch *= 2;

    if (interlace) {
        // Draw even page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext1;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext1 = pSrc[w++];
                    colTmp   = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable16[colTmp];

                    colPrev1  = colCur;
                    colCur    = colNext1;

                    colNext1 = pSrc[w++];
                    colTmp  = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable16[colTmp];

                    colPrev1 = colCur;
                    colCur   = colNext1;

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst1[dstIndex++] = 3 * ((colRgb1 >> 2) & 0x39e7) + noise;
                    pDst1[dstIndex++] = 3 * ((colRgb2 >> 2) & 0x39e7) + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable16[((colTmp + colCur) >> 1) & YCBCR_MASK];
                    colRgb2 = pRgbTable16[((colTmp + colNext) >> 1) & YCBCR_MASK];

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst1[dstIndex++] = 3 * ((colRgb1 >> 2) & 0x39e7) + noise;
                    pDst1[dstIndex++] = 3 * ((colRgb2 >> 2) & 0x39e7) + noise;
                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt16*)((UInt8*)pDst1 + dstPitch * 2);
        }

        // Draw odd page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext1;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext1 = pSrc[w++];
                    colTmp   = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable16[colTmp];

                    colPrev1  = colCur;
                    colCur    = colNext1;

                    colNext1 = pSrc[w++];
                    colTmp  = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable16[colTmp];

                    colPrev1 = colCur;
                    colCur   = colNext1;

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable16[((colTmp + colCur) >> 1) & YCBCR_MASK];
                    colRgb2 = pRgbTable16[((colTmp + colNext) >> 1) & YCBCR_MASK];

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst2 = (UInt16*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;

        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext1;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext1 = pSrc[w++];
                    colTmp   = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable16[colTmp];

                    colPrev1  = colCur;
                    colCur    = colNext1;

                    colNext1 = pSrc[w++];
                    colTmp  = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable16[colTmp];

                    colPrev1 = colCur;
                    colCur   = colNext1;

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb1 >> 3) & 0x18e3)) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb2 >> 3) & 0x18e3)) + noise;
                    dstIndex++;

                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable16[((colTmp + colCur) >> 1) & YCBCR_MASK];
                    colRgb2 = pRgbTable16[((colTmp + colNext) >> 1) & YCBCR_MASK];

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb1 >> 3) & 0x18e3)) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb2 >> 3) & 0x18e3)) + noise;
                    dstIndex++;

                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pDst3 = pDst2;
            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt16*)((UInt8*)pDst1 + dstPitch * 2);
            pDst2 = (UInt16*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
}

static void copyMonitor_2x2_32(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                               int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage, int interlace)
{
    UInt32* pRgbTable32 = (UInt32*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt32* pDst1       = (UInt32*)pDestination;
    UInt32* pDst2       = pDst1 + dstPitch / sizeof(UInt32);
    UInt32* pDst3       = pDst2;
    int w;
    int h;

    srcPitch *= 2;

    if (interlace) {
        // Draw even page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 noise;

                    colNext = pSrc[w++];
                    colTmp   = ((colCur   + colNext) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable32[colTmp];

                    colCur   = colNext;

                    colNext = pSrc[w++];
                    colTmp   = ((colCur   + colNext) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    colCur   = colNext;

                    noise = (rnd >> 31) * 0x10101;
                    pDst1[dstIndex++] = (colRgb1 & 0xf8f8f8) + noise;
                    pDst1[dstIndex++] = (colRgb2 & 0xf8f8f8) + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 colTmp2;
                    UInt32 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colTmp2 = ((colTmp  + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[((colTmp2 + colCur) >> 1) & YCBCR_MASK];
                    colTmp2 = ((colTmp + colNext) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[((colTmp2 + colNext) >> 1) & YCBCR_MASK];

                    noise = (rnd >> 31) * 0x10101;
                    pDst1[dstIndex++] = (colRgb1 & 0xf8f8f8) + noise;
                    pDst1[dstIndex++] = (colRgb2 & 0xf8f8f8) + noise;
                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt32*)((UInt8*)pDst1 + dstPitch * 2);
        }

        // Draw odd page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 noise;

                    colNext = pSrc[w++];
                    colTmp   = ((colCur   + colNext) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable32[colTmp];

                    colCur   = colNext;

                    colNext = pSrc[w++];
                    colTmp   = ((colCur   + colNext) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    colCur   = colNext;

                    noise = (rnd >> 31) * 0x10101;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 colTmp2;
                    UInt32 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colTmp2 = ((colTmp  + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[((colTmp2 + colCur) >> 1) & YCBCR_MASK];
                    colTmp2 = ((colTmp + colNext) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[((colTmp2 + colNext) >> 1) & YCBCR_MASK];

                    noise = (rnd >> 31) * 0x10101;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst2 = (UInt32*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;

        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 noise;

                    colNext = pSrc[w++];
                    colTmp   = ((colCur   + colNext) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable32[colTmp];

                    colCur   = colNext;

                    colNext = pSrc[w++];
                    colTmp   = ((colCur   + colNext) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    colCur   = colNext;

                    noise = (rnd >> 31) * 0x10101;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 2 * ((pDst3[dstIndex] >> 3) & 0x1f1f1f) + 6 * ((colRgb1 >> 3) & 0x1f1f1f) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 2 * ((pDst3[dstIndex] >> 3) & 0x1f1f1f) + 6 * ((colRgb2 >> 3) & 0x1f1f1f) + noise;
                    dstIndex++;

                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 colTmp2;
                    UInt32 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colTmp2 = ((colTmp  + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[((colTmp2 + colCur) >> 1) & YCBCR_MASK];
                    colTmp2 = ((colTmp + colNext) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[((colTmp2 + colNext) >> 1) & YCBCR_MASK];

                    noise = (rnd >> 31) * 0x10101;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 2 * ((pDst3[dstIndex] >> 3) & 0x1f1f1f) + 6 * ((colRgb1 >> 3) & 0x1f1f1f) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 2 * ((pDst3[dstIndex] >> 3) & 0x1f1f1f) + 6 * ((colRgb2 >> 3) & 0x1f1f1f) + noise;
                    dstIndex++;

                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pDst3 = pDst2;
            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt32*)((UInt8*)pDst1 + dstPitch * 2);
            pDst2 = (UInt32*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
}

static void copySharpPAL_2x2_16(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                                int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage, int interlace)
{
    UInt16* pRgbTable16 = (UInt16*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt16* pDst1       = (UInt16*)pDestination;
    UInt16* pDst2       = pDst1 + dstPitch / sizeof(UInt16);
    UInt16* pDst3       = pDst2;
    int w;
    int h;

    srcPitch *= 2;

    if (interlace) {
        // Draw even page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext1;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext1 = pSrc[w++];
                    colTmp   = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable16[colTmp];

                    colPrev1  = colCur;
                    colCur    = colNext1;

                    colNext1 = pSrc[w++];
                    colTmp  = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable16[colTmp];

                    colPrev1 = colCur;
                    colCur   = colNext1;

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst1[dstIndex++] = 3 * ((colRgb1 >> 2) & 0x39e7) + noise;
                    pDst1[dstIndex++] = 3 * ((colRgb2 >> 2) & 0x39e7) + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable16[((colTmp + colCur) >> 1) & YCBCR_MASK];
                    colRgb2 = pRgbTable16[((colTmp + colNext) >> 1) & YCBCR_MASK];

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst1[dstIndex++] = 3 * ((colRgb1 >> 2) & 0x39e7) + noise;
                    pDst1[dstIndex++] = 3 * ((colRgb2 >> 2) & 0x39e7) + noise;
                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt16*)((UInt8*)pDst1 + dstPitch * 2);
        }

        // Draw odd page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext1;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext1 = pSrc[w++];
                    colTmp   = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable16[colTmp];

                    colPrev1  = colCur;
                    colCur    = colNext1;

                    colNext1 = pSrc[w++];
                    colTmp  = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable16[colTmp];

                    colPrev1 = colCur;
                    colCur   = colNext1;

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable16[((colTmp + colCur) >> 1) & YCBCR_MASK];
                    colRgb2 = pRgbTable16[((colTmp + colNext) >> 1) & YCBCR_MASK];

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst2 = (UInt16*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;

        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext1;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext1 = pSrc[w++];
                    colTmp   = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable16[colTmp];

                    colPrev1  = colCur;
                    colCur    = colNext1;

                    colNext1 = pSrc[w++];
                    colTmp  = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable16[colTmp];

                    colPrev1 = colCur;
                    colCur   = colNext1;

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb1 >> 3) & 0x18e3)) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb2 >> 3) & 0x18e3)) + noise;
                    dstIndex++;

                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable16[((colTmp + colCur) >> 1) & YCBCR_MASK];
                    colRgb2 = pRgbTable16[((colTmp + colNext) >> 1) & YCBCR_MASK];

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb1 >> 3) & 0x18e3)) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb2 >> 3) & 0x18e3)) + noise;
                    dstIndex++;

                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pDst3 = pDst2;
            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt16*)((UInt8*)pDst1 + dstPitch * 2);
            pDst2 = (UInt16*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
}

static void copySharpPAL_2x2_32(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                                int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage, int interlace)
{
    UInt32* pRgbTable32 = (UInt32*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt32* pDst1       = (UInt32*)pDestination;
    UInt32* pDst2       = pDst1 + dstPitch / sizeof(UInt32);
    UInt32* pDst3       = pDst2;
    int w;
    int h;

    srcPitch *= 2;

    if (interlace) {
        // Draw even page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext1;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 noise;

                    colNext1 = pSrc[w++];
                    colTmp   = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable32[colTmp];

                    colPrev1  = colCur;
                    colCur    = colNext1;

                    colNext1 = pSrc[w++];
                    colTmp  = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    colPrev1 = colCur;
                    colCur   = colNext1;

                    noise = (rnd >> 29) * 0x10101;
                    pDst1[dstIndex++] = 7 * ((colRgb1 / 8) & 0x1f1f1f) + noise;
                    pDst1[dstIndex++] = 7 * ((colRgb2 / 8) & 0x1f1f1f) + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[((colTmp + colCur) >> 1) & YCBCR_MASK];
                    colRgb2 = pRgbTable32[((colTmp + colNext) >> 1) & YCBCR_MASK];

                    noise = (rnd >> 29) * 0x10101;
                    pDst1[dstIndex++] = 7 * ((colRgb1 / 8) & 0x1f1f1f) + noise;
                    pDst1[dstIndex++] = 7 * ((colRgb2 / 8) & 0x1f1f1f) + noise;
                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt32*)((UInt8*)pDst1 + dstPitch * 2);
        }

        // Draw odd page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext1;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 noise;

                    colNext1 = pSrc[w++];
                    colTmp   = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable32[colTmp];

                    colPrev1  = colCur;
                    colCur    = colNext1;

                    colNext1 = pSrc[w++];
                    colTmp  = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    colPrev1 = colCur;
                    colCur   = colNext1;

                    noise = (rnd >> 29) * 0x10101;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[((colTmp + colCur) >> 1) & YCBCR_MASK];
                    colRgb2 = pRgbTable32[((colTmp + colNext) >> 1) & YCBCR_MASK];

                    noise = (rnd >> 29) * 0x10101;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst2 = (UInt32*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;

        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext1;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 noise;

                    colNext1 = pSrc[w++];
                    colTmp   = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable32[colTmp];

                    colPrev1  = colCur;
                    colCur    = colNext1;

                    colNext1 = pSrc[w++];
                    colTmp  = ((colNext1   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    colPrev1 = colCur;
                    colCur   = colNext1;

                    noise = (rnd >> 29) * 0x10101;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 7 * (((pDst3[dstIndex] >> 4) & 0x0f0f0f) + ((colRgb1 >> 4) & 0x0f0f0f)) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 7 * (((pDst3[dstIndex] >> 4) & 0x0f0f0f) + ((colRgb2 >> 4) & 0x0f0f0f)) + noise;
                    dstIndex++;

                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 noise;

                    colNext = pSrc[w];
                    colTmp  = ((colNext + colCur) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[((colTmp + colCur) >> 1) & YCBCR_MASK];
                    colRgb2 = pRgbTable32[((colTmp + colNext) >> 1) & YCBCR_MASK];

                    noise = (rnd >> 29) * 0x10101;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 7 * (((pDst3[dstIndex] >> 4) & 0x0f0f0f) + ((colRgb1 >> 4) & 0x0f0f0f)) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 7 * (((pDst3[dstIndex] >> 4) & 0x0f0f0f) + ((colRgb2 >> 4) & 0x0f0f0f)) + noise;
                    dstIndex++;

                    rnd *= 23;
                    colCur = colNext;
                }
            }

            pDst3 = pDst2;
            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt32*)((UInt8*)pDst1 + dstPitch * 2);
            pDst2 = (UInt32*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
}

static void copyPAL_2x2_16(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                           int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, UInt32 decay, int evenOddPage, int interlace)
{
    UInt16* pRgbTable16 = (UInt16*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt16* pDst1       = (UInt16*)pDestination;
    UInt16* pDst2       = pDst1 + dstPitch / sizeof(UInt16);
    UInt16* pDst3       = pDst2;
    int w;
    int h;

    srcPitch *= 2;

    if (interlace) {
        // Draw even page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            UInt32 colPrev2 = colCur;
            UInt32 colNext1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext2;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext2 = pSrc[w++];
                    colTmp   = ((colNext2 + colPrev2) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable16[colTmp];

                    colPrev2  = colPrev1;
                    colPrev1  = colCur;
                    colCur    = colNext1;
                    colNext1  = colNext2;

                    colNext2 = pSrc[w++];
                    colTmp   = ((colNext2 + colPrev2) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2  = pRgbTable16[colTmp];

                    colPrev2  = colPrev1;
                    colPrev1 = colCur;
                    colCur   = colNext1;
                    colNext1  = colNext2;

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst1[dstIndex++] = 3 * ((colRgb1 / 4) & 0x39e7) + noise;
                    pDst1[dstIndex++] = 3 * ((colRgb2 / 4) & 0x39e7) + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt32 colLgt;
                    UInt16 noise;

                    colNext = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    colTmp = history[h][w] - colLgt;
                    colLgt += ((-colTmp >> 24) & (decay * colTmp / 5)) - ((colTmp >> 24) & (decay * -colTmp / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;

                    colTmp  = ((colPrev1 + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable16[colTmp];

                    colTmp  = ((colNext + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable16[colTmp];

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst1[dstIndex++] = 3 * ((colRgb1 / 4) & 0x39e7) + noise;
                    pDst1[dstIndex++] = 3 * ((colRgb2 / 4) & 0x39e7) + noise;
                    rnd *= 23;
                    colPrev1 = colCur;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt16*)((UInt8*)pDst1 + dstPitch * 2);
        }

        // Draw odd page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            UInt32 colPrev2 = colCur;
            UInt32 colNext1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext2;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext2 = pSrc[w++];
                    colTmp   = ((colNext2 + colPrev2) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable16[colTmp];

                    colPrev2  = colPrev1;
                    colPrev1  = colCur;
                    colCur    = colNext1;
                    colNext1  = colNext2;

                    colNext2 = pSrc[w++];
                    colTmp   = ((colNext2 + colPrev2) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2  = pRgbTable16[colTmp];

                    colPrev2  = colPrev1;
                    colPrev1 = colCur;
                    colCur   = colNext1;
                    colNext1  = colNext2;

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt32 colLgt;
                    UInt16 noise;

                    colNext = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    colTmp = history[h][w] - colLgt;
                    colLgt += ((-colTmp >> 24) & (decay * colTmp / 5)) - ((colTmp >> 24) & (decay * -colTmp / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;

                    colTmp  = ((colPrev1 + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable16[colTmp];

                    colTmp  = ((colNext + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable16[colTmp];

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                    colPrev1 = colCur;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst2 = (UInt16*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;

        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev1 = colCur;
            UInt32 colPrev2 = colCur;
            UInt32 colNext1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext2;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt16 noise;

                    colNext2 = pSrc[w++];
                    colTmp   = ((colNext2 + colPrev2) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb1  = pRgbTable16[colTmp];

                    colPrev2  = colPrev1;
                    colPrev1  = colCur;
                    colCur    = colNext1;
                    colNext1  = colNext2;

                    colNext2 = pSrc[w++];
                    colTmp   = ((colNext2 + colPrev2) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp   = ((colTmp   + colCur) >> 1) & YCBCR_MASK;
                    colRgb2  = pRgbTable16[colTmp];

                    colPrev2  = colPrev1;
                    colPrev1 = colCur;
                    colCur   = colNext1;
                    colNext1  = colNext2;

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb1 >> 3) & 0x18e3)) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb2 >> 3) & 0x18e3)) + noise;
                    dstIndex++;

                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt16 colRgb1;
                    UInt16 colRgb2;
                    UInt32 colTmp;
                    UInt32 colLgt;
                    UInt16 noise;

                    colNext = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    colTmp = history[h][w] - colLgt;
                    colLgt += ((-colTmp >> 24) & (decay * colTmp / 5)) - ((colTmp >> 24) & (decay * -colTmp / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;

                    colTmp  = ((colPrev1 + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable16[colTmp];

                    colTmp  = ((colNext + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable16[colTmp];

                    noise = (UInt16)(rnd >> 31) * 0x0821;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb1 >> 3) & 0x18e3)) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 3 * (((pDst3[dstIndex] >> 3) & 0x18e3) + ((colRgb2 >> 3) & 0x18e3)) + noise;
                    dstIndex++;

                    rnd *= 23;
                    colPrev1 = colCur;
                    colCur = colNext;
                }
            }

            pDst3 = pDst2;
            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt16*)((UInt8*)pDst1 + dstPitch * 2);
            pDst2 = (UInt16*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
}

static void copyPAL_2x2_32(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                           int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, UInt32 decay, int evenOddPage, int interlace)
{
    UInt32* pRgbTable32 = (UInt32*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt32* pDst1       = (UInt32*)pDestination;
    UInt32* pDst2       = pDst1 + dstPitch / sizeof(UInt32);
    UInt32* pDst3       = pDst2;
    int w;
    int h;

    srcPitch *= 2;

    if (interlace) {
        // Draw even page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev2 = colCur;
            UInt32 colPrev1 = colCur;
            UInt32 colNext1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext2;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 colLgt;
                    UInt32 noise;

                    colNext2 = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    noise = history[h][w] - colLgt;
                    colLgt += ((-noise >> 24) & (decay * noise / 5)) - ((noise >> 24) & (decay * -noise / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;
                    w++;

                    colTmp  = ((colPrev2 + colNext2) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colLgt) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[colTmp];

                    colPrev2 = colPrev1;
                    colPrev1 = colCur;
                    colCur   = colNext1;
                    colNext1 = colNext2;
                    colNext2 = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    noise = history[h][w] - colLgt;
                    colLgt += ((-noise >> 24) & (decay * noise / 5)) - ((noise >> 24) & (decay * -noise / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;
                    w++;

                    colTmp  = ((colPrev2 + colNext2) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colLgt) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    colPrev2 = colPrev1;
                    colPrev1 = colCur;
                    colCur   = colNext1;
                    colNext1 = colNext2;

                    noise = (rnd >> 29) * 0x10101;
                    pDst1[dstIndex++] = 3 * ((colRgb1 / 4) & 0x3f3f3f) + noise;
                    pDst1[dstIndex++] = 3 * ((colRgb2 / 4) & 0x3f3f3f) + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 colLgt;
                    UInt32 noise;

                    colNext = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    noise = history[h][w] - colLgt;
                    colLgt += ((-noise >> 24) & (decay * noise / 5)) - ((noise >> 24) & (decay * -noise / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;

                    colTmp  = ((colPrev1 + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[colTmp];

                    colTmp  = ((colNext + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    noise = (rnd >> 29) * 0x10101;
                    pDst1[dstIndex++] = colRgb1 + noise;
                    pDst1[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                    colPrev1 = colCur;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt32*)((UInt8*)pDst1 + dstPitch * 2);
        }

        // Draw odd page
        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev2 = colCur;
            UInt32 colPrev1 = colCur;
            UInt32 colNext1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext2;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 colLgt;
                    UInt32 noise;

                    colNext2 = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    noise = history[h][w] - colLgt;
                    colLgt += ((-noise >> 24) & (decay * noise / 5)) - ((noise >> 24) & (decay * -noise / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;
                    w++;

                    colTmp  = ((colPrev2 + colNext2) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colLgt) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[colTmp];

                    colPrev2 = colPrev1;
                    colPrev1 = colCur;
                    colCur   = colNext1;
                    colNext1 = colNext2;
                    colNext2 = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    noise = history[h][w] - colLgt;
                    colLgt += ((-noise >> 24) & (decay * noise / 5)) - ((noise >> 24) & (decay * -noise / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;
                    w++;

                    colTmp  = ((colPrev2 + colNext2) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colLgt) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    colPrev2 = colPrev1;
                    colPrev1 = colCur;
                    colCur   = colNext1;
                    colNext1 = colNext2;

                    noise = (rnd >> 29) * 0x10101;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 colLgt;
                    UInt32 noise;

                    colNext = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    noise = history[h][w] - colLgt;
                    colLgt += ((-noise >> 24) & (decay * noise / 5)) - ((noise >> 24) & (decay * -noise / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;

                    colTmp  = ((colPrev1 + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[colTmp];

                    colTmp  = ((colNext + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    noise = (rnd >> 29) * 0x10101;
                    pDst2[dstIndex++] = colRgb1 + noise;
                    pDst2[dstIndex++] = colRgb2 + noise;
                    rnd *= 23;
                    colPrev1 = colCur;
                    colCur = colNext;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst2 = (UInt32*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;

        for (h = 0; h < srcHeight; h++) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev2 = colCur;
            UInt32 colPrev1 = colCur;
            UInt32 colNext1 = colCur;
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 colNext2;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 colLgt;
                    UInt32 noise;

                    colNext2 = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    noise = history[h][w] - colLgt;
                    colLgt += ((-noise >> 24) & (decay * noise / 5)) - ((noise >> 24) & (decay * -noise / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;
                    w++;

                    colTmp  = ((colPrev2 + colNext2) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colLgt) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[colTmp];

                    colPrev2 = colPrev1;
                    colPrev1 = colCur;
                    colCur   = colNext1;
                    colNext1 = colNext2;
                    colNext2 = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    noise = history[h][w] - colLgt;
                    colLgt += ((-noise >> 24) & (decay * noise / 5)) - ((noise >> 24) & (decay * -noise / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;
                    w++;

                    colTmp  = ((colPrev2 + colNext2) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colNext1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp   + colLgt) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    colPrev2 = colPrev1;
                    colPrev1 = colCur;
                    colCur   = colNext1;
                    colNext1 = colNext2;

                    noise = (rnd >> 29) * 0x10101;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 6 * (((pDst3[dstIndex] >> 4) & 0x0f0f0f) + ((colRgb1 >> 4) & 0x0f0f0f)) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 6 * (((pDst3[dstIndex] >> 4) & 0x0f0f0f) + ((colRgb2 >> 4) & 0x0f0f0f)) + noise;
                    dstIndex++;

                    rnd *= 23;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext;
                    UInt32 colRgb1;
                    UInt32 colRgb2;
                    UInt32 colTmp;
                    UInt32 colLgt;
                    UInt32 noise;

                    colNext = pSrc[w];

                    colLgt  = colCur & 0x0f;
                    noise = history[h][w] - colLgt;
                    colLgt += ((-noise >> 24) & (decay * noise / 5)) - ((noise >> 24) & (decay * -noise / 16));
                    history[h][w] = colLgt;
                    colLgt += colCur & 0xfffffff0;

                    colTmp  = ((colPrev1 + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb1 = pRgbTable32[colTmp];

                    colTmp  = ((colNext + colPrev1) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colLgt) >> 1) & YCBCR_MASK;
                    colRgb2 = pRgbTable32[colTmp];

                    noise = (rnd >> 29) * 0x10101;
                    pDst2[dstIndex] = colRgb1 + noise;
                    pDst1[dstIndex] = 6 * (((pDst3[dstIndex] >> 4) & 0x0f0f0f) + ((colRgb1 >> 4) & 0x0f0f0f)) + noise;
                    dstIndex++;
                    pDst2[dstIndex] = colRgb2 + noise;
                    pDst1[dstIndex] = 6 * (((pDst3[dstIndex] >> 4) & 0x0f0f0f) + ((colRgb2 >> 4) & 0x0f0f0f)) + noise;
                    dstIndex++;

                    rnd *= 23;
                    colPrev1 = colCur;
                    colCur = colNext;
                }
            }

            pDst3 = pDst2;
            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt32*)((UInt8*)pDst1 + dstPitch * 2);
            pDst2 = (UInt32*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
}

static void copyPAL_1x1_16(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                           int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage, int interlace)
{
    UInt16* pRgbTable16 = (UInt16*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt32 *pSrc2       = pSrc + 2 * srcWidth * srcHeight;
    UInt16* pDst        = (UInt16*)pDestination;
    int w;
    int h;

    srcPitch *= 2;

    if (interlace) {
        for (h = 0; h < srcHeight; h += 2) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev = colCur;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = (((((pSrc[2 * w] + pSrc[2 * w + 1]) >> 1) & YCBCR_MASK) >> 1) + ((((pSrc2[2 * w] + pSrc2[2 * w + 1]) >> 1) & YCBCR_MASK) >> 1)) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = pRgbTable16[colTmp] + (UInt16)(rnd >> 31)  * 0x0821;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = ((pSrc[w] + pSrc2[w]) >> 1) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = pRgbTable16[colTmp] + (UInt16)(rnd >> 31)  * 0x0821;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }

            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pSrc2 = (UInt32*)((UInt8*)pSrc2 + srcPitch);
            pDst = (UInt16*) ((UInt8*)pDst + dstPitch);

            if (srcDoubleWidth[h + 1]) {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = (((((pSrc[2 * w] + pSrc[2 * w + 1]) >> 1) & YCBCR_MASK) >> 1) + ((((pSrc2[2 * w] + pSrc2[2 * w + 1]) >> 1) & YCBCR_MASK) >> 1)) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = 3 * ((pRgbTable16[colTmp] >> 2) & 0x39e7) + (UInt16)(rnd >> 31) * 0x0821;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = ((pSrc[w] + pSrc2[w]) >> 1) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = 3 * ((pRgbTable16[colTmp] >> 2) & 0x39e7) + (UInt16)(rnd >> 31) * 0x0821;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }

            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pSrc2 = (UInt32*)((UInt8*)pSrc2 + srcPitch);
            pDst = (UInt16*) ((UInt8*)pDst + dstPitch);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;
        
        for (h = 0; h < srcHeight; h += 2) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev = colCur;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = ((pSrc[2 * w] + pSrc[2 * w + 1]) >> 1) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = pRgbTable16[colTmp] + (UInt16)(rnd >> 31)  * 0x0821;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = pSrc[w];
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = pRgbTable16[colTmp] + (UInt16)(rnd >> 31)  * 0x0821;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }

            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pDst = (UInt16*) ((UInt8*)pDst + dstPitch);

            if (srcDoubleWidth[h + 1]) {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = ((pSrc[2 * w] + pSrc[2 * w + 1]) >> 1) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = 3 * ((pRgbTable16[colTmp] >> 2) & 0x39e7) + (UInt16)(rnd >> 31) * 0x0821;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = pSrc[w];
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = 3 * ((pRgbTable16[colTmp] >> 2) & 0x39e7) + (UInt16)(rnd >> 31) * 0x0821;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }

            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pDst = (UInt16*) ((UInt8*)pDst + dstPitch);
        }
    }
}

static void copyPAL_1x1_32(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                           int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage, int interlace)
{
    UInt32* pRgbTable32 = (UInt32*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt32 *pSrc2       = pSrc + 2 * srcWidth * srcHeight;
    UInt32* pDst        = (UInt32*)pDestination;
    int w;
    int h;

    srcPitch *= 2;

    if (interlace) {
        for (h = 0; h < srcHeight; h += 2) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev = colCur;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth; w++) {
                    UInt32 colNext = (((((pSrc[2 * w] + pSrc[2 * w + 1]) >> 1) & YCBCR_MASK) >> 1) + ((((pSrc2[2 * w] + pSrc2[2 * w + 1]) >> 1) & YCBCR_MASK) >> 1)) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = pRgbTable32[colTmp] + (rnd >> 31)  * 0x10101;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = ((pSrc[w] + pSrc2[w]) >> 1) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = pRgbTable32[colTmp] + (rnd >> 31)  * 0x10101;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }

            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pSrc2 = (UInt32*)((UInt8*)pSrc2 + srcPitch);
            pDst = (UInt32*)((UInt8*)pDst + dstPitch);

            if (srcDoubleWidth[h + 1]) {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = (((((pSrc[2 * w] + pSrc[2 * w + 1]) >> 1) & YCBCR_MASK) >> 1) + ((((pSrc2[2 * w] + pSrc2[2 * w + 1]) >> 1) & YCBCR_MASK) >> 1)) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = 7 * ((pRgbTable32[colTmp] >> 3) & 0x1f1f1f) + (rnd >> 30) * 0x10101;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = ((pSrc[w] + pSrc2[w]) >> 1) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = 7 * ((pRgbTable32[colTmp] >> 3) & 0x1f1f1f) + (rnd >> 30) * 0x10101;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }

            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pSrc2 = (UInt32*)((UInt8*)pSrc2 + srcPitch);
            pDst = (UInt32*)((UInt8*)pDst + dstPitch);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;

        for (h = 0; h < srcHeight; h += 2) {
            UInt32 colCur = pSrc[0];
            UInt32 colPrev = colCur;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth; w++) {
                    UInt32 colNext = ((pSrc[2 * w] + pSrc[2 * w + 1]) >> 1) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = pRgbTable32[colTmp] + (rnd >> 31)  * 0x10101;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = pSrc[w];
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = pRgbTable32[colTmp] + (rnd >> 31)  * 0x10101;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }

            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pDst = (UInt32*)((UInt8*)pDst + dstPitch);

            if (srcDoubleWidth[h + 1]) {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = ((pSrc[2 * w] + pSrc[2 * w + 1]) >> 1) & YCBCR_MASK;
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = 7 * ((pRgbTable32[colTmp] >> 3) & 0x1f1f1f) + (rnd >> 30) * 0x10101;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    UInt32 colNext = pSrc[w];
                    UInt32 colTmp;

                    colTmp  = ((colPrev + colNext) >> 1) & YCBCR_MASK;
                    colTmp  = ((colTmp + colCur) >> 1) & YCBCR_MASK;

                    pDst[w] = 7 * ((pRgbTable32[colTmp] >> 3) & 0x1f1f1f) + (rnd >> 30) * 0x10101;

                    rnd *= 23;
                    colPrev = colCur;
                    colCur = colNext;
                }
            }

            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pDst = (UInt32*)((UInt8*)pDst + dstPitch);
        }
    }
}


/*****************************************************************************
**
** Fast rendering routines
**
******************************************************************************
*/
static void copy_1x1_16(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                        int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage, int interlace)
{
    UInt16* pRgbTable16 = (UInt16*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt32 *pSrc2       = pSrc + 2 * srcWidth * srcHeight;
    UInt16* pDst        = (UInt16*)pDestination;
    int w;
    int h;

    srcWidth /= 8;
    srcPitch *= 2;

    if (interlace) {
        for (h = 0; h < srcHeight; h++) {
            if (srcDoubleWidth[h]) {
                for (w = 0; w < srcWidth; w++) {
                    pDst[0] = pRgbTable16[(((((pSrc[0] + pSrc[1]) >> 1) & YCBCR_MASK) + (((pSrc2[0] + pSrc2[1]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[1] = pRgbTable16[(((((pSrc[2] + pSrc[3]) >> 1) & YCBCR_MASK) + (((pSrc2[2] + pSrc2[3]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[2] = pRgbTable16[(((((pSrc[4] + pSrc[5]) >> 1) & YCBCR_MASK) + (((pSrc2[4] + pSrc2[5]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[3] = pRgbTable16[(((((pSrc[6] + pSrc[7]) >> 1) & YCBCR_MASK) + (((pSrc2[6] + pSrc2[7]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[4] = pRgbTable16[(((((pSrc[8] + pSrc[9]) >> 1) & YCBCR_MASK) + (((pSrc2[8] + pSrc2[9]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[5] = pRgbTable16[(((((pSrc[10] + pSrc[11]) >> 1) & YCBCR_MASK) + (((pSrc2[10] + pSrc2[11]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[6] = pRgbTable16[(((((pSrc[12] + pSrc[13]) >> 1) & YCBCR_MASK) + (((pSrc2[12] + pSrc2[13]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[7] = pRgbTable16[(((((pSrc[14] + pSrc[15]) >> 1) & YCBCR_MASK) + (((pSrc2[14] + pSrc2[15]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    
                    pSrc += 16;
                    pSrc2 += 16;
                    pDst += 8;
                }

                pSrc -= 16 * srcWidth;
                pSrc2 -= 16 * srcWidth;
                pDst -= 8 * srcWidth;
            }
            else {
                for (w = 0; w < srcWidth; w ++) {
                    pDst[0] = pRgbTable16[((pSrc[0] + pSrc2[0]) >> 1) & YCBCR_MASK];
                    pDst[1] = pRgbTable16[((pSrc[1] + pSrc2[1]) >> 1) & YCBCR_MASK];
                    pDst[2] = pRgbTable16[((pSrc[2] + pSrc2[2]) >> 1) & YCBCR_MASK];
                    pDst[3] = pRgbTable16[((pSrc[3] + pSrc2[3]) >> 1) & YCBCR_MASK];
                    pDst[4] = pRgbTable16[((pSrc[4] + pSrc2[4]) >> 1) & YCBCR_MASK];
                    pDst[5] = pRgbTable16[((pSrc[5] + pSrc2[5]) >> 1) & YCBCR_MASK];
                    pDst[6] = pRgbTable16[((pSrc[6] + pSrc2[6]) >> 1) & YCBCR_MASK];
                    pDst[7] = pRgbTable16[((pSrc[7] + pSrc2[7]) >> 1) & YCBCR_MASK];
                    
                    pSrc += 8;
                    pSrc2 += 8;
                    pDst += 8;
                }

                pSrc -= 8 * srcWidth;
                pSrc2 -= 8 * srcWidth;
                pDst -= 8 * srcWidth;
            }

            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pSrc2 = (UInt32*)((UInt8*)pSrc2 + srcPitch);
            pDst = (UInt16*)((UInt8*)pDst + dstPitch);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * 8 * srcWidth * srcHeight;
        for (h = 0; h < srcHeight; h++) {
            if (srcDoubleWidth[h]) {
                for (w = 0; w < srcWidth; w++) {
                    pDst[0] = pRgbTable16[((pSrc[0] + pSrc[1]) >> 1) & YCBCR_MASK];
                    pDst[1] = pRgbTable16[((pSrc[2] + pSrc[3]) >> 1) & YCBCR_MASK];
                    pDst[2] = pRgbTable16[((pSrc[4] + pSrc[5]) >> 1) & YCBCR_MASK];
                    pDst[3] = pRgbTable16[((pSrc[6] + pSrc[7]) >> 1) & YCBCR_MASK];
                    pDst[4] = pRgbTable16[((pSrc[8] + pSrc[9]) >> 1) & YCBCR_MASK];
                    pDst[5] = pRgbTable16[((pSrc[10] + pSrc[11]) >> 1) & YCBCR_MASK];
                    pDst[6] = pRgbTable16[((pSrc[12] + pSrc[13]) >> 1) & YCBCR_MASK];
                    pDst[7] = pRgbTable16[((pSrc[14] + pSrc[15]) >> 1) & YCBCR_MASK];
                    
                    pSrc += 16;
                    pDst += 8;
                }

                pSrc -= 16 * srcWidth;
                pDst -= 8 * srcWidth;
            }
            else {
                for (w = 0; w < srcWidth; w ++) {
                    pDst[0] = pRgbTable16[pSrc[0]];
                    pDst[1] = pRgbTable16[pSrc[1]];
                    pDst[2] = pRgbTable16[pSrc[2]];
                    pDst[3] = pRgbTable16[pSrc[3]];
                    pDst[4] = pRgbTable16[pSrc[4]];
                    pDst[5] = pRgbTable16[pSrc[5]];
                    pDst[6] = pRgbTable16[pSrc[6]];
                    pDst[7] = pRgbTable16[pSrc[7]];
                    
                    pSrc += 8;
                    pDst += 8;
                }

                pSrc -= 8 * srcWidth;
                pDst -= 8 * srcWidth;
            }

            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pDst = (UInt16*)((UInt8*)pDst + dstPitch);
        }
    }
}

static void copy_1x1_32(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                        int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage, int interlace)
{
    UInt32* pRgbTable32 = (UInt32*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt32 *pSrc2       = pSrc + 2 * srcWidth * srcHeight;
    UInt32* pDst        = (UInt32*)pDestination;
    int w;
    int h;

    srcWidth /= 8;

    srcPitch *= 2;

    if (interlace) {
        for (h = 0; h < srcHeight; h++) {
            if (srcDoubleWidth[h]) {
                for (w = 0; w < srcWidth; w++) {
                    pDst[0] = pRgbTable32[(((((pSrc[0] + pSrc[1]) >> 1) & YCBCR_MASK) + (((pSrc2[0] + pSrc2[1]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[1] = pRgbTable32[(((((pSrc[2] + pSrc[3]) >> 1) & YCBCR_MASK) + (((pSrc2[2] + pSrc2[3]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[2] = pRgbTable32[(((((pSrc[4] + pSrc[5]) >> 1) & YCBCR_MASK) + (((pSrc2[4] + pSrc2[5]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[3] = pRgbTable32[(((((pSrc[6] + pSrc[7]) >> 1) & YCBCR_MASK) + (((pSrc2[6] + pSrc2[7]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[4] = pRgbTable32[(((((pSrc[8] + pSrc[9]) >> 1) & YCBCR_MASK) + (((pSrc2[8] + pSrc2[9]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[5] = pRgbTable32[(((((pSrc[10] + pSrc[11]) >> 1) & YCBCR_MASK) + (((pSrc2[10] + pSrc2[11]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[6] = pRgbTable32[(((((pSrc[12] + pSrc[13]) >> 1) & YCBCR_MASK) + (((pSrc2[12] + pSrc2[13]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    pDst[7] = pRgbTable32[(((((pSrc[14] + pSrc[15]) >> 1) & YCBCR_MASK) + (((pSrc2[14] + pSrc2[15]) >> 1) & YCBCR_MASK)) >> 1) & YCBCR_MASK];
                    
                    pSrc += 16;
                    pSrc2 += 16;
                    pDst += 8;
                }
                pSrc -= 16 * srcWidth;
                pSrc2 -= 16 * srcWidth;
                pDst -= 8 * srcWidth;
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    pDst[0] = pRgbTable32[((pSrc[0] + pSrc2[0]) >> 1) & YCBCR_MASK];
                    pDst[1] = pRgbTable32[((pSrc[1] + pSrc2[1]) >> 1) & YCBCR_MASK];
                    pDst[2] = pRgbTable32[((pSrc[2] + pSrc2[2]) >> 1) & YCBCR_MASK];
                    pDst[3] = pRgbTable32[((pSrc[3] + pSrc2[3]) >> 1) & YCBCR_MASK];
                    pDst[4] = pRgbTable32[((pSrc[4] + pSrc2[4]) >> 1) & YCBCR_MASK];
                    pDst[5] = pRgbTable32[((pSrc[5] + pSrc2[5]) >> 1) & YCBCR_MASK];
                    pDst[6] = pRgbTable32[((pSrc[6] + pSrc2[6]) >> 1) & YCBCR_MASK];
                    pDst[7] = pRgbTable32[((pSrc[7] + pSrc2[7]) >> 1) & YCBCR_MASK];
                    
                    pSrc += 8;
                    pSrc2 += 8;
                    pDst += 8;
                }
                pSrc -= 8 * srcWidth;
                pSrc2 -= 8 * srcWidth;
                pDst -= 8 * srcWidth;
            }        
            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pSrc2 = (UInt32*)((UInt8*)pSrc2 + srcPitch);
            pDst = (UInt32*)((UInt8*)pDst + dstPitch);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * 8 * srcWidth * srcHeight;
        for (h = 0; h < srcHeight; h++) {
            if (srcDoubleWidth[h]) {
                for (w = 0; w < srcWidth; w++) {
                    pDst[0] = pRgbTable32[((pSrc[0] + pSrc[1]) >> 1) & YCBCR_MASK];
                    pDst[1] = pRgbTable32[((pSrc[2] + pSrc[3]) >> 1) & YCBCR_MASK];
                    pDst[2] = pRgbTable32[((pSrc[4] + pSrc[5]) >> 1) & YCBCR_MASK];
                    pDst[3] = pRgbTable32[((pSrc[6] + pSrc[7]) >> 1) & YCBCR_MASK];
                    pDst[4] = pRgbTable32[((pSrc[8] + pSrc[9]) >> 1) & YCBCR_MASK];
                    pDst[5] = pRgbTable32[((pSrc[10] + pSrc[11]) >> 1) & YCBCR_MASK];
                    pDst[6] = pRgbTable32[((pSrc[12] + pSrc[13]) >> 1) & YCBCR_MASK];
                    pDst[7] = pRgbTable32[((pSrc[14] + pSrc[15]) >> 1) & YCBCR_MASK];
                    
                    pSrc += 16;
                    pDst += 8;
                }
                pSrc -= 16 * srcWidth;
                pDst -= 8 * srcWidth;
            }
            else {
                for (w = 0; w < srcWidth; w++) {
                    pDst[0] = pRgbTable32[pSrc[0]];
                    pDst[1] = pRgbTable32[pSrc[1]];
                    pDst[2] = pRgbTable32[pSrc[2]];
                    pDst[3] = pRgbTable32[pSrc[3]];
                    pDst[4] = pRgbTable32[pSrc[4]];
                    pDst[5] = pRgbTable32[pSrc[5]];
                    pDst[6] = pRgbTable32[pSrc[6]];
                    pDst[7] = pRgbTable32[pSrc[7]];
                    
                    pSrc += 8;
                    pDst += 8;
                }
                pSrc -= 8 * srcWidth;
                pDst -= 8 * srcWidth;
            }        
            pSrc = (UInt32*)((UInt8*)pSrc + srcPitch);
            pDst = (UInt32*)((UInt8*)pDst + dstPitch);
        }
    }
}

static void copy_2x2_16(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                        int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage, int interlace)
{
    UInt16* pRgbTable16 = (UInt16*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt16* pDst1       = (UInt16*)pDestination;
    UInt16* pDst2       = pDst1 + dstPitch / sizeof(UInt16);
    int w;
    int h;    

    srcPitch *= 2;

    if (interlace) {
        UInt32 *pSrc2 = pSrc + 2 * srcWidth * srcHeight;

        for (h = 0; h < srcHeight; h++) {
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    pDst1[dstIndex]   = pRgbTable16[pSrc[w]];
                    pDst2[dstIndex++] = pRgbTable16[pSrc2[w++]];
                    
                    pDst1[dstIndex]   = pRgbTable16[pSrc[w]];
                    pDst2[dstIndex++] = pRgbTable16[pSrc2[w++]];
                    
                    pDst1[dstIndex]   = pRgbTable16[pSrc[w]];
                    pDst2[dstIndex++] = pRgbTable16[pSrc2[w++]];
                    
                    pDst1[dstIndex]   = pRgbTable16[pSrc[w]];
                    pDst2[dstIndex++] = pRgbTable16[pSrc2[w++]];
                }
            }
            else {
                for (w = 0; w < srcWidth;) {
                    UInt16 col1 = pRgbTable16[pSrc[w]];
                    UInt16 col2 = pRgbTable16[pSrc2[w++]];
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;

                    col1 = pRgbTable16[pSrc[w]];
                    col2 = pRgbTable16[pSrc2[w++]];
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;

                    col1 = pRgbTable16[pSrc[w]];
                    col2 = pRgbTable16[pSrc2[w++]];
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;

                    col1 = pRgbTable16[pSrc[w]];
                    col2 = pRgbTable16[pSrc2[w++]];
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pSrc2 = (UInt32*)((UInt8*)pSrc2 + srcPitch);
            pDst1 = (UInt16*)((UInt8*)pDst1 + dstPitch * 2);
            pDst2 = (UInt16*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;
        for (h = 0; h < srcHeight; h++) {
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt16 col1 = pRgbTable16[pSrc[w++]];
                    UInt16 col2 = pRgbTable16[pSrc[w++]];
                    UInt16 col3 = pRgbTable16[pSrc[w++]];
                    UInt16 col4 = pRgbTable16[pSrc[w++]];

                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col1;
                    
                    pDst1[dstIndex]   = col2;
                    pDst2[dstIndex++] = col2;
                    
                    pDst1[dstIndex]   = col3;
                    pDst2[dstIndex++] = col3;
                    
                    pDst1[dstIndex]   = col4;
                    pDst2[dstIndex++] = col4;
                }
            }
            else {
                for (w = 0; w < srcWidth;) {
                    UInt16 col1 = pRgbTable16[pSrc[w++]];
                    UInt16 col2 = pRgbTable16[pSrc[w++]];
                    UInt16 col3 = pRgbTable16[pSrc[w++]];
                    UInt16 col4 = pRgbTable16[pSrc[w++]];

                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col1;
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col1;
                    
                    pDst1[dstIndex]   = col2;
                    pDst2[dstIndex++] = col2;
                    pDst1[dstIndex]   = col2;
                    pDst2[dstIndex++] = col2;
                    
                    pDst1[dstIndex]   = col3;
                    pDst2[dstIndex++] = col3;
                    pDst1[dstIndex]   = col3;
                    pDst2[dstIndex++] = col3;
                    
                    pDst1[dstIndex]   = col4;
                    pDst2[dstIndex++] = col4;
                    pDst1[dstIndex]   = col4;
                    pDst2[dstIndex++] = col4;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt16*)((UInt8*)pDst1 + dstPitch * 2);
            pDst2 = (UInt16*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
}

static void copy_2x2_32(void* pSource, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDestination, 
                        int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage, int interlace)
{
    UInt32* pRgbTable32 = (UInt32*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt32* pDst1       = (UInt32*)pDestination;
    UInt32* pDst2       = pDst1 + dstPitch / sizeof(UInt32);
    int w;
    int h;

    srcPitch *= 2;

    if (interlace) {
        UInt32 *pSrc2 = pSrc + 2 * srcWidth * srcHeight;

        for (h = 0; h < srcHeight; h++) {
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    pDst1[dstIndex]   = pRgbTable32[pSrc[w]];
                    pDst2[dstIndex++] = pRgbTable32[pSrc2[w++]];
                    
                    pDst1[dstIndex]   = pRgbTable32[pSrc[w]];
                    pDst2[dstIndex++] = pRgbTable32[pSrc2[w++]];
                    
                    pDst1[dstIndex]   = pRgbTable32[pSrc[w]];
                    pDst2[dstIndex++] = pRgbTable32[pSrc2[w++]];
                    
                    pDst1[dstIndex]   = pRgbTable32[pSrc[w]];
                    pDst2[dstIndex++] = pRgbTable32[pSrc2[w++]];
                }
            }
            else {
                for (w = 0; w < srcWidth;) {
                    UInt32 col1 = pRgbTable32[pSrc[w]];
                    UInt32 col2 = pRgbTable32[pSrc2[w++]];
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;

                    col1 = pRgbTable32[pSrc[w]];
                    col2 = pRgbTable32[pSrc2[w++]];
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;

                    col1 = pRgbTable32[pSrc[w]];
                    col2 = pRgbTable32[pSrc2[w++]];
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;

                    col1 = pRgbTable32[pSrc[w]];
                    col2 = pRgbTable32[pSrc2[w++]];
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col2;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pSrc2 = (UInt32*)((UInt8*)pSrc2 + srcPitch);
            pDst1 = (UInt32*)((UInt8*)pDst1 + dstPitch * 2);
            pDst2 = (UInt32*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
    else {
        if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;
        for (h = 0; h < srcHeight; h++) {
            int dstIndex = 0;

            if (srcDoubleWidth[h]) {
                for (w = 0; w < 2 * srcWidth;) {
                    UInt32 col1 = pRgbTable32[pSrc[w++]];
                    UInt32 col2 = pRgbTable32[pSrc[w++]];
                    UInt32 col3 = pRgbTable32[pSrc[w++]];
                    UInt32 col4 = pRgbTable32[pSrc[w++]];

                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col1;
                    
                    pDst1[dstIndex]   = col2;
                    pDst2[dstIndex++] = col2;
                    
                    pDst1[dstIndex]   = col3;
                    pDst2[dstIndex++] = col3;
                    
                    pDst1[dstIndex]   = col4;
                    pDst2[dstIndex++] = col4;
                }
            }
            else {
                for (w = 0; w < srcWidth;) {
                    UInt32 col1 = pRgbTable32[pSrc[w++]];
                    UInt32 col2 = pRgbTable32[pSrc[w++]];
                    UInt32 col3 = pRgbTable32[pSrc[w++]];
                    UInt32 col4 = pRgbTable32[pSrc[w++]];

                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col1;
                    pDst1[dstIndex]   = col1;
                    pDst2[dstIndex++] = col1;
                    
                    pDst1[dstIndex]   = col2;
                    pDst2[dstIndex++] = col2;
                    pDst1[dstIndex]   = col2;
                    pDst2[dstIndex++] = col2;
                    
                    pDst1[dstIndex]   = col3;
                    pDst2[dstIndex++] = col3;
                    pDst1[dstIndex]   = col3;
                    pDst2[dstIndex++] = col3;
                    
                    pDst1[dstIndex]   = col4;
                    pDst2[dstIndex++] = col4;
                    pDst1[dstIndex]   = col4;
                    pDst2[dstIndex++] = col4;
                }
            }

            pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
            pDst1 = (UInt32*)((UInt8*)pDst1 + dstPitch * 2);
            pDst2 = (UInt32*)((UInt8*)pDst2 + dstPitch * 2);
        }
    }
}

static void scale2x_2x2_32(void* pSource, int srcWidth, int srcHeight, void* pDestination, 
                           int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage)
{
	UInt32  ImgSrc[80000];
	UInt32* pRgbTable32 = (UInt32*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt32* pDst1       = (UInt32*)pDestination;
	int w, h;    

    if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;

	srcPitch/=2;

    for (h=0; h<srcHeight; h++) 
	{
        for (w=0; w<srcWidth; w++) 
		{
			ImgSrc[w+h*srcWidth]=pRgbTable32[pSrc[w]];
        }
		pSrc+=srcPitch;
    }
    scale(2, &pDst1[0], srcWidth*8, &ImgSrc[0], srcWidth*4, 4, srcWidth, srcHeight);
}

static void scale2x_2x2_16(void* pSource, int srcWidth, int srcHeight, void* pDestination, 
                           int srcPitch, int dstPitch, UInt32 rnd, void* pRgbTable, int evenOddPage)
{

    UInt16* pRgbTable16 = (UInt16*)pRgbTable;
    UInt32* pSrc        = (UInt32*)pSource;
    UInt16* pDst1       = (UInt16*)pDestination;
	UInt16  ImgSrc[320*250];
	int w, h,index;    

    if (evenOddPage) pSrc += 2 * srcWidth * srcHeight;
    srcPitch *= 2;
    index=0;
    for (h=0; h<srcHeight; h++) 
	{
        for (w=0; w<srcWidth; w++) 
		{
			ImgSrc[index]=pRgbTable16[pSrc[w]];
			index++;
        }
        pSrc  = (UInt32*)((UInt8*)pSrc  + srcPitch);
    }
    scale(2, &pDst1[0], srcWidth*4, &ImgSrc[0], srcWidth*2, 2, srcWidth, srcHeight);
}

/*****************************************************************************
**
** Public interface methods
**
******************************************************************************
*/
Video* videoCreate() 
{
    Video* pVideo = (Video*)calloc(sizeof(Video), 1);
    initYJKtoYCbCrTable();
    initRGBTable();

    pVideo->palMode = VIDEO_PAL_FAST;
    pVideo->pRgbTable16 = pRgbTableColor16;
    pVideo->pRgbTable32 = pRgbTableColor32;

    return pVideo;
}

void videoDestroy(Video* pVideo) 
{
    free(pVideo);
}

void videoSetFrameSkip(Video* pVideo, UInt32 skipCount)
{
    if (skipCount > 3) skipCount = 3;
    pVideo->decay = 3 - skipCount;
}

UInt32 videoGetColor(Video* pVideo, int R, int G, int B)
{
    /* Get YCbCr color from an RGB color. The color tables are updated with
     * the exact color.
     */

    int Y  = (int)(0.2989*R + 0.5866*G + 0.1145*B);
    int Cb = B - Y;
    int Cr = R - Y;
    int L  = Y;
    UInt32 YCbCr = (Y / 16) | ((16 + (Cb / 16)) << 5) | ((16 + (Cr / 16)) << 11);

    L = MAX(6, MIN(247, L));
    R = MAX(6, MIN(247, R));
    G = MAX(6, MIN(247, G));
    B = MAX(6, MIN(247, B));

    pRgbTableColor32[YCbCr] = ((R << 16) | (G << 8) | B);
    pRgbTableGreen32[YCbCr] = 0x100010 | (L << 8);
    pRgbTableWhite32[YCbCr] = (L << 16) | (L << 8) | (L << 0);

    pRgbTableColor16[YCbCr] = ((R >> 3) << 11) | ((G >> 2) << 5) | (B >> 3);
    pRgbTableGreen16[YCbCr] = 0x0801 | (UInt16)((L >> 2) << 5);
    pRgbTableWhite16[YCbCr] = (UInt16)(((L >> 3) << 11) | ((L >> 2) << 5) | (L >> 3));

    return YCbCr;
}

void videoSetColorMode(Video* pVideo, VideoColorMode colorMode) 
{
    static int initialized = 0;

    if (!initialized) {
        initYJKtoYCbCrTable();
        initRGBTable();
        initialized = 1;
    }

    switch (colorMode) {
    case VIDEO_GREEN:
        pVideo->pRgbTable16 = pRgbTableGreen16;
        pVideo->pRgbTable32 = pRgbTableGreen32;
        break;
    case VIDEO_BLACKWHITE:
        pVideo->pRgbTable16 = pRgbTableWhite16;
        pVideo->pRgbTable32 = pRgbTableWhite32;
        break;
    case VIDEO_COLOR:
    default:
        pVideo->pRgbTable16 = pRgbTableColor16;
        pVideo->pRgbTable32 = pRgbTableColor32;
        break;
    }
}

void videoSetPalMode(Video* pVideo, VideoPalMode palMode)
{
    pVideo->palMode = palMode;
}


void videoRender(Video* pVideo, int bitDepth, int zoom, int evenOddPage, int interlace,
                 void* pSrc, int srcWidth, int srcHeight, int* srcDoubleWidth, void* pDst, int srcPitch, int dstPitch)
{
    static UInt32 rnd = 51;

    // Update simple rand generator
    rnd *= 13;

    switch (bitDepth) {
    case 16:
        switch (pVideo->palMode) {
        case VIDEO_PAL_FAST:
            if (zoom == 2) copy_2x2_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable16, evenOddPage, interlace);
            else           copy_1x1_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable16, evenOddPage, interlace);
            break;
        case VIDEO_PAL_SHARP:
            if (zoom == 2) copySharpPAL_2x2_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable16, evenOddPage, interlace);
            else           copyPAL_1x1_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable16, evenOddPage, interlace);
            break;
        case VIDEO_PAL_SHARP_NOISE:
            if (zoom == 2) copySharpPAL_2x2_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable16, evenOddPage, interlace);
            else           copyPAL_1x1_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable16, evenOddPage, interlace);
            break;
        case VIDEO_PAL_BLUR:
            if (zoom == 2) copyPAL_2x2_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable16, pVideo->decay, evenOddPage, interlace);
            else           copyPAL_1x1_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable16, evenOddPage, interlace);
            break;
        case VIDEO_PAL_BLUR_NOISE:
            if (zoom == 2) copyPAL_2x2_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable16, pVideo->decay, evenOddPage, interlace);
            else           copyPAL_1x1_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable16, evenOddPage, interlace);
            break;
        case VIDEO_MONITOR:
            if (zoom == 2) copyMonitor_2x2_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable16, pVideo->decay, evenOddPage, interlace);
            else           copyPAL_1x1_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable16, evenOddPage, interlace);
            break;
		case VIDEO_PAL_SCALE2X:
            if (zoom==2) {
                if (!*srcDoubleWidth && !interlace) {
                    scale2x_2x2_16(pSrc, srcWidth, srcHeight, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable16, evenOddPage);
                }
                else {
                    copy_2x2_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable16, evenOddPage, interlace);
                }
            }
            else {
                copy_1x1_16(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable16, evenOddPage, interlace);
            }
            break;
        }
        break;
    case 32:
        switch (pVideo->palMode) {
        case VIDEO_PAL_FAST:
            if (zoom == 2) copy_2x2_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable32, evenOddPage, interlace);
            else           copy_1x1_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable32, evenOddPage, interlace);
            break;
        case VIDEO_PAL_SHARP:
            if (zoom == 2) copySharpPAL_2x2_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable32, evenOddPage, interlace);
            else           copyPAL_1x1_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable32, evenOddPage, interlace);
            break;
        case VIDEO_PAL_SHARP_NOISE:
            if (zoom == 2) copySharpPAL_2x2_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable32, evenOddPage, interlace);
            else           copyPAL_1x1_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable32, evenOddPage, interlace);
            break;
        case VIDEO_PAL_BLUR:
            if (zoom == 2) copyPAL_2x2_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable32, pVideo->decay, evenOddPage, interlace);
            else           copyPAL_1x1_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable32, evenOddPage, interlace);
            break;
        case VIDEO_PAL_BLUR_NOISE:
            if (zoom == 2) copyPAL_2x2_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable32, pVideo->decay, evenOddPage, interlace);
            else           copyPAL_1x1_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable32, evenOddPage, interlace);
            break;
        case VIDEO_MONITOR:
            if (zoom == 2) copyMonitor_2x2_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable32, pVideo->decay, evenOddPage, interlace);
            else           copyPAL_1x1_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, rnd, pVideo->pRgbTable32, evenOddPage, interlace);
            break;
		case VIDEO_PAL_SCALE2X:
            if (zoom==2) {
                if (!*srcDoubleWidth && !interlace) {
                    scale2x_2x2_32(pSrc, srcWidth, srcHeight, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable32, evenOddPage);
                }
                else {
                    copy_2x2_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable32, evenOddPage, interlace);
                }
            }
            else {
                copy_1x1_32(pSrc, srcWidth, srcHeight, srcDoubleWidth, pDst, srcPitch, dstPitch, 0, pVideo->pRgbTable32, evenOddPage, interlace);
            }
            break;
        }
        break;
    }
}
