/* Mednafen - Multi-system Emulator
 * 
 * Copyright notice for this file:
 *  Copyright (C) 1998 BERO
 *  Copyright (C) 2003 Xodnizel
 *  
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or   
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include        <string.h>
#include        <stdio.h>
#include        <stdlib.h>

#include        "../nes.h"
#include        "../x6502.h"
#include	"../nsf.h"
#include        "../sound.h"
#include        "../endian.h"
#include        "../memory.h"

#include        "../cart.h"
#include	"ppu.h"
#include        "palette.h"
#include        "../input.h"  

#define VBlankON        (PPU[0]&0x80)   /* Generate VBlank NMI */
#define Sprite16        (PPU[0]&0x20)   /* Sprites 8x16/8x8        */
#define BGAdrHI         (PPU[0]&0x10)   /* BG pattern adr $0000/$1000 */
#define SpAdrHI         (PPU[0]&0x08)   /* Sprite pattern adr $0000/$1000 */
#define INC32           (PPU[0]&0x04)   /* auto increment 1/32  */

#define SpriteON        (PPU[1]&0x10)   /* Show Sprite             */
#define ScreenON        (PPU[1]&0x08)   /* Show screen             */

#define PPU_status      (PPU[2])

static void FetchSpriteData(void);
static void RefreshLine(int lastpixel);
static void RefreshSprites(void);

static void Fixit1(void);
static uint32 ppulut1[256], ppulut2[256], ppulut3[128];

static void makeppulut(void)
{
 int x;
 int y;

 for(x=0;x<256;x++)
 {
  ppulut1[x]=0;
  for(y=0;y<8;y++) 
  {
   ppulut1[x] |= ((x>>(7 - y))&0x1) << (y*4);
  }
  ppulut2[x]=ppulut1[x]<<1;
 }

 {

  int cc,xo,pixel;

  for(cc=0;cc<16;cc++)
  {
   for(xo=0;xo<8;xo++)
   {
    ppulut3[xo|(cc<<3)]=0;
    for(pixel=0;pixel<8;pixel++)
    {
     int shiftr;
      shiftr=(pixel+xo)/8;
      shiftr*=2;
      ppulut3[xo|(cc<<3)]|=(( cc>>shiftr )&3)<<(2+pixel*4);
    }
//    printf("%08x\n",ppulut3[xo|(cc<<3)]);
   }
  }

 }
} 
  
static int ppudead;
static int kook = 0;
int fceuindbg=0;

int MMC5Hack;
uint32 MMC5HackCHRBank;
uint32 MMC5HackVROMMask;
uint8 *MMC5HackExNTARAMPtr;
uint8 *MMC5HackVROMPTR;
uint8 MMC5HackCHRMode=0;
uint8 MMC5HackSPMode;   
uint8 MMC5HackSPScroll; 
uint8 MMC5HackSPPage;   


uint8 VRAMBuffer,PPUGenLatch;
uint8 *vnapage[4];
uint8 PPUNTARAM;  
uint8 PPUCHRRAM;  

void (*GameHBIRQHook)(void), (*GameHBIRQHook2)(void);
void (*PPU_hook)(uint32 A);

static uint8 vtoggle=0;
static uint8 XOffset=0;
   
static uint32 TempAddr,RefreshAddr;
  
static int maxsprites=8;  
    
/* scanline is equal to the current visible scanline we're on. */
     
int scanline;
static uint32 scanlines_per_frame;
    
uint8 PPU[4];
uint8 PPUSPL;
uint8 NTARAM[0x800],PALRAM[0x20],PALRAMCache[0x20],SPRAM[0x100],SPRBUF[0x100];
static uint8 PALBG;

static uint32 PALRAMLUTCache[0x100];

static void RefreshPaletteCache(void)
{
 int x;

 static const double rtmul[8]={1, 1.239,.794,1.019,.905,1.023,.741,.75};
 static const double gtmul[8]={1, .915,1.086,.98,1.026,.908,.987,.75};
 static const double btmul[8]={1, .743,.882,.653,1.277,.979,.101,.75};

 for(x = 0;x < 0x40; x++)
 {
  int emp = PPU[1] >> 5;
  int r = (int)(rtmul[emp] * palo[x].r);
  int g = (int)(gtmul[emp] * palo[x].g);
  int b = (int)(btmul[emp] * palo[x].b);

  if(r > 255) r = 255;
  if(g > 255) g = 255;
  if(b > 255) b = 255;
  PALRAMLUTCache[x] = (PALRAMLUTCache + 0x40)[x] = (PALRAMLUTCache + 0x80)[x] = (PALRAMLUTCache + 0xC0)[x] = MK_COLOR(r,g,b);
 }
}

void MDFNNES_SetPixelFormat(int r, int g, int b)
{
 if(MDFNGameInfo->nes.type != GIT_NSF)
  RefreshPaletteCache();
}


#define MMC5SPRVRAMADR(V)      &MMC5SPRVPage[(V)>>10][(V)]
#define MMC5BGVRAMADR(V)      &MMC5BGVPage[(V)>>10][(V)]  
#define VRAMADR(V)      &VPage[(V)>>10][(V)]


static int32 sphitx;
static uint8 sphitdata;

static DECLFR(A2002)
{
	uint8 ret;
        
	if(sphitx != 0x100)
	 MDFNPPU_LineUpdate();
	ret = PPU_status;
        ret|=PPUGenLatch&0x1F;
	#ifdef MDFNDEF_DEBUGGER
	if(!fceuindbg)
	#endif
	{
	 vtoggle = 0;
	 PPU_status &= 0x7F;
	 PPUGenLatch = ret;
	}
	return ret;
}

static DECLFR(A200x)	/* Not correct for $2004 reads. */
{
	MDFNPPU_LineUpdate();
	return PPUGenLatch;
}

#define GETLASTPIXEL    (PAL?((timestamp*48-linestartts)/15) : ((timestamp*48-linestartts)>>4) )

static uint8 *Pline;
static uint32 *PlineReal;
static int firstpixel;
static int linestartts;


static DECLFR(A2004)
{
	int poopix = GETLASTPIXEL;

	if(poopix > 320 && poopix < 340)
	 return(0);

	return(0xFF);
}

static void poopoo();

static DECLFR(A2007)
{
	uint8 ret;
	uint32 tmp = RefreshAddr & 0x3FFF;
	
	MDFNPPU_LineUpdate();
	ret = VRAMBuffer;

	#ifdef MDFNDEF_DEBUGGER
	if(!fceuindbg)
	#endif
	{
	 if(PPU_hook) PPU_hook(tmp);
	 PPUGenLatch=VRAMBuffer;
	 if(tmp<0x2000)
          VRAMBuffer=VPage[tmp>>10][tmp];
	 else
	  VRAMBuffer=vnapage[(tmp>>10)&0x3][tmp&0x3FF];
	}

	#ifdef MDFNDEF_DEBUGGER
	if(!fceuindbg)
	#endif
	{
	 if (INC32) RefreshAddr+=32;
	 else RefreshAddr++;
	 poopoo();
	 if(PPU_hook) PPU_hook(RefreshAddr&0x3fff);
	}
	return ret;
}

static DECLFW(B2000)
{
                //printf("%04x:$%02x, %d\n",A,V&0x38,scanline);

		MDFNPPU_LineUpdate();
                PPUGenLatch=V;
                if(!(PPU[0]&0x80) && (V&0x80) && (PPU_status&0x80))
                {
                 //printf("Trigger NMI, %d, %d\n",timestamp,ppudead);
                 TriggerNMI2();
                }
                PPU[0]=V;
                TempAddr&=0xF3FF;
                TempAddr|=(V&3)<<10;
}

static int RCBGOn;
static int RCSPROn;
static int rendis;
static void RedoRenderCache(void)
{
	RCSPROn = SpriteON;

	RCBGOn = ScreenON;

	if(rendis & 1)
	 RCBGOn = 0;

	if(rendis & 2)
	 RCSPROn = 0;
}

static DECLFW(B2001)
{
		//printf("%04x:$%02x, %d\n",A,V,scanline);
		MDFNPPU_LineUpdate();
                PPUGenLatch=V;
		PPU[1]=V;
		RedoRenderCache();
		RefreshPaletteCache();
}
 
static DECLFW(B2002)
{
	PPUGenLatch=V;
}

static DECLFW(B2003)
{
	PPUGenLatch=V;
	PPU[3]=V;
	PPUSPL=V&0x7;
}
 
static DECLFW(B2004)
{
	PPUGenLatch=V;
	if(PPUSPL>=8) 
	{
         if(PPU[3]>=8)
           SPRAM[PPU[3]]=V;
        }
        else
         SPRAM[PPUSPL]=V;
	PPU[3]++;
        PPUSPL++;
}
 
static DECLFW(B2005)
{
	uint32 tmp = TempAddr;

	MDFNPPU_LineUpdate();
	PPUGenLatch = V;
	
	if(!vtoggle)
	{
	 tmp &= 0xFFE0;
	 tmp |= V>>3;
	 XOffset = V&7;
	}
	else
	{
	 tmp &= 0x8C1F;
	 tmp|=((V&~0x7)<<2);
         tmp|=(V&7)<<12;
        }
        TempAddr=tmp;
        vtoggle^=1;  
}

static void poopoo(void)
{
 uint32 A = RefreshAddr;
 if((A & 0x3F00) == 0x3F00)
 {
  PALBG = RefreshAddr & 0xC;
 }
}


static DECLFW(B2006)
{
		MDFNPPU_LineUpdate();
                PPUGenLatch=V;
                if(!vtoggle)  
                {
                 TempAddr&=0x00FF;
                 TempAddr|=(V&0x3f)<<8;
                }
                else
                {   
                 TempAddr&=0xFF00;
                 TempAddr|=V;

                 RefreshAddr=TempAddr;

		 poopoo();

                 if(PPU_hook)
                  PPU_hook(RefreshAddr);
		 //printf("%d, %04x\n",scanline,RefreshAddr);
                }
                vtoggle^=1;
}
 
static DECLFW(B2007)
{
                        uint32 tmp=RefreshAddr&0x3FFF;

			MDFNPPU_LineUpdate();

                        PPUGenLatch=V;
                        if(tmp>=0x3F00)
                        {
                         tmp &= (tmp & 3)? 0x1F : 0x0C;
                         PALRAM[tmp] = PALRAMCache[tmp] = V & 0x3F;
                        }
                        else if(tmp<0x2000)
                        {
                          if(PPUCHRRAM&(1<<(tmp>>10)))
                            VPage[tmp>>10][tmp]=V;
                        }   
                        else
                        {
                         if(PPUNTARAM&(1<<((tmp&0xF00)>>10)))
                          vnapage[((tmp&0xF00)>>10)][tmp&0x3FF]=V;
                        }
                        if (INC32) RefreshAddr+=32;
                        else RefreshAddr++;
			poopoo();
                        if(PPU_hook) PPU_hook(RefreshAddr&0x3fff);

			RefreshPaletteCache();
}
 
static DECLFW(B4014)
{
        uint32 t=V<<8;
        int x;

        for(x=0;x<256;x++)
         X6502_DMW(0x2004,X6502_DMR(t+x));
}

static void ResetRL(uint8 *target, uint32 *real_target)
{
 Pline=target; 
 PlineReal = real_target;
 firstpixel = 0;

 if(scanline == -1)
  firstpixel=256;

 linestartts=timestamp*48+X.count;
 MDFNPPU_LineUpdate();
}

static uint8 sprlinebuf[256+8] __attribute__ ((aligned (16)));

void MDFNPPU_LineUpdate(void)
{
 #ifdef MDFNDEF_DEBUGGER
 if(!fceuindbg)
 #endif
  if(Pline)
  {
   int l=GETLASTPIXEL;
   RefreshLine(l);
  }
} 
  
bool MDFNNES_ToggleLayer(int which) 
{
 rendis ^= 1 << which;

 RedoRenderCache();

 return(((rendis >> which) & 1) ^ 1);
}

static void EndRL(void)
{
 RefreshLine(341);
 Pline=0;
}
 

static uint8 NT_TMP = 0;
static uint8 MMC5NT_TMP;
static uint32 pshift[2];
static uint32 atlatch;
static uint8 xs, ys;
static int tochange;

static INLINE void FetchNT(int MMC5Ex)
{
	uint8 *C;

	if(PPU_hook)
	 PPU_hook((RefreshAddr & 0xfff) | 0x2000);

        C = vnapage[(RefreshAddr>>10)&3];

	NT_TMP = C[RefreshAddr&0x3ff];   /* Fetch name table byte. */

	if(MMC5Ex == 1)
	 MMC5NT_TMP = MMC5HackExNTARAMPtr[RefreshAddr & 0x3ff];
	else if((MMC5Ex == 3 || MMC5Ex == 4) && ((tochange<=0 && MMC5HackSPMode&0x40) || (tochange>0 && !(MMC5HackSPMode&0x40))) )
	{
	 NT_TMP = MMC5HackExNTARAMPtr[xs|(ys<<5)];
	// printf("OK: %d, %d\n",scanline,xs);
	}
}

static INLINE void FetchAT(int MMC5Ex)
{
	uint8 cc, zz;
	uint8 *C;
	
	C = vnapage[(RefreshAddr >> 10) & 3];
	zz = RefreshAddr & 0x1f;

	if(MMC5Ex == 1)
	 cc=(MMC5HackExNTARAMPtr[RefreshAddr & 0x3ff] & 0xC0)>>6;
	else if((MMC5Ex == 3 || MMC5Ex == 4) && ((tochange<=0 && MMC5HackSPMode&0x40) || (tochange>0 && !(MMC5HackSPMode&0x40))) )
	{
         cc=MMC5HackExNTARAMPtr[0x3c0+(xs>>2)+((ys&0x1C)<<1)];
         cc=((cc >> ((xs&2) + ((ys&0x2)<<1))) &3);
	}
	else
	{
         cc=C[0x3c0+(zz>>2)+((RefreshAddr&0x380)>>4)];  /* Fetch attribute table byte. */
         cc=((cc >> ((zz&2) + ((RefreshAddr&0x40)>>4))) &3);
	}
        atlatch |= cc<<4;

	if((RefreshAddr&0x1f)==0x1f)
	 RefreshAddr^=0x41F;
	else
	 RefreshAddr++;

	if(PPU_hook)
	 PPU_hook(RefreshAddr & 0x3FFF);
}

static INLINE void FetchCD1(int MMC5Ex)
{
 uint32 vofs = ((PPU[0]&0x10)<<8) | ((RefreshAddr>>12)&7);
 uint32 vadr = (NT_TMP << 4) | vofs;
 uint8 *C = VRAMADR(vadr);

 if(MMC5Ex == 1)
 {
  C = MMC5HackVROMPTR;
  C += (((MMC5NT_TMP) & 0x3f & MMC5HackVROMMask) << 12) + (vadr & 0xfff);
 }
 else if(MMC5Ex == 2)
  C = MMC5BGVRAMADR(vadr);
 else if(MMC5Ex == 3 || MMC5Ex == 4)
 {
  if(((tochange<=0 && MMC5HackSPMode&0x40) || (tochange>0 && !(MMC5HackSPMode&0x40))) )
  {
   C = MMC5HackVROMPTR + ((NT_TMP << 4) | ((RefreshAddr >> 12)&7));
   C += ((MMC5HackSPPage & 0x3f & MMC5HackVROMMask) << 12);
  }
  else
   C = MMC5BGVRAMADR(vadr);
 }

 pshift[0] |= C[0];

 if(PPU_hook)
  PPU_hook(vadr);
}

static INLINE void FetchCD2(int MMC5Ex)
{
 uint32 vofs = ((PPU[0]&0x10)<<8) | ((RefreshAddr>>12)&7);
 uint32 vadr = (NT_TMP << 4) | vofs;
 uint8 *C = VRAMADR(vadr);

 if(MMC5Ex == 1)
 {
  C = MMC5HackVROMPTR;
  C += (((MMC5NT_TMP) & 0x3f & MMC5HackVROMMask) << 12) + (vadr & 0xfff);
 }
 else if(MMC5Ex == 2)
  C = MMC5BGVRAMADR(vadr);
 else if(MMC5Ex == 3 || MMC5Ex == 4)
 {
  if(((tochange<=0 && MMC5HackSPMode&0x40) || (tochange>0 && !(MMC5HackSPMode&0x40))) )
  {
   C = MMC5HackVROMPTR + ((NT_TMP << 4) | ((RefreshAddr >> 12)&7));
   C += ((MMC5HackSPPage & 0x3f & MMC5HackVROMMask) << 12);
  }
  else
   C = MMC5BGVRAMADR(vadr);
  //printf("%d, %d\n",scanline,xs);
  xs++;
  tochange--;
 }

 pshift[1] |= C[8];

 pshift[0] <<= 8;
 pshift[1] <<= 8;
 atlatch >>= 2;

 if(PPU_hook)
  PPU_hook(vadr | 8);
}

static int spork=0;     /* spork the world.  Any sprites on this line?
                           Then this will be set to 1.
                        */
                          
static INLINE void Fixit2(void)
{
        uint32 rad=RefreshAddr;
        rad&=0xFBE0;
        rad|=TempAddr&0x041f;
        RefreshAddr=rad;
}

static void RefreshLine(int lastpixel)
{
 static int norecurse = 0;
 int x;

 if(norecurse)
  return;
 norecurse = 1;

 if(ScreenON || SpriteON)
 {
  uint8 pix_mask = 0x3F;

  if(PPU[1]&0x01)
   pix_mask = 0x30;

  PALRAMCache[0x0]=PALRAMCache[0x4]=PALRAMCache[0x8]=PALRAMCache[0xC]=PALRAM[0] | 64;

  if(MMC5Hack && geniestage != 1)
  {
   if(MMC5HackCHRMode==0 && (MMC5HackSPMode&0x80))
   {
    const int MMC5Mode = 4;

    ys=((scanline>>3)+MMC5HackSPScroll)&0x1F;
    if(ys>=0x1E) ys-=0x1E;

    #include "ppu-subline.h"
   }
   else if (MMC5HackCHRMode==1 && (MMC5HackSPMode&0x80))
   {
    const int MMC5Mode = 3;

    ys=((scanline>>3)+MMC5HackSPScroll)&0x1F;
    if(ys>=0x1E) ys-=0x1E;

    #include "ppu-subline.h"
   }
   else if(MMC5HackCHRMode == 1)
   {
    const int MMC5Mode = 1;
    #include "ppu-subline.h"
   }
   else
   {
    const int MMC5Mode = 2;
    #include "ppu-subline.h"
   }
  }
  else
  {
   const int MMC5Mode = 0;
   #include "ppu-subline.h"
  }

 }
 else
 {
  int count = lastpixel - firstpixel;

  if((count + firstpixel) > 256) count = 256 - firstpixel;

  if(count > 0)
  {
   int x;
   memset(Pline + firstpixel, PALRAM[PALBG], count);

   for(x=0; x<count;x++)
   {
    PlineReal[firstpixel + x] = PALRAMLUTCache[Pline[firstpixel + x]];
   }
  }
 }

 if(InputScanlineHook)
  InputScanlineHook(Pline,0,firstpixel,lastpixel);
 firstpixel = lastpixel;

 norecurse = 0;
}

static void Fixit1(void)
{
    uint32 rad=RefreshAddr;

    if((rad&0x7000)==0x7000)
    {
     rad^=0x7000;
     if((rad&0x3E0)==0x3A0)
     {
      rad^=0x3A0;
      rad^=0x800;
     }
     else
     {
      if((rad&0x3E0)==0x3e0)
       rad^=0x3e0;
      else rad+=0x20;
     }
    }
    else
     rad+=0x1000;
    RefreshAddr=rad;
}

#include "ppu-fastrl.h"

void MMC5_hb(int);     /* Ugh ugh ugh. */
static void DoLine(int skip)
{
 uint8 target[256];
 uint32 *real_target;
 int prior;


 if(InputScanlineHook) // Frame skipping will break zapper emulation soooo muchlybadlydoubleplusungoodly.
  skip = 0;

 real_target = &((uint32 *)MDFNGameInfo->fb)[scanline * MDFNGameInfo->pitch / 4];
 if(scanline == -1)
  real_target = (uint32 *)MDFNGameInfo->fb;


 if(RCSPROn) RefreshSprites();

 ResetRL(target, real_target);

 if(scanline >= 0 && MMC5Hack && (ScreenON || SpriteON)) MMC5_hb(scanline);

 prior = firstpixel;
 X6502_Run(256);

 if(firstpixel < 240)
 {
  int newfirst = firstpixel;

  if(newfirst & 7)
  {
   newfirst = (newfirst + 7) &~7;
   //printf("%d\n",newfirst);
   RefreshLine(newfirst);
  }

  Pline=0;	// We don't want any PPU_hook()-calling-RefreshLine()-business going on!

  FastRefreshLine(newfirst >> 3, target);

  if(RCSPROn)
   FastCopySprites(newfirst >> 3, target, skip);

  if(!skip)
   FastLineEffects(newfirst >> 3, target);

  if(InputScanlineHook)
   InputScanlineHook(target, 0, newfirst, 256);

  firstpixel = 256;
  if(ScreenON || SpriteON)
   Fixit1();

  Pline = target; // Restore it!
 }
 else
  MDFNPPU_LineUpdate();

 sphitx=0x100;

 if(ScreenON || SpriteON)
  FetchSpriteData();

 if(GameHBIRQHook && (ScreenON || SpriteON) && ((PPU[0]&0x38)!=0x18))
 {
  X6502_Run(10);
  GameHBIRQHook();
  X6502_Run(85-16-10);
 }
 else
 {
  X6502_Run(85-16);

  // A semi-hack for Star Trek: 25th Anniversary
  if(GameHBIRQHook && (ScreenON || SpriteON) && ((PPU[0]&0x38)!=0x18))
   GameHBIRQHook();
 }

 if(GameHBIRQHook2 && (ScreenON || SpriteON))
  GameHBIRQHook2();
 X6502_Run(16);
 EndRL();


 scanline++;
}

#define V_FLIP  0x80
#define H_FLIP  0x40
#define SP_BACK 0x20

typedef struct {
        uint8 y,no,atr,x;
} SPR;

typedef struct {
        uint8 ca[2],atr,x;
} SPRB;

static uint8 numsprites,SpriteBlurp;
static void FetchSpriteData(void)
{
        uint8 ns,sb;
        SPR *spr;
        uint8 H;
	int n;
	int vofs;
        uint8 P0=PPU[0];

        spr=(SPR *)SPRAM;
        H=8;

        ns=sb=0;

        vofs=(unsigned int)(P0&0x8&(((P0&0x20)^0x20)>>2))<<9;
        H+=(P0&0x20)>>2;

        if(!PPU_hook)
         for(n=63;n>=0;n--,spr++)
         {
                if((unsigned int)(scanline-spr->y)>=H) continue;
                //printf("%d, %u\n",scanline,(unsigned int)(scanline-spr->y));
                if(ns<maxsprites)
                {
                 if(n==63) sb=1;

                 {
                  SPRB dst;
                  uint8 *C;
                  int t;
                  unsigned int vadr;

                  t = (int)scanline-(spr->y);

                  if (Sprite16)
                   vadr = ((spr->no&1)<<12) + ((spr->no&0xFE)<<4);
                  else
                   vadr = (spr->no<<4)+vofs;

                  if (spr->atr&V_FLIP)
                  {
                        vadr+=7;
                        vadr-=t;
                        vadr+=(P0&0x20)>>1;
                        vadr-=t&8;
                  }
                  else
                  {
                        vadr+=t;
                        vadr+=t&8;
                  }

                  /* Fix this geniestage hack */
                  if(MMC5Hack && geniestage!=1) C = MMC5SPRVRAMADR(vadr);
                  else C = VRAMADR(vadr);

                  
                  dst.ca[0]=C[0];
                  dst.ca[1]=C[8];
                  dst.x=spr->x;
                  dst.atr=spr->atr;

                  *(uint32 *)&SPRBUF[ns<<2]=*(uint32 *)&dst;
                 }

                 ns++;
                }
                else
                {
                  PPU_status|=0x20;
                  break;
                }
         }
        else
         for(n=63;n>=0;n--,spr++)
         {
                if((unsigned int)(scanline-spr->y)>=H) continue;

                if(ns<maxsprites)
                {
                 if(n==63) sb=1;

                 {
                  SPRB dst;
                  uint8 *C;
                  int t;
                  unsigned int vadr;

                  t = (int)scanline-(spr->y);

                  if (Sprite16)
                   vadr = ((spr->no&1)<<12) + ((spr->no&0xFE)<<4);
                  else
                   vadr = (spr->no<<4)+vofs;

                  if (spr->atr&V_FLIP)
                  {
                        vadr+=7;
                        vadr-=t;
                        vadr+=(P0&0x20)>>1;
                        vadr-=t&8;
                  }
                  else
                  {
                        vadr+=t;
                        vadr+=t&8;
                  }

                  if(MMC5Hack) C = MMC5SPRVRAMADR(vadr);
                  else C = VRAMADR(vadr);
                  dst.ca[0]=C[0];
		  if(ns<8)
		  {
		   PPU_hook(0x2000);
		   PPU_hook(vadr);
		  }
                  dst.ca[1]=C[8];
                  if(ns<8)
                   PPU_hook(vadr | 8);
                  dst.x=spr->x;
                  dst.atr=spr->atr;


                  *(uint32 *)&SPRBUF[ns<<2]=*(uint32 *)&dst;
                 }

                 ns++;
                }
                else
                {
                  PPU_status|=0x20;
                  break;
                }
         }
        //if(ns>=7)
        //printf("%d %d\n",scanline,ns);
        if(ns>8) PPU_status|=0x20;	/* Handle case when >8 sprites per
					   scanline option is enabled. */
	else if(PPU_hook)
	{
	 for(n=0;n<(8-ns);n++)
	 {
                 PPU_hook(0x2000);
                 PPU_hook(vofs);
	 }
	}
        numsprites=ns;
        SpriteBlurp=sb;
}

static void RefreshSprites(void)
{
        int n;
        SPRB *spr;

        spork=0;

        MDFN_FastU32MemsetM8((uint32 *)sprlinebuf, 0x80808080, 256 / sizeof(uint32));
        if(!numsprites) return;

        numsprites--;
	spr = (SPRB*)SPRBUF+numsprites;

	for(n=numsprites;n>=0;n--,spr--)
	{
	 register uint32 pixdata;
	 register uint8 J,atr;

	 int x=spr->x;
         uint8 *C;
         uint8 *VB;
                
         pixdata=ppulut1[spr->ca[0]]|ppulut2[spr->ca[1]];
         J=spr->ca[0]|spr->ca[1];
         atr=spr->atr;

                       if(J)
                       {
                        if(n==0 && SpriteBlurp && !(PPU_status&0x40))
                        {
                         sphitx=x;
                         sphitdata=J;
                         if(atr&H_FLIP)
                          sphitdata=    ((J<<7)&0x80) |
                                        ((J<<5)&0x40) |
                                        ((J<<3)&0x20) |
                                        ((J<<1)&0x10) |
                                        ((J>>1)&0x08) |
                                        ((J>>3)&0x04) |
                                        ((J>>5)&0x02) |
                                        ((J>>7)&0x01);                                          
                        }

         C = sprlinebuf+x;
         VB = (PALRAM+0x10)+((atr&3)<<2);
	 uint8 pbit = (atr & SP_BACK) ? 0x00 : 0x40;

         if (atr&H_FLIP)
         {
	   if(J & 0x80) C[7]=VB[pixdata&3] | pbit;
           pixdata>>=4;
	   if(J & 0x40) C[6]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x20) C[5]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x10) C[4]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x08) C[3]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x04) C[2]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x02) C[1]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x01) C[0]=VB[pixdata] | pbit;
         } else  {
	   if(J & 0x80) C[0]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x40) C[1]=VB[pixdata&3] | pbit;
           pixdata>>=4;
	   if(J & 0x20) C[2]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x10) C[3]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x08) C[4]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x04) C[5]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x02) C[6]=VB[pixdata&3] | pbit;
           pixdata>>=4;
           if(J & 0x01) C[7]=VB[pixdata] | pbit;
         }
        }
      }
     SpriteBlurp=0;
     spork=1;
}

void MDFNPPU_SetVideoSystem(int w)
{
 if(w)
  scanlines_per_frame=312;
 else
  scanlines_per_frame=262;
}

void MDFNPPU_Reset(void)
{
        VRAMBuffer=PPU[0]=PPU[1]=PPU_status=PPU[3]=0;   
        PPUSPL=0;
        PPUGenLatch=0;
        RefreshAddr=TempAddr=0;
        vtoggle = 0;
        ppudead = 2;
	kook = 0;
	RedoRenderCache();
}

void MDFNPPU_Power(void)
{
	int x;

        memset(NTARAM,0x00,0x800);
        memset(PALRAM,0x00,0x20); 
        memset(SPRAM,0x00,0x100); 
        MDFNPPU_Reset();

        for(x=0x2000;x<0x4000;x+=8)
        {
         ARead[x]=A200x;
         BWrite[x]=B2000;
         ARead[x+1]=A200x;
         BWrite[x+1]=B2001;
         ARead[x+2]=A2002;
         BWrite[x+2]=B2002;
         ARead[x+3]=A200x;
         BWrite[x+3]=B2003;
         ARead[x+4]=A2004;
         BWrite[x+4]=B2004;
         ARead[x+5]=A200x;
         BWrite[x+5]=B2005;
         ARead[x+6]=A200x;
         BWrite[x+6]=B2006;
         ARead[x+7]=A2007;
         BWrite[x+7]=B2007;
        }
        BWrite[0x4014]=B4014;
}


int MDFNPPU_Loop(int skip)
{
  if(ppudead) /* Needed for Knight Rider, possibly others. */
  {
   if(!skip)
    memset(MDFNGameInfo->fb, 0, 256 * 240 * sizeof(uint32));
   X6502_Run(scanlines_per_frame*(256+85));
   ppudead--;
  }
  else
  {
   X6502_Run(256+85);
   PPU_status |= 0x80;
   PPU[3]=PPUSPL=0;             /* Not sure if this is correct.  According
                                  to Matt Conte and my own tests, it is.  Timing is probably
                                  off, though.  NOTE:  Not having this here
                                  breaks a Super Donkey Kong game. */
                                /* I need to figure out the true nature and length
                                   of this delay. 
                                */
   X6502_Run(12);
   if(MDFNGameInfo->nes.type==GIT_NSF)
    DoNSFFrame();
   else
   {
    if(VBlankON)
     TriggerNMI();
   }
   X6502_Run((scanlines_per_frame-242)*(256+85)-12); //-12); 
   PPU_status&=0x1F;
   scanline = -1;
   DoLine(skip);

   /* Clean this stuff up later. */
   spork=numsprites=0;
   kook ^= 1;
   if(MDFNGameInfo->nes.type==GIT_NSF)
    X6502_Run((256+85)*240 - kook);
   else
   {
    for(scanline=0;scanline<240;)       //scanline is incremented in  DoLine.  Evil. :/
     DoLine(skip);

    if(MMC5Hack && (ScreenON || SpriteON)) MMC5_hb(scanline);
   }
  } /* else... to if(ppudead) */

  if(skip)
   return(0);
  else
   return(1);
}

int MDFNPPU_StateAction(StateMem *sm, int load, int data_only)
{
 uint16 TempAddrT,RefreshAddrT;

 SFORMAT PPU_STATEINFO[]=
 {
  SFARRAYN(NTARAM, 0x800, "NTAR"),
  SFARRAYN(PALRAM, 0x20, "PRAM"),
  SFARRAYN(SPRAM, 0x100, "SPRA"),
  SFARRAYN(PPU, 0x4, "PPUR"),
  SFVARN(kook, "KOOK"),
  SFVARN(ppudead, "DEAD"),
  SFVARN(PPUSPL, "PSPL"),
  SFVARN(XOffset, "XOFF"),
  SFVARN(vtoggle, "VTOG"),
  SFVARN(RefreshAddrT, "RADD"),
  SFVARN(TempAddrT, "TADD"),
  SFVARN(VRAMBuffer, "VBUF"),
  SFVARN(PPUGenLatch, "PGEN"),
  SFEND
 };

 if(!load)
 {
  TempAddrT=TempAddr;
  RefreshAddrT=RefreshAddr;
 }

 std::vector <SSDescriptor> love;
 love.push_back(SSDescriptor(PPU_STATEINFO, "PPU"));
 int ret = MDFNSS_StateAction(sm, load, data_only, love);

 if(load)
 {
  int x;

  TempAddr=TempAddrT;
  RefreshAddr=RefreshAddrT;

  for(x=0;x<0x20;x++)
  {
   PALRAM[x] &= 0x3F;
   PALRAMCache[x] = PALRAM[x];
  }
  RedoRenderCache();
 }
 return(ret);
}

void MDFNPPU_Init(void) 
{
 makeppulut();
 rendis = 0;
 maxsprites = MDFN_GetSettingB("nes.no8lim") ? 64 : 8;
}

