#include "stdafx.h"
#include "mem.h"
#include "gpu.h"
#include "regs.h"
#include <stdio.h>

extern BYTE* MEM;

GPUSTATE gpustate;

#define CLEAR_FLAG(x) mov [x],0x00
#define SET_FLAG(x) mov [x],0x01

extern int gpu_log_enable;

extern FILE* gpudisasm;
//BYTE jump_condition[32][256];
BYTE jump_condition[32][8];

void gpu_init(void)
{
	memset(&gpustate,0,sizeof(GPUSTATE));
	gpustate.reg = gpustate.r0;
	gpustate.alt = gpustate.r1;
	gpu_set_regbank(0);
	/*memset(jump_condition,0,32*256*sizeof(BYTE));
	for(int j=0;j<32;j++) {
		for(int i=0;i<256;i++) {
			int cc = (((i>>6)&0x1)<<2) | (((i>>7)&0x1)<<1) | (i&0x1);
			if(jump_conditions[j][cc])
				jump_condition[j][i] = 0x1;
		}
	}*/
	memset(gpustate.interrupt_stack,0,sizeof(DWORD)*8);
	gpustate.i_pointer = -1;
	for(int i=0;i<5;i++) {
		gpustate.irq_active[i] = 0;
		gpustate.irq_enable[i] = 0;
	}
	memset(jump_condition,0,32*8*sizeof(BYTE));
	for(int j=0;j<32;j++) {
		for(int i=0;i<8;i++) {
			BYTE r = 1;
			if(j & 0x1) {
				if(i & 0x1)
					r = 0;
			}
			if(j & 0x2) {
				if(!(i & 0x1))
					r = 0;
			}
			if(j & 0x4) {
				if(i & (0x2 << (j >> 4)))
					r = 0;
			}
			if(j & 0x8) {
				if(!(i & (0x2 << (j >> 4))))
					r = 0;
			}
			jump_condition[j][i] = r;
		}
	}
}

BYTE fz[2],fn[2],fc[2];

void gpu_set_regbank(int bank)
{
	if(bank == 0) {
		/*fz[0] = gpustate.z;
		fc[0] = gpustate.c;
		fn[0] = gpustate.n;
		gpustate.z = fz[1];
		gpustate.c = fc[1];
		gpustate.n = fn[1];*/
		gpustate.reg = gpustate.r0;
		gpustate.alt = gpustate.r1;
		gpustate.regbank = 0;
	} else {
		/*fz[1] = gpustate.z;
		fc[1] = gpustate.c;
		fn[1] = gpustate.n;
		gpustate.z = fz[0];
		gpustate.c = fc[0];
		gpustate.n = fn[0];*/
		gpustate.reg = gpustate.r1;
		gpustate.alt = gpustate.r0;
		gpustate.regbank = 1;
	}
}

int gpu_get_regbank(void)
{
	return gpustate.regbank;
}

/*void gpu_interrupt(int irq)
{
	if(gpustate.imask == 0) {
		if(gpustate.irq_enable[irq] != 0) {
			gpu_set_regbank(0);
			gpustate.reg[31] -= 4;
			WriteMem32(gpustate.reg[31],gpustate.pc);
			gpustate.pc = 0xF03000 + (irq * 0x10);
			gpustate.irq_active[irq] = 1;
			gpustate.imask = 1;
		}
	}
}*/

void gpu_interrupt(int irq)
{
	/*if(gpustate.imask == 0) {
		if(gpustate.irq_enable[irq] != 0) {
			gpustate.old_bank = gpu_get_regbank();
			gpustate.interrupt_active = 1;
			gpu_set_regbank(0);
			gpustate.reg[31] -= 4;
			WriteMem32(gpustate.reg[31],gpustate.pc-2);
			gpustate.pc = 0xF03000 + (irq * 0x10);
			gpustate.irq_active[irq] = 1;
			gpustate.imask = 1;
		}
	}*/
	if(gpustate.irq_enable[irq] != 0) {
		gpustate.i_pointer++;
		gpustate.interrupt_stack[gpustate.i_pointer] = irq;
	}
}

void gpu_exec(int cycles)
{
	DWORD offset;
	DWORD delayed_jump = 0;
	DWORD delayed_jump_address = 0;
	DWORD dw;
	gpustate.pc &= 0xFFFFFF;
	int i=0;
	while(i < cycles || delayed_jump != 0)
	{
		if(gpustate.active == 0)
			return;
		WORD fetch;

		if(gpustate.i_pointer >= 0 && delayed_jump == 0) {
			if(gpustate.imask == 0) {
				int irq = gpustate.interrupt_stack[gpustate.i_pointer];
				gpu_set_regbank(0);
				gpustate.reg[31] -= 4;
				WriteMem32(gpustate.reg[31],gpustate.pc-2);
				gpustate.pc = 0xF03000 + (irq * 0x10);
				gpustate.irq_active[irq] = 1;
				gpustate.imask = 1;
				gpustate.i_pointer--;
				gpustate.reg[30] = gpustate.pc;
				delayed_jump = 0;
			}
		}
		fetch = *(WORD*)(&MEM[gpustate.pc & 0xFFFFFF]);
		DWORD opcode = (fetch >> 10) & 0x3F;
		DWORD rs = (fetch >> 5) & 0x1F;
		DWORD rd = fetch & 0x1F;

		switch(opcode) {
		case 22:  // ABS
			{
				int d = gpustate.reg[rd];
				if(d & 0x80000000) {
					d = abs(d);
					gpustate.c = 1;
				} else {
					gpustate.c = 0;
				}
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = 0;
			}
			break;
		case 0:   // ADD
			{
				int s = gpustate.reg[rs];
				int d = gpustate.reg[rd];
				INT64 r = s + d;
				gpustate.c = r & 0x100000000 ? 1 : 0;
				gpustate.reg[rd] = r;
				gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
				gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
			}
			break;
		case 1:   // ADDC
			{
				int s = gpustate.reg[rs];
				int d = gpustate.reg[rd];
				int c = gpustate.c;
				INT64 r = s + d + c;
				gpustate.c = r & 0x100000000 ? 1 : 0;
				gpustate.reg[rd] = r;
				gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
				gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
			}
		    break;
		case 2:   // ADDQ
			{
				int s = qtable[rs];
				int d = gpustate.reg[rd];
				INT64 r = s + d;
				gpustate.c = r & 0x100000000 ? 1 : 0;
				gpustate.reg[rd] = r;
				gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
				gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
			}
		    break;
		case 3:   // ADDQT
			{
				gpustate.reg[rd] += qtable[rs];
			}
			break;
		case 9:   // AND
			{
				gpustate.reg[rd] &= gpustate.reg[rs];
				gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
				gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
			}
			break;
		case 15:  // BLCR
			{
				gpustate.reg[rd] &= ~(1 << rs);
				gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
				gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
			}
		    break;
		case 14:  // BSET
			{
				gpustate.reg[rd] |= 1 << rs;
				gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
				gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
			}
			break;
		case 13:  // BTST
			{
				gpustate.z = gpustate.reg[rd] & (1 << rs) ? 0 : 1;
			}
		    break;
		case 30:  // CMP
			{
				int s = gpustate.reg[rs];
				int d = gpustate.reg[rd];
				gpustate.c = (unsigned int)d < (unsigned int)s;
				d -= s;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		case 31:  // CMPQ
			{
				int s = sqtable[rs];
				int d = gpustate.reg[rd];
				gpustate.c = (unsigned int)d < (unsigned int)s;
				d -= s;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		case 21:  // DIV
			{
				if(gpustate.reg[rs] != 0) {
					if(gpustate.divide == 0) {
						DWORD q = gpustate.reg[rd];
						DWORD d = gpustate.reg[rs];
						DWORD r = q / d;
						DWORD r2 = q % d;
						gpustate.reg[rd] = r;
						gpustate.remain = r2;
					} else {
						UINT64 q = (UINT64)(gpustate.reg[rd])<<16;
						UINT64 d = (UINT64)(gpustate.reg[rs]);
						DWORD r = (UINT64)(q / d);
						DWORD r2 = (UINT64)(q % d);
						gpustate.reg[rd] = r;
						gpustate.remain = r2;
					}
				}
			}
			break;
		case 20:  // IMACN   
			{
				short s = gpustate.reg[rs];
				short d = gpustate.reg[rd];
				int r = s * d;
				gpustate.acc += r;
			}
			break;
		case 17:  // IMULT
			{
				short s = gpustate.reg[rs];
				short d = gpustate.reg[rd];
				int r = s * d;
				gpustate.reg[rd] = r;
				gpustate.z = r == 0 ? 1 : 0;
				gpustate.n = r & 0x80000000 ? 1 : 0;
			}
			break;
		case 18:  // IMULTN
			{
				short s = gpustate.reg[rs];
				short d = gpustate.reg[rd];
				int r = s * d;
				gpustate.acc = r;
				gpustate.z = r == 0 ? 1 : 0;
				gpustate.n = r & 0x80000000 ? 1 : 0;
			}
			break;
        case 53:  // JR;
			dw = (gpustate.z & 0x1) | ((gpustate.n & 0x1) << 2) | ((gpustate.c & 0x1) << 1);
			if(jump_condition[rd][dw]) {
				signed int offset = rs & 0x10 ? (0xFFFFFFF0 | (rs & 0xF)) : (rs & 0xF);
				delayed_jump_address = gpustate.pc + 2 + (offset * 2);
				delayed_jump = 1;
			}
			break;
		case 52:  // JUMP
			dw = (gpustate.z & 0x1) | ((gpustate.n & 0x1) << 2) | ((gpustate.c & 0x1) << 1);
			if(jump_condition[rd][dw]) {
				delayed_jump_address = gpustate.reg[rs] & 0xFFFFFE;
				delayed_jump = 1;
			}
			break;
		case 41:  // LOAD
			{
				DWORD address = gpustate.reg[rs];
				if(address >= 0xF03000 && address < 0xF04000) {
					gpustate.reg[rd] = _rotl(*(DWORD*)(&MEM[address]),16);
				} else if(address >= 0xF0B000 && address < 0xF0C000) {
					gpustate.reg[rd] = _rotl(*(DWORD*)(&MEM[address-0x8000]),16);
				} else {
					gpustate.reg[rd] = ReadMem32(address);
				}
			}
			break;
		case 43:  // LOAD (R14+m)
			{
				DWORD address = gpustate.reg[14] + (qtable[rs] << 2);
				if(address >= 0xF03000 && address < 0xF04000) {
					gpustate.reg[rd] = _rotl(*(DWORD*)(&MEM[address]),16);
				} else if(address >= 0xF0B000 && address < 0xF0C000) {
					gpustate.reg[rd] = _rotl(*(DWORD*)(&MEM[address-0x8000]),16);
				} else {
					gpustate.reg[rd] = ReadMem32(address);
				}
			}
			break;
		case 44:  // LOAD (R15+m)
			{
				DWORD address = gpustate.reg[15] + (qtable[rs] << 2);
				if(address >= 0xF03000 && address < 0xF04000) {
					gpustate.reg[rd] = _rotl(*(DWORD*)(&MEM[address]),16);
				} else if(address >= 0xF0B000 && address < 0xF0C000) {
					gpustate.reg[rd] = _rotl(*(DWORD*)(&MEM[address-0x8000]),16);
				} else {
					gpustate.reg[rd] = ReadMem32(address);
				}
			}
			break; 
		case 58:  // LOAD (R14+Rm)
			{
				DWORD address = gpustate.reg[14] + gpustate.reg[rs];
				if(address >= 0xF03000 && address < 0xF04000) {
					gpustate.reg[rd] = _rotl(*(DWORD*)(&MEM[address]),16);
				} else if(address >= 0xF0B000 && address < 0xF0C000) {
					gpustate.reg[rd] = _rotl(*(DWORD*)(&MEM[address-0x8000]),16);
				} else {
					gpustate.reg[rd] = ReadMem32(address);
				}
			}
			break;
		case 59:  // LOAD (R15+Rm)
			{
				DWORD address = gpustate.reg[15] + gpustate.reg[rs];
				if(address >= 0xF03000 && address < 0xF04000) {
					gpustate.reg[rd] = _rotl(*(DWORD*)(&MEM[address]),16);
				} else if(address >= 0xF0B000 && address < 0xF0C000) {
					gpustate.reg[rd] = _rotl(*(DWORD*)(&MEM[address-0x8000]),16);
				} else {
					gpustate.reg[rd] = ReadMem32(address);
				}
			}
			break;
		case 39:  // LOADB
			if(gpustate.reg[rs] >= 0xF03000 && gpustate.reg[rs] < 0xF04000) {
				gpustate.reg[rd] = ReadMem32(gpustate.reg[rs]);
			} else {
				gpustate.reg[rd] = ReadMem8(gpustate.reg[rs]);
			}
			break;
		case 40:  // LOADW
			if(gpustate.reg[rs] >= 0xF03000 && gpustate.reg[rs] < 0xF04000) {
				gpustate.reg[rd] = ReadMem32(gpustate.reg[rs]);
			} else {
				gpustate.reg[rd] = ReadMem16(gpustate.reg[rs]);
			}
			break;
		case 42:  // LOADP
			if(gpustate.reg[rs] >= 0xF03000 && gpustate.reg[rs] < 0xF04000) {
				gpustate.reg[rd] = ReadMem32(gpustate.reg[rs]);
			} else {
				gpustate.reg[rd] = ReadMem32(gpustate.reg[rs]);
				gpustate.hidata = ReadMem32(gpustate.reg[rs]+4);
			}
			break;
		case 34:  // MOVE
			{
				gpustate.reg[rd] = gpustate.reg[rs];
			}
			break;
		case 51:  // MOVE PC,Rn
			{
				gpustate.reg[rd] = gpustate.pc;
			}
			break;
		case 37:  // MOVEFA
			{
				gpustate.reg[rd] = gpustate.alt[rs];
			}
			break;
		case 38:  // MOVEI
			{
				gpustate.reg[rd] = *(DWORD*)(&MEM[gpustate.pc+2]);
				gpustate.pc += 4;
			}
			break;
        case 35:  // MOVEQ
			{
				gpustate.reg[rd] = rs;
			}
			break;
		case 36:  // MOVETA
			{
				gpustate.alt[rd] = gpustate.reg[rs];
			}
			break;
		case 55:  // MTOI
			{
				int d = gpustate.reg[rd] & 0x7FFFFF;
				if(gpustate.reg[rd] & 0x80000000) {
					d |= 0xFF800000;
				}
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		case 16:  // MULT
			{
				unsigned short s = gpustate.reg[rs];
				unsigned short d = gpustate.reg[rd];
				int r = s * d;
				gpustate.reg[rd] = r;
				gpustate.z = r == 0 ? 1 : 0;
				gpustate.n = r & 0x80000000 ? 1 : 0;
			}
			break;
		case 54:  // MMULT
			{
				int size = gpustate.matrix_size & 0xF;
				int address = gpustate.matrix_address;
				int add;
				if(gpustate.matrix_size & 0x10)
					add = size * 4;
				else
					add = 4;
				int result = 0;
				for(int i=0; i<size; i++) {
					short m,r;
					m = ReadMem16(address+2);
					if(i & 0x1)
						r = gpustate.alt[rs+(i>>1)] >> 16;
					else
						r = (gpustate.alt[rs+(i>>1)] & 0xFFFF);
					//result += (int)(r * m);
					int mult = r*m;
					__asm {
						mov eax,[result]
						mov edx,[mult]
						add eax,edx
						setc [gpustate.c]
						mov [result],eax
					}

					address += add;
				}
				gpustate.reg[rd] = result;
				gpustate.n = (result < 0) ? 1 : 0;
				gpustate.z = (result == 0) ? 1 : 0;
			}
			break;
		case 8:   // NEG
			{
				int s = 0;
				int d = gpustate.reg[rd];
				gpustate.c = d - s < d;
				d = s - d;
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		case 57:  // NOP
			break;
		case 56:  // NORMI
			{
				/*unsigned int d = gpustate.reg[rd];
				int r = 0;
				while ((d & 0xffc00000) == 0)
				{
					d <<= 1;
					r--;
				}
				while ((d & 0xff800000) != 0)
				{
					d >>= 1;
					r++;
				}
				gpustate.reg[rd] = r;
				gpustate.z = r == 0 ? 1 : 0;
				gpustate.n = r & 0x80000000 ? 1 : 0;*/
				gpustate.reg[rd] = 0;
			}
			break;
		case 12:  // NOT
			{
				int d = gpustate.reg[rd];
				d ^= 0xFFFFFFFF;
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		case 10:  // OR
			{
				int s = gpustate.reg[rs];
				int d = gpustate.reg[rd];
				d |= s;
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		case 63:  // PACK / UNPACK
			{
				if(rs == 0) {
					int c1 = (gpustate.reg[rd] & 0x3C00000) >> 10;
					int c2 = (gpustate.reg[rd] & 0x1E000) >> 5;
					int y = (gpustate.reg[rd] & 0xFF);
					gpustate.reg[rd] = c1 | c2 | y;
				} else {
					int c1 = (gpustate.reg[rd] & 0xF000) << 10;
					int c2 = (gpustate.reg[rd] & 0xF00) << 5;
					int y = (gpustate.reg[rd] & 0xFF);
					gpustate.reg[rd] = c1 | c2 | y;
				}
			}
			break;
		case 19:  // RESMAC
			{
				gpustate.reg[rd] = gpustate.acc;
			}
			break;
		case 28:  // ROR
			{
				unsigned int d = gpustate.reg[rd];
				int shift = gpustate.reg[rs];
				gpustate.c = d & 0x80000000 ? 1 : 0;
				d = _rotr(d,shift);
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		case 29:  // RORQ
			{
				unsigned int d = gpustate.reg[rd];
				int shift = qtable[rs];
				gpustate.c = d & 0x80000000 ? 1 : 0;
				d = _rotr(d,shift);
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		case 32:  // SAT8
			{
				int d = gpustate.reg[rd];
				if(d < 0)
					d = 0;
				if(d > 255)
					d = 255;
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = 0;
			}
			break;
		case 33:  // SAT16
			{
				int d = gpustate.reg[rd];
				if(d < 0)
					d = 0;
				if(d > 65535)
					d = 65535;
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = 0;
			}
			break;
		case 62:  // SAT24
			{
				int d = gpustate.reg[rd];
				if(d < 0)
					d = 0;
				if(d > 16777215)
					d = 16777215;
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = 0;
			}
			break;
		case 23:  // SH
			{
				int shift = gpustate.reg[rs];
				if(shift & 0x80000000) {
					gpustate.c = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
					UINT32 d = gpustate.reg[rd];
					d <<= 0-shift;
					gpustate.reg[rd] = d;
					gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
					gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
				} else {
					gpustate.c = gpustate.reg[rd] & 0x1 ? 1 : 0;
					UINT32 d = gpustate.reg[rd];
					d >>= shift;
					gpustate.reg[rd] = d;
					gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
					gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
				}
			}
			break;
		case 26:  // SHA
			{
				int shift = gpustate.reg[rs];
				if(shift & 0x80000000) {
					gpustate.c = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
					INT32 d = gpustate.reg[rd];
					d <<= 0-shift;
					gpustate.reg[rd] = d;
					gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
					gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
				} else {
					gpustate.c = gpustate.reg[rd] & 0x1 ? 1 : 0;
					INT32 d = gpustate.reg[rd];
					d >>= shift;
					gpustate.reg[rd] = d;
					gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
					gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
				}
			}
			break;
		case 27:  // SHARQ
			{
				INT32 d = gpustate.reg[rd];
				int shift = qtable[rs];
				gpustate.c = d & 0x1 ? 1 : 0;
				d >>= shift;
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		case 24:  // SHLQ
			{
				UINT32 d = gpustate.reg[rd];
				int shift = 32 - rs;
				gpustate.c = d & 0x80000000 ? 1 : 0;
				d <<= shift;
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
        case 25:  // SHRQ
			{
				UINT32 d = gpustate.reg[rd];
				int shift = qtable[rs];
				gpustate.c = d & 0x1 ? 1 : 0;
				d >>= shift;
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		case 47:  // STORE
			{
				DWORD address = gpustate.reg[rs];
				if(address >= 0xF03000 && address < 0xF04000) {
					*(DWORD*)(&MEM[address]) = _rotl(gpustate.reg[rd],16);
				} else if(address >= 0xF0B000 && address < 0xF0C000) {
					*(DWORD*)(&MEM[address-0x8000]) = _rotl(gpustate.reg[rd],16);
				} else {
					WriteMem32(address,gpustate.reg[rd]);
				}
			}
			break;
		case 49:  // STORE (R14+m)
			{
				DWORD address = gpustate.reg[14] + (qtable[rs] << 2);
				if(address >= 0xF03000 && address < 0xF04000) {
					*(DWORD*)(&MEM[address]) = _rotl(gpustate.reg[rd],16);
				} else if(address >= 0xF0B000 && address < 0xF0C000) {
					*(DWORD*)(&MEM[address-0x8000]) = _rotl(gpustate.reg[rd],16);
				} else {
					WriteMem32(address,gpustate.reg[rd]);
				}
			}
			break;
		case 50:  // STORE (R15+m)
			{
				DWORD address = gpustate.reg[15] + (qtable[rs] << 2);
				if(address >= 0xF03000 && address < 0xF04000) {
					*(DWORD*)(&MEM[address]) = _rotl(gpustate.reg[rd],16);
				} else if(address >= 0xF0B000 && address < 0xF0C000) {
					*(DWORD*)(&MEM[address-0x8000]) = _rotl(gpustate.reg[rd],16);
				} else {
					WriteMem32(address,gpustate.reg[rd]);
				}
			}
			break;
		case 60:  // STORE (R14+Rm)
			{
				DWORD address = gpustate.reg[14] + gpustate.reg[rs];
				if(address >= 0xF03000 && address < 0xF04000) {
					*(DWORD*)(&MEM[address]) = _rotl(gpustate.reg[rd],16);
				} else if(address >= 0xF0B000 && address < 0xF0C000) {
					*(DWORD*)(&MEM[address-0x8000]) = _rotl(gpustate.reg[rd],16);
				} else {
					WriteMem32(address,gpustate.reg[rd]);
				}
			}
			break;
		case 61:  // STORE (R15+Rm)
			{
				DWORD address = gpustate.reg[15] + gpustate.reg[rs];
				if(address >= 0xF03000 && address < 0xF04000) {
					*(DWORD*)(&MEM[address]) = _rotl(gpustate.reg[rd],16);
				} else if(address >= 0xF0B000 && address < 0xF0C000) {
					*(DWORD*)(&MEM[address-0x8000]) = _rotl(gpustate.reg[rd],16);
				} else {
					WriteMem32(address,gpustate.reg[rd]);
				}
			}
			break;
		case 45:  // STOREB
			if(gpustate.reg[rs]>0xF03000 && gpustate.reg[rs]<0xF04000) {
				WriteMem32(gpustate.reg[rs],gpustate.reg[rd]);
			} else {
				WriteMem8(gpustate.reg[rs],(BYTE)gpustate.reg[rd]);
			}
			break;
		case 46:  // STOREW
			if(gpustate.reg[rs]>0xF03000 && gpustate.reg[rs]<0xF04000) {
				WriteMem32(gpustate.reg[rs],gpustate.reg[rd]);
			} else {
				WriteMem16(gpustate.reg[rs],(WORD)gpustate.reg[rd]);
			}
			break;
		case 48:  // STOREP
			if(gpustate.reg[rs]>0xF03000 && gpustate.reg[rs]<0xF04000) {
				WriteMem32(gpustate.reg[rs],gpustate.reg[rd]);
			} else {
				WriteMem32(gpustate.reg[rs],gpustate.reg[rd]);
				WriteMem32(gpustate.reg[rs]+4,gpustate.hidata);
			}
			break;
		case 4:   // SUB
			{
				int s = gpustate.reg[rs];
				int d = gpustate.reg[rd];
				INT64 r = d - s;
				gpustate.c = r & 0x100000000 ? 1 : 0;
				gpustate.reg[rd] = r;
				gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
				gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
			}
			break;
		case 5:   // SUBC
			{
				int s = gpustate.reg[rs];
				int d = gpustate.reg[rd];
				int c = gpustate.c;
				INT64 r = d - s - c;
				gpustate.c = r & 0x100000000 ? 1 : 0;
				gpustate.reg[rd] = r;
				gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
				gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
			}
			break;
		case 6:   // SUBQ
			{
				int s = qtable[rs];
				int d = gpustate.reg[rd];
				INT64 r = d - s;
				gpustate.c = r & 0x100000000 ? 1 : 0;
				gpustate.reg[rd] = r;
				gpustate.z = gpustate.reg[rd] == 0 ? 1 : 0;
				gpustate.n = gpustate.reg[rd] & 0x80000000 ? 1 : 0;
			}
			break;
		case 7:   // SUBQT
			{
				gpustate.reg[rd] -= qtable[rs];
			}
			break;
		case 11:  // XOR
			{
				int s = gpustate.reg[rs];
				int d = gpustate.reg[rd];
				d ^= s;
				gpustate.reg[rd] = d;
				gpustate.z = d == 0 ? 1 : 0;
				gpustate.n = d & 0x80000000 ? 1 : 0;
			}
			break;
		default:
			return;
		}
		gpustate.pc += 2;
		if(delayed_jump > 1) {
			gpustate.pc = delayed_jump_address;
			delayed_jump = 0;
		}
		if(delayed_jump == 1) {
			delayed_jump++;
		}
		i += gpu_opcode_times[opcode];
	}
}