#include "stdafx.h"
#include "blitter.h"
#include "mem.h"
#include "regs.h"
#include "mmx.h"

extern BYTE* MEM;

const pixels_per_phrase[8] = { 64,32,16,8,4,2,1,0};

void hle_gouraud(DWORD* dst,int block_length)
{
	int gd_i[4];
	int gd_c[4];
	int gd_ia,gd_ca;
	DWORD gouraud_add,gouraud_data;
	WORD gint[4],gfrac[4];
	BYTE gcolour[4];
	gouraud_add = ReadMem32(0xF02270);
	gcolour[0] = ReadMem8(0xF02268);
	gcolour[1] = ReadMem8(0xF0226A);
	gcolour[2] = ReadMem8(0xF0226C);
	gcolour[3] = ReadMem8(0xF0226E);
	gint[3] = ReadMem16(0xF0227C);
	gint[2] = ReadMem16(0xF02280);
	gint[1] = ReadMem16(0xF02284);
	gint[0] = ReadMem16(0xF02288);
	gfrac[3] = ReadMem16(0xF0227E);
	gfrac[2] = ReadMem16(0xF02282);
	gfrac[1] = ReadMem16(0xF02286);
	gfrac[0] = ReadMem16(0xF0228A);
	gd_ia = gouraud_add & 0xFFFFFF;
	if(gd_ia & 0x800000)
		gd_ia = 0xFF000000 | gd_ia;
	gd_ca = (gouraud_add>>24) & 0xFF;
	if(gd_ca & 0x80)
		gd_ca = 0xFFFFFF00 | gd_ca;
	for(int v=0;v<4;v++) {
		gd_i[v] = gint[v] & 0xFF;
		if(gd_i[v] & 0x80)
			gd_i[v] = 0xFF00 | gd_i[v];
		gd_i[v] = (gd_i[v] << 16) | gfrac[v];
		gd_c[v] = gcolour[v]<<4;
	}
	WORD* dst2 = (WORD*)dst;
	for(int i=0;i<block_length/2;i++) {
		WORD p;
		DWORD gdt;
		int pixel = i % 4;
		gdt = (gd_i[pixel] & 0xFFFFFF) | ((gd_c[pixel]>>4) << 24);
		p = gdt >> 16;
		*dst2++ = p;
		gd_i[pixel] += gd_ia;
		gd_c[pixel] += gd_ca;
	}
}

void hle_srcshade(DWORD* dst,DWORD* src,DWORD gd_ia,int block_length)
{
	WORD* dst2 = (WORD*)dst;
	WORD* src2 = (WORD*)src;
	for(int i=0;i<block_length/2;i++) {
		WORD src1 = *src2++;
		int intensity = src1 & 0xFF;
		int ia = gd_ia >> 16;
		if(ia & 0x80)
			ia = 0xFFFFFF00 | ia;
		intensity += ia;
		if(intensity < 0)
			intensity = 0;
		if(intensity > 0xFF)
			intensity = 0xFF;
		*dst2++ = (src1 & 0xFF00) | intensity;
	}
}

void BlitterExec_HLE(DWORD cmd)
{
	DWORD* src,* dst,* dstr;
	DWORD a1_address, a2_address;
	DWORD a1_bm,a2_bm;
	DWORD* src_bm,* dst_bm;
	BLITCMD bcmd;
	BLIT_A1 a1;
	BLIT_A2 a2;
	memset(&a1,0,sizeof(BLIT_A1));
	memset(&a2,0,sizeof(BLIT_A2));
	memset(&bcmd,0,sizeof(BLITCMD));
	DWORD srcd = GetRealPointer(0xF02240);
	DWORD dstd = GetRealPointer(0xF02248);
	DWORD patd = GetRealPointer(0xF02268);

	bcmd.srcen = (cmd & 0x1) ? 1 : 0;
	bcmd.srcenz = (cmd & 0x2) ? 1 : 0;
	bcmd.srcenx = (cmd & 0x4) ? 1 : 0;
	bcmd.dsten = (cmd & 0x8) ? 1 : 0;
	bcmd.dstenz = (cmd & 0x10) ? 1 : 0;
	bcmd.dstwrz = (cmd & 0x20) ? 1 : 0;
	bcmd.clip_a1 = (cmd & 0x40) ? 1 : 0;
	bcmd.upda1f = (cmd & 0x100) ? 1 : 0;
	bcmd.upda1 = (cmd & 0x200) ? 1 : 0;
	bcmd.upda2 = (cmd & 0x400) ? 1 : 0;
	bcmd.dsta2 = (cmd & 0x800) ? 1 : 0;
	bcmd.gourd = (cmd & 0x1000) ? 1 : 0;
	bcmd.patdsel = (cmd & 0x10000) ? 1 : 0;
	bcmd.cmpdst = (cmd & 0x2000000) ? 1 : 0;
	bcmd.bcompen = (cmd & 0x4000000) ? 1 : 0;
	bcmd.dcompen = (cmd & 0x8000000) ? 1 : 0;
	bcmd.bkgwren = (cmd & 0x10000000) ? 1 : 0;
	bcmd.srcshade = (cmd & 0x40000000) ? 1 : 0;
	bcmd.logic = (cmd >> 21) & 0xF;

	DWORD outer_loop = B_COUNT_OUT;
	DWORD inner_loop = B_COUNT_IN;

	a1.base = DWORDBIG(A1_BASE);
	DWORD flags = DWORDBIG(A1_FLAGS);
	a1.pitch = flags & 0x3;
	a1.depth = (flags >> 3) & 0x7;
	a1.z_offset = (flags >> 6) & 0x7;
	a1.xadd = (short)((flags >> 16) & 0x3);
	a1.pixel_x = A1_PIXEL_X << 16 | A1_FPIXEL_X;
	a1.pixel_y = A1_PIXEL_Y << 16 | A1_FPIXEL_Y;
	a1.step_x = A1_STEP_X << 16;
	a1.step_y = A1_STEP_Y << 16;
	a1.fstep_x = A1_FSTEP_X;
	a1.fstep_y = A1_FSTEP_Y;
	a1.inc_x = A1_INC_X << 16 | A1_FINC_X;
	a1.inc_y = A1_INC_Y << 16 | A1_FINC_Y;
	a1.clip_x = A1_CLIP_X & 0x7FFF;
	a1.clip_y = A1_CLIP_Y & 0x7FFF;
	a1.width = width_table[(flags >> 9) & 0x3F];

	a1.yadd = (short)((flags >> 18) & 0x1);
	a1.xsign = (flags >> 19) & 0x1;
	a1.ysign = (flags >> 20) & 0x1;
	if(a1.ysign)
		a1.yadd = -a1.yadd;
	a1.pixelmode = (flags >> 16) & 0x3;

	a2.base = DWORDBIG(A2_BASE);
	flags = DWORDBIG(A2_FLAGS);
	a2.pitch = flags & 0x3;
	a2.z_offset = (flags >> 6) & 0x7;
	a2.pixel_x = A2_PIXEL_X;
	a2.pixel_y = A2_PIXEL_Y;
	a2.step_x = (signed short)A2_STEP_X;
	a2.step_y = (signed short)A2_STEP_Y;
	a2.mask = DWORDBIG(A2_MASK);
	a2.width = width_table[(flags >> 9) & 0x3F];
	a2.yadd = (flags >> 18) & 0x1;
	a2.yadd = a1.yadd; // Buggy blitter !!?
	a2.pixelmode = (flags >> 16) & 0x3;
	a2.xsign = (flags >> 19) & 0x1;
	a2.ysign = (flags >> 20) & 0x1;
	if(a2.ysign)
		a2.yadd = -a2.yadd;

	if(bcmd.dsta2 == 0) {
		dst = &a1_address;
		src = &a2_address;
		dstr = &a1_address;
		dst_bm = &a1_bm;
		src_bm = &a2_bm;
	} else {
		src = &a1_address;
		dst = &a2_address;
		dstr = &a2_address;
		dst_bm = &a2_bm;
		src_bm = &a1_bm;
	}
	if(!bcmd.srcen) {
		src = &srcd;
	}
	if(bcmd.patdsel) {
		src = &patd;
	}
	if(!bcmd.dsten) {
		dstr = &dstd;
	}

	unsigned int a1_x = a1.pixel_x;
	unsigned int a1_y = a1.pixel_y;
	unsigned int a2_x = a2.pixel_x;
	unsigned int a2_y = a2.pixel_y;
	int a1_size = pitch[a1.pitch];
	int a2_size = pitch[a2.pitch];
	//inner_loop = (inner_loop * bitdepth[a1.depth]) / 8;

	if(!a1.pixelmode) {
		a1_x = A1_PIXEL_X;//((A1_PIXEL_X) * bitdepth[a1.depth]) / 8;
		a1_y = A1_PIXEL_Y;
		a2_x = A2_PIXEL_X;//((A2_PIXEL_X) * bitdepth[a1.depth]) / 8;
		a2_y = A2_PIXEL_Y;
		//a1.width = (a1.width * bitdepth[a1.depth]) / 8;
		//a2.width = (a2.width * bitdepth[a1.depth]) / 8;
		a1.step_x = (signed short)(A1_STEP_X);//((signed short)(A1_STEP_X) * (signed int)bitdepth[a1.depth]) / 8;
		a1.step_y = (signed short)(A1_STEP_Y);
		a2.step_x = (signed short)(A2_STEP_X);//((signed short)(A2_STEP_X) * (signed int)bitdepth[a1.depth]) / 8;
		a2.step_y = (signed short)(A2_STEP_Y);
		a1.clip_x = (a1.clip_x * bitdepth[a1.depth]) / 8;
		a1.clip_y = (a1.clip_y * bitdepth[a1.depth]) / 8;

		DWORD gouraud_add = ReadMem32(0xF02270);
		int gd_ia = gouraud_add & 0xFFFFFF;

		for(DWORD j=0; j<outer_loop; j++) {
			a1_address = GetRealPointer(a1.base + (((a1_y * a1.width + a1_x)*bitdepth[a1.depth])/8));
			a2_address = GetRealPointer(a2.base + (((a2_y * a2.width + a2_x)*bitdepth[a1.depth])/8));
			if(a1_address == NULL || a2_address == NULL)
				return;
			DWORD* src_address = (DWORD*)(*src);
			DWORD* dst_address = (DWORD*)(*dst);
			DWORD blit_size = (inner_loop*bitdepth[a1.depth]) / 8;
			if(bcmd.patdsel) {
				UINT64 pattern;
				pattern = *(UINT64*)(&MEM[0xF02268]);
				//memset(dst_address,0,(inner_loop*bitdepth[a1.depth])/8);
				if(bcmd.gourd) {
					hle_gouraud(dst_address,blit_size);
					//mmx_gouraud(dst_address,pattern,gouraud_add,blit_size);
				} else {
					//hle_fill(dst_address,pattern,blit_size);
					memset(dst_address,pattern,blit_size);
				}
			} else if(bcmd.srcshade) {
				hle_srcshade(dst_address,src_address,gd_ia,blit_size);
			}else {
				if(bcmd.dsten) {
					switch(bcmd.logic) {
						case 0: memset(dst_address,0,blit_size); break;
						case 1: mmx_copy_1(dst_address,src_address,blit_size); break;
						case 2: mmx_copy_2(dst_address,src_address,blit_size); break;
						case 3: mmx_copy_3(dst_address,src_address,blit_size); break;
						case 4: mmx_copy_4(dst_address,src_address,blit_size); break;
						case 5: mmx_copy_5(dst_address,blit_size); break;
						case 6: mmx_copy_6(dst_address,src_address,blit_size); break;
						case 7: mmx_copy_7(dst_address,src_address,blit_size); break;
						case 8: mmx_copy_8(dst_address,src_address,blit_size); break;
						case 9: mmx_copy_9(dst_address,src_address,blit_size); break;
						case 10: break;
						case 11: mmx_copy_11(dst_address,src_address,blit_size); break;
						case 12: memcpy(dst_address,src_address,blit_size); break;
						case 13: mmx_copy_13(dst_address,src_address,blit_size); break;
						case 14: mmx_copy_14(dst_address,src_address,blit_size); break;
						case 15: memset(dst_address,0xFF,blit_size); break;
					}
				} else {
					UINT64 data;
					data = *(UINT64*)(&MEM[0xF02248]);
					switch(bcmd.logic) {
						case 0: memset(dst_address,0,blit_size); break;
						case 1: mmx_copy_1_data(dst_address,src_address,data,blit_size); break;
						case 2: mmx_copy_2_data(dst_address,src_address,data,blit_size); break;
						case 3: mmx_copy_3(dst_address,src_address,blit_size); break;
						case 4: mmx_copy_4_data(dst_address,src_address,data,blit_size); break;
						case 5: mmx_copy_5_data(dst_address,data,blit_size); break;
						case 6: mmx_copy_6_data(dst_address,src_address,data,blit_size); break;
						case 7: mmx_copy_7_data(dst_address,src_address,data,blit_size); break;
						case 8: mmx_copy_8_data(dst_address,src_address,data,blit_size); break;
						case 9: mmx_copy_9_data(dst_address,src_address,data,blit_size); break;
						case 10: mmx_copy_10_data(dst_address,data,blit_size); break;
						case 11: mmx_copy_11_data(dst_address,src_address,data,blit_size); break;
						case 12: memcpy(dst_address,src_address,blit_size); break;
						case 13: mmx_copy_13_data(dst_address,src_address,data,blit_size); break;
						case 14: mmx_copy_14_data(dst_address,src_address,data,blit_size); break;
						case 15: memset(dst_address,0xFF,blit_size); break;
					}
				}
			}
			a1_x += inner_loop;//(inner_loop*bitdepth[a1.depth])/8;
			a2_x += inner_loop;//(inner_loop*bitdepth[a1.depth])/8;
			if(bcmd.upda1) {
				a1_x += a1.step_x;
				a1_y += a1.step_y;
			}
			if(bcmd.upda2) {
				a2_x += a2.step_x;
				a2_y += a2.step_y;
			}
		}
		A1_PIXEL_X = a1_x;
		A1_PIXEL_Y = a1_y;
		A2_PIXEL_X = a2_x;
		A2_PIXEL_Y = a2_y;
	} else {
		int pix_size = bitdepth[a1.depth] / 8;
		int a1_inc_x,a1_inc_y;
		short a2_inc_x;
		switch(a1.pixelmode) {
			case 0:a1_inc_x = 1<<16; a1_inc_y = a1.yadd*65536; break;
			case 1:a1_inc_x = 1<<16; a1_inc_y = a1.yadd*65536; break;
			case 2:a1_inc_x = 0; a1_inc_y = a1.yadd*65536; break;
			case 3:a1_inc_x = a1.inc_x; a1_inc_y = a1.inc_y; break;
		}
		if(a1.xsign)
			a1_inc_x = -a1_inc_x;
		switch(a2.pixelmode) {
			case 0:a2_inc_x = 1;break;
			case 1:a2_inc_x = 1;break;
			case 2:a2_inc_x = 0;break;
			case 3:a2_inc_x = 1;break;
		}
		if(a2.xsign)
			a2_inc_x = -a2_inc_x;

		switch(a1.depth) {
			case 0:
				for(DWORD j=0; j<outer_loop; j++) {
					for(DWORD i=0; i<inner_loop; i++) {
						BYTE src1;
						BYTE dst1,dst_old;
						//a1_bm = ((a1_y>>16) * a1.width + (a1_x>>16)) & 0x1;
						//a2_bm = (a2_y * a2.width + a2_x) & 0x1;
						a1_address = a1.base + ((((a1_y>>16) * a1.width + (a1_x>>16)) * bitdepth[a1.depth])/8);
						a2_address = a2.base + (((a2_y * a2.width + a2_x) * bitdepth[a1.depth])/8);
						src1 = ReadMem8(*src);
						dst1 = ReadMem8(*dst);
						dst_old = dst1;
						if(!bcmd.patdsel) {
							switch(bcmd.logic)
							{
							case 0:dst1 = 0;break;
							case 1:dst1 = !src1 & !dst1;break;
							case 2:dst1 = !src1 &  dst1;break;
							case 3:dst1 = !src1;break;
							case 4:dst1 = src1 & !dst1;break;
							case 5:dst1 = !dst1;break;
							case 6:dst1 = !(src1 ^ dst1);break;
							case 7:dst1 = !src1 | !dst1;break;
							case 8:dst1 = src1 & dst1;break;
							case 9:dst1 = src1 ^ dst1;break;
							case 10:dst1 = dst1;break;
							case 11:dst1 = !src1 | dst1;break;
							case 12:dst1 = src1;break;
							case 13:dst1 = src1 | !dst1;break;
							case 14:dst1 = src1 | dst1;break;
							case 15:dst1 = 0xFF;break;
							}
						} else {
							dst1 = *(BYTE*)(&MEM[0xF02268]);
						}
						if(bcmd.clip_a1) {
							if((a1_x>>16) < a1.clip_x && (a1_x>>16) >= 0 && (a1_y>>16) < a1.clip_y && (a1_y>>16) >= 0) {
								WriteMem8(*dst,dst1);
							}
						} else {
							WriteMem8(*dst,dst1);
						}
						a1_x += a1_inc_x;
						a2_x += a2_inc_x;
						a1_y += a1_inc_y;
						a2_y += a2.yadd;
					}
					if(bcmd.upda1) {
						a1_x += a1.step_x;
						a1_y += a1.step_y;
					}
					if(bcmd.upda1f) {
						a1_x += a1.fstep_x;
						a1_y += a1.fstep_y;
					}
					if(bcmd.upda2) {
						a2_x += a2.step_x;
						a2_y += a2.step_y;
					}
				}
				break;
			case 1:
				for(DWORD j=0; j<outer_loop; j++) {
					for(DWORD i=0; i<inner_loop; i++) {
						BYTE src1;
						BYTE dst1,dst_old;
						//a1_bm = ((a1_y>>16) * a1.width + (a1_x>>16)) & 0x1;
						//a2_bm = (a2_y * a2.width + a2_x) & 0x1;
						a1_address = a1.base + ((((a1_y>>16) * a1.width + (a1_x>>16)) * bitdepth[a1.depth])/8);
						a2_address = a2.base + (((a2_y * a2.width + a2_x) * bitdepth[a1.depth])/8);
						src1 = ReadMem8(*src);
						dst1 = ReadMem8(*dst);
						dst_old = dst1;
						if(!bcmd.patdsel) {
							switch(bcmd.logic)
							{
							case 0:dst1 = 0;break;
							case 1:dst1 = !src1 & !dst1;break;
							case 2:dst1 = !src1 &  dst1;break;
							case 3:dst1 = !src1;break;
							case 4:dst1 = src1 & !dst1;break;
							case 5:dst1 = !dst1;break;
							case 6:dst1 = !(src1 ^ dst1);break;
							case 7:dst1 = !src1 | !dst1;break;
							case 8:dst1 = src1 & dst1;break;
							case 9:dst1 = src1 ^ dst1;break;
							case 10:dst1 = dst1;break;
							case 11:dst1 = !src1 | dst1;break;
							case 12:dst1 = src1;break;
							case 13:dst1 = src1 | !dst1;break;
							case 14:dst1 = src1 | dst1;break;
							case 15:dst1 = 0xFF;break;
							}
						} else {
							*(BYTE*)(&MEM[0xF02268]);
						}
						if(bcmd.clip_a1) {
							if((a1_x>>16) < a1.clip_x && (a1_x>>16) >= 0 && (a1_y>>16) < a1.clip_y && (a1_y>>16) >= 0) {
								WriteMem8(*dst,dst1);
							}
						} else {
							WriteMem8(*dst,dst1);
						}
						a1_x += a1_inc_x;
						a2_x += a2_inc_x;
						a1_y += a1_inc_y;
						a2_y += a2.yadd;
					}
					if(bcmd.upda1) {
						a1_x += a1.step_x;
						a1_y += a1.step_y;
					}
					if(bcmd.upda1f) {
						a1_x += a1.fstep_x;
						a1_y += a1.fstep_y;
					}
					if(bcmd.upda2) {
						a2_x += a2.step_x;
						a2_y += a2.step_y;
					}
				}
				break;
			case 2:
				for(DWORD j=0; j<outer_loop; j++) {
					for(DWORD i=0; i<inner_loop; i++) {
						BYTE src1;
						BYTE dst1,dst_old;
						//a1_bm = ((a1_y>>16) * a1.width + (a1_x>>16)) & 0x1;
						//a2_bm = (a2_y * a2.width + a2_x) & 0x1;
						int src_bit,dst_bit;
						if(bcmd.dsta2) {
							src_bit = (a1_x>>16) & 0x1;
							dst_bit = (a2_x & 0x1);
						} else {
							src_bit = (a2_x & 0x1);
							dst_bit = (a1_x>>16) & 0x1;
						}
						a1_address = a1.base + ((((a1_y>>16) * a1.width + (a1_x>>16)) * bitdepth[a1.depth])/8);
						a2_address = a2.base + (((a2_y * a2.width + a2_x) * bitdepth[a1.depth])/8);
						src1 = ReadMem8(*src);
						dst1 = ReadMem8(*dst);
						dst_old = dst1;
						if(src_bit == 0) {
							src1 = (src1 >> 4) & 0xF;
						} else {
							src1 = src1 & 0xF;
						}
						if(dst_bit == 0) {
							dst1 = (dst1 >> 4) & 0xF;
						} else {
							dst1 = dst1 & 0xF;
						}
						if(!bcmd.patdsel) {
							switch(bcmd.logic)
							{
							case 0:dst1 = 0;break;
							case 1:dst1 = !src1 & !dst1;break;
							case 2:dst1 = !src1 &  dst1;break;
							case 3:dst1 = !src1;break;
							case 4:dst1 = src1 & !dst1;break;
							case 5:dst1 = !dst1;break;
							case 6:dst1 = !(src1 ^ dst1);break;
							case 7:dst1 = !src1 | !dst1;break;
							case 8:dst1 = src1 & dst1;break;
							case 9:dst1 = src1 ^ dst1;break;
							case 10:dst1 = dst1;break;
							case 11:dst1 = !src1 | dst1;break;
							case 12:dst1 = src1;break;
							case 13:dst1 = src1 | !dst1;break;
							case 14:dst1 = src1 | dst1;break;
							case 15:dst1 = 0xF;break;
							}
						} else {
							dst1 = *(BYTE*)(&MEM[0xF02268]);
						}
						dst1 &= 0xF;
						if(dst_bit) {
							dst1 = (dst_old & 0xF0) | dst1;
						} else {
							dst1 = dst1 << 4 | (dst_old & 0xF);
						}
						if(bcmd.clip_a1) {
							if((a1_x>>16) < a1.clip_x && (a1_x>>16) >= 0 && (a1_y>>16) < a1.clip_y && (a1_y>>16) >= 0) {
								WriteMem8(*dst,dst1);
							}
						} else {
							WriteMem8(*dst,dst1);
						}
						a1_x += a1_inc_x;
						a2_x += a2_inc_x;
						a1_y += a1_inc_y;
						a2_y += a2.yadd;
					}
					if(bcmd.upda1) {
						a1_x += a1.step_x;
						a1_y += a1.step_y;
					}
					if(bcmd.upda1f) {
						a1_x += a1.fstep_x;
						a1_y += a1.fstep_y;
					}
					if(bcmd.upda2) {
						a2_x += a2.step_x;
						a2_y += a2.step_y;
					}
				}
				break;
			case 3:
				for(DWORD j=0; j<outer_loop; j++) {
					for(DWORD i=0; i<inner_loop; i++) {
						BYTE src1;
						BYTE dst1,dst_old;
						a1_address = a1.base + ((((a1_y>>16) * a1.width + (a1_x>>16)) * bitdepth[a1.depth])/8);
						a2_address = a2.base + (((a2_y * a2.width + a2_x) * bitdepth[a1.depth])/8);
						src1 = ReadMem8(*src);
						dst1 = ReadMem8(*dst);
						dst_old = dst1;
						if(!bcmd.patdsel) {
							switch(bcmd.logic)
							{
							case 0:dst1 = 0x7F;break;
							case 1:dst1 = !src1 & !dst1;break;
							case 2:dst1 = !src1 &  dst1;break;
							case 3:dst1 = !src1;break;
							case 4:dst1 = src1 & !dst1;break;
							case 5:dst1 = !dst1;break;
							case 6:dst1 = !(src1 ^ dst1);break;
							case 7:dst1 = !src1 | !dst1;break;
							case 8:dst1 = src1 & dst1;break;
							case 9:dst1 = src1 ^ dst1;break;
							case 10:dst1 = dst1;break;
							case 11:dst1 = !src1 | dst1;break;
							case 12:dst1 = src1;break;
							case 13:dst1 = src1 | !dst1;break;
							case 14:dst1 = src1 | dst1;break;
							case 15:dst1 = 0xFF;break;
							}
						} else {
							dst1 = MEM[0xF02268];
						}
						if(bcmd.dcompen) {
							BYTE pattern;
							pattern = *(BYTE*)(&MEM[0xF02268]);
							if(!bcmd.cmpdst) {
								if(dst1 == !dst_old)
									dst1 = dst_old;
							} else {
								if(pattern == !dst_old)
									dst1 = dst_old;
							}
						}
						if(bcmd.clip_a1) {
							if((a1_x>>16) < a1.clip_x && (a1_x>>16) >= 0 && (a1_y>>16) < a1.clip_y && (a1_y>>16) >= 0) {
								WriteMem8(*dst,dst1);
							}
						} else {
							WriteMem8(*dst,dst1);
						}
						a1_x += a1_inc_x;
						a2_x += a2_inc_x;
						a1_y += a1_inc_y;
						a2_y += a2.yadd;
					}
					if(bcmd.upda1) {
						a1_x += a1.step_x;
						a1_y += a1.step_y;
					}
					if(bcmd.upda1f) {
						a1_x += a1.fstep_x;
						a1_y += a1.fstep_y;
					}
					if(bcmd.upda2) {
						a2_x += a2.step_x;
						a2_y += a2.step_y;
					}
				}
				break;
			case 4:{
				int gd_i=0;
				int gd_c=0;
				int gd_ia,gd_ca;
				DWORD gouraud_add,gouraud_data;
				WORD gint,gfrac;
				BYTE gcolour;
				gouraud_add = ReadMem32(0xF02270);
				gcolour = ReadMem8(0xF02268);
				gint = ReadMem16(0xF0227C);
				gfrac = ReadMem16(0xF0227E);
				gd_ia = gouraud_add & 0xFFFFFF;
				if(gd_ia & 0x800000)
					gd_ia = 0xFF000000 | gd_ia;
				gd_ca = (gouraud_add>>24) & 0xFF;
				if(gd_ca & 0x80)
					gd_ca = 0xFFFFFF00 | gd_ca;

				gd_i = gint & 0xFF;
				//if(gd_i & 0x80)
				//	gd_i = 0xFF00 | gd_i;
				gd_i = (gd_i << 16) | gfrac;
				gd_c = gcolour;
				for(DWORD j=0; j<outer_loop; j++) {
					for(DWORD i=0; i<inner_loop; i++) {
						WORD src1;
						WORD dst1,dst_old;
						//a1_address = a1.base + ((( (a1_y>>16) * a1.width + (a1_x>>16)) * bitdepth[a1.depth])/8);
						//a2_address = a2.base + (((a2_y * a2.width + a2_x) * bitdepth[a1.depth])/8);
						a1_address = a1.base + (((a1_y>>16) * a1.width + ((a1_x>>16)&0xFFFFFFFC))*2*(a1_size/8)) + (((a1_x>>16)&0x3)*2);
						a2_address = a2.base + ((a2_y * a2.width + (a2_x&0xFFFFFFFC))*2*(a2_size/8)) + ((a2_x&0x3)*2);
						src1 = ReadMem16(*src);
						dst1 = ReadMem16(*dst);
						dst_old = dst1;
						if(bcmd.patdsel) {
							dst1 = *(WORD*)(&MEM[0xF02268]);
						} else {
							switch(bcmd.logic)
							{
							case 0:dst1 = 0;break;
							case 1:dst1 = !src1 & !dst1;break;
							case 2:dst1 = !src1 &  dst1;break;
							case 3:dst1 = !src1;break;
							case 4:dst1 = src1 & !dst1;break;
							case 5:dst1 = !dst1;break;
							case 6:dst1 = !(src1 ^ dst1);break;
							case 7:dst1 = !src1 | !dst1;break;
							case 8:dst1 = src1 & dst1;break;
							case 9:dst1 = src1 ^ dst1;break;
							case 10:dst1 = dst1;break;
							case 11:dst1 = !src1 | dst1;break;
							case 12:dst1 = src1;break;
							case 13:dst1 = src1 | !dst1;break;
							case 14:dst1 = src1 | dst1;break;
							case 15:dst1 = 0xFFFF;break;
							}
						}
						if(bcmd.gourd) {
							//DWORD gdt = ((gouraud_data >> 4) & 0xF000000) | gouraud_data & 0xFFFFFF;
							DWORD gdt = (gd_i & 0xFFFFFF) | (gd_c << 24);
							//dst1 = gouraud_data;//+= gdt >> 16;
							dst1 = gdt >> 16;
						}
						if(bcmd.srcshade) {
							int intensity = src1 & 0xFF;
							int ia = gd_ia >> 16;
							if(ia & 0x80)
								ia = 0xFFFFFF00 | ia;
							intensity += ia;
							if(intensity < 0)
								intensity = 0;
							if(intensity > 0xFF)
								intensity = 0xFF;
							dst1 = (src1 & 0xFF00) | intensity;
						}
						if(bcmd.dcompen) {
							WORD pattern;
							pattern = *(WORD*)(&MEM[0xF02268]);
							if(!bcmd.cmpdst) {
								if(src1 == !dst_old)//dst_old)
									dst1 = dst_old;
							} else {
								if(pattern == !dst_old)
									dst1 = dst_old;
							}
						}
						if(bcmd.clip_a1) {
							if((a1_x>>16) < a1.clip_x && (a1_x>>16) >= 0 && (a1_y>>16) < a1.clip_y && (a1_y>>16) >= 0) {
								WriteMem16(*dst,dst1);
							}
						} else {
							WriteMem16(*dst,dst1);
						}
						a1_x += a1_inc_x;
						a2_x += a2_inc_x;
						a1_y += a1_inc_y;
						a2_y += a2.yadd;
						//gouraud_data += gouraud_add;
						gd_i += gd_ia;
						gd_c += gd_ca;
						if(gd_i > 0xFFFFFF)
							gd_i = 0xFFFFFF;
					}
					if(bcmd.upda1) {
						a1_x += a1.step_x;
						a1_y += a1.step_y;
					}
					if(bcmd.upda1f) {
						a1_x += a1.fstep_x;
						a1_y += a1.fstep_y;
					}
					if(bcmd.upda2) {
						a2_x += a2.step_x;
						a2_y += a2.step_y;
					}
				}
				break;}
			case 5:
				for(DWORD j=0; j<outer_loop; j++) {
					for(DWORD i=0; i<inner_loop; i+=2) {
						DWORD src1;
						DWORD dst1,dst_old;
						a1_address = a1.base + ((( (a1_y>>16) * a1.width + (a1_x>>16)) * bitdepth[a1.depth])/8);
						a2_address = a2.base + (((a2_y * a2.width + a2_x) * bitdepth[a1.depth])/8);
						src1 = ReadMem32(*src);
						dst1 = ReadMem32(*dst);
						dst_old = dst1;
						if(!bcmd.patdsel) {
							switch(bcmd.logic)
							{
							case 0:dst1 = 0;break;
							case 1:dst1 = !src1 & !dst1;break;
							case 2:dst1 = !src1 &  dst1;break;
							case 3:dst1 = !src1;break;
							case 4:dst1 = src1 & !dst1;break;
							case 5:dst1 = !dst1;break;
							case 6:dst1 = !(src1 ^ dst1);break;
							case 7:dst1 = !src1 | !dst1;break;
							case 8:dst1 = src1 & dst1;break;
							case 9:dst1 = src1 ^ dst1;break;
							case 10:dst1 = dst1;break;
							case 11:dst1 = !src1 | dst1;break;
							case 12:dst1 = src1;break;
							case 13:dst1 = src1 | !dst1;break;
							case 14:dst1 = src1 | dst1;break;
							case 15:dst1 = 0xFF;break;
							}
						} else {
							dst1 = *(DWORD*)(&MEM[0xF02268]);
						}
						if(bcmd.dcompen) {
							if(!bcmd.cmpdst) {
								if(src1 == !dst_old)
									dst1 = dst_old;
							} else {
								DWORD pattern;
								pattern = *(DWORD*)(&MEM[0xF02268]);
								if(pattern == !dst_old)
									dst1 = dst_old;
							}
						}
						if(bcmd.clip_a1) {
							if((a1_x>>16) < a1.clip_x && (a1_x>>16) >= 0 && (a1_y>>16) < a1.clip_y && (a1_y>>16) >= 0) {
								WriteMem32(*dst,dst1);
							}
						} else {
							WriteMem32(*dst,dst1);
						}
						a1_x += a1_inc_x;
						a2_x += a2_inc_x;
						a1_y += a1_inc_y;
						a2_y += a2.yadd;
					}
					if(bcmd.upda1) {
						a1_x += a1.step_x;
						a1_y += a1.step_y;
					}
					if(bcmd.upda1f) {
						a1_x += a1.fstep_x;
						a1_y += a1.fstep_y;
					}
					if(bcmd.upda2) {
						a2_x += a2.step_x;
						a2_y += a2.step_y;
					}
				}
				break;
			default:
				break;
		}
		A1_PIXEL_X = a1_x >> 16;
		A1_PIXEL_Y = a1_y >> 16;
		A1_FPIXEL_X = a1_x & 0xFFFF;
		A1_FPIXEL_Y = a1_y & 0xFFFF;
		A2_PIXEL_X = a2_x;
		A2_PIXEL_Y = a2_y;
	}
}