--- /home/manu/hq2x/asm/hq2x16.asm	2003-11-23 22:36:26.000000000 +0100
+++ hq2x16.asm	2008-01-16 14:48:37.791083213 +0100
@@ -38,6 +38,9 @@
 w7        resd 1
 w8        resd 1
 w9        resd 1
+src_add   resd 1
+dst_add   resd 1
+pitch	  resd 1
 
 SECTION .data
 
@@ -56,7 +59,7 @@
     mov     edx,[%1]
     cmp     edx,[%2]
     je      %%fin
-    mov     ecx,_RGBtoYUV
+    mov     ecx,[_RGBtoYUV]
     movd    mm1,[ecx+edx*4]
     movq    mm5,mm1
     mov     edx,[%2]
@@ -178,7 +181,7 @@
 %endmacro
 
 %macro Interp6 3
-    mov        ecx, _LUT16to32
+    mov        ecx, [_LUT16to32]
     movd       mm1, [ecx+eax*4]
     mov        edx, %2
     movd       mm2, [ecx+edx*4]
@@ -202,7 +205,7 @@
 %endmacro
 
 %macro Interp7 3
-    mov        ecx, _LUT16to32
+    mov        ecx, [_LUT16to32]
     movd       mm1, [ecx+eax*4]
     mov        edx, %2
     movd       mm2, [ecx+edx*4]
@@ -225,7 +228,7 @@
 %endmacro
 
 %macro Interp9 3
-    mov        ecx, _LUT16to32
+    mov        ecx, [_LUT16to32]
     movd       mm1, [ecx+eax*4]
     mov        edx, %2
     movd       mm2, [ecx+edx*4]
@@ -249,7 +252,7 @@
 %endmacro
 
 %macro Interp10 3
-    mov        ecx, _LUT16to32
+    mov        ecx, [_LUT16to32]
     movd       mm1, [ecx+eax*4]
     mov        edx, %2
     movd       mm2, [ecx+edx*4]
@@ -465,23 +468,58 @@
 
 inbuffer     equ 8
 outbuffer    equ 12
-Xres         equ 16
-Yres         equ 20
-pitch        equ 24
+x1	     equ 16
+y1	     equ 20
+x2	     equ 24
+y2	     equ 28
+Xres         equ 32
+Yres         equ 36
 
 _hq2x_16:
     push ebp
     mov ebp,esp
     pushad
 
+    mov	    ecx,[ebp+Xres]
+    shl	    ecx,1
+
+    ; setup source
     mov     esi,[ebp+inbuffer]
+    mov	    eax,[ebp+y1]
+    shl     eax,2
+
+    mov    edx,[esi+68]
+    sub    edx,[esi+64] ; line length in bytes
+    mov	   [nextline],edx
+    sub    edx,ecx
+    mov    [src_add],edx
+
+    mov    esi,[esi+eax+64]
+    mov    eax,[ebp+x1]
+    shl	    eax,1
+    add    esi,eax
+
+    ; setup dest
     mov     edi,[ebp+outbuffer]
+    mov	    eax,[ebp+y2]
+    shl     eax,2
+
+    mov    edx,[edi+68]
+    sub    edx,[edi+64] ; line length in bytes
+    mov	   [pitch],edx
+    shl	   edx,1 ; 2 lines / loop... !!!
+    sub    edx,ecx
+    sub    edx,ecx
+    mov    [dst_add],edx
+
+    mov    edi,[edi+eax+64]
+    mov    eax,[ebp+x2]
+    shl	    eax,1
+    add    edi,eax
+
     mov     edx,[ebp+Yres]
     mov     [linesleft],edx
-    mov     ebx,[ebp+Xres]
-    shl     ebx,1
     mov     dword[prevline],0
-    mov     dword[nextline],ebx
 .loopy
     mov     ecx,[ebp+Xres]
     sub     ecx,2                 ; x={Xres-2, Xres-1} are special cases.
@@ -545,7 +583,7 @@
     movzx   edx,ax  
     mov     [w9],edx
 .flags
-    mov     ebx,_RGBtoYUV
+    mov     ebx,[_RGBtoYUV]
     mov     eax,[w5]
     xor     ecx,ecx
     movd    mm5,[ebx+eax*4]
@@ -614,7 +652,7 @@
     test    ecx,ecx
     jnz     .testflag1
     mov     ecx,[cross]
-    mov     ebx,[ebp+pitch]
+    mov     ebx,[pitch]
     jmp     [FuncTable2+ecx*4]
 .testflag1
     mov     edx,[w1]
@@ -673,7 +711,7 @@
     jz      .noflag9
     or      ecx,128
 .noflag9
-    mov  ebx,[ebp+pitch]
+    mov  ebx,[pitch]
     jmp  [FuncTable+ecx*4]
 
 ..@flag0
@@ -1865,14 +1903,13 @@
     mov     [w9],eax
     jmp     .flags
 .nexty
-    add     edi,ebx
+    add     edi,[dst_add]
+    add	    esi,[src_add]
     dec     dword[linesleft]
     jz      .fin
-    mov     ebx,[ebp+Xres]
-    shl     ebx,1
+    mov     ebx,[nextline]
     cmp     dword[linesleft],1
     je      .lastline
-    mov     dword[nextline],ebx
     neg     ebx
     mov     dword[prevline],ebx
     jmp     .loopy
