Author Topic: nasm error: invalid combination of opcode/ope  (Read 7052 times)

nobody

  • Guest
nasm error: invalid combination of opcode/ope
« on: July 31, 2008, 03:08:14 AM »
Hi i am creating a c library in vs2008 that uses assembly functions:

Memory.h:
#ifdef __cplusplus
extern "C" {
#endif

void* __cdecl memcpy(void* dst, void src, size_t count);
void* __cdecl memset(void* dst, int val, size_t count);
int     __cdecl memcmp(void* buf1, void* buf 2, size_t count);
void* __cdecl memmove(void* dst, void* src, size_t count);
void* __cdecl memchr(void* buf, int val, size_t count);

#ifdef __cplusplus
}
#endif

Memory.asm:
bits 32

global _memset
global _memcpy
global _memcmp

section .text

_memcpy:
   push ebp
   push ebx
   push ecx
   mov  ebp, esp

pushad

mov   eax, [ebp+8]
   mov ebx, [ebp+12]
   mov ecx, [ebp+16]

.loop
   mov dword [eax],  [ebx]
   add eax, 2
   add ebx, 2
   loop .loop

popad

pop ecx
   pop ebx
   pop ebp
   ret

_memset:
   push ebp
   mov ebp, esp

pushad

mov eax, [ebp+8]
   mov ebx, [ebp+12]
   mov ecx, [ebp+16]

.loop
   mov [eax], ebx
   add eax, 1
   loop .loop

popad

pop ebp
   ret

_memcmp:
   push ebp
   mov ebp, esp

pushad

mov eax, [ebp+8]
   mov ebx, [ebp+12]
   mov ecx, [ebp+16]

.loop
   cmp dword [eax], [ebx]
   jne .loopDone
   add eax, 2
   add ebx, 2
   loop .loop

.loopDone
   mov eax, [ebp+16]
   sub eax, ecx

popad

pop ebp
   ret

it is given me the above error on lines 20 and 61.
What am i doing wrong?

nobody

  • Guest
Re: nasm error: invalid combination of opcode/ope
« Reply #1 on: July 31, 2008, 05:27:26 AM »
Quite a "bit", actually... The main problem is that we've only got one data bus, so the move memory to memory and compare memory to memory functions don't exist.


Memory.h:
#ifdef __cplusplus
extern "C" {
#endif

void* __cdecl memcpy(void* dst, void src, size_t count);

"void * src", too, eh?

void* __cdecl memset(void* dst, int val, size_t count);

"man 3 memset" declares this the same way you have, but indicates that "val" is a "const char", not "int". Dunno what's right here.

int __cdecl memcmp(void* buf1, void* buf 2, size_t count);

No space in "buf 2"?

void* __cdecl memmove(void* dst, void* src, size_t count);
void* __cdecl memchr(void* buf, int val, size_t count);

Again, "val" is really only a byte.

#ifdef __cplusplus
}
#endif

Memory.asm:
bits 32

global _memset
global _memcpy
global _memcmp

section .text

_memcpy:
push ebp
push ebx
push ecx

With these "push"s *before* the "mov ebp, esp", you're parameters aren't going to be where you expect!!! (the ABI doesn't require you to preserve ecx - you might want to do it anyway...)

mov ebp, esp

pushad

mov eax, [ebp+8]
mov ebx, [ebp+12]
mov ecx, [ebp+16]

.loop
mov dword [eax], [ebx]

Not an x86 instruction. You'll have to do something like:

mov dl, [ebx]
mov [eax], dl

add eax, 2
add ebx, 2

Why 2??? You're going to overrun your buffer!

loop .loop

popad

You're not returning anything in eax...

pop ecx
pop ebx
pop ebp
ret

_memset:
push ebp
mov ebp, esp

pushad

mov eax, [ebp+8]
mov ebx, [ebp+12]
mov ecx, [ebp+16]

.loop
mov [eax], ebx

Probably only want to do "mov [eax], bl"...

add eax, 1
loop .loop

popad

Not returning anything in eax.

pop ebp
ret

_memcmp:
push ebp
mov ebp, esp

pushad

mov eax, [ebp+8]
mov ebx, [ebp+12]
mov ecx, [ebp+16]

.loop
cmp dword [eax], [ebx]

No such instruction!

mov dl, [ebx]
cmp [eax], dl


jne .loopDone
add eax, 2
add ebx, 2
loop .loop

2???

.loopDone
mov eax, [ebp+16]
sub eax, ecx

Okay, but...

popad

You overwrite eax here, and again don't return anything useful.

pop ebp
ret

You might want to look into the "string" instructions, movsb, stosb, cmpsb (and scasb, if I understand what "memchr" does...). They haven't been optimized, so may be slower than "explicit" instructions - "rep movsd" has been optimized and is still fast, I guess... maybe "rep movsb", too...

"Sophisticated" versions of these functions test for alignment, do any "odd bytes" first, then move/set/compare dwords - or bigger (an old trick was to use FPU regs to move 8 bytes at a time... nowadays we got mmx, xmm, ssssee5... okay, not that last one...) - and then clean up any "odd bytes" at the end. Probably "unroll the loop"... maybe use the prefetch instructions... Align loops in your routines - and the "loop" instruction is slow - "sub ecx, (byte) 1"/"jnz ..." is faster... It has become damn complicated to write fast code! Makes me miss the "good old days"...

I see you know how to keep C++ from mutilating your function names. That's a good start! You've got the right idea, but your code needs a little "tweaking"...

Best,
Frank