; int printf(char *buff,const char *fmt,va_list args)
; [ss:sp] = ? in 32-bit by default
global _printf
section .text
countw equ 0 ;to count the number of '%s'
countb equ 0 ;to count the number of '%d' or '*c'
_printf:
push ebp
mov ebp,esp
sub esp,0x40 ;32 words of local stack space,for some temporary variables
mov ebx,[ebp+8] ;the first parameter is a string's pointer
;Such as "Hello %s, welcome to asm world." we deal with it like this:
; "Hello ",'%s',"welcome to asm world." three parts.
; Then replace the %s with the second parameter(ebp+12),
; and combine the parts as a result.Yes ?
xor cx,cx
mov stringtmp,ebx ;How to define a pointer ? And if it's right ?
loop1:
mov ax,byte [ebx+cx]
cmp ax,'\0' ;judge if it's the end, but I don't know whether it's right...
jz the_end
inc cx
cmp ax,'%s'
jz fmt_s
cmp ax,'%c'
jz fmt_c
cmp ax,'%d'
jz fmt_d
; more ...
jmp loop1
fmt_s:
mov dx,countw
inc dx
mov eax,[ebp+8+countw<<2+countb<<1]
...
...
First thing I notice is you shouldn't be using equ like it's a runtime variable. Equates are compile time, and I'm assuming you are trying to use them as incremental values for tracking arguments, in which case won't work.
Next thing I notice is that you define variable space on the stack (which isn't needed) then try to access that variable with a literal which hasn't been specified. If you want to use arguments in that way, either use the NASMX project from
http://nasmx.sourceforge.net or make use of the procedure handling directives built-in to NASM.
slow_swap:
%push
%stacksize flat
%assign %$localsize 0
%arg src:dword, dst:dword
%local tmp:dword
enter %$localsize, 0
mov eax, [src]
mov [tmp], eax
mov eax,[tmp]
mov [dst], eax
leave
ret
%pop
The above code is horrible and should never be used. But it's a great example of using arguments and locals with NASM's built-in directives. Please read the manual for more information.
http://www.nasm.us/doc/nasmdoc4.html#section-4.8FInally, in your loop the comparisons are all wrong. AFAIK the line 'mov ax,byte [ebx+cx]' won't even assemble (or shouldn't) since you are trying to typecast a byte value to a word storage and 'mov' doesn't zero extend on it's own (that's what the movzx variant is for). Also, when you compare the values you are assuming that a word has been read, so maybe you actually meant the previous to be 'mov ax, word [ebx+ecx]'. However, even if that was the case you have to expect that it would be 'c%', 's%', etc. Intel processors are little endian, so word value should be reversed. To avoid this type of confusion, I suggest working byte-2-byte which will make things a bit easier on you. You aren't really getting any optimization in your code by reading word values since you are byte-incrementing which is where the optimization would have occurred. I don't really care for the "state machine" style comparison you have going on (just personal opinion) so I cooked up an example that uses a more if/elsif/else/endif style code layout.
The following code has been tested and is just unoptimized enough to give you a few things to play around with. For example, once you've added the OS dependent stuff, you might think about calculating the size of the string and using dword incrementing to reduce the number of iterations.
[BITS 32]
[CPU 386]
[GLOBAL print]
[SECTION .text]
;; --------------------------------------------------
; @brief prints a formatted string.
; @param fmt - format string
; @param ... - variadic argument list
; @return - number qualifiers in format.
;; --------------------------------------------------
print:
push ebp
mov ebp, esp
;; --------------------------------------------------
xor eax, eax
xor ecx, ecx
mov esi, [ebp+8+(ecx*4)]
inc ecx
run_again:
mov al, [esi]
inc esi
or eax, eax
jz is_done
cmp al, '%'
jnz print_char
mov al, [esi]
inc esi
cmp al, 's'
jne not_string
;; --------------------------------------------------
mov edi, [ebp+8+(ecx*4)]
inc ecx
;; --------------------------------------------------
;; PRINT ASCIIZ STRING IN ARGUMENT (IN EDI)
;; --------------------------------------------------
jmp run_again
not_string:
cmp al, 'c'
jne not_character
;; --------------------------------------------------
;; PRINT CHARACTER ARGUMENT (IN EAX)
;; --------------------------------------------------
mov eax, [ebp+8+(ecx*4)]
jmp print_char
;; --------------------------------------------------
jmp run_again
not_character:
cmp al, 'd'
jne not_decimal
;; --------------------------------------------------
mov edi, [ebp+8+(ecx*4)]
inc ecx
;; --------------------------------------------------
;; CONVERT NUMBER IN EDI TO DECIMAL STRING THEN PRINT
;; --------------------------------------------------
jmp run_again
not_decimal:
cmp al, 'x'
jne not_hexadecimal
;; --------------------------------------------------
mov edi, [ebp+8+(ecx*4)]
inc ecx
;; --------------------------------------------------
;; CONVERT NUMBER IN EDI TO HEX STRING THEN PRINT
;; --------------------------------------------------
jmp run_again
not_hexadecimal:
cmp al, '%'
je print_char
;; --------------------------------------------------
;; PRINT FORMAT ERROR MESSAGE
;; --------------------------------------------------
jmp is_done
print_char:
;; --------------------------------------------------
;; PRINT CHARACTER IN AL
;; --------------------------------------------------
jmp run_again
is_done:
;; --------------------------------------------------
;; Swap EAX & ECX to return number of handled tokens
;; --------------------------------------------------
xor eax, ecx
xor ecx, eax
xor eax, ecx
dec eax
;; --------------------------------------------------
leave
ret
This example 'print' function handles '%%', '%s', '%c', '%d', and '%x' tokens. As I said before, you'll need to write in the OS dependent stuff. This code leaves a lot of room for improvement. Only "optimization" I've done is using the generic swapping algorithm at the end, I point this out cause it tends to confuse people. At the end, EAX=n and ECX=0 where 'n' equals the number of % tokens you've handled (just like printf does). I also left you a place to do error handling and possibly clean up the stack in case an invalid/unsupported token modifier is passed. I tested this code by inserting 'printf()' to handle the real output and letting this parser direct tokenizing. then I traced the call stack in gdb to make sure it invoked printf the right number of times for the string printf("Hello, %s%c%c", "Bryant", '!', 10).
Regards,
Bryant Keller
Edited for awkward tab-stops.