Hello all, I hope you enjoy winter-holidays! :-)
But now to my problem :
I use Linux 64 bit, and my wish is to print numbers on the screen just by syscall, no extern funtions and no extra variables for storing numbers. Therefore, I use the common way, push every number to stack to get it later and print.
I know, the syscall to write is with rax=1, rdi=1 to use my screen, rsi as pointer to the adress where my string/number is, and rdx for length of string (in my case 1).
As you can see, I use the stackadress in rsi to get my number, and it works with 8 added or 16 added, but not with the common rsp-adress.
Has anybody of you an idea, what is going wrong ?
Greetz, and thanks in advance, Andy
[bits 64]
n: dd 1234
global _start
mov rax, [n]
xor rcx, rcx
mov rbx, 10
xor rdx, rdx
div rbx ; Divide RAX by 10
add rdx, 30h
push rdx ; pop Restzahl auf Stack
inc rcx
test rax, rax
jnz .loop
mov rax, 1 ; Write system call
mov rdi, 1 ; STDOUT
xor rsi, rsi
mov r10, rsp
add r10, 0 ; change offset to be read in stack 0, 8 or 16
mov rsi, r10
pop r12
mov rdx, 1 ; ein Zeichen schreiben.
push rcx
pop rcx
dec rcx
test rcx, rcx
jnz .loop2
mov rax, 1
mov rbx, 0
int 0x80
For your study:
bits 64
default rel
section .rodata
db `\n`
section .text
global _start
align 4
sub rsp,8 ; Align RSP to DQWORD (SysV ABI)
mov rdi,12345678 ; # to print.
call printUint64Decimal
; Print a newline
mov eax,1
mov edi,eax
mov edx,eax
lea rsi,[nl]
sub rsp,8 ; restore RSP
; Exit program
mov eax,60
xor edi,edi
syscall ; This syscall never returns.
; Entry RDI = #
align 4
; Allocate 24 bytes, realigning RSP to DQWORD (SysV ABI).
; We just need 22 bytes in the buffer allocated on the stack.
sub rsp, 24
mov r9, rsp
mov rsi, rsp
mov r8, 0xcccccccccccccccd ; 1/10, scaled (0b0.00011001100... rounded and shifted left by 67).
align 4
mov rax, rdi
dec rsi
; Multiply by scaled 1/10, instead of dividing by 10
; This is faster.
mul r8
mov rax, rdi
shr rdx, 3 ; RDX = quotient
lea rcx, [rdx+rdx*4] ; RCX = RDX*10
add rcx, rcx
sub rax, rcx ; RAX = Dividend - RCX (remainder)
; RAX = remainder, RDI = quotient
; Store remainder converted to ASCII.
add al, '0'
mov [rsi], al
mov rax, rdi
mov rdi, rdx
; Stay in loop if quotient > 9.
cmp rax, 9
ja .loop
; Print the buffer, calculating the size of the string.
mov eax, 1
mov rdx, r9
sub rdx, rsi
mov edi, eax
add rsp, 24
Many thanks Frederico!
At least I have a working example of printing by stack. Would have been happy to know, why mine does not work es expected, but I can live with your variante good. :-)
Thanks ! :-)
Would have been happy to know, why mine does not work es expected
To push QWORDs with each character calculated in the first loop is a huge waste of stack space. It is simplier to pre-alocate the space (10 chars for a DWORD converted in decimal string), but keeping RSP QWORD (or DQWORD) aligned.
Here's your code modified:
bits 64
default rel
section .text
global _start
sub rsp,8+16 ; Align by DQWORD and reserve space to the string.
; We only need 11 bytes allocated, but we need to keep
; RSP aligned.
; Don't really need to keep RSP aligned to DQWORD in this
; code, but it is a good practice in case we use SSE/SSE2.
mov rsi,rsp ; Using RSI because sys_write needs it.
mov eax, 1234 ; # to print.
xor ecx, ecx ; Counter = 0.
mov byte [rsi],`\n`
mov ebx, 10
align 4
dec rsi
xor edx, edx
div ebx
add dl, '0'
mov [rsi],dl
inc rcx ; Increment counter.
test eax, eax ; Quotient == 0?
jnz .loop ; No, stay in the loop.
; sys_write.
mov eax,1
mov edi,eax
lea edx,[rcx+1] ; The final '\n' as well.
add rsp,8+16 ; Restore RSP to its original position.
; Not really needed since sys_exit will not return.
; FIXED: Use 'syscall', not 'int 0x80'.
mov eax, 60
xor edi, edi
Many many thanks, I will check this as soon as I will be back at my Computer. Many thanks!