81
Other Discussion / Re: sprintf_s plus printf
« Last post by alCoPaUL on May 15, 2024, 09:23:05 PM »essential msvcrt.lib for win64asm...
;r9 // sprintf_s + printf
;r8 // by alCoPaUL [GIMO]
;rdx // 5/15/2024
;rcx // Brigada Ocho [b8] Productions
;call // rax
global m
extern printf
extern sprintf_s
section .text
m:sub rsp,28h
lea r8,x
lea rdx,i
lea rcx,b
call sprintf_s
mov r8, rax
lea rdx,b
lea rcx,i
call printf
add rsp,28h
ret
section .data
x:db 'Revelation 13'
......
.....
....
;r9 // sprintf_s + printf
;r8 // by alCoPaUL [GIMO]
;rdx // 5/15/2024
;rcx // Brigada Ocho [b8] Productions
;call // rax
global m
extern printf
extern sprintf_s
section .text
m:sub rsp,28h
lea r8,x
lea rdx,i
lea rcx,b
call sprintf_s
mov r8, rax
lea rdx,b
lea rcx,i
call printf
add rsp,28h
ret
section .data
x:db 'Revelation 13'
....
....
....
;r9 // sprintf_s + printf
;r8 // by alCoPaUL [GIMO]
;rdx // 5/15/2024
;rcx // Brigada Ocho [b8] Productions
;call // rax
extrn printf:proc
extrn sprintf_s:proc
.code
main proc
sub rsp,28h
lea r8,[x]
lea rdx,[i]
lea rcx,[b]
call sprintf_s
mov r8, rax
lea rdx,[b]
lea rcx,[i]
call printf
add rsp,28h
ret
main endp
.data
x db 'Revelation 13'
........
........
; nasm -f elf64 calculatepi1.asm -o calculatepi1.o
; gcc -no-pie -m64 calculatepi1.o -o calculatepi1
; Leibniz says : pi/4 =1/1 - 1/3 + 1/5 - 1/7 + 1/9 - 1/11 + 1/13 .....
section .data
var_oben dq 1.0 ; Startwert Zaehler
var_unten dq 1.0 ; Startwert Teiler
var_0 dq 0.0 ; Null
var_2 dq 2.0 ; Zwei
var_minus1 dq -1.0 ; minus 1
MSG_RESULT db "pi = %0.18lf", 10, 0
section .text
extern printf
global main
main:
push rbp
mov rbp,rsp
; Set the number of iterations for the Leibniz formula
mov rax, 200000
movsd xmm2, qword [var_0] ; xmm2 beginnt bei null und nimmt dann die Leibnizzahl als Viertel auf.
; Loop to calculate Pi using Leibniz formula
leibniz_loop:
; Calculate next term
movsd xmm0, qword [var_oben]
movsd xmm1, qword [var_unten]
divsd xmm0, xmm1
addsd xmm2, xmm0 ; xmm2 hat das Viertel des Leibnizwertes!
; alternating sign
movsd xmm3, qword[var_minus1]
movsd xmm0, qword [var_oben]
mulsd xmm0, xmm3
movsd qword [var_oben], xmm0
; Teiler zwei dazu...
movsd xmm3, qword [var_unten]
addsd xmm3, qword [var_2]
movsd qword [var_unten], xmm3
; Decrement loop counter
dec rax
jnz leibniz_loop
; Multiply result by 4
movsd xmm3, qword [var_2]
mulsd xmm2, xmm3
mulsd xmm2, xmm3
movsd xmm0, xmm2
mov rdi, MSG_RESULT ; set format for printf
mov rax,1 ; set one xmm registers
call printf ; Call C function
mov rax,0 ; normal, no error, return value
pop rbp
mov rax, 60 ; System call number for sys_exit
xor rdi, rdi ; Exit code 0
syscall
That makes sense. When you say, main() has a misaligned RSP, how do you know? Is it because of the way nasm puts the binary together? I am doing pure Linux at the moment and using syscalls with _start, not doing the pseudo C, so I gather from what you're saying that I don't need the prolog. But, I will once I switch to main for the pseudo C stuff, so I'm curious how you know.
; test_win64.asm
;
; c:\work> nasm -fwin64 -o test_win64.o test_win64.asm
; c:\work> ld -s -o test_win64.exe test_win64.o -lkernel32
;
bits 64
default rel
section .data
buffer:
db '0x'
times 16 db '0'
db `\n`
bufferLength equ $ - buffer
section .text
extern __imp_GetStdHandle
extern __imp_WriteConsoleA
extern __imp_ExitProcess
global _start
_start:
; sub rsp,8 ; align to DQWORD, if needed
mov rdx,rsp
lea rcx,[buffer+2]
call u64toStr
mov rcx,-11
call [__imp_GetStdHandle]
mov rcx,rax
lea rdx,[buffer]
mov r8d,bufferLength
xor r9,r9
push r9
call [__imp_WriteConsoleA]
xor ecx,ecx
jmp [__imp_ExitProcess]
; Destroys RAX, RDX and RDI.
align 4
u64toStr:
lea rdi,[rcx+15]
jmp .test
.loop:
mov rax,rdx
and al,0x0f
add al,'0'
cmp al,'9'
jbe .skip
add al,7
.skip:
mov [rdi],al
shr rdx,4
dec rdi
.test:
cmp rdi,rcx
jae .loop
ret
bits 64
default rel
section .data
buffer:
db '0x'
times 16 db '0'
db `\n`
bufferLength equ $ - buffer
section .text
extern __imp_GetStdHandle
extern __imp_WriteConsoleA
extern __imp_ExitProcess
global _start
_start:
; sub rsp,8 ; align to DQWORD, if needed
mov rdx,rsp
lea rcx,[buffer+2]
call u64toStr
mov rcx,-11
call [__imp_GetStdHandle]
mov rcx,rax
lea rdx,[buffer]
mov r8d,bufferLength
xor r9,r9
push r9
call [__imp_WriteConsoleA]
xor ecx,ecx
jmp [__imp_ExitProcess]
; Destroys RAX, RDX and RDI.
align 4
u64toStr:
lea rdi,[rcx+15]
jmp .test
.loop:
mov rax,rdx
and al,0x0f
add al,'0'
cmp al,'9'
jbe .skip
add al,7
.skip:
mov [rdi],al
shr rdx,4
dec rdi
.test:
cmp rdi,rcx
jae .loop
ret
; test_sysv.asm
;
; $ nasm -felf64 -o test_sysv.o test_sysv.asm
; $ ld -s -o test_sysv test_sysv.o
;
bits 64
default rel
section .data
buffer:
db '0x'
times 16 db '0'
db `\n`
bufferLength equ $ - buffer
section .text
global _start
_start:
; NOTA: RSP já está alinhado por DQWORD (SysV ABI)!
mov rsi,rsp
lea rdi,[buffer+2]
call u64toStr
mov eax,1
mov edi,eax
lea rsi,[buffer]
mov edx,bufferLength
syscall
xor edi,edi
mov eax,60
syscall
align 4
u64toStr:
lea rcx,[rdi+15]
jmp .test
.loop:
mov rax,rsi
and al,0x0f
add al,'0'
cmp al,'9'
jbe .skip
add al,7
.skip:
mov [rcx],al
shr rsi,4
dec rcx
.test:
cmp rcx,rdi
jae .loop
ret
Running both of them (using MinGW64 for Windows):c:\work> nasm -fwin64 -o test_win64.o test_win64.asm
c:\work> ld -s -o test_win64.exe test_win64.o -lkernel32
c:\work> test_win64
0x00000034601FF858
$ nasm -felf64 -o test_sysv.o test_sysv.asm
$ ld -s -o test_sysv test_sysv.o
$ ./test_sysv
0x00007FFD2BBF2D90
Notice the first 4 bits...%rsp "The stack pointer holds the address of the byte with lowest address which is part of
the stack. It is guaranteed to be 16-byte aligned at process entry."