Well... no luck. I knew that I wasn't familiar with "haddps", but apparently my CPU is unfamiliar with it too. Crashes with "illegal instruction" after printing one line of zeros. I really must upgrade this beast Real Soon Now.
After commenting out those two lines, it prints two lines of zeros (surrounded by parentheses). I don't think this is correct - shouldn't I see some numbers from my array? It at least exits without a segfault...
Best,
Frank
; nasm -f elf32 maria.asm
; ld -o maria maria.o -I/lib/ld-linux.so.2 -lc -melf_i386
%include "ssutils.inc"
global _start
section .data
carray dd 1.0, 2.0, 3.0, 36.0
section .bss
csum reso 1
section .text
_start:
nop
; fake caller
push carray
push 4
push csum
call costTot
add esp, 12
exit:
mov ebx, eax
mov eax, 1 ; sys_exit
int 80h
global costTot
cost equ 8
n equ 12
sum equ 16
costTot:
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov edi, [ebp+cost] ;
mov esi, [ebp+n]
xorps xmm2,xmm2
mov ecx,0 ;index of the loop
movups xmm0,[edi]
mov edx,16
;if an element is less then 0 it is not added
.loop xorps xmm1,xmm1
cmpltps xmm1,xmm0
printregps xmm0
; this instruction crashes my machine
; obviously, your algorithm won't work without it
; but "just to try"... comment it out
; haddps xmm0,xmm0
; haddps xmm0,xmm0
addss xmm2,xmm0
printregps xmm2
add ecx,4 ; i use the single precision so in each register i put 4 number
movups xmm0,[edi+edx] ;
add edx,16 ;add 16 for the next four number
cmp ecx,esi
jb .loop
; movups [ebp+sum],xmm2
mov ecx, [ebp + sum]
movups [ecx], xmm2
pop edi ; ripristina i registri da preservare
pop esi
pop ebx
mov esp, ebp ; ripristina lo Stack Pointer
pop ebp ; ripristina il Base Pointer
ret ; torna alla funzione C chiamante