If you have AVX CPU and want to start learning AVX programming, then BASELIB offers various utilites to do just that via prnymm, dumpymm, clearymm and sse_flags routines. They are accessible in both binaries mode and source mode supplied in the attachment (Revision 2.4) in the above post.
Sample demo code (via binaries BASE3.DLL on 32-bit Windows with AVX support, using GCC as the linker)
;--------------------
;nasm -f win this.asm
;gcc -m32 this.obj base3.dll -s -o this.exe
;--------------------
global _WinMain@16
extern _clearymm
extern _dumpymm
extern _prnline
section .data align=32
w dq 10.23,11.43,12.54,17.95
x dq 89.54,98.14,55.16,19.11
y dq 45.12,65.22,14.17,29.16
z dq 56.33,51.01,54.86,28.54
a dq 00.00,05.00,00.00,00.00
section .text
_WinMain@16:
push ebp ;C's standard prologue
mov ebp,esp
call _clearymm ;YMMs are dirty
vmovapd ymm4,[w] ;data population
vmovapd ymm5,[x]
vmovapd ymm6,[y]
vmovapd ymm7,[z]
vmovapd ymm0,[a]
mov eax,1 ;option to view as packed quads
call _dumpymm ;See initial population
call _prnline
vdivpd ymm1,ymm4,ymm0 ;perform an AVX packed division. YMM1 = YMM4 / YMM0
mov eax,1 ;See the result after division
call _dumpymm
pop ebp ;Epilogue
ret
The code above produces the following output
YMM0: 0.0|0.0|5.0|0.0 ;the YMM registers after data population
YMM1: 0.0|0.0|0.0|0.0
YMM2: 0.0|0.0|0.0|0.0
YMM3: 0.0|0.0|0.0|0.0
YMM4: 17.95|12.54|11.43|10.23
YMM5: 19.11|55.16|98.14|89.54
YMM6: 29.16|14.17|65.22|45.12
YMM7: 28.54|54.86|51.01|56.33
YMM0: 0.0|0.0|5.0|0.0 ;the YMM registers after packed divisions on four REAL8 data
YMM1: #|#|2.286|#
YMM2: 0.0|0.0|0.0|0.0
YMM3: 0.0|0.0|0.0|0.0
YMM4: 17.95|12.54|11.43|10.23
YMM5: 19.11|55.16|98.14|89.54
YMM6: 29.16|14.17|65.22|45.12
YMM7: 28.54|54.86|51.01|56.33
EDIT: Added the equivalent code for Linux32. Using BASE3.O and GCC as linker. Your CPU must have AVX support.
;--------------------
; nasm -f elf this.asm
; gcc -m32 this.o base3.o -s -o this
; ./this
;--------------------
global main
extern clearymm
extern dumpymm
extern prnline
section .data align=32
w dq 10.23,11.43,12.54,17.95
x dq 89.54,98.14,55.16,19.11
y dq 45.12,65.22,14.17,29.16
z dq 56.33,51.01,54.86,28.54
a dq 00.00,05.00,00.00,00.00
section .text
main:
push ebp ;C's standard prologue
mov ebp,esp
call clearymm ;YMMs are dirty
vmovapd ymm4,[w] ;data population
vmovapd ymm5,[x]
vmovapd ymm6,[y]
vmovapd ymm7,[z]
vmovapd ymm0,[a]
mov eax,1 ;option to view as packed quads
call dumpymm ;See initial population
call prnline
vdivpd ymm1,ymm4,ymm0 ;perform an AVX packed division
mov eax,1 ;See the result after division
call dumpymm
pop ebp ;Epilogue
ret