Well... we don't really know what "a$" looks like. I seem to recall that BASIC uses a byte prefix with the length (or am I thinking of Pascal?)... And by "extract to a new string", I guess you mean copy the string (to newly allocated memory?). Depending on what you're doing, you may not need to copy the delimited strings - just "finding" them might be enough. But ASSuming that you've got an "lstring" - byte prefix with the length - and want to copy the delimited strings to similar, newly allocated, strings...
; nasm -f elf32 extract.asm
; ld -o extract extract.o -I/lib/ld-linux.so.2 -lc
global _start
extern malloc
%define DELIMITER ' '
section .text
_start:
bp1: ; just a breakpoint for debugging
mov esi, basicstring
lodsb ; get its length
movzx ebx, al ; transfer it to ebx
add ebx, esi ; end of string (so we know when we're done)
mov edx, pointers ; array of pointers to extracted strings
.top:
; first, figure out how long our delimited string is
xor ecx, ecx
.getlen:
cmp byte [esi + ecx], DELIMITER
jz .gotlen
inc ecx
; if we're at the end of string, we won't find another delimiter, so check!
lea edi, [esi + ecx]
cmp edi, ebx
jnz .getlen
.gotlen:
; then, allocate some memory for it
inc ecx ; we need an extra byte for the length!
push edx ; save our edx - malloc trashes it!
push ecx ; both the parameter to malloc, and "save ecx"
; push ecx - for stdcall (Windows API) push it again!
call malloc ; get some memory for our new string
pop ecx ; restore our length
pop edx ; restore our edx (pointers)
; should check if malloc succeeded - I ASSume it does :(
mov [edx], eax ; save the address we got in "pointers" array
add edx, 4 ; and get ready for next one
mov edi, eax ; make our address "destination" for movsb
dec ecx ; we don't need the "extra" byte anymore
mov al, cl ; save the length byte
stosb
rep movsb ; and copy the string
inc esi ; we left esi pointed at the delimiter - move past it
inc dword [stringcount] ; count our delimited strings
cmp esi, ebx ; are we done?
jb .top ; no? do more.
; we're finished - print 'em, just to prove it worked :)
; this part is specific to Linux.
mov esi, pointers
print_next:
mov ecx, [esi] ; address of our delimited string
add esi, 4 ; get ready for next one
movzx edx, byte [ecx] ; Linux wants the length in edx
inc ecx ; move past the length byte
mov ebx, 1 ; STDOUT
mov eax, 4 ; __NR_write
int 80h ; call kernel
mov ecx, newline
mov edx, 1
mov ebx, 1
mov eax, 4
int 80h
dec dword [stringcount]
jnz print_next
exit:
mov eax, 1
int 80h
;-----------------------
section .data
basicstring db .end - basicstring - 1
db "string1 string2 string3"
.end:
newline db 10
;------------------
section .bss
pointers resd 128
stringcount resd 1
;----------------------
That probably isn't what you want - not in Linux, anyway - but maybe it'll give you an idea how to approach it.
Best,
Frank