You're close. Only that I expected you use MOVSB instead of STOSQ. I suspect you're dealing with UTF characters?
Here a quick code, dealing with ASCII text, that is close enough to your interpretation and my suggested algorithm above...
global ConCatProc
section .bss
conbuf: resb 100
section .text
ConCatProc:
pushfq
cld ;all-forward operation
mov rdi,conbuf ;pointer to buffer
mov r8,rcx ;first string
mov r9,rdx ;second string
mov rsi,r8 ;find string1 length
call str_length
mov rcx,rax
rep movsb ;copy to buffer. RSI--> RDI
mov rsi,r9 ;find string2 length
call str_length
mov rcx,rax
rep movsb ;copy to buffer. RSI--> RDI
xor al,al
stosb ;0-ended the buffer
mov rax,conbuf ;return buffer's address
popfq
ret
;Find length of a C string
str_length:
push rdi
mov rdi,rsi
mov al,0
mov rcx,-1
repne scasb
mov rax,-2
sub rax,rcx
pop rdi
ret
The key here is to not manually modify RDI as the buffer's pointer. Let it increments naturally in MOVSB instructions.