Try this:
bits 16
jmp 0x7c0:_start ; I prefeer to use smaller offsets!
_start:
push cs
pop ds
cld
lea si,[msg]
.loop:
mov ah,0x0e
lodsb
test al,al
jz _exit
mov bx,7 ; page 0, white foreground.
int 0x10 ; probably int 0x10 destroys AX...
jmp .loop
_exit:
hlt
jmp _exit
msg: db `Hello!!!\0`
times 510-($-$$) db 0
dw 0xaa55