Author Topic: ASCII -> Integer (64 bit) conversion  (Read 5207 times)

Offline TightCoderEx

  • Full Member
  • **
  • Posts: 103
ASCII -> Integer (64 bit) conversion
« on: December 26, 2012, 02:32:32 PM »
I've finally completed this project, only to discover it doesn't work. Well not entirely anyway. Decimal, Octal and Binary conversion work just great, it wasn't till I started testing Hexadecimal that the flaw reared its ugly head. The code between 1FH & 94H is where conversion qualifiers are evaluated. My logic was see if string has been appended with H B or O 3EH - 55H and then see @ 52H - 62H if they've been prefixed with 0 (zero) or 0x.

So 0x3f2a would drop into 57H and return that error
     0x3f2b would make it to binary conversion and then invoke an error cause digits out of range.

Solution #1: Poke a hack in @ 57H to make it work  :o
               #2: Redesign everything from 1FH - 8EH  ;D

Code size now is 254 bytes, so option #1 is only going to bloat that and hopefully I can streamline a little more by checking leading qualifiers first.


  1f:   48 89 fe                mov    rsi,rdi
  22:   48 83 ee 02             sub    rsi,0x2
  26:   ac                      lods   al,BYTE PTR ds:[rsi]
  27:   41 88 c0                mov    r8b,al
  2a:   48 29 ce                sub    rsi,rcx
  2d:   e8 b6 00 00 00          call   e8
  32:   8a 06                   mov    al,BYTE PTR [rsi]
  34:   56                      push   rsi
  35:   be 00 00 00 00          mov    esi,0x0
  3a:   74 24                   je     60
  3c:   ff c9                   dec    ecx
  3e:   41 80 e0 5f             and    r8b,0x5f
  42:   ad                      lods   eax,DWORD PTR ds:[rsi]
  43:   41 80 f8 4f             cmp    r8b,0x4f
  47:   74 47                   je     90
  49:   ad                      lods   eax,DWORD PTR ds:[rsi]
  4a:   41 80 f8 48             cmp    r8b,0x48
  4e:   74 40                   je     90
  50:   ad                      lods   eax,DWORD PTR ds:[rsi]
  51:   41 80 f8 42             cmp    r8b,0x42
  55:   74 39                   je     90

  57:   48 83 ca ff             or     rdx,0xffffffffffffffff
  5b:   e9 95 00 00 00          jmp    f5

  60:   e8 83 00 00 00          call   e8
  65:   75 f0                   jne    57
  67:   08 c0                   or     al,al
  69:   75 28                   jne    93
  6b:   48 ff c9                dec    rcx
  6e:   ad                      lods   eax,DWORD PTR ds:[rsi]
  6f:   48 89 c3                mov    rbx,rax
  72:   ad                      lods   eax,DWORD PTR ds:[rsi]
  73:   48 87 04 24             xchg   QWORD PTR [rsp],rax
  77:   48 89 c6                mov    rsi,rax
  7a:   ac                      lods   al,BYTE PTR ds:[rsi]
  7b:   8a 06                   mov    al,BYTE PTR [rsi]
  7d:   e8 66 00 00 00          call   e8
  82:   74 10                   je     94
  84:   5b                      pop    rbx
  85:   ff c9                   dec    ecx
  87:   ac                      lods   al,BYTE PTR ds:[rsi]
  88:   24 5f                   and    al,0x5f
  8a:   3c 58                   cmp    al,0x58
  8c:   74 06                   je     94
  8e:   eb c7                   jmp    57

  90:   48 89 c3                mov    rbx,rax
  93:   5e                      pop    rsi

Offline TightCoderEx

  • Full Member
  • **
  • Posts: 103
Re: ASCII -> Integer (64 bit) conversion
« Reply #1 on: December 26, 2012, 09:31:37 PM »
Ok, it is done and working, or at least I tested out each function once and then again with an error. It's a little jumbled up, but that was so all my jumps and conditional branches were only two bytes. Still only saved 6 bytes over the previous version.

Code: [Select]
UCASE equ 0x5f
 
  global A2I

section .text
; =============================================================================================
; Converts an ASCII null terminated string into 64 bit integer.

; ENTRY: RAX = 0
; RDI = Pointer to begining of string

; RAX = Strings length (probably passed from call to GetS)
; RDI = Pointer to next byte after this strings terminator

; LEAVE: RAX = Converted value or 0xffffffffffffffff if error
; ---------------------------------------------------------------------------------------------

           A2I push rcx
            push rsi ; Preserve essential
            push rdi
           
; As self modifying code isn't possible, this is the next best alternative to
; specifying a conversion method dynamically base on qualifier in input.

push rbx
mov ebx, .Dec ; Assume decimal conversion by default.

push rbp ; Empty procedure frame in case proc needs
mov rbp, rsp ; to bail from error inside local call.

            xor edx, edx ; Reset functions accumulator
           
; This procedure may be as a result from a call to GetS. In that case RAX reflects
; the number of characters in string.

            mov rcx, rax ; Move character count
            or eax, eax ; and check if it's zero
jz .CalcLen ; ZF = 0, Must be called after GetS maybe

; RSI needs to point to begining of string

mov rsi, rdi
sub rsi, rcx
dec rsi
jmp .Delim

; Determines if byte in AL is a digit and if so returns with MSB striped from byte
; otherwise AL unchanged

      .IsDigit cmp al, '9'
      ja .Done - 1
      cmp al, '0'
      jb .Done - 1
      and al, 15 ; Strip MSB's
      xor ah,  ah ; Set ZF = 1, is a digit
      ret
     
        ; Primary loop continues until buffer exhausted.
       
        .Next lodsb ; Grab next character
        call .IsDigit ; Strip MSB if it's a digit
        call rbx ; Execute desired function
        loop .Next ; Until ECX = 0
             
      jmp .Done ; Bounce over all functions
     
; Because RAX is NULL, this means we need mimic what GetS would return by calcuating
; length of NULL terminated string.

      .CalcLen mov rsi, rdi ; Save pointer to begining of string
      dec ecx ; Should be plenty big enough
repnz scasb ; Find NULL, string terminator
add ecx, 2 ; Bump to actual count of characters
neg ecx ; Invert to positive value
     
      ; Test if optional qualifiers exist at begining or end of string
     
      .Delim lodsb
      cmp al, '0' ; Is it a leading zero
      jnz .Post
     
      ; The only other two options are Octal or Hexidecimal
     
      mov ebx, .Oct ; Assume octal conversion
      dec cl
      lodsb ; Get next character
      call .IsDigit
      jz .Next + 6 ; Must be doing Octal
     
      dec cl ; Bump once more to account for prefix
      mov ebx, .Hex ; Set conversion routine
      and al, UCASE
      cmp al, 'X'
      jz .Next      ; Continue if match
     
        ; Return 0xffffffffffffffff for any error condition
       
      .Error or rdx, -1 ; Set error condition
      jmp .Done
       
        ; Now check if string has been suffixed with one of the three qualifiers O, H or B.
       
        .Post push rax ; We'll need first digit later
        mov al, [rdi - 2] ; Get last character of string
        call .IsDigit
        jz .PostEx + 6 ; No suffixed, must be doing DEC conversion
       
        and al, UCASE
        mov r8, rax        ; Preserve last char     
        push rsi
        mov esi, .Func ; Get pointer to array of functions
       
        lodsd ; Load pointer to Octal
        cmp r8b, 'O'
        jz .PostEx
       
        lodsd ; Load pointer to Hex
        cmp r8b, 'H'
        jz .PostEx
       
        lodsd ; Load pointer to Binary
        cmp r8b, 'B'
        jnz .Error
       
; At this point we know the last character was one of the three qualifiers, so
; continue with processing

      .PostEx mov rbx, rax ; Set function pointer
      dec cl ; Bump counter to account for suffix
      pop rsi ; and pointer to string
        pop rax ; Restore first character
        jmp .Next + 1 ; Bump over loading, already done
       
         .Func dd .Oct, .Hex, .Bin
         
        ; Convert decimal digits
       
        .Dec jnz .Error ; Exit if not a digit
        imul rdx, 10
        add rdx, rax
        ret
       
      ; Convert Octal digits
     
        .Oct jnz .Error
        cmp al, 7
        ja .Error
        shl rdx, 3
        add dl,  al
        ret
       
      ; Convert binary digits
     
        .Bin jnz .Error
        cmp al, 1
        ja .Error
        rcr al, 1
        rcl rdx, 1
        ret
       
      ; Convert hexidecimal digits
     
        .Hex jz .H01
        and al, UCASE ; Convert to uppercase
        cmp al, 'A'
        jb .Error
        cmp al, 'F'
        ja .Error
        sub al, 55 ; Convert to value 10 - 15
       
        .H01 shl rdx, 4 ; Shift everything left 4 bits
        add dl,  al ; Append low nibble of AL to RDX
        ret
     
      ; Procedure postamble
     
         .Done leave ; Kill frame and reposition RSP properly
          pop rbx
         
          pop rdi
            pop rsi ; Restore essential
            pop rcx

xchg rdx, rax ; Move result into accumulator           
            ret