; Copyright 1996 Acorn Computers Ltd
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
; > s.bastxt
;
; Title  : s.bastxt
; Purpose: to provide BASIC tokenising and detokenising capability
; Version: 0.1 PJC created
;          0.2 PJC altered bastxt_tokenise so that it returns information in
;                  the line number to show:
;                   * whether or not a line number has ever been specified
;                   * whether or not a line number was specified for this line

        GBLL    ModeMayBeNonUser
ModeMayBeNonUser   SETL  {FALSE}
        GET     s.h_Brazil

        AREA    |C$$code|, CODE, READONLY

|x$codeseg|

        EXPORT  bastxt_detokenise

; void bastxt_detokenise(int output_buffer, int input_address,
;                        int *flag, int *detokeniser);

bastxt_detokenise
; on entry, a1 points to the output buffer
;           a2 points to the text to detokenise
;           a3 points to the flag variable
;           a4 points to the BASIC detokeniser address
; on exit,  the flag variable holds -1 if a line number reference found
;                                    0 if the end of the file reached
;                                    otherwise the updated pointer
;           the value that a4 points to holds the updated output buffer ptr
;
; register usage:
; a1: used to hold the current byte under examination
; a2: used to point into the token table and as scratch
; a3: not used!
; a4: used to hold the address of the BASIC detokeniser
; v1: used to indicate whether or not we are expanding tokens
; v2: skip line numbers flag
; v3: pointer to flag variable
; v4: pointer into output buffer
; v5: not used!
; v6: not used!
; sl: not used!
; fp: not used!
; ip: pointer into input buffer
        STMFD   sp!, {a1-ip,lr}
        MOV     ip, a2
        MOV     v4, a1
        MOV     v3, a3
        LDR     v2, [v3]                  ; get the flag value
;
        LDRB    a1, [ip], #1              ; do the line number first
        TEQ     a1, #255                  ; test for end of file first
        BNE     not_end_of_file
        MOV     a1, #0
        STR     a1, [v3]
        B       exit_detok
;
not_end_of_file
        LDRB    a2, [ip], #1
        ADD     a2, a2, a1, LSL #8
        TEQ     v2, #0
        BLEQ    nprn
;
        ADD     ip, ip, #1                ; skip length of line indicator
        MOV     v1, #0                    ; expansion flag
det_loop
        LDRB    a1, [ip], #1              ; get the next value
        TEQ     a1, #13                   ; end of line reached?
        BNE     det_cont
        MOV     a1, #10
        BL      store_byte
        MOV     a1, #0
        BL      store_byte
        STR     ip, [v3]
        B       exit_detok

det_cont
        TEQ     a1, #34                   ; double-quotes
        EOREQ   v1, v1, #1                ; flip flag
        TEQ     v1, #0                    ; expanding tokens?
        BNE     det_nodetok               ; no - store the char
        CMP     a1, #&7F                  ; if >= &7F, its a token
        BCS     det_token                 ; so expand it
det_nodetok
        BL      store_byte
        B       det_loop

det_token
        TEQ     a1, #&8D                  ; encoded constant
        BEQ     det_const
        TEQ     a1, #&F4                  ; REM
        MOVEQ   v1, #4                    ; no more detokenising after this
        STMFD   sp!, {a3-v4}
        ADR     lr, det_token_ret
        LDR     pc, [a4]
det_token_ret
        LDMFD   sp!, {a3-v4}
det_token_out
        LDRB    a1, [a2], #1              ; a2 (R1) points into the token table
        CMP     a1, #&7F                  ; a value >= &7F marks the end of the text
        BGE     det_loop
        BL      store_byte                ; otherwise store it
        B       det_token_out             ; and loop

det_const
        TEQ     v2, #0                    ; are we skipping line numbers?
        BEQ     det_const_1
        MVN     a1, #0                    ; yes, but we've found a reference
        STR     a1, [v3]
        B       exit_detok

det_const_1
; register usage: a1,a2,a3,a4 (all preserved)
;                 ip (incremented)
        STMFD   sp!, {a1-a4}
        LDRB    a1, [ip], #1              ; decode the constant
        MOV     a3, a1, LSL #2
        AND     a4, a3, #&C0
        LDRB    a1, [ip], #1
        EOR     a4, a4, a1
        LDRB    a1, [ip], #1
        EOR     a3, a1, a3, LSL #2
        AND     a3, a3, #255
        ORR     a2, a4, a3, LSL #8
        BL      posite                     ; and stuff it out
        LDMFD   sp!, {a1-a4}
        B       det_loop

posite
; output the number in a2 (R1) as decimal
        STMFD   sp!, {v1,lr}
        MOV     v1, #0
        BL      prn
        LDMFD   sp!, {v1,pc}

nprn
        STMFD   sp!, {v1,lr}
        MOV     v1, #5
        BL      prn
        LDMFD   sp!, {v1,pc}

prn
        STMFD   sp!, {a1,a2,a4,v1,v2,v3,lr}
        MOV     a4, #4
        ADR     v2, values
prn_lop
        MOV     a1, #48
        LDR     v3, [v2, a4, LSL #2]
prn_00
        SUBS    a2, a2, v3
        ADDCS   a1, a1, #1
        BCS     prn_00
        ADD     a2, a2, v3
        CMP     a1, #48
        MOVNE   v1, #1
        TEQ     v1, #1
        TEQNE   a4, #0
        BEQ     prn_lpa
        TEQ     v1, #0
        BEQ     prn_lpb
        MOV     a1, #32
prn_lpa
        BL      store_byte
prn_lpb
        SUBS    a4, a4, #1
        BCS     prn_lop
        LDMFD   sp!, {a1,a2,a4,v1,v2,v3,pc}

values
        DCD     1
        DCD     10
        DCD     100
        DCD     1000
        DCD     10000
        DCD     100000
        DCD     1000000
        DCD     10000000
        DCD     100000000
        DCD     1000000000

store_byte
        STRB    a1, [v4], #1
        MOV     pc, lr

exit_detok
        STR     v4, [a4]
        Return  "a1-ip"

        EXPORT  bastxt_tokenise

; void bastxt_tokenise(char **output_buffer, char **input_buffer,
;                      int *line_number, int *tokeniser, int increment);

bastxt_tokenise
; on entry, a1 points to the output buffer pointer
;           a2 points to the input buffer pointer
;           a3 points to the last line number
;           a4 points to the BASIC tokeniser address
;
; Note that the top bits of the line number have the following meaning:
; bit 31: a line number has been specified in the text at some point
; bit 30: the current line does not have a line number
;
; register usage:
; a1:
; a2: pointer to input buffer
; a3: pointer to output buffer
; a4: temp
; v1: temp
; v2: temp
; v3: temp
; v4: pointer to output buffer pointer
; v5: pointer to input buffer pointer
; v6: pointer to last line number
; sl: pointer to BASIC tokeniser address
; fp:
; ip: line number increment
  LDR     ip, [sp]               ; get the increment
  STMFD   sp!, {r0-r11, r14}     ; stack everything!
  MOV     v4, a1                 ; preserve the pointers
  MOV     v5, a2
  MOV     v6, a3
  MOV     sl, a4
  LDR     a3, [v4]               ; point to the output buffer
  LDR     a2, [v5]               ; point to the input buffer
  MOV     a4, a2                 ; check for a line number first
tok_skip
  LDRB    v1, [a4], #1           ; skip leading spaces first
  TEQ     v1, #32
  BEQ     tok_skip
  CMP     v1, #48                ; 0
  BLT     no_line_number
  CMP     v1, #57                ; 9
  BGT     no_line_number

  ; PJC. Need to set the top bit of the line number to show that
  ;      a line number has been specified
  LDR     v2, [v6]
  ORR     v2, v2, #1 << 31
  STR     v2, [v6]

  MOV     v2, #0                 ; got a line number so calculate it
tok_line_no
  MOV     v3, v2, LSL #3         ; v3 = v2 * 8
  ADD     v2, v3, v2, LSL #1     ; v2 = v2 * 8 + v2 * 2 (ie v2 = v2 * 10)
  ADD     v2, v2, v1
  SUB     v2, v2, #48            ; v2 = v2 * 10 + new digit
  LDRB    v1, [a4], #1
  CMP     v1, #48                ; 0
  BLT     finished_line_no
  CMP     v1, #57                ; 9
  BLE     tok_line_no
finished_line_no
  SUB     a2, a4, #1             ; adjust a2 just in case we had a line number!

  ; PJC. Need to store the new line number whilst preserving the flag values
  LDR     a4, [v6]
  AND     a4, a4, #3 << 30
  ORR     v2, v2, a4

  STR     v2, [v6]
  LDRB    v2, [v6, #1]           ; get MSB
  STRB    v2, [a3], #1           ; and put it in the output buffer
  LDRB    v2, [v6]               ; get LSB
  STRB    v2, [a3], #2           ; NB inced by 2 to skip the length byte
  MOV     a4, #0
  MOV     v1, #0
  ADR     lr, token_ret
  LDR     pc, [sl]               ; MATCH: R1 (a2) = points to the source string
                                 ;        R2 (a3) = points to the destination string
                                 ;        R3 (a4) = MODE
                                 ;        R4 (v1) = CONSTA
                                 ; on exit, R1 (a2) and R2 (a3) point one beyond,
                                 ;          R0, R3, R4 and R5 are corrupt
no_line_number
; NB. This bit of code gets executed if the text in the input buffer doesn't contain
;     a line number. It drops out of line up to finished_line_no
  LDR     v2, [v6]

  ; PJC. Set the flag to show no line number specified for this line.
  ORR     v2, v2, #1 << 30
  STR     v2, [v6]
  AND     v2, v2, #&3FFFFFFF     ; clear the flag bits

  ADD     v2, v2, ip             ; increment line number
  ADD     a4, a2, #1             ; restore a4 so that leading spaces aren't skipped!
  B       finished_line_no

token_ret
  LDR     v1, [v4]               ; get the original output ptr
  ADD     v1, v1, #3             ; and point to the first byte of data
  STR     a2, [v5]               ; ready for the next line

  SUB     a3, a3, #1
lose_spaces
  LDRB    a1, [a3, #-1]!          ; get the character
  CMP     a3, v1                  ; have we run out of line?
  BLT     done_spaces             ; yes - give up
  TEQ     a1, #32                 ; if it is a space
  BEQ     lose_spaces             ; continue

done_spaces
  MOV     a1, #13                ; plant a CR there
  STRB    a1, [a3, #1]!          ; and move forward one more
  SUB     v1, v1, #3             ; restore original ptr
  ADD     a3, a3, #1

  STR     a3, [v4]               ; save the output ptr
  SUB     a1, a3, v1             ; number of bytes in the output buffer
  STRB    a1, [v1,#2]            ; and store it as the length byte
  ADD     a3, v1, #3             ; now check for the ELSE munging
token_check
  LDRB    a1, [a3], #1
  TEQ     a1, #32                ; skip leading spaces
  BEQ     token_check
  SUB     a3, a3, #1
  LDRB    a1, [a3]               ; first token in the line
  TEQ     a1, #&8B
  MOVEQ   a1, #&CC
  STREQB  a1, [a3]               ; munge if ELSE
  STR     v2, [sl]               ; return the results
  Return  "r0-r11"

        END