; Copyright 1996 Acorn Computers Ltd ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; ; > s.bastxt ; ; Title : s.bastxt ; Purpose: to provide BASIC tokenising and detokenising capability ; Version: 0.1 PJC created ; 0.2 PJC altered bastxt_tokenise so that it returns information in ; the line number to show: ; * whether or not a line number has ever been specified ; * whether or not a line number was specified for this line GBLL ModeMayBeNonUser ModeMayBeNonUser SETL {FALSE} GET s.h_Brazil AREA |C$$code|, CODE, READONLY |x$codeseg| EXPORT bastxt_detokenise ; void bastxt_detokenise(int output_buffer, int input_address, ; int *flag, int *detokeniser); bastxt_detokenise ; on entry, a1 points to the output buffer ; a2 points to the text to detokenise ; a3 points to the flag variable ; a4 points to the BASIC detokeniser address ; on exit, the flag variable holds -1 if a line number reference found ; 0 if the end of the file reached ; otherwise the updated pointer ; the value that a4 points to holds the updated output buffer ptr ; ; register usage: ; a1: used to hold the current byte under examination ; a2: used to point into the token table and as scratch ; a3: not used! ; a4: used to hold the address of the BASIC detokeniser ; v1: used to indicate whether or not we are expanding tokens ; v2: skip line numbers flag ; v3: pointer to flag variable ; v4: pointer into output buffer ; v5: not used! ; v6: not used! ; sl: not used! ; fp: not used! ; ip: pointer into input buffer STMFD sp!, {a1-ip,lr} MOV ip, a2 MOV v4, a1 MOV v3, a3 LDR v2, [v3] ; get the flag value ; LDRB a1, [ip], #1 ; do the line number first TEQ a1, #255 ; test for end of file first BNE not_end_of_file MOV a1, #0 STR a1, [v3] B exit_detok ; not_end_of_file LDRB a2, [ip], #1 ADD a2, a2, a1, LSL #8 TEQ v2, #0 BLEQ nprn ; ADD ip, ip, #1 ; skip length of line indicator MOV v1, #0 ; expansion flag det_loop LDRB a1, [ip], #1 ; get the next value TEQ a1, #13 ; end of line reached? BNE det_cont MOV a1, #10 BL store_byte MOV a1, #0 BL store_byte STR ip, [v3] B exit_detok det_cont TEQ a1, #34 ; double-quotes EOREQ v1, v1, #1 ; flip flag TEQ v1, #0 ; expanding tokens? BNE det_nodetok ; no - store the char CMP a1, #&7F ; if >= &7F, its a token BCS det_token ; so expand it det_nodetok BL store_byte B det_loop det_token TEQ a1, #&8D ; encoded constant BEQ det_const TEQ a1, #&F4 ; REM MOVEQ v1, #4 ; no more detokenising after this STMFD sp!, {a3-v4} ADR lr, det_token_ret LDR pc, [a4] det_token_ret LDMFD sp!, {a3-v4} det_token_out LDRB a1, [a2], #1 ; a2 (R1) points into the token table CMP a1, #&7F ; a value >= &7F marks the end of the text BGE det_loop BL store_byte ; otherwise store it B det_token_out ; and loop det_const TEQ v2, #0 ; are we skipping line numbers? BEQ det_const_1 MVN a1, #0 ; yes, but we've found a reference STR a1, [v3] B exit_detok det_const_1 ; register usage: a1,a2,a3,a4 (all preserved) ; ip (incremented) STMFD sp!, {a1-a4} LDRB a1, [ip], #1 ; decode the constant MOV a3, a1, LSL #2 AND a4, a3, #&C0 LDRB a1, [ip], #1 EOR a4, a4, a1 LDRB a1, [ip], #1 EOR a3, a1, a3, LSL #2 AND a3, a3, #255 ORR a2, a4, a3, LSL #8 BL posite ; and stuff it out LDMFD sp!, {a1-a4} B det_loop posite ; output the number in a2 (R1) as decimal STMFD sp!, {v1,lr} MOV v1, #0 BL prn LDMFD sp!, {v1,pc} nprn STMFD sp!, {v1,lr} MOV v1, #5 BL prn LDMFD sp!, {v1,pc} prn STMFD sp!, {a1,a2,a4,v1,v2,v3,lr} MOV a4, #4 ADR v2, values prn_lop MOV a1, #48 LDR v3, [v2, a4, LSL #2] prn_00 SUBS a2, a2, v3 ADDCS a1, a1, #1 BCS prn_00 ADD a2, a2, v3 CMP a1, #48 MOVNE v1, #1 TEQ v1, #1 TEQNE a4, #0 BEQ prn_lpa TEQ v1, #0 BEQ prn_lpb MOV a1, #32 prn_lpa BL store_byte prn_lpb SUBS a4, a4, #1 BCS prn_lop LDMFD sp!, {a1,a2,a4,v1,v2,v3,pc} values DCD 1 DCD 10 DCD 100 DCD 1000 DCD 10000 DCD 100000 DCD 1000000 DCD 10000000 DCD 100000000 DCD 1000000000 store_byte STRB a1, [v4], #1 MOV pc, lr exit_detok STR v4, [a4] Return "a1-ip" EXPORT bastxt_tokenise ; void bastxt_tokenise(char **output_buffer, char **input_buffer, ; int *line_number, int *tokeniser, int increment); bastxt_tokenise ; on entry, a1 points to the output buffer pointer ; a2 points to the input buffer pointer ; a3 points to the last line number ; a4 points to the BASIC tokeniser address ; ; Note that the top bits of the line number have the following meaning: ; bit 31: a line number has been specified in the text at some point ; bit 30: the current line does not have a line number ; ; register usage: ; a1: ; a2: pointer to input buffer ; a3: pointer to output buffer ; a4: temp ; v1: temp ; v2: temp ; v3: temp ; v4: pointer to output buffer pointer ; v5: pointer to input buffer pointer ; v6: pointer to last line number ; sl: pointer to BASIC tokeniser address ; fp: ; ip: line number increment LDR ip, [sp] ; get the increment STMFD sp!, {r0-r11, r14} ; stack everything! MOV v4, a1 ; preserve the pointers MOV v5, a2 MOV v6, a3 MOV sl, a4 LDR a3, [v4] ; point to the output buffer LDR a2, [v5] ; point to the input buffer MOV a4, a2 ; check for a line number first tok_skip LDRB v1, [a4], #1 ; skip leading spaces first TEQ v1, #32 BEQ tok_skip CMP v1, #48 ; 0 BLT no_line_number CMP v1, #57 ; 9 BGT no_line_number ; PJC. Need to set the top bit of the line number to show that ; a line number has been specified LDR v2, [v6] ORR v2, v2, #1 << 31 STR v2, [v6] MOV v2, #0 ; got a line number so calculate it tok_line_no MOV v3, v2, LSL #3 ; v3 = v2 * 8 ADD v2, v3, v2, LSL #1 ; v2 = v2 * 8 + v2 * 2 (ie v2 = v2 * 10) ADD v2, v2, v1 SUB v2, v2, #48 ; v2 = v2 * 10 + new digit LDRB v1, [a4], #1 CMP v1, #48 ; 0 BLT finished_line_no CMP v1, #57 ; 9 BLE tok_line_no finished_line_no SUB a2, a4, #1 ; adjust a2 just in case we had a line number! ; PJC. Need to store the new line number whilst preserving the flag values LDR a4, [v6] AND a4, a4, #3 << 30 ORR v2, v2, a4 STR v2, [v6] LDRB v2, [v6, #1] ; get MSB STRB v2, [a3], #1 ; and put it in the output buffer LDRB v2, [v6] ; get LSB STRB v2, [a3], #2 ; NB inced by 2 to skip the length byte MOV a4, #0 MOV v1, #0 ADR lr, token_ret LDR pc, [sl] ; MATCH: R1 (a2) = points to the source string ; R2 (a3) = points to the destination string ; R3 (a4) = MODE ; R4 (v1) = CONSTA ; on exit, R1 (a2) and R2 (a3) point one beyond, ; R0, R3, R4 and R5 are corrupt no_line_number ; NB. This bit of code gets executed if the text in the input buffer doesn't contain ; a line number. It drops out of line up to finished_line_no LDR v2, [v6] ; PJC. Set the flag to show no line number specified for this line. ORR v2, v2, #1 << 30 STR v2, [v6] AND v2, v2, #&3FFFFFFF ; clear the flag bits ADD v2, v2, ip ; increment line number ADD a4, a2, #1 ; restore a4 so that leading spaces aren't skipped! B finished_line_no token_ret LDR v1, [v4] ; get the original output ptr ADD v1, v1, #3 ; and point to the first byte of data STR a2, [v5] ; ready for the next line SUB a3, a3, #1 lose_spaces LDRB a1, [a3, #-1]! ; get the character CMP a3, v1 ; have we run out of line? BLT done_spaces ; yes - give up TEQ a1, #32 ; if it is a space BEQ lose_spaces ; continue done_spaces MOV a1, #13 ; plant a CR there STRB a1, [a3, #1]! ; and move forward one more SUB v1, v1, #3 ; restore original ptr ADD a3, a3, #1 STR a3, [v4] ; save the output ptr SUB a1, a3, v1 ; number of bytes in the output buffer STRB a1, [v1,#2] ; and store it as the length byte ADD a3, v1, #3 ; now check for the ELSE munging token_check LDRB a1, [a3], #1 TEQ a1, #32 ; skip leading spaces BEQ token_check SUB a3, a3, #1 LDRB a1, [a3] ; first token in the line TEQ a1, #&8B MOVEQ a1, #&CC STREQB a1, [a3] ; munge if ELSE STR v2, [sl] ; return the results Return "r0-r11" END